author | Hanjie Lin <hanjie.lin@amlogic.com> | 2019-08-27 08:21:46 (GMT) |
---|---|---|
committer | Jianxin Pan <jianxin.pan@amlogic.com> | 2019-09-18 05:35:56 (GMT) |
commit | 270cbf3b28a98f2071f54862e7cf967e52e98356 (patch) | |
tree | 5047820d793ec85ecc8a4cff68582378e4ba2011 | |
parent | ed434f226ee7db58064e22d9c7d613c39867a182 (diff) | |
download | common-270cbf3b28a98f2071f54862e7cf967e52e98356.zip common-270cbf3b28a98f2071f54862e7cf967e52e98356.tar.gz common-270cbf3b28a98f2071f54862e7cf967e52e98356.tar.bz2 |
perf: pmu fine-tune for aarch32/64 of A53/A55/A73 [1/1]
PD#SWPL-13243
Problem:
pmu event is not accurate or not complete in A53/A55/A73.
Solution:
1, modify event config for A53/A55/A73.
2, perf executable file must compiled from latest kernel(5.1+)
3, A55 events are most complete, A73 are least complete(eg: less ld_retired/st_retired/stall/prefetch events)
4, A55/A53 same event meanings simlar, but A73 is more different(eg: L1/L2 dcache/icache loads meanings)
sample commands:
a55 arm64:
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv8_pmuv3/ld_retired/,armv8_pmuv3/st_retired/,cycles,branch-loads,branch-load-misses,armv8_pmuv3/a55_l1d_cache_rd/,armv8_pmuv3/a55_l1d_cache_refill_rd/,armv8_pmuv3/a55_l1d_cache_wr/,armv8_pmuv3/a55_l1d_cache_refill_wr/,L1-icache-loads,L1-icache-load-misses,armv8_pmuv3/a55_l2d_cache_rd/,armv8_pmuv3/a55_l2d_cache_refill_rd/,armv8_pmuv3/a55_l1d_cache_refill_inner/,armv8_pmuv3/a55_l1d_cache_refill_outer/,armv8_pmuv3/a55_l1d_cache_refill_prefetch/,armv8_pmuv3/a55_l2d_cache_refill_prefetch/,armv8_pmuv3/a5x_stall_frontend_cache/,armv8_pmuv3/a5x_stall_frontend_tlb/,armv8_pmuv3/a5x_stall_backend_ld/,armv8_pmuv3/a55_stall_backend_ld_cache/,armv8_pmuv3/a55_stall_backend_ld_tlb/,armv8_pmuv3/a5x_stall_backend_st/,armv8_pmuv3/a5x_stall_backend_ilock_agu/,armv8_pmuv3/a5x_stall_backend_ilock_fpu/ ls
a53 arm64:
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv8_pmuv3/ld_retired/,armv8_pmuv3/st_retired/,cycles,branch-loads,branch-load-misses,armv8_pmuv3/l1d_cache/,armv8_pmuv3/l1d_cache_refill/,L1-icache-loads,L1-icache-load-misses,armv8_pmuv3/a5x_l2d_cache/,armv8_pmuv3/a5x_l2d_cache_refill/,armv8_pmuv3/a53_cache_refill_prefetch/,armv8_pmuv3/a53_scu_snooped/,armv8_pmuv3/a5x_stall_frontend_cache/,armv8_pmuv3/a5x_stall_frontend_tlb/,armv8_pmuv3/a5x_stall_backend_ld/,,armv8_pmuv3/a5x_stall_backend_st/,armv8_pmuv3/a5x_stall_backend_ilock_agu/,armv8_pmuv3/a5x_stall_backend_ilock_fpu/ ls
a73 arm64: (w400 bind to a73 cpu2)
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,cycles,branch-loads,branch-load-misses,armv8_pmuv3/l1d_cache/,armv8_pmuv3/l1d_cache_refill/,armv8_pmuv3/a55_l1d_cache_rd/,armv8_pmuv3/a55_l1d_cache_wr/,armv8_pmuv3/a5x_l2d_cache/,armv8_pmuv3/a5x_l2d_cache_refill/,armv8_pmuv3/a55_l2d_cache_rd/,armv8_pmuv3/a55_l2d_cache_wr/ busybox taskset 4 ls
a55 arm:
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv7_cortex_a15/ld_retired/,armv7_cortex_a15/st_retired/,cycles,branch-loads,branch-load-misses,armv7_cortex_a15/a55_l1d_cache_rd/,armv7_cortex_a15/a55_l1d_cache_refill_rd/,armv7_cortex_a15/a55_l1d_cache_wr/,armv7_cortex_a15/a55_l1d_cache_refill_wr/,L1-icache-loads,L1-icache-load-misses,armv7_cortex_a15/a55_l2d_cache_rd/,armv7_cortex_a15/a55_l2d_cache_refill_rd/,armv7_cortex_a15/a55_l1d_cache_refill_inner/,armv7_cortex_a15/a55_l1d_cache_refill_outer/,armv7_cortex_a15/a55_l1d_cache_refill_prefetch/,armv7_cortex_a15/a55_l2d_cache_refill_prefetch/,armv7_cortex_a15/a5x_stall_frontend_cache/,armv7_cortex_a15/a5x_stall_frontend_tlb/,armv7_cortex_a15/a5x_stall_backend_ld/,armv7_cortex_a15/a55_stall_backend_ld_cache/,armv7_cortex_a15/a55_stall_backend_ld_tlb/,armv7_cortex_a15/a5x_stall_backend_st/,armv7_cortex_a15/a5x_stall_backend_ilock_agu/,armv7_cortex_a15/a5x_stall_backend_ilock_fpu/ ls
a53 arm:
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv7_cortex_a15/ld_retired/,armv7_cortex_a15/st_retired/,cycles,branch-loads,branch-load-misses,armv7_cortex_a15/l1d_cache/,armv7_cortex_a15/l1d_cache_refill/,L1-icache-loads,L1-icache-load-misses,armv7_cortex_a15/a5x_l2d_cache/,armv7_cortex_a15/a5x_l2d_cache_refill/,armv7_cortex_a15/a53_cache_refill_prefetch/,armv7_cortex_a15/a53_scu_snooped/,armv7_cortex_a15/a5x_stall_frontend_cache/,armv7_cortex_a15/a5x_stall_frontend_tlb/,armv7_cortex_a15/a5x_stall_backend_ld/,armv7_cortex_a15/a5x_stall_backend_st/,armv7_cortex_a15/a5x_stall_backend_ilock_agu/,armv7_cortex_a15/a5x_stall_backend_ilock_fpu/ ls
a73 arm: (w400 bind to a73 cpu2)
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,cycles,branch-loads,branch-load-misses,armv7_cortex_a15/l1d_cache/,armv7_cortex_a15/l1d_cache_refill/,armv7_cortex_a15/a55_l1d_cache_rd/,armv7_cortex_a15/a55_l1d_cache_wr/,armv7_cortex_a15/a5x_l2d_cache/,armv7_cortex_a15/a5x_l2d_cache_refill/,armv7_cortex_a15/a55_l2d_cache_rd/,armv7_cortex_a15/a55_l2d_cache_wr/ busybox taskset 4 ls
Verify:
ac200/u200/w400
Change-Id: I7f11e1480c3c27d016b011d2a84c33e824f69b08
Signed-off-by: Hanjie Lin <hanjie.lin@amlogic.com>
-rw-r--r-- | arch/arm/kernel/perf_event_v7.c | 83 | ||||
-rw-r--r-- | arch/arm64/kernel/perf_event.c | 87 |
2 files changed, 170 insertions, 0 deletions
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index e3a3ebc..a35b803 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -612,6 +612,49 @@ ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC); ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE); ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES); +#ifdef CONFIG_AMLOGIC_MODIFY +/* a53/a55 common events */ +ARMV7_EVENT_ATTR(a5x_stall_frontend_cache, 0xe1); +ARMV7_EVENT_ATTR(a5x_stall_frontend_tlb, 0xe2); +ARMV7_EVENT_ATTR(a5x_stall_frontend_pderr, 0xe3); +ARMV7_EVENT_ATTR(a5x_stall_backend_ilock_agu, 0xe5); +ARMV7_EVENT_ATTR(a5x_stall_backend_ilock_fpu, 0xe6); +ARMV7_EVENT_ATTR(a5x_stall_backend_ld, 0xe7); +ARMV7_EVENT_ATTR(a5x_stall_backend_st, 0xe8); +ARMV7_EVENT_ATTR(a5x_l2d_cache, 0x16); +ARMV7_EVENT_ATTR(a5x_l2d_cache_refill, 0x17); + +/* a55 events */ +ARMV7_EVENT_ATTR(a55_stall_frontend, 0x23); +ARMV7_EVENT_ATTR(a55_stall_backend, 0x24); +ARMV7_EVENT_ATTR(a55_stall_backend_ilock, 0xe4); +ARMV7_EVENT_ATTR(a55_l1d_cache_refill_inner, 0x44); +ARMV7_EVENT_ATTR(a55_l1d_cache_refill_outer, 0x45); +ARMV7_EVENT_ATTR(a55_l1d_cache_refill_prefetch, 0xc2); +ARMV7_EVENT_ATTR(a55_l2d_cache_refill_prefetch, 0xc1); +ARMV7_EVENT_ATTR(a55_l3d_cache_refill_prefetch, 0xc0); +ARMV7_EVENT_ATTR(a55_stall_backend_ld_cache, 0xe9); +ARMV7_EVENT_ATTR(a55_stall_backend_ld_tlb, 0xea); +ARMV7_EVENT_ATTR(a55_stall_backend_st_stb, 0xeb); +ARMV7_EVENT_ATTR(a55_stall_backend_st_tlb, 0xec); +ARMV7_EVENT_ATTR(a55_l1d_cache_rd, 0x40); +ARMV7_EVENT_ATTR(a55_l1d_cache_wr, 0x41); +ARMV7_EVENT_ATTR(a55_l1d_cache_refill_rd, 0x42); +ARMV7_EVENT_ATTR(a55_l1d_cache_refill_wr, 0x43); +ARMV7_EVENT_ATTR(a55_l2d_cache_rd, 0x50); +ARMV7_EVENT_ATTR(a55_l2d_cache_wr, 0x51); +ARMV7_EVENT_ATTR(a55_l2d_cache_refill_rd, 0x52); +ARMV7_EVENT_ATTR(a55_l2d_cache_refill_wr, 0x53); +ARMV7_EVENT_ATTR(a55_l3d_cache_rd, 0xa0); +ARMV7_EVENT_ATTR(a55_l3d_cache_refill_rd, 0xa2); + +/* a53 events */ +ARMV7_EVENT_ATTR(a53_cache_refill_prefetch, 0xc2); +ARMV7_EVENT_ATTR(a53_scu_snooped, 0xc8); +ARMV7_EVENT_ATTR(a53_stall_backend_st_stb, 0xc7); +ARMV7_EVENT_ATTR(a53_stall_frontend_other, 0xe0); +#endif + static struct attribute *armv7_pmuv2_event_attrs[] = { &armv7_event_attr_sw_incr.attr.attr, &armv7_event_attr_l1i_cache_refill.attr.attr, @@ -643,6 +686,46 @@ static struct attribute *armv7_pmuv2_event_attrs[] = { &armv7_event_attr_inst_spec.attr.attr, &armv7_event_attr_ttbr_write_retired.attr.attr, &armv7_event_attr_bus_cycles.attr.attr, +#ifdef CONFIG_AMLOGIC_MODIFY + /* a55/a53 common events */ + &armv7_event_attr_a5x_stall_frontend_cache.attr.attr, //0xe1 + &armv7_event_attr_a5x_stall_frontend_tlb.attr.attr, //0xe2 + &armv7_event_attr_a5x_stall_frontend_pderr.attr.attr, //0xe3 + &armv7_event_attr_a5x_stall_backend_ilock_agu.attr.attr, //0xe5 + &armv7_event_attr_a5x_stall_backend_ilock_fpu.attr.attr, //0xe6 + &armv7_event_attr_a5x_stall_backend_ld.attr.attr, //0xe7 + &armv7_event_attr_a5x_stall_backend_st.attr.attr, //0xe8 + &armv7_event_attr_a5x_l2d_cache.attr.attr, //0x16 + &armv7_event_attr_a5x_l2d_cache_refill.attr.attr, //0x17 + /* a55 events */ + &armv7_event_attr_a55_stall_frontend.attr.attr, //0x23 + &armv7_event_attr_a55_stall_backend.attr.attr, //0x24 + &armv7_event_attr_a55_stall_backend_ilock.attr.attr, //0xe4 + &armv7_event_attr_a55_stall_backend_ld_cache.attr.attr, //0xe9 + &armv7_event_attr_a55_stall_backend_ld_tlb.attr.attr, //0xea + &armv7_event_attr_a55_stall_backend_st_stb.attr.attr, //0xeb + &armv7_event_attr_a55_stall_backend_st_tlb.attr.attr, //0xec + &armv7_event_attr_a55_l1d_cache_refill_inner.attr.attr, //0x44 + &armv7_event_attr_a55_l1d_cache_refill_outer.attr.attr, //0x45 + &armv7_event_attr_a55_l1d_cache_refill_prefetch.attr.attr, //0xc2 + &armv7_event_attr_a55_l2d_cache_refill_prefetch.attr.attr, //0xc1 + &armv7_event_attr_a55_l3d_cache_refill_prefetch.attr.attr, //0xc0 + &armv7_event_attr_a55_l1d_cache_rd.attr.attr, //0x40 + &armv7_event_attr_a55_l1d_cache_wr.attr.attr, //0x41 + &armv7_event_attr_a55_l1d_cache_refill_rd.attr.attr, //0x42 + &armv7_event_attr_a55_l1d_cache_refill_wr.attr.attr, //0x43 + &armv7_event_attr_a55_l2d_cache_rd.attr.attr, //0x50 + &armv7_event_attr_a55_l2d_cache_wr.attr.attr, //0x51 + &armv7_event_attr_a55_l2d_cache_refill_rd.attr.attr, //0x52 + &armv7_event_attr_a55_l2d_cache_refill_wr.attr.attr, //0x53 + &armv7_event_attr_a55_l3d_cache_rd.attr.attr, //0xa0 + &armv7_event_attr_a55_l3d_cache_refill_rd.attr.attr, //0xa2 + /* a53 events */ + &armv7_event_attr_a53_cache_refill_prefetch.attr.attr, //0xc2 + &armv7_event_attr_a53_scu_snooped.attr.attr, //0xc8 + &armv7_event_attr_a53_stall_backend_st_stb.attr.attr, //0xc7 + &armv7_event_attr_a53_stall_frontend_other.attr.attr, //0xe0 +#endif NULL, }; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9b3b5dd..0042224 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -449,6 +449,49 @@ ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL); ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB); ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB); +#ifdef CONFIG_AMLOGIC_MODIFY +/* a53/a55 common */ +ARMV8_EVENT_ATTR(a5x_stall_frontend_cache, 0xe1); +ARMV8_EVENT_ATTR(a5x_stall_frontend_tlb, 0xe2); +ARMV8_EVENT_ATTR(a5x_stall_frontend_pderr, 0xe3); +ARMV8_EVENT_ATTR(a5x_stall_backend_ilock_agu, 0xe5); +ARMV8_EVENT_ATTR(a5x_stall_backend_ilock_fpu, 0xe6); +ARMV8_EVENT_ATTR(a5x_stall_backend_ld, 0xe7); +ARMV8_EVENT_ATTR(a5x_stall_backend_st, 0xe8); +ARMV8_EVENT_ATTR(a5x_l2d_cache, 0x16); +ARMV8_EVENT_ATTR(a5x_l2d_cache_refill, 0x17); + +/* a55 events */ +ARMV8_EVENT_ATTR(a55_stall_frontend, 0x23); +ARMV8_EVENT_ATTR(a55_stall_backend, 0x24); +ARMV8_EVENT_ATTR(a55_stall_backend_ilock, 0xe4); +ARMV8_EVENT_ATTR(a55_l1d_cache_refill_inner, 0x44); +ARMV8_EVENT_ATTR(a55_l1d_cache_refill_outer, 0x45); +ARMV8_EVENT_ATTR(a55_l1d_cache_refill_prefetch, 0xc2); +ARMV8_EVENT_ATTR(a55_l2d_cache_refill_prefetch, 0xc1); +ARMV8_EVENT_ATTR(a55_l3d_cache_refill_prefetch, 0xc0); +ARMV8_EVENT_ATTR(a55_stall_backend_ld_cache, 0xe9); +ARMV8_EVENT_ATTR(a55_stall_backend_ld_tlb, 0xea); +ARMV8_EVENT_ATTR(a55_stall_backend_st_stb, 0xeb); +ARMV8_EVENT_ATTR(a55_stall_backend_st_tlb, 0xec); +ARMV8_EVENT_ATTR(a55_l1d_cache_rd, 0x40); +ARMV8_EVENT_ATTR(a55_l1d_cache_wr, 0x41); +ARMV8_EVENT_ATTR(a55_l1d_cache_refill_rd, 0x42); +ARMV8_EVENT_ATTR(a55_l1d_cache_refill_wr, 0x43); +ARMV8_EVENT_ATTR(a55_l2d_cache_rd, 0x50); +ARMV8_EVENT_ATTR(a55_l2d_cache_wr, 0x51); +ARMV8_EVENT_ATTR(a55_l2d_cache_refill_rd, 0x52); +ARMV8_EVENT_ATTR(a55_l2d_cache_refill_wr, 0x53); +ARMV8_EVENT_ATTR(a55_l3d_cache_rd, 0xa0); +ARMV8_EVENT_ATTR(a55_l3d_cache_refill_rd, 0xa2); + +/* a53 events */ +ARMV8_EVENT_ATTR(a53_cache_refill_prefetch, 0xc2); +ARMV8_EVENT_ATTR(a53_scu_snooped, 0xc8); +ARMV8_EVENT_ATTR(a53_stall_backend_st_stb, 0xc7); +ARMV8_EVENT_ATTR(a53_stall_frontend_other, 0xe0); +#endif + static struct attribute *armv8_pmuv3_event_attrs[] = { &armv8_event_attr_sw_incr.attr.attr, &armv8_event_attr_l1i_cache_refill.attr.attr, @@ -498,6 +541,46 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { &armv8_event_attr_l2i_tlb_refill.attr.attr, &armv8_event_attr_l2d_tlb.attr.attr, &armv8_event_attr_l2i_tlb.attr.attr, +#ifdef CONFIG_AMLOGIC_MODIFY + /* a55/a53 common events */ + &armv8_event_attr_a5x_stall_frontend_cache.attr.attr, //0xe1 + &armv8_event_attr_a5x_stall_frontend_tlb.attr.attr, //0xe2 + &armv8_event_attr_a5x_stall_frontend_pderr.attr.attr, //0xe3 + &armv8_event_attr_a5x_stall_backend_ilock_agu.attr.attr, //0xe5 + &armv8_event_attr_a5x_stall_backend_ilock_fpu.attr.attr, //0xe6 + &armv8_event_attr_a5x_stall_backend_ld.attr.attr, //0xe7 + &armv8_event_attr_a5x_stall_backend_st.attr.attr, //0xe8 + &armv8_event_attr_a5x_l2d_cache.attr.attr, //0x16 + &armv8_event_attr_a5x_l2d_cache_refill.attr.attr, //0x17 + /* a55 events */ + &armv8_event_attr_a55_stall_frontend.attr.attr, //0x23 + &armv8_event_attr_a55_stall_backend.attr.attr, //0x24 + &armv8_event_attr_a55_stall_backend_ilock.attr.attr, //0xe4 + &armv8_event_attr_a55_stall_backend_ld_cache.attr.attr, //0xe9 + &armv8_event_attr_a55_stall_backend_ld_tlb.attr.attr, //0xea + &armv8_event_attr_a55_stall_backend_st_stb.attr.attr, //0xeb + &armv8_event_attr_a55_stall_backend_st_tlb.attr.attr, //0xec + &armv8_event_attr_a55_l1d_cache_refill_inner.attr.attr, //0x44 + &armv8_event_attr_a55_l1d_cache_refill_outer.attr.attr, //0x45 + &armv8_event_attr_a55_l1d_cache_refill_prefetch.attr.attr, //0xc2 + &armv8_event_attr_a55_l2d_cache_refill_prefetch.attr.attr, //0xc1 + &armv8_event_attr_a55_l3d_cache_refill_prefetch.attr.attr, //0xc0 + &armv8_event_attr_a55_l1d_cache_rd.attr.attr, //0x40 + &armv8_event_attr_a55_l1d_cache_wr.attr.attr, //0x41 + &armv8_event_attr_a55_l1d_cache_refill_rd.attr.attr, //0x42 + &armv8_event_attr_a55_l1d_cache_refill_wr.attr.attr, //0x43 + &armv8_event_attr_a55_l2d_cache_rd.attr.attr, //0x50 + &armv8_event_attr_a55_l2d_cache_wr.attr.attr, //0x51 + &armv8_event_attr_a55_l2d_cache_refill_rd.attr.attr, //0x52 + &armv8_event_attr_a55_l2d_cache_refill_wr.attr.attr, //0x53 + &armv8_event_attr_a55_l3d_cache_rd.attr.attr, //0xa0 + &armv8_event_attr_a55_l3d_cache_refill_rd.attr.attr, //0xa2 + /* a53 events */ + &armv8_event_attr_a53_cache_refill_prefetch.attr.attr, //0xc2 + &armv8_event_attr_a53_scu_snooped.attr.attr, //0xc8 + &armv8_event_attr_a53_stall_backend_st_stb.attr.attr, //0xc7 + &armv8_event_attr_a53_stall_frontend_other.attr.attr, //0xe0 +#endif NULL, }; @@ -505,6 +588,9 @@ static umode_t armv8pmu_event_attr_is_visible(struct kobject *kobj, struct attribute *attr, int unused) { +#ifdef CONFIG_AMLOGIC_MODIFY + return 0444; +#else struct device *dev = kobj_to_dev(kobj); struct pmu *pmu = dev_get_drvdata(dev); struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); @@ -516,6 +602,7 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj, return attr->mode; return 0; +#endif } static struct attribute_group armv8_pmuv3_events_attr_group = { |