perf annotate: Add fusion logic for AMD microarchs
AMD family 15h and above microarchs fuse a subset of cmp/test/ALU instructions with branch instructions[1][2]. Add perf annotate fused instruction support for these microarchs. Before: │ testb $0x80,0x51(%rax) │ ┌──jne 5b3 0.78 │ │ mov %r13,%rdi │ │→ callq mark_page_accessed 1.08 │5b3:└─→mov 0x8(%r13),%rax After: │ ┌──testb $0x80,0x51(%rax) │ ├──jne 5b3 0.78 │ │ mov %r13,%rdi │ │→ callq mark_page_accessed 1.08 │5b3:└─→mov 0x8(%r13),%rax [1] https://bugzilla.kernel.org/attachment.cgi?id=298553 [2] https://bugzilla.kernel.org/attachment.cgi?id=298555 Committer testing: On a: $ grep -m1 "model name" /proc/cpuinfo model name : AMD Ryzen 9 3900X 12-Core Processor $ Samples: 44K of event 'cycles', 4000 Hz, Event count (approx.): 7533249650 _int_malloc /usr/lib64/libc-2.33.so [Percent: local period] Percent│ ┌──test %eax,%eax │ ├──jne 884 │ │↓ jmpq 943 │ │ nop │878:│ add $0x10,%rdx 0.64 │ │ add %eax,%eax 0.57 │ │↓ je cc9 0.77 │884:└─→test %esi,%eax │ ↑ je 878 │ mov 0x18(%rdx),%r15 Reported-by: Kim Phillips <kim.phillips@amd.com> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jin Yao <yao.jin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Link: https //lore.kernel.org/r/20210911043854.8373-2-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
316346243b
commit
3149733584
@ -144,8 +144,31 @@ static struct ins x86__instructions[] = {
|
||||
{ .name = "xorps", .ops = &mov_ops, },
|
||||
};
|
||||
|
||||
static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
|
||||
static bool amd__ins_is_fused(struct arch *arch, const char *ins1,
|
||||
const char *ins2)
|
||||
{
|
||||
if (strstr(ins2, "jmp"))
|
||||
return false;
|
||||
|
||||
/* Family >= 15h supports cmp/test + branch fusion */
|
||||
if (arch->family >= 0x15 && (strstarts(ins1, "test") ||
|
||||
(strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Family >= 19h supports some ALU + branch fusion */
|
||||
if (arch->family >= 0x19 && (strstarts(ins1, "add") ||
|
||||
strstarts(ins1, "sub") || strstarts(ins1, "and") ||
|
||||
strstarts(ins1, "inc") || strstarts(ins1, "dec") ||
|
||||
strstarts(ins1, "or") || strstarts(ins1, "xor"))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool intel__ins_is_fused(struct arch *arch, const char *ins1,
|
||||
const char *ins2)
|
||||
{
|
||||
if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
|
||||
return false;
|
||||
@ -184,6 +207,9 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid)
|
||||
if (ret == 3) {
|
||||
arch->family = family;
|
||||
arch->model = model;
|
||||
arch->ins_is_fused = strstarts(cpuid, "AuthenticAMD") ?
|
||||
amd__ins_is_fused :
|
||||
intel__ins_is_fused;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -183,7 +183,6 @@ static struct arch architectures[] = {
|
||||
.init = x86__annotate_init,
|
||||
.instructions = x86__instructions,
|
||||
.nr_instructions = ARRAY_SIZE(x86__instructions),
|
||||
.ins_is_fused = x86__ins_is_fused,
|
||||
.objdump = {
|
||||
.comment_char = '#',
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user