Performance events changes for v6.13:

- Uprobes:
     - Add BPF session support (Jiri Olsa)
     - Switch to RCU Tasks Trace flavor for better performance (Andrii Nakryiko)
     - Massively increase uretprobe SMP scalability by SRCU-protecting
       the uretprobe lifetime (Andrii Nakryiko)
     - Kill xol_area->slot_count (Oleg Nesterov)
 
  - Core facilities:
     - Implement targeted high-frequency profiling by adding the ability
       for an event to "pause" or "resume" AUX area tracing (Adrian Hunter)
 
  - VM profiling/sampling:
     - Correct perf sampling with guest VMs (Colton Lewis)
 
  - New hardware support:
     - x86/intel: Add PMU support for Intel ArrowLake-H CPUs (Dapeng Mi)
 
  - Misc fixes and enhancements:
     - x86/intel/pt: Fix buffer full but size is 0 case (Adrian Hunter)
     - x86/amd: Warn only on new bits set (Breno Leitao)
     - x86/amd/uncore: Avoid a false positive warning about snprintf
                       truncation in amd_uncore_umc_ctx_init (Jean Delvare)
     - uprobes: Re-order struct uprobe_task to save some space (Christophe JAILLET)
     - x86/rapl: Move the pmu allocation out of CPU hotplug (Kan Liang)
     - x86/rapl: Clean up cpumask and hotplug (Kan Liang)
     - uprobes: Deuglify xol_get_insn_slot/xol_free_insn_slot paths (Oleg Nesterov)
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmc7eKERHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1i57A/+KQ6TrIoICVTE+BPlDfUw8NU+N3DagVb0
 dzoyDxlDRsnsYzeXZipPn+3IitX1w+DrGxBNIojSoiFVCLnHIKgo4uHbj7cVrR7J
 fBTVSnoJ94SGAk5ySebvLwMLce/YhXBeHK2lx6W/pI6acNcxzDfIabjjETeqltUo
 g7hmT9lo10pzZEZyuUfYX9khlWBxda1dKHc9pMIq7baeLe4iz/fCGlJ0K4d4M4z3
 NPZw239Np6iHUwu3Lcs4gNKe4rcDe7Bt47hpedemHe0Y+7c4s2HaPxbXWxvDtE76
 mlsg93i28f8SYxeV83pREn0EOCptXcljhiek+US+GR7NSbltMnV+uUiDfPKIE9+Y
 vYP/DYF9hx73FsOucEFrHxYYcePorn3pne5/khBYWdQU6TnlrBYWpoLQsjgCKTTR
 4JhCFlBZ5cDpc6ihtpwCwVTQ4Q/H7vM1XOlDwx0hPhcIPPHDreaQD/wxo61jBdXf
 PY0EPAxh3BcQxfPYuDS+XiYjQ8qO8MtXMKz5bZyHBZlbHwccV6T4ExjsLKxFk5As
 6BG8pkBWLg7drXAgVdleIY0ux+34w/Zzv7gemdlQxvWLlZrVvpjiG93oU3PTpZeq
 A2UD9eAOuXVD6+HsF/dmn88sFmcLWbrMskFWujkvhEUmCvSGAnz3YSS/mLEawBiT
 2xI8xykNWSY=
 =ItOT
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:
 "Uprobes:
    - Add BPF session support (Jiri Olsa)
    - Switch to RCU Tasks Trace flavor for better performance (Andrii
      Nakryiko)
    - Massively increase uretprobe SMP scalability by SRCU-protecting
      the uretprobe lifetime (Andrii Nakryiko)
    - Kill xol_area->slot_count (Oleg Nesterov)

  Core facilities:
    - Implement targeted high-frequency profiling by adding the ability
      for an event to "pause" or "resume" AUX area tracing (Adrian
      Hunter)

  VM profiling/sampling:
    - Correct perf sampling with guest VMs (Colton Lewis)

  New hardware support:
    - x86/intel: Add PMU support for Intel ArrowLake-H CPUs (Dapeng Mi)

  Misc fixes and enhancements:
    - x86/intel/pt: Fix buffer full but size is 0 case (Adrian Hunter)
    - x86/amd: Warn only on new bits set (Breno Leitao)
    - x86/amd/uncore: Avoid a false positive warning about snprintf
      truncation in amd_uncore_umc_ctx_init (Jean Delvare)
    - uprobes: Re-order struct uprobe_task to save some space
      (Christophe JAILLET)
    - x86/rapl: Move the pmu allocation out of CPU hotplug (Kan Liang)
    - x86/rapl: Clean up cpumask and hotplug (Kan Liang)
    - uprobes: Deuglify xol_get_insn_slot/xol_free_insn_slot paths (Oleg
      Nesterov)"

* tag 'perf-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (32 commits)
  perf/core: Correct perf sampling with guest VMs
  perf/x86: Refactor misc flag assignments
  perf/powerpc: Use perf_arch_instruction_pointer()
  perf/core: Hoist perf_instruction_pointer() and perf_misc_flags()
  perf/arm: Drop unused functions
  uprobes: Re-order struct uprobe_task to save some space
  perf/x86/amd/uncore: Avoid a false positive warning about snprintf truncation in amd_uncore_umc_ctx_init
  perf/x86/intel: Do not enable large PEBS for events with aux actions or aux sampling
  perf/x86/intel/pt: Add support for pause / resume
  perf/core: Add aux_pause, aux_resume, aux_start_paused
  perf/x86/intel/pt: Fix buffer full but size is 0 case
  uprobes: SRCU-protect uretprobe lifetime (with timeout)
  uprobes: allow put_uprobe() from non-sleepable softirq context
  perf/x86/rapl: Clean up cpumask and hotplug
  perf/x86/rapl: Move the pmu allocation out of CPU hotplug
  uprobe: Add support for session consumer
  uprobe: Add data pointer to consumer handlers
  perf/x86/amd: Warn only on new bits set
  uprobes: fold xol_take_insn_slot() into xol_get_insn_slot()
  uprobes: kill xol_area->slot_count
  ...
This commit is contained in:
Linus Torvalds 2024-11-19 13:34:06 -08:00
commit f41dac3efb
34 changed files with 1073 additions and 422 deletions

View File

@ -135,6 +135,7 @@ config KPROBES_ON_FTRACE
config UPROBES config UPROBES
def_bool n def_bool n
depends on ARCH_SUPPORTS_UPROBES depends on ARCH_SUPPORTS_UPROBES
select TASKS_TRACE_RCU
help help
Uprobes is the user-space counterpart to kprobes: they Uprobes is the user-space counterpart to kprobes: they
enable instrumentation applications (such as 'perf probe') enable instrumentation applications (such as 'perf probe')

View File

@ -8,13 +8,6 @@
#ifndef __ARM_PERF_EVENT_H__ #ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__ #define __ARM_PERF_EVENT_H__
#ifdef CONFIG_PERF_EVENTS
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
#endif
#define perf_arch_fetch_caller_regs(regs, __ip) { \ #define perf_arch_fetch_caller_regs(regs, __ip) { \
(regs)->ARM_pc = (__ip); \ (regs)->ARM_pc = (__ip); \
frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \ frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \

View File

@ -96,20 +96,3 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
arm_get_current_stackframe(regs, &fr); arm_get_current_stackframe(regs, &fr);
walk_stackframe(&fr, callchain_trace, entry); walk_stackframe(&fr, callchain_trace, entry);
} }
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
return instruction_pointer(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
int misc = 0;
if (user_mode(regs))
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
return misc;
}

View File

@ -10,10 +10,6 @@
#include <asm/ptrace.h> #include <asm/ptrace.h>
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs #define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
#endif #endif

View File

@ -38,31 +38,3 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
arch_stack_walk(callchain_trace, entry, current, regs); arch_stack_walk(callchain_trace, entry, current, regs);
} }
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
if (perf_guest_state())
return perf_guest_get_ip();
return instruction_pointer(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
unsigned int guest_state = perf_guest_state();
int misc = 0;
if (guest_state) {
if (guest_state & PERF_GUEST_USER)
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
} else {
if (user_mode(regs))
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
}
return misc;
}

View File

@ -102,8 +102,8 @@ struct power_pmu {
int __init register_power_pmu(struct power_pmu *pmu); int __init register_power_pmu(struct power_pmu *pmu);
struct pt_regs; struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs); extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
extern unsigned long int read_bhrb(int n); extern unsigned long int read_bhrb(int n);
/* /*
@ -111,7 +111,7 @@ extern unsigned long int read_bhrb(int n);
* if we have hardware PMU support. * if we have hardware PMU support.
*/ */
#ifdef CONFIG_PPC_PERF_CTRS #ifdef CONFIG_PPC_PERF_CTRS
#define perf_misc_flags(regs) perf_misc_flags(regs) #define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
#endif #endif
/* /*

View File

@ -51,7 +51,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
lr = regs->link; lr = regs->link;
sp = regs->gpr[1]; sp = regs->gpr[1];
perf_callchain_store(entry, perf_instruction_pointer(regs)); perf_callchain_store(entry, perf_arch_instruction_pointer(regs));
if (!validate_sp(sp, current)) if (!validate_sp(sp, current))
return; return;

View File

@ -139,7 +139,7 @@ void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
long level = 0; long level = 0;
unsigned int __user *fp, *uregs; unsigned int __user *fp, *uregs;
next_ip = perf_instruction_pointer(regs); next_ip = perf_arch_instruction_pointer(regs);
lr = regs->link; lr = regs->link;
sp = regs->gpr[1]; sp = regs->gpr[1];
perf_callchain_store(entry, next_ip); perf_callchain_store(entry, next_ip);

View File

@ -74,7 +74,7 @@ void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
struct signal_frame_64 __user *sigframe; struct signal_frame_64 __user *sigframe;
unsigned long __user *fp, *uregs; unsigned long __user *fp, *uregs;
next_ip = perf_instruction_pointer(regs); next_ip = perf_arch_instruction_pointer(regs);
lr = regs->link; lr = regs->link;
sp = regs->gpr[1]; sp = regs->gpr[1];
perf_callchain_store(entry, next_ip); perf_callchain_store(entry, next_ip);

View File

@ -2332,7 +2332,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
* Called from generic code to get the misc flags (i.e. processor mode) * Called from generic code to get the misc flags (i.e. processor mode)
* for an event_id. * for an event_id.
*/ */
unsigned long perf_misc_flags(struct pt_regs *regs) unsigned long perf_arch_misc_flags(struct pt_regs *regs)
{ {
u32 flags = perf_get_misc_flags(regs); u32 flags = perf_get_misc_flags(regs);
@ -2346,7 +2346,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
* Called from generic code to get the instruction pointer * Called from generic code to get the instruction pointer
* for an event_id. * for an event_id.
*/ */
unsigned long perf_instruction_pointer(struct pt_regs *regs) unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
{ {
unsigned long siar = mfspr(SPRN_SIAR); unsigned long siar = mfspr(SPRN_SIAR);

View File

@ -37,9 +37,9 @@ extern ssize_t cpumf_events_sysfs_show(struct device *dev,
/* Perf callbacks */ /* Perf callbacks */
struct pt_regs; struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs); extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs) #define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs #define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
/* Perf pt_regs extension for sample-data-entry indicators */ /* Perf pt_regs extension for sample-data-entry indicators */

View File

@ -57,7 +57,7 @@ static unsigned long instruction_pointer_guest(struct pt_regs *regs)
return sie_block(regs)->gpsw.addr; return sie_block(regs)->gpsw.addr;
} }
unsigned long perf_instruction_pointer(struct pt_regs *regs) unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
{ {
return is_in_guest(regs) ? instruction_pointer_guest(regs) return is_in_guest(regs) ? instruction_pointer_guest(regs)
: instruction_pointer(regs); : instruction_pointer(regs);
@ -84,7 +84,7 @@ static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
return flags; return flags;
} }
unsigned long perf_misc_flags(struct pt_regs *regs) unsigned long perf_arch_misc_flags(struct pt_regs *regs)
{ {
/* Check if the cpum_sf PMU has created the pt_regs structure. /* Check if the cpum_sf PMU has created the pt_regs structure.
* In this case, perf misc flags can be easily extracted. Otherwise, * In this case, perf misc flags can be easily extracted. Otherwise,

View File

@ -943,11 +943,12 @@ static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, u
static int amd_pmu_v2_handle_irq(struct pt_regs *regs) static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
{ {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
static atomic64_t status_warned = ATOMIC64_INIT(0);
u64 reserved, status, mask, new_bits, prev_bits;
struct perf_sample_data data; struct perf_sample_data data;
struct hw_perf_event *hwc; struct hw_perf_event *hwc;
struct perf_event *event; struct perf_event *event;
int handled = 0, idx; int handled = 0, idx;
u64 reserved, status, mask;
bool pmu_enabled; bool pmu_enabled;
/* /*
@ -1012,7 +1013,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
* the corresponding PMCs are expected to be inactive according to the * the corresponding PMCs are expected to be inactive according to the
* active_mask * active_mask
*/ */
WARN_ON(status > 0); if (status > 0) {
prev_bits = atomic64_fetch_or(status, &status_warned);
// A new bit was set for the very first time.
new_bits = status & ~prev_bits;
WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits);
}
/* Clear overflow and freeze bits */ /* Clear overflow and freeze bits */
amd_pmu_ack_global_status(~status); amd_pmu_ack_global_status(~status);

View File

@ -916,7 +916,8 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 }; u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
union amd_uncore_info info; union amd_uncore_info info;
struct amd_uncore_pmu *pmu; struct amd_uncore_pmu *pmu;
int index = 0, gid, i; int gid, i;
u16 index = 0;
if (pmu_version < 2) if (pmu_version < 2)
return 0; return 0;
@ -948,7 +949,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) { for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
for (i = 0; i < group_num_pmus[gid]; i++) { for (i = 0; i < group_num_pmus[gid]; i++) {
pmu = &uncore->pmus[index]; pmu = &uncore->pmus[index];
snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index); snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%hu", index);
pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid]; pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2; pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
pmu->rdpmc_base = -1; pmu->rdpmc_base = -1;

View File

@ -3003,35 +3003,57 @@ static unsigned long code_segment_base(struct pt_regs *regs)
return 0; return 0;
} }
unsigned long perf_instruction_pointer(struct pt_regs *regs) unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
{ {
if (perf_guest_state())
return perf_guest_get_ip();
return regs->ip + code_segment_base(regs); return regs->ip + code_segment_base(regs);
} }
unsigned long perf_misc_flags(struct pt_regs *regs) static unsigned long common_misc_flags(struct pt_regs *regs)
{ {
unsigned int guest_state = perf_guest_state();
int misc = 0;
if (guest_state) {
if (guest_state & PERF_GUEST_USER)
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
} else {
if (user_mode(regs))
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
}
if (regs->flags & PERF_EFLAGS_EXACT) if (regs->flags & PERF_EFLAGS_EXACT)
misc |= PERF_RECORD_MISC_EXACT_IP; return PERF_RECORD_MISC_EXACT_IP;
return misc; return 0;
}
static unsigned long guest_misc_flags(struct pt_regs *regs)
{
unsigned long guest_state = perf_guest_state();
if (!(guest_state & PERF_GUEST_ACTIVE))
return 0;
if (guest_state & PERF_GUEST_USER)
return PERF_RECORD_MISC_GUEST_USER;
else
return PERF_RECORD_MISC_GUEST_KERNEL;
}
static unsigned long host_misc_flags(struct pt_regs *regs)
{
if (user_mode(regs))
return PERF_RECORD_MISC_USER;
else
return PERF_RECORD_MISC_KERNEL;
}
unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
{
unsigned long flags = common_misc_flags(regs);
flags |= guest_misc_flags(regs);
return flags;
}
unsigned long perf_arch_misc_flags(struct pt_regs *regs)
{
unsigned long flags = common_misc_flags(regs);
flags |= host_misc_flags(regs);
return flags;
} }
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)

View File

@ -3962,8 +3962,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type & if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event)) &&
~intel_pmu_large_pebs_flags(event))) { !has_aux_action(event)) {
event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
event->attach_state |= PERF_ATTACH_SCHED_CB; event->attach_state |= PERF_ATTACH_SCHED_CB;
} }
@ -4599,6 +4599,28 @@ static inline bool erratum_hsw11(struct perf_event *event)
X86_CONFIG(.event=0xc0, .umask=0x01); X86_CONFIG(.event=0xc0, .umask=0x01);
} }
static struct event_constraint *
arl_h_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
if (pmu->pmu_type == hybrid_tiny)
return cmt_get_event_constraints(cpuc, idx, event);
return mtl_get_event_constraints(cpuc, idx, event);
}
static int arl_h_hw_config(struct perf_event *event)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
if (pmu->pmu_type == hybrid_tiny)
return intel_pmu_hw_config(event);
return adl_hw_config(event);
}
/* /*
* The HSW11 requires a period larger than 100 which is the same as the BDM11. * The HSW11 requires a period larger than 100 which is the same as the BDM11.
* A minimum period of 128 is enforced as well for the INST_RETIRED.ALL. * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL.
@ -4924,17 +4946,26 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
/* /*
* This essentially just maps between the 'hybrid_cpu_type' * This essentially just maps between the 'hybrid_cpu_type'
* and 'hybrid_pmu_type' enums: * and 'hybrid_pmu_type' enums except for ARL-H processor
* which needs to compare atom uarch native id since ARL-H
* contains two different atom uarchs.
*/ */
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
u32 native_id;
if (cpu_type == HYBRID_INTEL_CORE && if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big)
pmu_type == hybrid_big)
return &x86_pmu.hybrid_pmu[i];
if (cpu_type == HYBRID_INTEL_ATOM &&
pmu_type == hybrid_small)
return &x86_pmu.hybrid_pmu[i]; return &x86_pmu.hybrid_pmu[i];
if (cpu_type == HYBRID_INTEL_ATOM) {
if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small)
return &x86_pmu.hybrid_pmu[i];
native_id = get_this_hybrid_cpu_native_id();
if (native_id == skt_native_id && pmu_type == hybrid_small)
return &x86_pmu.hybrid_pmu[i];
if (native_id == cmt_native_id && pmu_type == hybrid_tiny)
return &x86_pmu.hybrid_pmu[i];
}
} }
return NULL; return NULL;
@ -5965,6 +5996,37 @@ static struct attribute *lnl_hybrid_events_attrs[] = {
NULL NULL
}; };
/* The event string must be in PMU IDX order. */
EVENT_ATTR_STR_HYBRID(topdown-retiring,
td_retiring_arl_h,
"event=0xc2,umask=0x02;event=0x00,umask=0x80;event=0xc2,umask=0x0",
hybrid_big_small_tiny);
EVENT_ATTR_STR_HYBRID(topdown-bad-spec,
td_bad_spec_arl_h,
"event=0x73,umask=0x0;event=0x00,umask=0x81;event=0x73,umask=0x0",
hybrid_big_small_tiny);
EVENT_ATTR_STR_HYBRID(topdown-fe-bound,
td_fe_bound_arl_h,
"event=0x9c,umask=0x01;event=0x00,umask=0x82;event=0x71,umask=0x0",
hybrid_big_small_tiny);
EVENT_ATTR_STR_HYBRID(topdown-be-bound,
td_be_bound_arl_h,
"event=0xa4,umask=0x02;event=0x00,umask=0x83;event=0x74,umask=0x0",
hybrid_big_small_tiny);
static struct attribute *arl_h_hybrid_events_attrs[] = {
EVENT_PTR(slots_adl),
EVENT_PTR(td_retiring_arl_h),
EVENT_PTR(td_bad_spec_arl_h),
EVENT_PTR(td_fe_bound_arl_h),
EVENT_PTR(td_be_bound_arl_h),
EVENT_PTR(td_heavy_ops_adl),
EVENT_PTR(td_br_mis_adl),
EVENT_PTR(td_fetch_lat_adl),
EVENT_PTR(td_mem_bound_adl),
NULL,
};
/* Must be in IDX order */ /* Must be in IDX order */
EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small); EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small);
EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small); EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small);
@ -5983,6 +6045,21 @@ static struct attribute *mtl_hybrid_mem_attrs[] = {
NULL NULL
}; };
EVENT_ATTR_STR_HYBRID(mem-loads,
mem_ld_arl_h,
"event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3;event=0xd0,umask=0x5,ldlat=3",
hybrid_big_small_tiny);
EVENT_ATTR_STR_HYBRID(mem-stores,
mem_st_arl_h,
"event=0xd0,umask=0x6;event=0xcd,umask=0x2;event=0xd0,umask=0x6",
hybrid_big_small_tiny);
static struct attribute *arl_h_hybrid_mem_attrs[] = {
EVENT_PTR(mem_ld_arl_h),
EVENT_PTR(mem_st_arl_h),
NULL,
};
EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
@ -6006,8 +6083,8 @@ static struct attribute *adl_hybrid_tsx_attrs[] = {
FORMAT_ATTR_HYBRID(in_tx, hybrid_big); FORMAT_ATTR_HYBRID(in_tx, hybrid_big);
FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big); FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big);
FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small); FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small_tiny);
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small); FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small_tiny);
FORMAT_ATTR_HYBRID(frontend, hybrid_big); FORMAT_ATTR_HYBRID(frontend, hybrid_big);
#define ADL_HYBRID_RTM_FORMAT_ATTR \ #define ADL_HYBRID_RTM_FORMAT_ATTR \
@ -6030,7 +6107,7 @@ static struct attribute *adl_hybrid_extra_attr[] = {
NULL NULL
}; };
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small); FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small_tiny);
static struct attribute *mtl_hybrid_extra_attr_rtm[] = { static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
ADL_HYBRID_RTM_FORMAT_ATTR, ADL_HYBRID_RTM_FORMAT_ATTR,
@ -6238,8 +6315,9 @@ static inline int intel_pmu_v6_addr_offset(int index, bool eventsel)
} }
static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
{ hybrid_small, "cpu_atom" }, { hybrid_small, "cpu_atom" },
{ hybrid_big, "cpu_core" }, { hybrid_big, "cpu_core" },
{ hybrid_tiny, "cpu_lowpower" },
}; };
static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
@ -6272,7 +6350,7 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
0, x86_pmu_num_counters(&pmu->pmu), 0, 0); 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
if (pmu->pmu_type & hybrid_small) { if (pmu->pmu_type & hybrid_small_tiny) {
pmu->intel_cap.perf_metrics = 0; pmu->intel_cap.perf_metrics = 0;
pmu->intel_cap.pebs_output_pt_available = 1; pmu->intel_cap.pebs_output_pt_available = 1;
pmu->mid_ack = true; pmu->mid_ack = true;
@ -7111,6 +7189,37 @@ __init int intel_pmu_init(void)
name = "lunarlake_hybrid"; name = "lunarlake_hybrid";
break; break;
case INTEL_ARROWLAKE_H:
intel_pmu_init_hybrid(hybrid_big_small_tiny);
x86_pmu.pebs_latency_data = arl_h_latency_data;
x86_pmu.get_event_constraints = arl_h_get_event_constraints;
x86_pmu.hw_config = arl_h_hw_config;
td_attr = arl_h_hybrid_events_attrs;
mem_attr = arl_h_hybrid_mem_attrs;
tsx_attr = adl_hybrid_tsx_attrs;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
/* Initialize big core specific PerfMon capabilities. */
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
intel_pmu_init_lnc(&pmu->pmu);
/* Initialize Atom core specific PerfMon capabilities. */
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
intel_pmu_init_skt(&pmu->pmu);
/* Initialize Lower Power Atom specific PerfMon capabilities. */
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX];
intel_pmu_init_grt(&pmu->pmu);
pmu->extra_regs = intel_cmt_extra_regs;
intel_pmu_pebs_data_source_arl_h();
pr_cont("ArrowLake-H Hybrid events, ");
name = "arrowlake_h_hybrid";
break;
default: default:
switch (x86_pmu.version) { switch (x86_pmu.version) {
case 1: case 1:

View File

@ -177,6 +177,17 @@ void __init intel_pmu_pebs_data_source_mtl(void)
__intel_pmu_pebs_data_source_cmt(data_source); __intel_pmu_pebs_data_source_cmt(data_source);
} }
void __init intel_pmu_pebs_data_source_arl_h(void)
{
u64 *data_source;
intel_pmu_pebs_data_source_lnl();
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source;
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
__intel_pmu_pebs_data_source_cmt(data_source);
}
void __init intel_pmu_pebs_data_source_cmt(void) void __init intel_pmu_pebs_data_source_cmt(void)
{ {
__intel_pmu_pebs_data_source_cmt(pebs_data_source); __intel_pmu_pebs_data_source_cmt(pebs_data_source);
@ -388,6 +399,16 @@ u64 lnl_latency_data(struct perf_event *event, u64 status)
return lnc_latency_data(event, status); return lnc_latency_data(event, status);
} }
u64 arl_h_latency_data(struct perf_event *event, u64 status)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
if (pmu->pmu_type == hybrid_tiny)
return cmt_latency_data(event, status);
return lnl_latency_data(event, status);
}
static u64 load_latency_data(struct perf_event *event, u64 status) static u64 load_latency_data(struct perf_event *event, u64 status)
{ {
union intel_x86_pebs_dse dse; union intel_x86_pebs_dse dse;

View File

@ -418,6 +418,9 @@ static void pt_config_start(struct perf_event *event)
struct pt *pt = this_cpu_ptr(&pt_ctx); struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 ctl = event->hw.aux_config; u64 ctl = event->hw.aux_config;
if (READ_ONCE(event->hw.aux_paused))
return;
ctl |= RTIT_CTL_TRACEEN; ctl |= RTIT_CTL_TRACEEN;
if (READ_ONCE(pt->vmx_on)) if (READ_ONCE(pt->vmx_on))
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL); perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
@ -534,7 +537,24 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK); reg |= (event->attr.config & PT_CONFIG_MASK);
event->hw.aux_config = reg; event->hw.aux_config = reg;
/*
* Allow resume before starting so as not to overwrite a value set by a
* PMI.
*/
barrier();
WRITE_ONCE(pt->resume_allowed, 1);
/* Configuration is complete, it is now OK to handle an NMI */
barrier();
WRITE_ONCE(pt->handle_nmi, 1);
barrier();
pt_config_start(event); pt_config_start(event);
barrier();
/*
* Allow pause after starting so its pt_config_stop() doesn't race with
* pt_config_start().
*/
WRITE_ONCE(pt->pause_allowed, 1);
} }
static void pt_config_stop(struct perf_event *event) static void pt_config_stop(struct perf_event *event)
@ -828,11 +848,13 @@ static void pt_buffer_advance(struct pt_buffer *buf)
buf->cur_idx++; buf->cur_idx++;
if (buf->cur_idx == buf->cur->last) { if (buf->cur_idx == buf->cur->last) {
if (buf->cur == buf->last) if (buf->cur == buf->last) {
buf->cur = buf->first; buf->cur = buf->first;
else buf->wrapped = true;
} else {
buf->cur = list_entry(buf->cur->list.next, struct topa, buf->cur = list_entry(buf->cur->list.next, struct topa,
list); list);
}
buf->cur_idx = 0; buf->cur_idx = 0;
} }
} }
@ -846,8 +868,11 @@ static void pt_buffer_advance(struct pt_buffer *buf)
static void pt_update_head(struct pt *pt) static void pt_update_head(struct pt *pt)
{ {
struct pt_buffer *buf = perf_get_aux(&pt->handle); struct pt_buffer *buf = perf_get_aux(&pt->handle);
bool wrapped = buf->wrapped;
u64 topa_idx, base, old; u64 topa_idx, base, old;
buf->wrapped = false;
if (buf->single) { if (buf->single) {
local_set(&buf->data_size, buf->output_off); local_set(&buf->data_size, buf->output_off);
return; return;
@ -865,7 +890,7 @@ static void pt_update_head(struct pt *pt)
} else { } else {
old = (local64_xchg(&buf->head, base) & old = (local64_xchg(&buf->head, base) &
((buf->nr_pages << PAGE_SHIFT) - 1)); ((buf->nr_pages << PAGE_SHIFT) - 1));
if (base < old) if (base < old || (base == old && wrapped))
base += buf->nr_pages << PAGE_SHIFT; base += buf->nr_pages << PAGE_SHIFT;
local_add(base - old, &buf->data_size); local_add(base - old, &buf->data_size);
@ -1511,6 +1536,7 @@ void intel_pt_interrupt(void)
buf = perf_aux_output_begin(&pt->handle, event); buf = perf_aux_output_begin(&pt->handle, event);
if (!buf) { if (!buf) {
event->hw.state = PERF_HES_STOPPED; event->hw.state = PERF_HES_STOPPED;
WRITE_ONCE(pt->resume_allowed, 0);
return; return;
} }
@ -1519,6 +1545,7 @@ void intel_pt_interrupt(void)
ret = pt_buffer_reset_markers(buf, &pt->handle); ret = pt_buffer_reset_markers(buf, &pt->handle);
if (ret) { if (ret) {
perf_aux_output_end(&pt->handle, 0); perf_aux_output_end(&pt->handle, 0);
WRITE_ONCE(pt->resume_allowed, 0);
return; return;
} }
@ -1573,6 +1600,26 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx); struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf; struct pt_buffer *buf;
if (mode & PERF_EF_RESUME) {
if (READ_ONCE(pt->resume_allowed)) {
u64 status;
/*
* Only if the trace is not active and the error and
* stopped bits are clear, is it safe to start, but a
* PMI might have just cleared these, so resume_allowed
* must be checked again also.
*/
rdmsrl(MSR_IA32_RTIT_STATUS, status);
if (!(status & (RTIT_STATUS_TRIGGEREN |
RTIT_STATUS_ERROR |
RTIT_STATUS_STOPPED)) &&
READ_ONCE(pt->resume_allowed))
pt_config_start(event);
}
return;
}
buf = perf_aux_output_begin(&pt->handle, event); buf = perf_aux_output_begin(&pt->handle, event);
if (!buf) if (!buf)
goto fail_stop; goto fail_stop;
@ -1583,7 +1630,6 @@ static void pt_event_start(struct perf_event *event, int mode)
goto fail_end_stop; goto fail_end_stop;
} }
WRITE_ONCE(pt->handle_nmi, 1);
hwc->state = 0; hwc->state = 0;
pt_config_buffer(buf); pt_config_buffer(buf);
@ -1601,6 +1647,12 @@ static void pt_event_stop(struct perf_event *event, int mode)
{ {
struct pt *pt = this_cpu_ptr(&pt_ctx); struct pt *pt = this_cpu_ptr(&pt_ctx);
if (mode & PERF_EF_PAUSE) {
if (READ_ONCE(pt->pause_allowed))
pt_config_stop(event);
return;
}
/* /*
* Protect against the PMI racing with disabling wrmsr, * Protect against the PMI racing with disabling wrmsr,
* see comment in intel_pt_interrupt(). * see comment in intel_pt_interrupt().
@ -1608,6 +1660,15 @@ static void pt_event_stop(struct perf_event *event, int mode)
WRITE_ONCE(pt->handle_nmi, 0); WRITE_ONCE(pt->handle_nmi, 0);
barrier(); barrier();
/*
* Prevent a resume from attempting to restart tracing, or a pause
* during a subsequent start. Do this after clearing handle_nmi so that
* pt_event_snapshot_aux() will not re-allow them.
*/
WRITE_ONCE(pt->pause_allowed, 0);
WRITE_ONCE(pt->resume_allowed, 0);
barrier();
pt_config_stop(event); pt_config_stop(event);
if (event->hw.state == PERF_HES_STOPPED) if (event->hw.state == PERF_HES_STOPPED)
@ -1657,6 +1718,10 @@ static long pt_event_snapshot_aux(struct perf_event *event,
if (WARN_ON_ONCE(!buf->snapshot)) if (WARN_ON_ONCE(!buf->snapshot))
return 0; return 0;
/* Prevent pause/resume from attempting to start/stop tracing */
WRITE_ONCE(pt->pause_allowed, 0);
WRITE_ONCE(pt->resume_allowed, 0);
barrier();
/* /*
* There is no PT interrupt in this mode, so stop the trace and it will * There is no PT interrupt in this mode, so stop the trace and it will
* remain stopped while the buffer is copied. * remain stopped while the buffer is copied.
@ -1676,8 +1741,13 @@ static long pt_event_snapshot_aux(struct perf_event *event,
* Here, handle_nmi tells us if the tracing was on. * Here, handle_nmi tells us if the tracing was on.
* If the tracing was on, restart it. * If the tracing was on, restart it.
*/ */
if (READ_ONCE(pt->handle_nmi)) if (READ_ONCE(pt->handle_nmi)) {
WRITE_ONCE(pt->resume_allowed, 1);
barrier();
pt_config_start(event); pt_config_start(event);
barrier();
WRITE_ONCE(pt->pause_allowed, 1);
}
return ret; return ret;
} }
@ -1793,7 +1863,9 @@ static __init int pt_init(void)
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE |
PERF_PMU_CAP_ITRACE |
PERF_PMU_CAP_AUX_PAUSE;
pt_pmu.pmu.attr_groups = pt_attr_groups; pt_pmu.pmu.attr_groups = pt_attr_groups;
pt_pmu.pmu.task_ctx_nr = perf_sw_context; pt_pmu.pmu.task_ctx_nr = perf_sw_context;
pt_pmu.pmu.event_init = pt_event_init; pt_pmu.pmu.event_init = pt_event_init;

View File

@ -65,6 +65,7 @@ struct pt_pmu {
* @head: logical write offset inside the buffer * @head: logical write offset inside the buffer
* @snapshot: if this is for a snapshot/overwrite counter * @snapshot: if this is for a snapshot/overwrite counter
* @single: use Single Range Output instead of ToPA * @single: use Single Range Output instead of ToPA
* @wrapped: buffer advance wrapped back to the first topa table
* @stop_pos: STOP topa entry index * @stop_pos: STOP topa entry index
* @intr_pos: INT topa entry index * @intr_pos: INT topa entry index
* @stop_te: STOP topa entry pointer * @stop_te: STOP topa entry pointer
@ -82,6 +83,7 @@ struct pt_buffer {
local64_t head; local64_t head;
bool snapshot; bool snapshot;
bool single; bool single;
bool wrapped;
long stop_pos, intr_pos; long stop_pos, intr_pos;
struct topa_entry *stop_te, *intr_te; struct topa_entry *stop_te, *intr_te;
void **data_pages; void **data_pages;
@ -117,6 +119,8 @@ struct pt_filters {
* @filters: last configured filters * @filters: last configured filters
* @handle_nmi: do handle PT PMI on this cpu, there's an active event * @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu * @vmx_on: 1 if VMX is ON on this cpu
* @pause_allowed: PERF_EF_PAUSE is allowed to stop tracing
* @resume_allowed: PERF_EF_RESUME is allowed to start tracing
* @output_base: cached RTIT_OUTPUT_BASE MSR value * @output_base: cached RTIT_OUTPUT_BASE MSR value
* @output_mask: cached RTIT_OUTPUT_MASK MSR value * @output_mask: cached RTIT_OUTPUT_MASK MSR value
*/ */
@ -125,6 +129,8 @@ struct pt {
struct pt_filters filters; struct pt_filters filters;
int handle_nmi; int handle_nmi;
int vmx_on; int vmx_on;
int pause_allowed;
int resume_allowed;
u64 output_base; u64 output_base;
u64 output_mask; u64 output_mask;
}; };

View File

@ -668,24 +668,38 @@ enum {
#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 #define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) #define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
/*
* CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture
* of the core. Bits 31-24 indicates its core type (Core or Atom)
* and Bits [23:0] indicates the native model ID of the core.
* Core type and native model ID are defined in below enumerations.
*/
enum hybrid_cpu_type { enum hybrid_cpu_type {
HYBRID_INTEL_NONE, HYBRID_INTEL_NONE,
HYBRID_INTEL_ATOM = 0x20, HYBRID_INTEL_ATOM = 0x20,
HYBRID_INTEL_CORE = 0x40, HYBRID_INTEL_CORE = 0x40,
}; };
enum hybrid_pmu_type {
not_hybrid,
hybrid_small = BIT(0),
hybrid_big = BIT(1),
hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */
};
#define X86_HYBRID_PMU_ATOM_IDX 0 #define X86_HYBRID_PMU_ATOM_IDX 0
#define X86_HYBRID_PMU_CORE_IDX 1 #define X86_HYBRID_PMU_CORE_IDX 1
#define X86_HYBRID_PMU_TINY_IDX 2
#define X86_HYBRID_NUM_PMUS 2 enum hybrid_pmu_type {
not_hybrid,
hybrid_small = BIT(X86_HYBRID_PMU_ATOM_IDX),
hybrid_big = BIT(X86_HYBRID_PMU_CORE_IDX),
hybrid_tiny = BIT(X86_HYBRID_PMU_TINY_IDX),
/* The belows are only used for matching */
hybrid_big_small = hybrid_big | hybrid_small,
hybrid_small_tiny = hybrid_small | hybrid_tiny,
hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
};
enum atom_native_id {
cmt_native_id = 0x2, /* Crestmont */
skt_native_id = 0x3, /* Skymont */
};
struct x86_hybrid_pmu { struct x86_hybrid_pmu {
struct pmu pmu; struct pmu pmu;
@ -1578,6 +1592,8 @@ u64 cmt_latency_data(struct perf_event *event, u64 status);
u64 lnl_latency_data(struct perf_event *event, u64 status); u64 lnl_latency_data(struct perf_event *event, u64 status);
u64 arl_h_latency_data(struct perf_event *event, u64 status);
extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[];
@ -1697,6 +1713,8 @@ void intel_pmu_pebs_data_source_grt(void);
void intel_pmu_pebs_data_source_mtl(void); void intel_pmu_pebs_data_source_mtl(void);
void intel_pmu_pebs_data_source_arl_h(void);
void intel_pmu_pebs_data_source_cmt(void); void intel_pmu_pebs_data_source_cmt(void);
void intel_pmu_pebs_data_source_lnl(void); void intel_pmu_pebs_data_source_lnl(void);

View File

@ -148,7 +148,6 @@ struct rapl_model {
/* 1/2^hw_unit Joule */ /* 1/2^hw_unit Joule */
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
static struct rapl_pmus *rapl_pmus; static struct rapl_pmus *rapl_pmus;
static cpumask_t rapl_cpu_mask;
static unsigned int rapl_cntr_mask; static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms; static u64 rapl_timer_ms;
static struct perf_msr *rapl_msrs; static struct perf_msr *rapl_msrs;
@ -369,8 +368,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
if (event->cpu < 0) if (event->cpu < 0)
return -EINVAL; return -EINVAL;
event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
return -EINVAL; return -EINVAL;
@ -389,7 +386,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
pmu = cpu_to_rapl_pmu(event->cpu); pmu = cpu_to_rapl_pmu(event->cpu);
if (!pmu) if (!pmu)
return -EINVAL; return -EINVAL;
event->cpu = pmu->cpu;
event->pmu_private = pmu; event->pmu_private = pmu;
event->hw.event_base = rapl_msrs[bit].msr; event->hw.event_base = rapl_msrs[bit].msr;
event->hw.config = cfg; event->hw.config = cfg;
@ -403,23 +399,6 @@ static void rapl_pmu_event_read(struct perf_event *event)
rapl_event_update(event); rapl_event_update(event);
} }
static ssize_t rapl_get_attr_cpumask(struct device *dev,
struct device_attribute *attr, char *buf)
{
return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
}
static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
static struct attribute *rapl_pmu_attrs[] = {
&dev_attr_cpumask.attr,
NULL,
};
static struct attribute_group rapl_pmu_attr_group = {
.attrs = rapl_pmu_attrs,
};
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
@ -467,7 +446,6 @@ static struct attribute_group rapl_pmu_format_group = {
}; };
static const struct attribute_group *rapl_attr_groups[] = { static const struct attribute_group *rapl_attr_groups[] = {
&rapl_pmu_attr_group,
&rapl_pmu_format_group, &rapl_pmu_format_group,
&rapl_pmu_events_group, &rapl_pmu_events_group,
NULL, NULL,
@ -570,65 +548,6 @@ static struct perf_msr amd_rapl_msrs[] = {
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 }, [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
}; };
static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
/* Check if exiting cpu is used for collecting rapl events */
if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
return 0;
pmu->cpu = -1;
/* Find a new cpu to collect rapl events */
target = cpumask_any_but(get_rapl_pmu_cpumask(cpu), cpu);
/* Migrate rapl events to the new target */
if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &rapl_cpu_mask);
pmu->cpu = target;
perf_pmu_migrate_context(pmu->pmu, cpu, target);
}
return 0;
}
static int rapl_cpu_online(unsigned int cpu)
{
s32 rapl_pmu_idx = get_rapl_pmu_idx(cpu);
if (rapl_pmu_idx < 0) {
pr_err("topology_logical_(package/die)_id() returned a negative value");
return -EINVAL;
}
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
if (!pmu) {
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
if (!pmu)
return -ENOMEM;
raw_spin_lock_init(&pmu->lock);
INIT_LIST_HEAD(&pmu->active_list);
pmu->pmu = &rapl_pmus->pmu;
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
rapl_hrtimer_init(pmu);
rapl_pmus->pmus[rapl_pmu_idx] = pmu;
}
/*
* Check if there is an online cpu in the package which collects rapl
* events already.
*/
target = cpumask_any_and(&rapl_cpu_mask, get_rapl_pmu_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
cpumask_set_cpu(cpu, &rapl_cpu_mask);
pmu->cpu = cpu;
return 0;
}
static int rapl_check_hw_unit(struct rapl_model *rm) static int rapl_check_hw_unit(struct rapl_model *rm)
{ {
u64 msr_rapl_power_unit_bits; u64 msr_rapl_power_unit_bits;
@ -707,12 +626,41 @@ static const struct attribute_group *rapl_attr_update[] = {
NULL, NULL,
}; };
static int __init init_rapl_pmu(void)
{
struct rapl_pmu *pmu;
int idx;
for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) {
pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
if (!pmu)
goto free;
raw_spin_lock_init(&pmu->lock);
INIT_LIST_HEAD(&pmu->active_list);
pmu->pmu = &rapl_pmus->pmu;
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
rapl_hrtimer_init(pmu);
rapl_pmus->pmus[idx] = pmu;
}
return 0;
free:
for (; idx > 0; idx--)
kfree(rapl_pmus->pmus[idx - 1]);
return -ENOMEM;
}
static int __init init_rapl_pmus(void) static int __init init_rapl_pmus(void)
{ {
int nr_rapl_pmu = topology_max_packages(); int nr_rapl_pmu = topology_max_packages();
int rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
if (!rapl_pmu_is_pkg_scope()) if (!rapl_pmu_is_pkg_scope()) {
nr_rapl_pmu *= topology_max_dies_per_package(); nr_rapl_pmu *= topology_max_dies_per_package();
rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
}
rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL); rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
if (!rapl_pmus) if (!rapl_pmus)
@ -728,9 +676,11 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.start = rapl_pmu_event_start; rapl_pmus->pmu.start = rapl_pmu_event_start;
rapl_pmus->pmu.stop = rapl_pmu_event_stop; rapl_pmus->pmu.stop = rapl_pmu_event_stop;
rapl_pmus->pmu.read = rapl_pmu_event_read; rapl_pmus->pmu.read = rapl_pmu_event_read;
rapl_pmus->pmu.scope = rapl_pmu_scope;
rapl_pmus->pmu.module = THIS_MODULE; rapl_pmus->pmu.module = THIS_MODULE;
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
return 0;
return init_rapl_pmu();
} }
static struct rapl_model model_snb = { static struct rapl_model model_snb = {
@ -876,24 +826,13 @@ static int __init rapl_pmu_init(void)
if (ret) if (ret)
return ret; return ret;
/*
* Install callbacks. Core will call them for each online cpu.
*/
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
"perf/x86/rapl:online",
rapl_cpu_online, rapl_cpu_offline);
if (ret)
goto out;
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
if (ret) if (ret)
goto out1; goto out;
rapl_advertise(); rapl_advertise();
return 0; return 0;
out1:
cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
out: out:
pr_warn("Initialization failed (%d), disabled\n", ret); pr_warn("Initialization failed (%d), disabled\n", ret);
cleanup_rapl_pmus(); cleanup_rapl_pmus();
@ -903,7 +842,6 @@ module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void) static void __exit intel_rapl_exit(void)
{ {
cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
perf_pmu_unregister(&rapl_pmus->pmu); perf_pmu_unregister(&rapl_pmus->pmu);
cleanup_rapl_pmus(); cleanup_rapl_pmus();
} }

View File

@ -32,6 +32,7 @@ extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
extern bool handle_guest_split_lock(unsigned long ip); extern bool handle_guest_split_lock(unsigned long ip);
extern void handle_bus_lock(struct pt_regs *regs); extern void handle_bus_lock(struct pt_regs *regs);
u8 get_this_hybrid_cpu_type(void); u8 get_this_hybrid_cpu_type(void);
u32 get_this_hybrid_cpu_native_id(void);
#else #else
static inline void __init sld_setup(struct cpuinfo_x86 *c) {} static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
@ -50,6 +51,11 @@ static inline u8 get_this_hybrid_cpu_type(void)
{ {
return 0; return 0;
} }
static inline u32 get_this_hybrid_cpu_native_id(void)
{
return 0;
}
#endif #endif
#ifdef CONFIG_IA32_FEAT_CTL #ifdef CONFIG_IA32_FEAT_CTL
void init_ia32_feat_ctl(struct cpuinfo_x86 *c); void init_ia32_feat_ctl(struct cpuinfo_x86 *c);

View File

@ -536,15 +536,17 @@ struct x86_perf_regs {
u64 *xmm_regs; u64 *xmm_regs;
}; };
extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs); extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs) extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs);
#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
#define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
/* /*
* We abuse bit 3 from flags to pass exact information, see perf_misc_flags * We abuse bit 3 from flags to pass exact information, see
* and the comment with PERF_EFLAGS_EXACT. * perf_arch_misc_flags() and the comment with PERF_EFLAGS_EXACT.
*/ */
#define perf_arch_fetch_caller_regs(regs, __ip) { \ #define perf_arch_fetch_caller_regs(regs, __ip) { \
(regs)->ip = (__ip); \ (regs)->ip = (__ip); \

View File

@ -1299,3 +1299,18 @@ u8 get_this_hybrid_cpu_type(void)
return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT; return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
} }
/**
* get_this_hybrid_cpu_native_id() - Get the native id of this hybrid CPU
*
* Returns the uarch native ID [23:0] of a CPU in a hybrid processor.
* If the processor is not hybrid, returns 0.
*/
u32 get_this_hybrid_cpu_native_id(void)
{
if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
return 0;
return cpuid_eax(0x0000001a) &
(BIT_ULL(X86_HYBRID_CPU_TYPE_ID_SHIFT) - 1);
}

View File

@ -208,7 +208,6 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_UNCORE_ONLINE, CPUHP_AP_PERF_X86_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
CPUHP_AP_PERF_X86_RAPL_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE,

View File

@ -170,6 +170,12 @@ struct hw_perf_event {
}; };
struct { /* aux / Intel-PT */ struct { /* aux / Intel-PT */
u64 aux_config; u64 aux_config;
/*
* For AUX area events, aux_paused cannot be a state
* flag because it can be updated asynchronously to
* state.
*/
unsigned int aux_paused;
}; };
struct { /* software */ struct { /* software */
struct hrtimer hrtimer; struct hrtimer hrtimer;
@ -294,6 +300,7 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 #define PERF_PMU_CAP_NO_EXCLUDE 0x0040
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 #define PERF_PMU_CAP_AUX_OUTPUT 0x0080
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
#define PERF_PMU_CAP_AUX_PAUSE 0x0200
/** /**
* pmu::scope * pmu::scope
@ -384,6 +391,8 @@ struct pmu {
#define PERF_EF_START 0x01 /* start the counter when adding */ #define PERF_EF_START 0x01 /* start the counter when adding */
#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */
#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */
/* /*
* Adds/Removes a counter to/from the PMU, can be done inside a * Adds/Removes a counter to/from the PMU, can be done inside a
@ -423,6 +432,18 @@ struct pmu {
* *
* ->start() with PERF_EF_RELOAD will reprogram the counter * ->start() with PERF_EF_RELOAD will reprogram the counter
* value, must be preceded by a ->stop() with PERF_EF_UPDATE. * value, must be preceded by a ->stop() with PERF_EF_UPDATE.
*
* ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
* overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
* PERF_EF_RESUME.
*
* ->start() with PERF_EF_RESUME will start as simply as possible but
* only if the counter is not otherwise stopped. Will not overlap
* another ->start() with PERF_EF_RESUME nor ->stop() with
* PERF_EF_PAUSE.
*
* Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
* ->stop()/->start() invocations, just not itself.
*/ */
void (*start) (struct perf_event *event, int flags); void (*start) (struct perf_event *event, int flags);
void (*stop) (struct perf_event *event, int flags); void (*stop) (struct perf_event *event, int flags);
@ -1655,15 +1676,35 @@ extern void perf_tp_event(u16 event_type, u64 count, void *record,
struct task_struct *task); struct task_struct *task);
extern void perf_bp_event(struct perf_event *event, void *data); extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs);
# define perf_misc_flags(regs) \ extern unsigned long perf_instruction_pointer(struct perf_event *event,
struct pt_regs *regs);
#ifndef perf_arch_misc_flags
# define perf_arch_misc_flags(regs) \
(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
# define perf_instruction_pointer(regs) instruction_pointer(regs) # define perf_arch_instruction_pointer(regs) instruction_pointer(regs)
#endif #endif
#ifndef perf_arch_bpf_user_pt_regs #ifndef perf_arch_bpf_user_pt_regs
# define perf_arch_bpf_user_pt_regs(regs) regs # define perf_arch_bpf_user_pt_regs(regs) regs
#endif #endif
#ifndef perf_arch_guest_misc_flags
static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
{
unsigned long guest_state = perf_guest_state();
if (!(guest_state & PERF_GUEST_ACTIVE))
return 0;
if (guest_state & PERF_GUEST_USER)
return PERF_RECORD_MISC_GUEST_USER;
else
return PERF_RECORD_MISC_GUEST_KERNEL;
}
# define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
#endif
static inline bool has_branch_stack(struct perf_event *event) static inline bool has_branch_stack(struct perf_event *event)
{ {
return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
@ -1679,6 +1720,13 @@ static inline bool has_aux(struct perf_event *event)
return event->pmu->setup_aux; return event->pmu->setup_aux;
} }
static inline bool has_aux_action(struct perf_event *event)
{
return event->attr.aux_sample_size ||
event->attr.aux_pause ||
event->attr.aux_resume;
}
static inline bool is_write_backward(struct perf_event *event) static inline bool is_write_backward(struct perf_event *event)
{ {
return !!event->attr.write_backward; return !!event->attr.write_backward;

View File

@ -15,6 +15,7 @@
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/timer.h>
struct uprobe; struct uprobe;
struct vm_area_struct; struct vm_area_struct;
@ -23,8 +24,17 @@ struct inode;
struct notifier_block; struct notifier_block;
struct page; struct page;
/*
* Allowed return values from uprobe consumer's handler callback
* with following meaning:
*
* UPROBE_HANDLER_REMOVE
* - Remove the uprobe breakpoint from current->mm.
* UPROBE_HANDLER_IGNORE
* - Ignore ret_handler callback for this consumer.
*/
#define UPROBE_HANDLER_REMOVE 1 #define UPROBE_HANDLER_REMOVE 1
#define UPROBE_HANDLER_MASK 1 #define UPROBE_HANDLER_IGNORE 2
#define MAX_URETPROBE_DEPTH 64 #define MAX_URETPROBE_DEPTH 64
@ -37,13 +47,15 @@ struct uprobe_consumer {
* for the current process. If filter() is omitted or returns true, * for the current process. If filter() is omitted or returns true,
* UPROBE_HANDLER_REMOVE is effectively ignored. * UPROBE_HANDLER_REMOVE is effectively ignored.
*/ */
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
int (*ret_handler)(struct uprobe_consumer *self, int (*ret_handler)(struct uprobe_consumer *self,
unsigned long func, unsigned long func,
struct pt_regs *regs); struct pt_regs *regs, __u64 *data);
bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm); bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
struct list_head cons_node; struct list_head cons_node;
__u64 id; /* set when uprobe_consumer is registered */
}; };
#ifdef CONFIG_UPROBES #ifdef CONFIG_UPROBES
@ -56,12 +68,62 @@ enum uprobe_task_state {
UTASK_SSTEP_TRAPPED, UTASK_SSTEP_TRAPPED,
}; };
/* The state of hybrid-lifetime uprobe inside struct return_instance */
enum hprobe_state {
HPROBE_LEASED, /* uretprobes_srcu-protected uprobe */
HPROBE_STABLE, /* refcounted uprobe */
HPROBE_GONE, /* NULL uprobe, SRCU expired, refcount failed */
HPROBE_CONSUMED, /* uprobe "consumed" by uretprobe handler */
};
/*
* Hybrid lifetime uprobe. Represents a uprobe instance that could be either
* SRCU protected (with SRCU protection eventually potentially timing out),
* refcounted using uprobe->ref, or there could be no valid uprobe (NULL).
*
* hprobe's internal state is setup such that background timer thread can
* atomically "downgrade" temporarily RCU-protected uprobe into refcounted one
* (or no uprobe, if refcounting failed).
*
* *stable* pointer always point to the uprobe (or could be NULL if there is
* was no valid underlying uprobe to begin with).
*
* *leased* pointer is the key to achieving race-free atomic lifetime state
* transition and can have three possible states:
* - either the same non-NULL value as *stable*, in which case uprobe is
* SRCU-protected;
* - NULL, in which case uprobe (if there is any) is refcounted;
* - special __UPROBE_DEAD value, which represents an uprobe that was SRCU
* protected initially, but SRCU period timed out and we attempted to
* convert it to refcounted, but refcount_inc_not_zero() failed, because
* uprobe effectively went away (the last consumer unsubscribed). In this
* case it's important to know that *stable* pointer (which still has
* non-NULL uprobe pointer) shouldn't be used, because lifetime of
* underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an
* internal marker and is handled transparently by hprobe_fetch() helper.
*
* When uprobe is SRCU-protected, we also record srcu_idx value, necessary for
* SRCU unlocking.
*
* See hprobe_expire() and hprobe_fetch() for details of race-free uprobe
* state transitioning details. It all hinges on atomic xchg() over *leaded*
* pointer. *stable* pointer, once initially set, is not modified concurrently.
*/
struct hprobe {
enum hprobe_state state;
int srcu_idx;
struct uprobe *uprobe;
};
/* /*
* uprobe_task: Metadata of a task while it singlesteps. * uprobe_task: Metadata of a task while it singlesteps.
*/ */
struct uprobe_task { struct uprobe_task {
enum uprobe_task_state state; enum uprobe_task_state state;
unsigned int depth;
struct return_instance *return_instances;
union { union {
struct { struct {
struct arch_uprobe_task autask; struct arch_uprobe_task autask;
@ -75,23 +137,30 @@ struct uprobe_task {
}; };
struct uprobe *active_uprobe; struct uprobe *active_uprobe;
struct timer_list ri_timer;
unsigned long xol_vaddr; unsigned long xol_vaddr;
struct arch_uprobe *auprobe; struct arch_uprobe *auprobe;
};
struct return_instance *return_instances; struct return_consumer {
unsigned int depth; __u64 cookie;
__u64 id;
}; };
struct return_instance { struct return_instance {
struct uprobe *uprobe; struct hprobe hprobe;
unsigned long func; unsigned long func;
unsigned long stack; /* stack pointer */ unsigned long stack; /* stack pointer */
unsigned long orig_ret_vaddr; /* original return address */ unsigned long orig_ret_vaddr; /* original return address */
bool chained; /* true, if instance is nested */ bool chained; /* true, if instance is nested */
int consumers_cnt;
struct return_instance *next; /* keep as stack */ struct return_instance *next; /* keep as stack */
}; struct rcu_head rcu;
struct return_consumer consumers[] __counted_by(consumers_cnt);
} ____cacheline_aligned;
enum rp_check { enum rp_check {
RP_CHECK_CALL, RP_CHECK_CALL,

View File

@ -511,7 +511,16 @@ struct perf_event_attr {
__u16 sample_max_stack; __u16 sample_max_stack;
__u16 __reserved_2; __u16 __reserved_2;
__u32 aux_sample_size; __u32 aux_sample_size;
__u32 __reserved_3;
union {
__u32 aux_action;
struct {
__u32 aux_start_paused : 1, /* start AUX area tracing paused */
aux_pause : 1, /* on overflow, pause AUX area tracing */
aux_resume : 1, /* on overflow, resume AUX area tracing */
__reserved_3 : 29;
};
};
/* /*
* User provided data if sigtrap=1, passed back to user via * User provided data if sigtrap=1, passed back to user via

View File

@ -2142,7 +2142,7 @@ static void perf_put_aux_event(struct perf_event *event)
static bool perf_need_aux_event(struct perf_event *event) static bool perf_need_aux_event(struct perf_event *event)
{ {
return !!event->attr.aux_output || !!event->attr.aux_sample_size; return event->attr.aux_output || has_aux_action(event);
} }
static int perf_get_aux_event(struct perf_event *event, static int perf_get_aux_event(struct perf_event *event,
@ -2167,6 +2167,10 @@ static int perf_get_aux_event(struct perf_event *event,
!perf_aux_output_match(event, group_leader)) !perf_aux_output_match(event, group_leader))
return 0; return 0;
if ((event->attr.aux_pause || event->attr.aux_resume) &&
!(group_leader->pmu->capabilities & PERF_PMU_CAP_AUX_PAUSE))
return 0;
if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux) if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
return 0; return 0;
@ -7003,6 +7007,29 @@ void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
#endif #endif
static bool should_sample_guest(struct perf_event *event)
{
return !event->attr.exclude_guest && perf_guest_state();
}
unsigned long perf_misc_flags(struct perf_event *event,
struct pt_regs *regs)
{
if (should_sample_guest(event))
return perf_arch_guest_misc_flags(regs);
return perf_arch_misc_flags(regs);
}
unsigned long perf_instruction_pointer(struct perf_event *event,
struct pt_regs *regs)
{
if (should_sample_guest(event))
return perf_guest_get_ip();
return perf_arch_instruction_pointer(regs);
}
static void static void
perf_output_sample_regs(struct perf_output_handle *handle, perf_output_sample_regs(struct perf_output_handle *handle,
struct pt_regs *regs, u64 mask) struct pt_regs *regs, u64 mask)
@ -7820,7 +7847,7 @@ void perf_prepare_sample(struct perf_sample_data *data,
__perf_event_header__init_id(data, event, filtered_sample_type); __perf_event_header__init_id(data, event, filtered_sample_type);
if (filtered_sample_type & PERF_SAMPLE_IP) { if (filtered_sample_type & PERF_SAMPLE_IP) {
data->ip = perf_instruction_pointer(regs); data->ip = perf_instruction_pointer(event, regs);
data->sample_flags |= PERF_SAMPLE_IP; data->sample_flags |= PERF_SAMPLE_IP;
} }
@ -7984,7 +8011,7 @@ void perf_prepare_header(struct perf_event_header *header,
{ {
header->type = PERF_RECORD_SAMPLE; header->type = PERF_RECORD_SAMPLE;
header->size = perf_sample_data_size(data, event); header->size = perf_sample_data_size(data, event);
header->misc = perf_misc_flags(regs); header->misc = perf_misc_flags(event, regs);
/* /*
* If you're adding more sample types here, you likely need to do * If you're adding more sample types here, you likely need to do
@ -7997,6 +8024,49 @@ void perf_prepare_header(struct perf_event_header *header,
WARN_ON_ONCE(header->size & 7); WARN_ON_ONCE(header->size & 7);
} }
static void __perf_event_aux_pause(struct perf_event *event, bool pause)
{
if (pause) {
if (!event->hw.aux_paused) {
event->hw.aux_paused = 1;
event->pmu->stop(event, PERF_EF_PAUSE);
}
} else {
if (event->hw.aux_paused) {
event->hw.aux_paused = 0;
event->pmu->start(event, PERF_EF_RESUME);
}
}
}
static void perf_event_aux_pause(struct perf_event *event, bool pause)
{
struct perf_buffer *rb;
if (WARN_ON_ONCE(!event))
return;
rb = ring_buffer_get(event);
if (!rb)
return;
scoped_guard (irqsave) {
/*
* Guard against self-recursion here. Another event could trip
* this same from NMI context.
*/
if (READ_ONCE(rb->aux_in_pause_resume))
break;
WRITE_ONCE(rb->aux_in_pause_resume, 1);
barrier();
__perf_event_aux_pause(event, pause);
barrier();
WRITE_ONCE(rb->aux_in_pause_resume, 0);
}
ring_buffer_put(rb);
}
static __always_inline int static __always_inline int
__perf_event_output(struct perf_event *event, __perf_event_output(struct perf_event *event,
struct perf_sample_data *data, struct perf_sample_data *data,
@ -9799,9 +9869,12 @@ static int __perf_event_overflow(struct perf_event *event,
ret = __perf_event_account_interrupt(event, throttle); ret = __perf_event_account_interrupt(event, throttle);
if (event->attr.aux_pause)
perf_event_aux_pause(event->aux_event, true);
if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT && if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
!bpf_overflow_handler(event, data, regs)) !bpf_overflow_handler(event, data, regs))
return ret; goto out;
/* /*
* XXX event_limit might not quite work as expected on inherited * XXX event_limit might not quite work as expected on inherited
@ -9863,6 +9936,9 @@ static int __perf_event_overflow(struct perf_event *event,
event->pending_wakeup = 1; event->pending_wakeup = 1;
irq_work_queue(&event->pending_irq); irq_work_queue(&event->pending_irq);
} }
out:
if (event->attr.aux_resume)
perf_event_aux_pause(event->aux_event, false);
return ret; return ret;
} }
@ -12254,11 +12330,25 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
} }
if (event->attr.aux_output && if (event->attr.aux_output &&
!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) { (!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT) ||
event->attr.aux_pause || event->attr.aux_resume)) {
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
goto err_pmu; goto err_pmu;
} }
if (event->attr.aux_pause && event->attr.aux_resume) {
err = -EINVAL;
goto err_pmu;
}
if (event->attr.aux_start_paused) {
if (!(pmu->capabilities & PERF_PMU_CAP_AUX_PAUSE)) {
err = -EOPNOTSUPP;
goto err_pmu;
}
event->hw.aux_paused = 1;
}
if (cgroup_fd != -1) { if (cgroup_fd != -1) {
err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader); err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
if (err) if (err)
@ -13052,7 +13142,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
* Grouping is not supported for kernel events, neither is 'AUX', * Grouping is not supported for kernel events, neither is 'AUX',
* make sure the caller's intentions are adjusted. * make sure the caller's intentions are adjusted.
*/ */
if (attr->aux_output) if (attr->aux_output || attr->aux_action)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
event = perf_event_alloc(attr, cpu, task, NULL, NULL, event = perf_event_alloc(attr, cpu, task, NULL, NULL,

View File

@ -52,6 +52,7 @@ struct perf_buffer {
void (*free_aux)(void *); void (*free_aux)(void *);
refcount_t aux_refcount; refcount_t aux_refcount;
int aux_in_sampling; int aux_in_sampling;
int aux_in_pause_resume;
void **aux_pages; void **aux_pages;
void *aux_priv; void *aux_priv;

View File

@ -26,6 +26,9 @@
#include <linux/task_work.h> #include <linux/task_work.h>
#include <linux/shmem_fs.h> #include <linux/shmem_fs.h>
#include <linux/khugepaged.h> #include <linux/khugepaged.h>
#include <linux/rcupdate_trace.h>
#include <linux/workqueue.h>
#include <linux/srcu.h>
#include <linux/uprobes.h> #include <linux/uprobes.h>
@ -42,8 +45,6 @@ static struct rb_root uprobes_tree = RB_ROOT;
static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */ static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */
static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock); static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock);
DEFINE_STATIC_SRCU(uprobes_srcu);
#define UPROBES_HASH_SZ 13 #define UPROBES_HASH_SZ 13
/* serialize uprobe->pending_list */ /* serialize uprobe->pending_list */
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
@ -51,6 +52,9 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
/* Covers return_instance's uprobe lifetime. */
DEFINE_STATIC_SRCU(uretprobes_srcu);
/* Have a copy of original instruction */ /* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0 #define UPROBE_COPY_INSN 0
@ -62,10 +66,13 @@ struct uprobe {
struct list_head pending_list; struct list_head pending_list;
struct list_head consumers; struct list_head consumers;
struct inode *inode; /* Also hold a ref to inode */ struct inode *inode; /* Also hold a ref to inode */
struct rcu_head rcu; union {
struct rcu_head rcu;
struct work_struct work;
};
loff_t offset; loff_t offset;
loff_t ref_ctr_offset; loff_t ref_ctr_offset;
unsigned long flags; unsigned long flags; /* "unsigned long" so bitops work */
/* /*
* The generic code assumes that it has two members of unknown type * The generic code assumes that it has two members of unknown type
@ -100,7 +107,6 @@ static LIST_HEAD(delayed_uprobe_list);
*/ */
struct xol_area { struct xol_area {
wait_queue_head_t wq; /* if all slots are busy */ wait_queue_head_t wq; /* if all slots are busy */
atomic_t slot_count; /* number of in-use slots */
unsigned long *bitmap; /* 0 = free slot */ unsigned long *bitmap; /* 0 = free slot */
struct page *page; struct page *page;
@ -620,17 +626,23 @@ static inline bool uprobe_is_active(struct uprobe *uprobe)
return !RB_EMPTY_NODE(&uprobe->rb_node); return !RB_EMPTY_NODE(&uprobe->rb_node);
} }
static void uprobe_free_rcu(struct rcu_head *rcu) static void uprobe_free_rcu_tasks_trace(struct rcu_head *rcu)
{ {
struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
kfree(uprobe); kfree(uprobe);
} }
static void put_uprobe(struct uprobe *uprobe) static void uprobe_free_srcu(struct rcu_head *rcu)
{ {
if (!refcount_dec_and_test(&uprobe->ref)) struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
return;
call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu_tasks_trace);
}
static void uprobe_free_deferred(struct work_struct *work)
{
struct uprobe *uprobe = container_of(work, struct uprobe, work);
write_lock(&uprobes_treelock); write_lock(&uprobes_treelock);
@ -651,7 +663,162 @@ static void put_uprobe(struct uprobe *uprobe)
delayed_uprobe_remove(uprobe, NULL); delayed_uprobe_remove(uprobe, NULL);
mutex_unlock(&delayed_uprobe_lock); mutex_unlock(&delayed_uprobe_lock);
call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu); /* start srcu -> rcu_tasks_trace -> kfree chain */
call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_srcu);
}
static void put_uprobe(struct uprobe *uprobe)
{
if (!refcount_dec_and_test(&uprobe->ref))
return;
INIT_WORK(&uprobe->work, uprobe_free_deferred);
schedule_work(&uprobe->work);
}
/* Initialize hprobe as SRCU-protected "leased" uprobe */
static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx)
{
WARN_ON(!uprobe);
hprobe->state = HPROBE_LEASED;
hprobe->uprobe = uprobe;
hprobe->srcu_idx = srcu_idx;
}
/* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */
static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe)
{
hprobe->state = uprobe ? HPROBE_STABLE : HPROBE_GONE;
hprobe->uprobe = uprobe;
hprobe->srcu_idx = -1;
}
/*
* hprobe_consume() fetches hprobe's underlying uprobe and detects whether
* uprobe is SRCU protected or is refcounted. hprobe_consume() can be
* used only once for a given hprobe.
*
* Caller has to call hprobe_finalize() and pass previous hprobe_state, so
* that hprobe_finalize() can perform SRCU unlock or put uprobe, whichever
* is appropriate.
*/
static inline struct uprobe *hprobe_consume(struct hprobe *hprobe, enum hprobe_state *hstate)
{
*hstate = xchg(&hprobe->state, HPROBE_CONSUMED);
switch (*hstate) {
case HPROBE_LEASED:
case HPROBE_STABLE:
return hprobe->uprobe;
case HPROBE_GONE: /* uprobe is NULL, no SRCU */
case HPROBE_CONSUMED: /* uprobe was finalized already, do nothing */
return NULL;
default:
WARN(1, "hprobe invalid state %d", *hstate);
return NULL;
}
}
/*
* Reset hprobe state and, if hprobe was LEASED, release SRCU lock.
* hprobe_finalize() can only be used from current context after
* hprobe_consume() call (which determines uprobe and hstate value).
*/
static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate)
{
switch (hstate) {
case HPROBE_LEASED:
__srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
break;
case HPROBE_STABLE:
put_uprobe(hprobe->uprobe);
break;
case HPROBE_GONE:
case HPROBE_CONSUMED:
break;
default:
WARN(1, "hprobe invalid state %d", hstate);
break;
}
}
/*
* Attempt to switch (atomically) uprobe from being SRCU protected (LEASED)
* to refcounted (STABLE) state. Competes with hprobe_consume(); only one of
* them can win the race to perform SRCU unlocking. Whoever wins must perform
* SRCU unlock.
*
* Returns underlying valid uprobe or NULL, if there was no underlying uprobe
* to begin with or we failed to bump its refcount and it's going away.
*
* Returned non-NULL uprobe can be still safely used within an ongoing SRCU
* locked region. If `get` is true, it's guaranteed that non-NULL uprobe has
* an extra refcount for caller to assume and use. Otherwise, it's not
* guaranteed that returned uprobe has a positive refcount, so caller has to
* attempt try_get_uprobe(), if it needs to preserve uprobe beyond current
* SRCU lock region. See dup_utask().
*/
static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get)
{
enum hprobe_state hstate;
/*
* return_instance's hprobe is protected by RCU.
* Underlying uprobe is itself protected from reuse by SRCU.
*/
lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
hstate = READ_ONCE(hprobe->state);
switch (hstate) {
case HPROBE_STABLE:
/* uprobe has positive refcount, bump refcount, if necessary */
return get ? get_uprobe(hprobe->uprobe) : hprobe->uprobe;
case HPROBE_GONE:
/*
* SRCU was unlocked earlier and we didn't manage to take
* uprobe refcnt, so it's effectively NULL
*/
return NULL;
case HPROBE_CONSUMED:
/*
* uprobe was consumed, so it's effectively NULL as far as
* uretprobe processing logic is concerned
*/
return NULL;
case HPROBE_LEASED: {
struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe);
/*
* Try to switch hprobe state, guarding against
* hprobe_consume() or another hprobe_expire() racing with us.
* Note, if we failed to get uprobe refcount, we use special
* HPROBE_GONE state to signal that hprobe->uprobe shouldn't
* be used as it will be freed after SRCU is unlocked.
*/
if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) {
/* We won the race, we are the ones to unlock SRCU */
__srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
return get ? get_uprobe(uprobe) : uprobe;
}
/*
* We lost the race, undo refcount bump (if it ever happened),
* unless caller would like an extra refcount anyways.
*/
if (uprobe && !get)
put_uprobe(uprobe);
/*
* Even if hprobe_consume() or another hprobe_expire() wins
* the state update race and unlocks SRCU from under us, we
* still have a guarantee that underyling uprobe won't be
* freed due to ongoing caller's SRCU lock region, so we can
* return it regardless. Also, if `get` was true, we also have
* an extra ref for the caller to own. This is used in dup_utask().
*/
return uprobe;
}
default:
WARN(1, "unknown hprobe state %d", hstate);
return NULL;
}
} }
static __always_inline static __always_inline
@ -706,7 +873,7 @@ static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
struct rb_node *node; struct rb_node *node;
unsigned int seq; unsigned int seq;
lockdep_assert(srcu_read_lock_held(&uprobes_srcu)); lockdep_assert(rcu_read_lock_trace_held());
do { do {
seq = read_seqcount_begin(&uprobes_seqcount); seq = read_seqcount_begin(&uprobes_seqcount);
@ -825,8 +992,11 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
{ {
static atomic64_t id;
down_write(&uprobe->consumer_rwsem); down_write(&uprobe->consumer_rwsem);
list_add_rcu(&uc->cons_node, &uprobe->consumers); list_add_rcu(&uc->cons_node, &uprobe->consumers);
uc->id = (__u64) atomic64_inc_return(&id);
up_write(&uprobe->consumer_rwsem); up_write(&uprobe->consumer_rwsem);
} }
@ -934,8 +1104,7 @@ static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm)
bool ret = false; bool ret = false;
down_read(&uprobe->consumer_rwsem); down_read(&uprobe->consumer_rwsem);
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node, list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
srcu_read_lock_held(&uprobes_srcu)) {
ret = consumer_filter(uc, mm); ret = consumer_filter(uc, mm);
if (ret) if (ret)
break; break;
@ -1156,7 +1325,8 @@ void uprobe_unregister_sync(void)
* unlucky enough caller can free consumer's memory and cause * unlucky enough caller can free consumer's memory and cause
* handler_chain() or handle_uretprobe_chain() to do an use-after-free. * handler_chain() or handle_uretprobe_chain() to do an use-after-free.
*/ */
synchronize_srcu(&uprobes_srcu); synchronize_rcu_tasks_trace();
synchronize_srcu(&uretprobes_srcu);
} }
EXPORT_SYMBOL_GPL(uprobe_unregister_sync); EXPORT_SYMBOL_GPL(uprobe_unregister_sync);
@ -1240,19 +1410,18 @@ EXPORT_SYMBOL_GPL(uprobe_register);
int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add) int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add)
{ {
struct uprobe_consumer *con; struct uprobe_consumer *con;
int ret = -ENOENT, srcu_idx; int ret = -ENOENT;
down_write(&uprobe->register_rwsem); down_write(&uprobe->register_rwsem);
srcu_idx = srcu_read_lock(&uprobes_srcu); rcu_read_lock_trace();
list_for_each_entry_srcu(con, &uprobe->consumers, cons_node, list_for_each_entry_rcu(con, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
srcu_read_lock_held(&uprobes_srcu)) {
if (con == uc) { if (con == uc) {
ret = register_for_each_vma(uprobe, add ? uc : NULL); ret = register_for_each_vma(uprobe, add ? uc : NULL);
break; break;
} }
} }
srcu_read_unlock(&uprobes_srcu, srcu_idx); rcu_read_unlock_trace();
up_write(&uprobe->register_rwsem); up_write(&uprobe->register_rwsem);
@ -1475,9 +1644,15 @@ static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
return 0; return 0;
} }
static int xol_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
{
return -EPERM;
}
static const struct vm_special_mapping xol_mapping = { static const struct vm_special_mapping xol_mapping = {
.name = "[uprobes]", .name = "[uprobes]",
.fault = xol_fault, .fault = xol_fault,
.mremap = xol_mremap,
}; };
/* Slot allocation for XOL */ /* Slot allocation for XOL */
@ -1553,7 +1728,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
init_waitqueue_head(&area->wq); init_waitqueue_head(&area->wq);
/* Reserve the 1st slot for get_trampoline_vaddr() */ /* Reserve the 1st slot for get_trampoline_vaddr() */
set_bit(0, area->bitmap); set_bit(0, area->bitmap);
atomic_set(&area->slot_count, 1);
insns = arch_uprobe_trampoline(&insns_size); insns = arch_uprobe_trampoline(&insns_size);
arch_uprobe_copy_ixol(area->page, 0, insns, insns_size); arch_uprobe_copy_ixol(area->page, 0, insns, insns_size);
@ -1626,92 +1800,57 @@ void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
} }
} }
/* static unsigned long xol_get_slot_nr(struct xol_area *area)
* - search for a free slot.
*/
static unsigned long xol_take_insn_slot(struct xol_area *area)
{ {
unsigned long slot_addr; unsigned long slot_nr;
int slot_nr;
do { slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE); if (slot_nr < UINSNS_PER_PAGE) {
if (slot_nr < UINSNS_PER_PAGE) { if (!test_and_set_bit(slot_nr, area->bitmap))
if (!test_and_set_bit(slot_nr, area->bitmap)) return slot_nr;
break; }
slot_nr = UINSNS_PER_PAGE; return UINSNS_PER_PAGE;
continue;
}
wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE));
} while (slot_nr >= UINSNS_PER_PAGE);
slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES);
atomic_inc(&area->slot_count);
return slot_addr;
} }
/* /*
* xol_get_insn_slot - allocate a slot for xol. * xol_get_insn_slot - allocate a slot for xol.
* Returns the allocated slot address or 0.
*/ */
static unsigned long xol_get_insn_slot(struct uprobe *uprobe) static bool xol_get_insn_slot(struct uprobe *uprobe, struct uprobe_task *utask)
{ {
struct xol_area *area; struct xol_area *area = get_xol_area();
unsigned long xol_vaddr; unsigned long slot_nr;
area = get_xol_area();
if (!area) if (!area)
return 0; return false;
xol_vaddr = xol_take_insn_slot(area); wait_event(area->wq, (slot_nr = xol_get_slot_nr(area)) < UINSNS_PER_PAGE);
if (unlikely(!xol_vaddr))
return 0;
arch_uprobe_copy_ixol(area->page, xol_vaddr, utask->xol_vaddr = area->vaddr + slot_nr * UPROBE_XOL_SLOT_BYTES;
arch_uprobe_copy_ixol(area->page, utask->xol_vaddr,
&uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); &uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
return true;
return xol_vaddr;
} }
/* /*
* xol_free_insn_slot - If slot was earlier allocated by * xol_free_insn_slot - free the slot allocated by xol_get_insn_slot()
* @xol_get_insn_slot(), make the slot available for
* subsequent requests.
*/ */
static void xol_free_insn_slot(struct task_struct *tsk) static void xol_free_insn_slot(struct uprobe_task *utask)
{ {
struct xol_area *area; struct xol_area *area = current->mm->uprobes_state.xol_area;
unsigned long vma_end; unsigned long offset = utask->xol_vaddr - area->vaddr;
unsigned long slot_addr; unsigned int slot_nr;
if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask) utask->xol_vaddr = 0;
/* xol_vaddr must fit into [area->vaddr, area->vaddr + PAGE_SIZE) */
if (WARN_ON_ONCE(offset >= PAGE_SIZE))
return; return;
slot_addr = tsk->utask->xol_vaddr; slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
if (unlikely(!slot_addr)) clear_bit(slot_nr, area->bitmap);
return; smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
if (waitqueue_active(&area->wq))
area = tsk->mm->uprobes_state.xol_area; wake_up(&area->wq);
vma_end = area->vaddr + PAGE_SIZE;
if (area->vaddr <= slot_addr && slot_addr < vma_end) {
unsigned long offset;
int slot_nr;
offset = slot_addr - area->vaddr;
slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
if (slot_nr >= UINSNS_PER_PAGE)
return;
clear_bit(slot_nr, area->bitmap);
atomic_dec(&area->slot_count);
smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
if (waitqueue_active(&area->wq))
wake_up(&area->wq);
tsk->utask->xol_vaddr = 0;
}
} }
void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
@ -1750,11 +1889,18 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
return instruction_pointer(regs); return instruction_pointer(regs);
} }
static struct return_instance *free_ret_instance(struct return_instance *ri) static struct return_instance *free_ret_instance(struct return_instance *ri, bool cleanup_hprobe)
{ {
struct return_instance *next = ri->next; struct return_instance *next = ri->next;
put_uprobe(ri->uprobe);
kfree(ri); if (cleanup_hprobe) {
enum hprobe_state hstate;
(void)hprobe_consume(&ri->hprobe, &hstate);
hprobe_finalize(&ri->hprobe, hstate);
}
kfree_rcu(ri, rcu);
return next; return next;
} }
@ -1770,18 +1916,50 @@ void uprobe_free_utask(struct task_struct *t)
if (!utask) if (!utask)
return; return;
if (utask->active_uprobe) WARN_ON_ONCE(utask->active_uprobe || utask->xol_vaddr);
put_uprobe(utask->active_uprobe);
timer_delete_sync(&utask->ri_timer);
ri = utask->return_instances; ri = utask->return_instances;
while (ri) while (ri)
ri = free_ret_instance(ri); ri = free_ret_instance(ri, true /* cleanup_hprobe */);
xol_free_insn_slot(t);
kfree(utask); kfree(utask);
t->utask = NULL; t->utask = NULL;
} }
#define RI_TIMER_PERIOD (HZ / 10) /* 100 ms */
#define for_each_ret_instance_rcu(pos, head) \
for (pos = rcu_dereference_raw(head); pos; pos = rcu_dereference_raw(pos->next))
static void ri_timer(struct timer_list *timer)
{
struct uprobe_task *utask = container_of(timer, struct uprobe_task, ri_timer);
struct return_instance *ri;
/* SRCU protects uprobe from reuse for the cmpxchg() inside hprobe_expire(). */
guard(srcu)(&uretprobes_srcu);
/* RCU protects return_instance from freeing. */
guard(rcu)();
for_each_ret_instance_rcu(ri, utask->return_instances)
hprobe_expire(&ri->hprobe, false);
}
static struct uprobe_task *alloc_utask(void)
{
struct uprobe_task *utask;
utask = kzalloc(sizeof(*utask), GFP_KERNEL);
if (!utask)
return NULL;
timer_setup(&utask->ri_timer, ri_timer, 0);
return utask;
}
/* /*
* Allocate a uprobe_task object for the task if necessary. * Allocate a uprobe_task object for the task if necessary.
* Called when the thread hits a breakpoint. * Called when the thread hits a breakpoint.
@ -1793,38 +1971,73 @@ void uprobe_free_utask(struct task_struct *t)
static struct uprobe_task *get_utask(void) static struct uprobe_task *get_utask(void)
{ {
if (!current->utask) if (!current->utask)
current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); current->utask = alloc_utask();
return current->utask; return current->utask;
} }
static size_t ri_size(int consumers_cnt)
{
struct return_instance *ri;
return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
}
#define DEF_CNT 4
static struct return_instance *alloc_return_instance(void)
{
struct return_instance *ri;
ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL);
if (!ri)
return ZERO_SIZE_PTR;
ri->consumers_cnt = DEF_CNT;
return ri;
}
static struct return_instance *dup_return_instance(struct return_instance *old)
{
size_t size = ri_size(old->consumers_cnt);
return kmemdup(old, size, GFP_KERNEL);
}
static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
{ {
struct uprobe_task *n_utask; struct uprobe_task *n_utask;
struct return_instance **p, *o, *n; struct return_instance **p, *o, *n;
struct uprobe *uprobe;
n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); n_utask = alloc_utask();
if (!n_utask) if (!n_utask)
return -ENOMEM; return -ENOMEM;
t->utask = n_utask; t->utask = n_utask;
/* protect uprobes from freeing, we'll need try_get_uprobe() them */
guard(srcu)(&uretprobes_srcu);
p = &n_utask->return_instances; p = &n_utask->return_instances;
for (o = o_utask->return_instances; o; o = o->next) { for (o = o_utask->return_instances; o; o = o->next) {
n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); n = dup_return_instance(o);
if (!n) if (!n)
return -ENOMEM; return -ENOMEM;
*n = *o; /* if uprobe is non-NULL, we'll have an extra refcount for uprobe */
/* uprobe = hprobe_expire(&o->hprobe, true);
* uprobe's refcnt has to be positive at this point, kept by
* utask->return_instances items; return_instances can't be
* removed right now, as task is blocked due to duping; so
* get_uprobe() is safe to use here.
*/
get_uprobe(n->uprobe);
n->next = NULL;
*p = n; /*
* New utask will have stable properly refcounted uprobe or
* NULL. Even if we failed to get refcounted uprobe, we still
* need to preserve full set of return_instances for proper
* uretprobe handling and nesting in forked task.
*/
hprobe_init_stable(&n->hprobe, uprobe);
n->next = NULL;
rcu_assign_pointer(*p, n);
p = &n->next; p = &n->next;
n_utask->depth++; n_utask->depth++;
} }
@ -1900,45 +2113,34 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL; enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL;
while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) { while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) {
ri = free_ret_instance(ri); ri = free_ret_instance(ri, true /* cleanup_hprobe */);
utask->depth--; utask->depth--;
} }
utask->return_instances = ri; rcu_assign_pointer(utask->return_instances, ri);
} }
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
struct return_instance *ri)
{ {
struct return_instance *ri; struct uprobe_task *utask = current->utask;
struct uprobe_task *utask;
unsigned long orig_ret_vaddr, trampoline_vaddr; unsigned long orig_ret_vaddr, trampoline_vaddr;
bool chained; bool chained;
int srcu_idx;
if (!get_xol_area()) if (!get_xol_area())
return; goto free;
utask = get_utask();
if (!utask)
return;
if (utask->depth >= MAX_URETPROBE_DEPTH) { if (utask->depth >= MAX_URETPROBE_DEPTH) {
printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
" nestedness limit pid/tgid=%d/%d\n", " nestedness limit pid/tgid=%d/%d\n",
current->pid, current->tgid); current->pid, current->tgid);
return; goto free;
} }
/* we need to bump refcount to store uprobe in utask */
if (!try_get_uprobe(uprobe))
return;
ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
if (!ri)
goto fail;
trampoline_vaddr = uprobe_get_trampoline_vaddr(); trampoline_vaddr = uprobe_get_trampoline_vaddr();
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
if (orig_ret_vaddr == -1) if (orig_ret_vaddr == -1)
goto fail; goto free;
/* drop the entries invalidated by longjmp() */ /* drop the entries invalidated by longjmp() */
chained = (orig_ret_vaddr == trampoline_vaddr); chained = (orig_ret_vaddr == trampoline_vaddr);
@ -1956,53 +2158,51 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
* attack from user-space. * attack from user-space.
*/ */
uprobe_warn(current, "handle tail call"); uprobe_warn(current, "handle tail call");
goto fail; goto free;
} }
orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
} }
ri->uprobe = uprobe;
/* __srcu_read_lock() because SRCU lock survives switch to user space */
srcu_idx = __srcu_read_lock(&uretprobes_srcu);
ri->func = instruction_pointer(regs); ri->func = instruction_pointer(regs);
ri->stack = user_stack_pointer(regs); ri->stack = user_stack_pointer(regs);
ri->orig_ret_vaddr = orig_ret_vaddr; ri->orig_ret_vaddr = orig_ret_vaddr;
ri->chained = chained; ri->chained = chained;
utask->depth++; utask->depth++;
hprobe_init_leased(&ri->hprobe, uprobe, srcu_idx);
ri->next = utask->return_instances; ri->next = utask->return_instances;
utask->return_instances = ri; rcu_assign_pointer(utask->return_instances, ri);
mod_timer(&utask->ri_timer, jiffies + RI_TIMER_PERIOD);
return; return;
fail: free:
kfree(ri); kfree(ri);
put_uprobe(uprobe);
} }
/* Prepare to single-step probed instruction out of line. */ /* Prepare to single-step probed instruction out of line. */
static int static int
pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
{ {
struct uprobe_task *utask; struct uprobe_task *utask = current->utask;
unsigned long xol_vaddr;
int err; int err;
utask = get_utask();
if (!utask)
return -ENOMEM;
if (!try_get_uprobe(uprobe)) if (!try_get_uprobe(uprobe))
return -EINVAL; return -EINVAL;
xol_vaddr = xol_get_insn_slot(uprobe); if (!xol_get_insn_slot(uprobe, utask)) {
if (!xol_vaddr) {
err = -ENOMEM; err = -ENOMEM;
goto err_out; goto err_out;
} }
utask->xol_vaddr = xol_vaddr;
utask->vaddr = bp_vaddr; utask->vaddr = bp_vaddr;
err = arch_uprobe_pre_xol(&uprobe->arch, regs); err = arch_uprobe_pre_xol(&uprobe->arch, regs);
if (unlikely(err)) { if (unlikely(err)) {
xol_free_insn_slot(current); xol_free_insn_slot(utask);
goto err_out; goto err_out;
} }
@ -2125,35 +2325,90 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
return uprobe; return uprobe;
} }
static struct return_instance*
push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie)
{
if (unlikely(ri == ZERO_SIZE_PTR))
return ri;
if (unlikely(idx >= ri->consumers_cnt)) {
struct return_instance *old_ri = ri;
ri->consumers_cnt += DEF_CNT;
ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL);
if (!ri) {
kfree(old_ri);
return ZERO_SIZE_PTR;
}
}
ri->consumers[idx].id = id;
ri->consumers[idx].cookie = cookie;
return ri;
}
static struct return_consumer *
return_consumer_find(struct return_instance *ri, int *iter, int id)
{
struct return_consumer *ric;
int idx = *iter;
for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
if (ric->id == id) {
*iter = idx + 1;
return ric;
}
}
return NULL;
}
static bool ignore_ret_handler(int rc)
{
return rc == UPROBE_HANDLER_REMOVE || rc == UPROBE_HANDLER_IGNORE;
}
static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
{ {
struct uprobe_consumer *uc; struct uprobe_consumer *uc;
int remove = UPROBE_HANDLER_REMOVE; bool has_consumers = false, remove = true;
bool need_prep = false; /* prepare return uprobe, when needed */ struct return_instance *ri = NULL;
bool has_consumers = false; int push_idx = 0;
current->utask->auprobe = &uprobe->arch; current->utask->auprobe = &uprobe->arch;
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node, list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
srcu_read_lock_held(&uprobes_srcu)) { bool session = uc->handler && uc->ret_handler;
__u64 cookie = 0;
int rc = 0; int rc = 0;
if (uc->handler) { if (uc->handler) {
rc = uc->handler(uc, regs); rc = uc->handler(uc, regs, &cookie);
WARN(rc & ~UPROBE_HANDLER_MASK, WARN(rc < 0 || rc > 2,
"bad rc=0x%x from %ps()\n", rc, uc->handler); "bad rc=0x%x from %ps()\n", rc, uc->handler);
} }
if (uc->ret_handler) remove &= rc == UPROBE_HANDLER_REMOVE;
need_prep = true;
remove &= rc;
has_consumers = true; has_consumers = true;
if (!uc->ret_handler || ignore_ret_handler(rc))
continue;
if (!ri)
ri = alloc_return_instance();
if (session)
ri = push_consumer(ri, push_idx++, uc->id, cookie);
} }
current->utask->auprobe = NULL; current->utask->auprobe = NULL;
if (need_prep && !remove) if (!ZERO_OR_NULL_PTR(ri)) {
prepare_uretprobe(uprobe, regs); /* put bp at return */ /*
* The push_idx value has the final number of return consumers,
* and ri->consumers_cnt has number of allocated consumers.
*/
ri->consumers_cnt = push_idx;
prepare_uretprobe(uprobe, regs, ri);
}
if (remove && has_consumers) { if (remove && has_consumers) {
down_read(&uprobe->register_rwsem); down_read(&uprobe->register_rwsem);
@ -2169,19 +2424,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
} }
static void static void
handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) handle_uretprobe_chain(struct return_instance *ri, struct uprobe *uprobe, struct pt_regs *regs)
{ {
struct uprobe *uprobe = ri->uprobe; struct return_consumer *ric;
struct uprobe_consumer *uc; struct uprobe_consumer *uc;
int srcu_idx; int ric_idx = 0;
srcu_idx = srcu_read_lock(&uprobes_srcu); /* all consumers unsubscribed meanwhile */
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node, if (unlikely(!uprobe))
srcu_read_lock_held(&uprobes_srcu)) { return;
if (uc->ret_handler)
uc->ret_handler(uc, ri->func, regs); rcu_read_lock_trace();
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
bool session = uc->handler && uc->ret_handler;
if (uc->ret_handler) {
ric = return_consumer_find(ri, &ric_idx, uc->id);
if (!session || ric)
uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL);
}
} }
srcu_read_unlock(&uprobes_srcu, srcu_idx); rcu_read_unlock_trace();
} }
static struct return_instance *find_next_ret_chain(struct return_instance *ri) static struct return_instance *find_next_ret_chain(struct return_instance *ri)
@ -2200,6 +2463,8 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
{ {
struct uprobe_task *utask; struct uprobe_task *utask;
struct return_instance *ri, *next; struct return_instance *ri, *next;
struct uprobe *uprobe;
enum hprobe_state hstate;
bool valid; bool valid;
utask = current->utask; utask = current->utask;
@ -2230,21 +2495,24 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
* trampoline addresses on the stack are replaced with correct * trampoline addresses on the stack are replaced with correct
* original return addresses * original return addresses
*/ */
utask->return_instances = ri->next; rcu_assign_pointer(utask->return_instances, ri->next);
uprobe = hprobe_consume(&ri->hprobe, &hstate);
if (valid) if (valid)
handle_uretprobe_chain(ri, regs); handle_uretprobe_chain(ri, uprobe, regs);
ri = free_ret_instance(ri); hprobe_finalize(&ri->hprobe, hstate);
/* We already took care of hprobe, no need to waste more time on that. */
ri = free_ret_instance(ri, false /* !cleanup_hprobe */);
utask->depth--; utask->depth--;
} while (ri != next); } while (ri != next);
} while (!valid); } while (!valid);
utask->return_instances = ri;
return; return;
sigill: sigill:
uprobe_warn(current, "handle uretprobe, sending SIGILL."); uprobe_warn(current, "handle uretprobe, sending SIGILL.");
force_sig(SIGILL); force_sig(SIGILL);
} }
bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs) bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
@ -2266,13 +2534,13 @@ static void handle_swbp(struct pt_regs *regs)
{ {
struct uprobe *uprobe; struct uprobe *uprobe;
unsigned long bp_vaddr; unsigned long bp_vaddr;
int is_swbp, srcu_idx; int is_swbp;
bp_vaddr = uprobe_get_swbp_addr(regs); bp_vaddr = uprobe_get_swbp_addr(regs);
if (bp_vaddr == uprobe_get_trampoline_vaddr()) if (bp_vaddr == uprobe_get_trampoline_vaddr())
return uprobe_handle_trampoline(regs); return uprobe_handle_trampoline(regs);
srcu_idx = srcu_read_lock(&uprobes_srcu); rcu_read_lock_trace();
uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp); uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
if (!uprobe) { if (!uprobe) {
@ -2330,7 +2598,7 @@ static void handle_swbp(struct pt_regs *regs)
out: out:
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */ /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
srcu_read_unlock(&uprobes_srcu, srcu_idx); rcu_read_unlock_trace();
} }
/* /*
@ -2353,7 +2621,7 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
put_uprobe(uprobe); put_uprobe(uprobe);
utask->active_uprobe = NULL; utask->active_uprobe = NULL;
utask->state = UTASK_RUNNING; utask->state = UTASK_RUNNING;
xol_free_insn_slot(current); xol_free_insn_slot(utask);
spin_lock_irq(&current->sighand->siglock); spin_lock_irq(&current->sighand->siglock);
recalc_sigpending(); /* see uprobe_deny_signal() */ recalc_sigpending(); /* see uprobe_deny_signal() */

View File

@ -3240,7 +3240,8 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
} }
static int static int
uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
__u64 *data)
{ {
struct bpf_uprobe *uprobe; struct bpf_uprobe *uprobe;
@ -3249,7 +3250,8 @@ uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
} }
static int static int
uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
__u64 *data)
{ {
struct bpf_uprobe *uprobe; struct bpf_uprobe *uprobe;

View File

@ -89,9 +89,11 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
static int register_uprobe_event(struct trace_uprobe *tu); static int register_uprobe_event(struct trace_uprobe *tu);
static int unregister_uprobe_event(struct trace_uprobe *tu); static int unregister_uprobe_event(struct trace_uprobe *tu);
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
__u64 *data);
static int uretprobe_dispatcher(struct uprobe_consumer *con, static int uretprobe_dispatcher(struct uprobe_consumer *con,
unsigned long func, struct pt_regs *regs); unsigned long func, struct pt_regs *regs,
__u64 *data);
#ifdef CONFIG_STACK_GROWSUP #ifdef CONFIG_STACK_GROWSUP
static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
@ -1522,7 +1524,8 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type,
} }
} }
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
__u64 *data)
{ {
struct trace_uprobe *tu; struct trace_uprobe *tu;
struct uprobe_dispatch_data udd; struct uprobe_dispatch_data udd;
@ -1553,7 +1556,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
} }
static int uretprobe_dispatcher(struct uprobe_consumer *con, static int uretprobe_dispatcher(struct uprobe_consumer *con,
unsigned long func, struct pt_regs *regs) unsigned long func, struct pt_regs *regs,
__u64 *data)
{ {
struct trace_uprobe *tu; struct trace_uprobe *tu;
struct uprobe_dispatch_data udd; struct uprobe_dispatch_data udd;

View File

@ -461,7 +461,7 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
static int static int
uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
struct pt_regs *regs) struct pt_regs *regs, __u64 *data)
{ {
regs->ax = 0x12345678deadbeef; regs->ax = 0x12345678deadbeef;