perf/core improvements and fixes:
Fixes: - Support setting probes in versioned user space symbols, such as pthread_create@@GLIBC_2.1, picking the default one, more work needed to make it possible to set it on the other versions, as the 'perf probe' syntax already uses @ for other purposes. (Paul Clarke) - Do not special case address zero as an error for routines that return addresses (symbol lookup), instead use the return as the success/error indication and pass a pointer to return the address, fixing 'perf test vmlinux' (the one that compares address between vmlinux and kallsyms) on s/390, where the '_text' address is equal to zero (Arnaldo Carvalho de Melo) Infrastructure: - More header sanitization, moving stuff out of util.h into more appropriate headers and objects and sometimes creating new ones (Arnaldo Carvalho de Melo) - Refactor a duplicated code for obtaining config file name (Taeung Song) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAABCAAGBQJZCd/DAAoJENZQFvNTUqpAHLkP/i834z2r9/CQBIMiPOq3ciCd W3K4JsHp3IGFg2rFwit6QnRtTycaZyhupBBNnBj+4OLLT5zujemP3VHbLRf3rvrY Xhx7dlpSYkvpDXOB4lUElrIknIp4jLO329aGW9plRk7vGYa2q97NfDcQYqwRKnd+ 1Y4Z2Bg2ImTWhsrmD+YuI8MwzFcQnG5oAavbbXFP5Bnmorh56auJ4Y6doEThmVbC T0CnYyG29i9KlN1pIm4CDpjVH/aGNZpGhKBJlYGhCWDgxQwstMY2bKwa+6VyITpv FgtU/YKW9ebqT0v2nENjU2XAoFktd3Chn3b8nhuNqN3081mGvIdr4ugMuh7bP0k2 XGiO7ILQAfpO9b0uxGlUX9evvduvM7GMIwdRuJ/jurxxIn4cHy1i6rcU/l096Y0b 9s81bd11NyK4eE7c4Z1IX9JNV0Jw3Knb9B2XEHXfbOx4s7QPsNUQvE0zXUefwmS+ h0YZ1GcAwxIc92JC7gy2iuik1tJ18Nd8Y9/Qnfziem8AIVX205d4miEz9Zx1NUJI pRB4CB9HnrdFZW1rgZ5ob53ToVTdFLAziKq2tEJPdCq2+e2VZfrb3KqeVeGvgRUN xDRvTwc2rgeGynn80t/ShsSpbXPwnmbBapbp5MQdF5T5ObSQOnYVmIGQ3SN3ST5y azaqjBjikhiPzxQJxIHM =gqm+ -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo-4.12-20170503' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: Fixes: - Support setting probes in versioned user space symbols, such as pthread_create@@GLIBC_2.1, picking the default one, more work needed to make it possible to set it on the other versions, as the 'perf probe' syntax already uses @ for other purposes. (Paul Clarke) - Do not special case address zero as an error for routines that return addresses (symbol lookup), instead use the return as the success/error indication and pass a pointer to return the address, fixing 'perf test vmlinux' (the one that compares address between vmlinux and kallsyms) on s/390, where the '_text' address is equal to zero (Arnaldo Carvalho de Melo) Infrastructure changes: - More header sanitization, moving stuff out of util.h into more appropriate headers and objects and sometimes creating new ones (Arnaldo Carvalho de Melo) - Refactor a duplicated code for obtaining config file name (Taeung Song) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
12c1c2fd78
@ -8,8 +8,9 @@ Overview
|
||||
--------
|
||||
These events are similar to tracepoint based events. Instead of Tracepoint,
|
||||
this is based on kprobes (kprobe and kretprobe). So it can probe wherever
|
||||
kprobes can probe (this means, all functions body except for __kprobes
|
||||
functions). Unlike the Tracepoint based event, this can be added and removed
|
||||
kprobes can probe (this means, all functions except those with
|
||||
__kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL).
|
||||
Unlike the Tracepoint based event, this can be added and removed
|
||||
dynamically, on the fly.
|
||||
|
||||
To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
|
||||
|
@ -302,8 +302,8 @@ extern int ignore_sigio_fd(int fd);
|
||||
extern void maybe_sigio_broken(int fd, int read);
|
||||
extern void sigio_broken(int fd, int read);
|
||||
|
||||
/* sys-x86_64/prctl.c */
|
||||
extern int os_arch_prctl(int pid, int code, unsigned long *addr);
|
||||
/* prctl.c */
|
||||
extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
|
||||
|
||||
/* tty.c */
|
||||
extern int get_pty(void);
|
||||
|
@ -390,3 +390,4 @@
|
||||
381 i386 pkey_alloc sys_pkey_alloc
|
||||
382 i386 pkey_free sys_pkey_free
|
||||
383 i386 statx sys_statx
|
||||
384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
|
||||
|
@ -11,6 +11,8 @@
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "perf/amd_iommu: " fmt
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/cpumask.h>
|
||||
@ -21,44 +23,42 @@
|
||||
|
||||
#define COUNTER_SHIFT 16
|
||||
|
||||
#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
|
||||
#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
|
||||
/* iommu pmu conf masks */
|
||||
#define GET_CSOURCE(x) ((x)->conf & 0xFFULL)
|
||||
#define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL)
|
||||
#define GET_DOMID(x) (((x)->conf >> 24) & 0xFFFFULL)
|
||||
#define GET_PASID(x) (((x)->conf >> 40) & 0xFFFFFULL)
|
||||
|
||||
/* iommu pmu config masks */
|
||||
#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
|
||||
#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
|
||||
#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
|
||||
#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
|
||||
#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
|
||||
#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
|
||||
#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
|
||||
/* iommu pmu conf1 masks */
|
||||
#define GET_DEVID_MASK(x) ((x)->conf1 & 0xFFFFULL)
|
||||
#define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL)
|
||||
#define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL)
|
||||
|
||||
static struct perf_amd_iommu __perf_iommu;
|
||||
#define IOMMU_NAME_SIZE 16
|
||||
|
||||
struct perf_amd_iommu {
|
||||
struct list_head list;
|
||||
struct pmu pmu;
|
||||
struct amd_iommu *iommu;
|
||||
char name[IOMMU_NAME_SIZE];
|
||||
u8 max_banks;
|
||||
u8 max_counters;
|
||||
u64 cntr_assign_mask;
|
||||
raw_spinlock_t lock;
|
||||
const struct attribute_group *attr_groups[4];
|
||||
};
|
||||
|
||||
#define format_group attr_groups[0]
|
||||
#define cpumask_group attr_groups[1]
|
||||
#define events_group attr_groups[2]
|
||||
#define null_group attr_groups[3]
|
||||
static LIST_HEAD(perf_amd_iommu_list);
|
||||
|
||||
/*---------------------------------------------
|
||||
* sysfs format attributes
|
||||
*---------------------------------------------*/
|
||||
PMU_FORMAT_ATTR(csource, "config:0-7");
|
||||
PMU_FORMAT_ATTR(devid, "config:8-23");
|
||||
PMU_FORMAT_ATTR(pasid, "config:24-39");
|
||||
PMU_FORMAT_ATTR(domid, "config:40-55");
|
||||
PMU_FORMAT_ATTR(domid, "config:24-39");
|
||||
PMU_FORMAT_ATTR(pasid, "config:40-59");
|
||||
PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
|
||||
PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
|
||||
PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
|
||||
PMU_FORMAT_ATTR(domid_mask, "config1:16-31");
|
||||
PMU_FORMAT_ATTR(pasid_mask, "config1:32-51");
|
||||
|
||||
static struct attribute *iommu_format_attrs[] = {
|
||||
&format_attr_csource.attr,
|
||||
@ -79,6 +79,10 @@ static struct attribute_group amd_iommu_format_group = {
|
||||
/*---------------------------------------------
|
||||
* sysfs events attributes
|
||||
*---------------------------------------------*/
|
||||
static struct attribute_group amd_iommu_events_group = {
|
||||
.name = "events",
|
||||
};
|
||||
|
||||
struct amd_iommu_event_desc {
|
||||
struct kobj_attribute attr;
|
||||
const char *event;
|
||||
@ -150,30 +154,34 @@ static struct attribute_group amd_iommu_cpumask_group = {
|
||||
|
||||
/*---------------------------------------------*/
|
||||
|
||||
static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
|
||||
static int get_next_avail_iommu_bnk_cntr(struct perf_event *event)
|
||||
{
|
||||
struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu);
|
||||
int max_cntrs = piommu->max_counters;
|
||||
int max_banks = piommu->max_banks;
|
||||
u32 shift, bank, cntr;
|
||||
unsigned long flags;
|
||||
int shift, bank, cntr, retval;
|
||||
int max_banks = perf_iommu->max_banks;
|
||||
int max_cntrs = perf_iommu->max_counters;
|
||||
int retval;
|
||||
|
||||
raw_spin_lock_irqsave(&perf_iommu->lock, flags);
|
||||
raw_spin_lock_irqsave(&piommu->lock, flags);
|
||||
|
||||
for (bank = 0, shift = 0; bank < max_banks; bank++) {
|
||||
for (cntr = 0; cntr < max_cntrs; cntr++) {
|
||||
shift = bank + (bank*3) + cntr;
|
||||
if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
|
||||
if (piommu->cntr_assign_mask & BIT_ULL(shift)) {
|
||||
continue;
|
||||
} else {
|
||||
perf_iommu->cntr_assign_mask |= (1ULL<<shift);
|
||||
retval = ((u16)((u16)bank<<8) | (u8)(cntr));
|
||||
piommu->cntr_assign_mask |= BIT_ULL(shift);
|
||||
event->hw.iommu_bank = bank;
|
||||
event->hw.iommu_cntr = cntr;
|
||||
retval = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
retval = -ENOSPC;
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
|
||||
raw_spin_unlock_irqrestore(&piommu->lock, flags);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -202,8 +210,6 @@ static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
|
||||
static int perf_iommu_event_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_amd_iommu *perf_iommu;
|
||||
u64 config, config1;
|
||||
|
||||
/* test the event attr type check for PMU enumeration */
|
||||
if (event->attr.type != event->pmu->type)
|
||||
@ -225,80 +231,62 @@ static int perf_iommu_event_init(struct perf_event *event)
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
perf_iommu = &__perf_iommu;
|
||||
|
||||
if (event->pmu != &perf_iommu->pmu)
|
||||
return -ENOENT;
|
||||
|
||||
if (perf_iommu) {
|
||||
config = event->attr.config;
|
||||
config1 = event->attr.config1;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* integrate with iommu base devid (0000), assume one iommu */
|
||||
perf_iommu->max_banks =
|
||||
amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
|
||||
perf_iommu->max_counters =
|
||||
amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
|
||||
if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
|
||||
return -EINVAL;
|
||||
|
||||
/* update the hw_perf_event struct with the iommu config data */
|
||||
hwc->config = config;
|
||||
hwc->extra_reg.config = config1;
|
||||
hwc->conf = event->attr.config;
|
||||
hwc->conf1 = event->attr.config1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev)
|
||||
{
|
||||
return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu;
|
||||
}
|
||||
|
||||
static void perf_iommu_enable_event(struct perf_event *ev)
|
||||
{
|
||||
u8 csource = _GET_CSOURCE(ev);
|
||||
u16 devid = _GET_DEVID(ev);
|
||||
struct amd_iommu *iommu = perf_event_2_iommu(ev);
|
||||
struct hw_perf_event *hwc = &ev->hw;
|
||||
u8 bank = hwc->iommu_bank;
|
||||
u8 cntr = hwc->iommu_cntr;
|
||||
u64 reg = 0ULL;
|
||||
|
||||
reg = csource;
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_COUNTER_SRC_REG, ®, true);
|
||||
reg = GET_CSOURCE(hwc);
|
||||
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ®);
|
||||
|
||||
reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
|
||||
reg = GET_DEVID_MASK(hwc);
|
||||
reg = GET_DEVID(hwc) | (reg << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_DEVID_MATCH_REG, ®, true);
|
||||
reg |= BIT(31);
|
||||
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®);
|
||||
|
||||
reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
|
||||
reg = GET_PASID_MASK(hwc);
|
||||
reg = GET_PASID(hwc) | (reg << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_PASID_MATCH_REG, ®, true);
|
||||
reg |= BIT(31);
|
||||
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®);
|
||||
|
||||
reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
|
||||
reg = GET_DOMID_MASK(hwc);
|
||||
reg = GET_DOMID(hwc) | (reg << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_DOMID_MATCH_REG, ®, true);
|
||||
reg |= BIT(31);
|
||||
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®);
|
||||
}
|
||||
|
||||
static void perf_iommu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct amd_iommu *iommu = perf_event_2_iommu(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 reg = 0ULL;
|
||||
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_SRC_REG, ®, true);
|
||||
amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
|
||||
IOMMU_PC_COUNTER_SRC_REG, ®);
|
||||
}
|
||||
|
||||
static void perf_iommu_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_start\n");
|
||||
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
@ -306,10 +294,11 @@ static void perf_iommu_start(struct perf_event *event, int flags)
|
||||
hwc->state = 0;
|
||||
|
||||
if (flags & PERF_EF_RELOAD) {
|
||||
u64 prev_raw_count = local64_read(&hwc->prev_count);
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
|
||||
u64 prev_raw_count = local64_read(&hwc->prev_count);
|
||||
struct amd_iommu *iommu = perf_event_2_iommu(event);
|
||||
|
||||
amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
|
||||
IOMMU_PC_COUNTER_REG, &prev_raw_count);
|
||||
}
|
||||
|
||||
perf_iommu_enable_event(event);
|
||||
@ -319,37 +308,30 @@ static void perf_iommu_start(struct perf_event *event, int flags)
|
||||
|
||||
static void perf_iommu_read(struct perf_event *event)
|
||||
{
|
||||
u64 count = 0ULL;
|
||||
u64 prev_raw_count = 0ULL;
|
||||
u64 delta = 0ULL;
|
||||
u64 count, prev, delta;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
pr_debug("perf: amd_iommu:perf_iommu_read\n");
|
||||
struct amd_iommu *iommu = perf_event_2_iommu(event);
|
||||
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_REG, &count, false);
|
||||
|
||||
/* IOMMU pc counter register is only 48 bits */
|
||||
count &= 0xFFFFFFFFFFFFULL;
|
||||
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
count) != prev_raw_count)
|
||||
if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
|
||||
IOMMU_PC_COUNTER_REG, &count))
|
||||
return;
|
||||
|
||||
/* Handling 48-bit counter overflowing */
|
||||
delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
|
||||
/* IOMMU pc counter register is only 48 bits */
|
||||
count &= GENMASK_ULL(47, 0);
|
||||
|
||||
prev = local64_read(&hwc->prev_count);
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev)
|
||||
return;
|
||||
|
||||
/* Handle 48-bit counter overflow */
|
||||
delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
|
||||
delta >>= COUNTER_SHIFT;
|
||||
local64_add(delta, &event->count);
|
||||
|
||||
}
|
||||
|
||||
static void perf_iommu_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 config;
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_stop\n");
|
||||
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
@ -361,7 +343,6 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
||||
config = hwc->config;
|
||||
perf_iommu_read(event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
@ -369,17 +350,12 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
|
||||
static int perf_iommu_add(struct perf_event *event, int flags)
|
||||
{
|
||||
int retval;
|
||||
struct perf_amd_iommu *perf_iommu =
|
||||
container_of(event->pmu, struct perf_amd_iommu, pmu);
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_add\n");
|
||||
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
/* request an iommu bank/counter */
|
||||
retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
|
||||
if (retval != -ENOSPC)
|
||||
event->hw.extra_reg.reg = (u16)retval;
|
||||
else
|
||||
retval = get_next_avail_iommu_bnk_cntr(event);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
@ -390,115 +366,124 @@ static int perf_iommu_add(struct perf_event *event, int flags)
|
||||
|
||||
static void perf_iommu_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_amd_iommu *perf_iommu =
|
||||
container_of(event->pmu, struct perf_amd_iommu, pmu);
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_del\n");
|
||||
perf_iommu_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
/* clear the assigned iommu bank/counter */
|
||||
clear_avail_iommu_bnk_cntr(perf_iommu,
|
||||
_GET_BANK(event),
|
||||
_GET_CNTR(event));
|
||||
hwc->iommu_bank, hwc->iommu_cntr);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
|
||||
static __init int _init_events_attrs(void)
|
||||
{
|
||||
struct attribute **attrs;
|
||||
struct attribute_group *attr_group;
|
||||
int i = 0, j;
|
||||
struct attribute **attrs;
|
||||
|
||||
while (amd_iommu_v2_event_descs[i].attr.attr.name)
|
||||
i++;
|
||||
|
||||
attr_group = kzalloc(sizeof(struct attribute *)
|
||||
* (i + 1) + sizeof(*attr_group), GFP_KERNEL);
|
||||
if (!attr_group)
|
||||
attrs = kzalloc(sizeof(struct attribute **) * (i + 1), GFP_KERNEL);
|
||||
if (!attrs)
|
||||
return -ENOMEM;
|
||||
|
||||
attrs = (struct attribute **)(attr_group + 1);
|
||||
for (j = 0; j < i; j++)
|
||||
attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
|
||||
|
||||
attr_group->name = "events";
|
||||
attr_group->attrs = attrs;
|
||||
perf_iommu->events_group = attr_group;
|
||||
|
||||
amd_iommu_events_group.attrs = attrs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init void amd_iommu_pc_exit(void)
|
||||
{
|
||||
if (__perf_iommu.events_group != NULL) {
|
||||
kfree(__perf_iommu.events_group);
|
||||
__perf_iommu.events_group = NULL;
|
||||
}
|
||||
}
|
||||
const struct attribute_group *amd_iommu_attr_groups[] = {
|
||||
&amd_iommu_format_group,
|
||||
&amd_iommu_cpumask_group,
|
||||
&amd_iommu_events_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static __init int _init_perf_amd_iommu(
|
||||
struct perf_amd_iommu *perf_iommu, char *name)
|
||||
static struct pmu iommu_pmu = {
|
||||
.event_init = perf_iommu_event_init,
|
||||
.add = perf_iommu_add,
|
||||
.del = perf_iommu_del,
|
||||
.start = perf_iommu_start,
|
||||
.stop = perf_iommu_stop,
|
||||
.read = perf_iommu_read,
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.attr_groups = amd_iommu_attr_groups,
|
||||
};
|
||||
|
||||
static __init int init_one_iommu(unsigned int idx)
|
||||
{
|
||||
struct perf_amd_iommu *perf_iommu;
|
||||
int ret;
|
||||
|
||||
perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL);
|
||||
if (!perf_iommu)
|
||||
return -ENOMEM;
|
||||
|
||||
raw_spin_lock_init(&perf_iommu->lock);
|
||||
|
||||
/* Init format attributes */
|
||||
perf_iommu->format_group = &amd_iommu_format_group;
|
||||
perf_iommu->pmu = iommu_pmu;
|
||||
perf_iommu->iommu = get_amd_iommu(idx);
|
||||
perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx);
|
||||
perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx);
|
||||
|
||||
/* Init cpumask attributes to only core 0 */
|
||||
cpumask_set_cpu(0, &iommu_cpumask);
|
||||
perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
|
||||
|
||||
/* Init events attributes */
|
||||
if (_init_events_attrs(perf_iommu) != 0)
|
||||
pr_err("perf: amd_iommu: Only support raw events.\n");
|
||||
|
||||
/* Init null attributes */
|
||||
perf_iommu->null_group = NULL;
|
||||
perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
|
||||
|
||||
ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
|
||||
if (ret) {
|
||||
pr_err("perf: amd_iommu: Failed to initialized.\n");
|
||||
amd_iommu_pc_exit();
|
||||
} else {
|
||||
pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
|
||||
amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
|
||||
amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
|
||||
if (!perf_iommu->iommu ||
|
||||
!perf_iommu->max_banks ||
|
||||
!perf_iommu->max_counters) {
|
||||
kfree(perf_iommu);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx);
|
||||
|
||||
ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1);
|
||||
if (!ret) {
|
||||
pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n",
|
||||
idx, perf_iommu->max_banks, perf_iommu->max_counters);
|
||||
list_add_tail(&perf_iommu->list, &perf_amd_iommu_list);
|
||||
} else {
|
||||
pr_warn("Error initializing IOMMU %d.\n", idx);
|
||||
kfree(perf_iommu);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct perf_amd_iommu __perf_iommu = {
|
||||
.pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = perf_iommu_event_init,
|
||||
.add = perf_iommu_add,
|
||||
.del = perf_iommu_del,
|
||||
.start = perf_iommu_start,
|
||||
.stop = perf_iommu_stop,
|
||||
.read = perf_iommu_read,
|
||||
},
|
||||
.max_banks = 0x00,
|
||||
.max_counters = 0x00,
|
||||
.cntr_assign_mask = 0ULL,
|
||||
.format_group = NULL,
|
||||
.cpumask_group = NULL,
|
||||
.events_group = NULL,
|
||||
.null_group = NULL,
|
||||
};
|
||||
|
||||
static __init int amd_iommu_pc_init(void)
|
||||
{
|
||||
unsigned int i, cnt = 0;
|
||||
int ret;
|
||||
|
||||
/* Make sure the IOMMU PC resource is available */
|
||||
if (!amd_iommu_pc_supported())
|
||||
return -ENODEV;
|
||||
|
||||
_init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
|
||||
ret = _init_events_attrs();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* An IOMMU PMU is specific to an IOMMU, and can function independently.
|
||||
* So we go through all IOMMUs and ignore the one that fails init
|
||||
* unless all IOMMU are failing.
|
||||
*/
|
||||
for (i = 0; i < amd_iommu_get_num_iommus(); i++) {
|
||||
ret = init_one_iommu(i);
|
||||
if (!ret)
|
||||
cnt++;
|
||||
}
|
||||
|
||||
if (!cnt) {
|
||||
kfree(amd_iommu_events_group.attrs);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* Init cpumask attributes to only core 0 */
|
||||
cpumask_set_cpu(0, &iommu_cpumask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -24,17 +24,23 @@
|
||||
#define PC_MAX_SPEC_BNKS 64
|
||||
#define PC_MAX_SPEC_CNTRS 16
|
||||
|
||||
/* iommu pc reg masks*/
|
||||
#define IOMMU_BASE_DEVID 0x0000
|
||||
struct amd_iommu;
|
||||
|
||||
/* amd_iommu_init.c external support functions */
|
||||
extern int amd_iommu_get_num_iommus(void);
|
||||
|
||||
extern bool amd_iommu_pc_supported(void);
|
||||
|
||||
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
|
||||
extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
|
||||
|
||||
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
|
||||
extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
|
||||
|
||||
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value, bool is_write);
|
||||
extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value);
|
||||
|
||||
extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value);
|
||||
|
||||
extern struct amd_iommu *get_amd_iommu(int idx);
|
||||
|
||||
#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
|
||||
|
@ -30,6 +30,9 @@
|
||||
|
||||
#define COUNTER_SHIFT 16
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "amd_uncore: " fmt
|
||||
|
||||
static int num_counters_llc;
|
||||
static int num_counters_nb;
|
||||
|
||||
@ -509,51 +512,34 @@ static int __init amd_uncore_init(void)
|
||||
int ret = -ENODEV;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
||||
goto fail_nodev;
|
||||
|
||||
switch(boot_cpu_data.x86) {
|
||||
case 23:
|
||||
/* Family 17h: */
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L3;
|
||||
/*
|
||||
* For Family17h, the NorthBridge counters are
|
||||
* re-purposed as Data Fabric counters. Also, support is
|
||||
* added for L3 counters. The pmus are exported based on
|
||||
* family as either L2 or L3 and NB or DF.
|
||||
*/
|
||||
amd_nb_pmu.name = "amd_df";
|
||||
amd_llc_pmu.name = "amd_l3";
|
||||
format_attr_event_df.show = &event_show_df;
|
||||
format_attr_event_l3.show = &event_show_l3;
|
||||
break;
|
||||
case 22:
|
||||
/* Family 16h - may change: */
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L2;
|
||||
amd_nb_pmu.name = "amd_nb";
|
||||
amd_llc_pmu.name = "amd_l2";
|
||||
format_attr_event_df = format_attr_event;
|
||||
format_attr_event_l3 = format_attr_event;
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* All prior families have the same number of
|
||||
* NorthBridge and Last Level Cache counters
|
||||
*/
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L2;
|
||||
amd_nb_pmu.name = "amd_nb";
|
||||
amd_llc_pmu.name = "amd_l2";
|
||||
format_attr_event_df = format_attr_event;
|
||||
format_attr_event_l3 = format_attr_event;
|
||||
break;
|
||||
}
|
||||
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
|
||||
amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
|
||||
return -ENODEV;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
|
||||
goto fail_nodev;
|
||||
return -ENODEV;
|
||||
|
||||
if (boot_cpu_data.x86 == 0x17) {
|
||||
/*
|
||||
* For F17h, the Northbridge counters are repurposed as Data
|
||||
* Fabric counters. Also, L3 counters are supported too. The PMUs
|
||||
* are exported based on family as either L2 or L3 and NB or DF.
|
||||
*/
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L3;
|
||||
amd_nb_pmu.name = "amd_df";
|
||||
amd_llc_pmu.name = "amd_l3";
|
||||
format_attr_event_df.show = &event_show_df;
|
||||
format_attr_event_l3.show = &event_show_l3;
|
||||
} else {
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L2;
|
||||
amd_nb_pmu.name = "amd_nb";
|
||||
amd_llc_pmu.name = "amd_l2";
|
||||
format_attr_event_df = format_attr_event;
|
||||
format_attr_event_l3 = format_attr_event;
|
||||
}
|
||||
|
||||
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
|
||||
amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
|
||||
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
|
||||
@ -565,7 +551,7 @@ static int __init amd_uncore_init(void)
|
||||
if (ret)
|
||||
goto fail_nb;
|
||||
|
||||
pr_info("perf: AMD NB counters detected\n");
|
||||
pr_info("AMD NB counters detected\n");
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
@ -579,7 +565,7 @@ static int __init amd_uncore_init(void)
|
||||
if (ret)
|
||||
goto fail_llc;
|
||||
|
||||
pr_info("perf: AMD LLC counters detected\n");
|
||||
pr_info("AMD LLC counters detected\n");
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
@ -615,7 +601,6 @@ fail_nb:
|
||||
if (amd_uncore_nb)
|
||||
free_percpu(amd_uncore_nb);
|
||||
|
||||
fail_nodev:
|
||||
return ret;
|
||||
}
|
||||
device_initcall(amd_uncore_init);
|
||||
|
@ -63,7 +63,6 @@ struct bts_buffer {
|
||||
unsigned int cur_buf;
|
||||
bool snapshot;
|
||||
local_t data_size;
|
||||
local_t lost;
|
||||
local_t head;
|
||||
unsigned long end;
|
||||
void **data_pages;
|
||||
@ -199,7 +198,8 @@ static void bts_update(struct bts_ctx *bts)
|
||||
return;
|
||||
|
||||
if (ds->bts_index >= ds->bts_absolute_maximum)
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(&bts->handle,
|
||||
PERF_AUX_FLAG_TRUNCATED);
|
||||
|
||||
/*
|
||||
* old and head are always in the same physical buffer, so we
|
||||
@ -276,7 +276,7 @@ static void bts_event_start(struct perf_event *event, int flags)
|
||||
return;
|
||||
|
||||
fail_end_stop:
|
||||
perf_aux_output_end(&bts->handle, 0, false);
|
||||
perf_aux_output_end(&bts->handle, 0);
|
||||
|
||||
fail_stop:
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
@ -319,9 +319,8 @@ static void bts_event_stop(struct perf_event *event, int flags)
|
||||
bts->handle.head =
|
||||
local_xchg(&buf->data_size,
|
||||
buf->nr_pages << PAGE_SHIFT);
|
||||
|
||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
|
||||
!!local_xchg(&buf->lost, 0));
|
||||
perf_aux_output_end(&bts->handle,
|
||||
local_xchg(&buf->data_size, 0));
|
||||
}
|
||||
|
||||
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
|
||||
@ -484,8 +483,7 @@ int intel_bts_interrupt(void)
|
||||
if (old_head == local_read(&buf->head))
|
||||
return handled;
|
||||
|
||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
|
||||
!!local_xchg(&buf->lost, 0));
|
||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0));
|
||||
|
||||
buf = perf_aux_output_begin(&bts->handle, event);
|
||||
if (buf)
|
||||
@ -500,7 +498,7 @@ int intel_bts_interrupt(void)
|
||||
* cleared handle::event
|
||||
*/
|
||||
barrier();
|
||||
perf_aux_output_end(&bts->handle, 0, false);
|
||||
perf_aux_output_end(&bts->handle, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1553,6 +1553,27 @@ static __initconst const u64 slm_hw_cache_event_ids
|
||||
},
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c");
|
||||
EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3");
|
||||
/* UOPS_NOT_DELIVERED.ANY */
|
||||
EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c");
|
||||
/* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */
|
||||
EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02");
|
||||
/* UOPS_RETIRED.ANY */
|
||||
EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2");
|
||||
/* UOPS_ISSUED.ANY */
|
||||
EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e");
|
||||
|
||||
static struct attribute *glm_events_attrs[] = {
|
||||
EVENT_PTR(td_total_slots_glm),
|
||||
EVENT_PTR(td_total_slots_scale_glm),
|
||||
EVENT_PTR(td_fetch_bubbles_glm),
|
||||
EVENT_PTR(td_recovery_bubbles_glm),
|
||||
EVENT_PTR(td_slots_issued_glm),
|
||||
EVENT_PTR(td_slots_retired_glm),
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
|
||||
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
|
||||
@ -2130,7 +2151,7 @@ again:
|
||||
* counters from the GLOBAL_STATUS mask and we always process PEBS
|
||||
* events via drain_pebs().
|
||||
*/
|
||||
status &= ~cpuc->pebs_enabled;
|
||||
status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
|
||||
|
||||
/*
|
||||
* PEBS overflow sets bit 62 in the global status register
|
||||
@ -3750,6 +3771,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.cpu_events = glm_events_attrs;
|
||||
pr_cont("Goldmont events, ");
|
||||
break;
|
||||
|
||||
|
@ -1222,7 +1222,7 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
|
||||
|
||||
/* clear non-PEBS bit and re-check */
|
||||
pebs_status = p->status & cpuc->pebs_enabled;
|
||||
pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
|
||||
pebs_status &= PEBS_COUNTER_MASK;
|
||||
if (pebs_status == (1 << bit))
|
||||
return at;
|
||||
}
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <asm/insn.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/intel_pt.h>
|
||||
#include <asm/intel-family.h>
|
||||
|
||||
#include "../perf_event.h"
|
||||
#include "pt.h"
|
||||
@ -98,6 +99,7 @@ static struct attribute_group pt_cap_group = {
|
||||
.name = "caps",
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(pt, "config:0" );
|
||||
PMU_FORMAT_ATTR(cyc, "config:1" );
|
||||
PMU_FORMAT_ATTR(pwr_evt, "config:4" );
|
||||
PMU_FORMAT_ATTR(fup_on_ptw, "config:5" );
|
||||
@ -105,11 +107,13 @@ PMU_FORMAT_ATTR(mtc, "config:9" );
|
||||
PMU_FORMAT_ATTR(tsc, "config:10" );
|
||||
PMU_FORMAT_ATTR(noretcomp, "config:11" );
|
||||
PMU_FORMAT_ATTR(ptw, "config:12" );
|
||||
PMU_FORMAT_ATTR(branch, "config:13" );
|
||||
PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
|
||||
PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
|
||||
PMU_FORMAT_ATTR(psb_period, "config:24-27" );
|
||||
|
||||
static struct attribute *pt_formats_attr[] = {
|
||||
&format_attr_pt.attr,
|
||||
&format_attr_cyc.attr,
|
||||
&format_attr_pwr_evt.attr,
|
||||
&format_attr_fup_on_ptw.attr,
|
||||
@ -117,6 +121,7 @@ static struct attribute *pt_formats_attr[] = {
|
||||
&format_attr_tsc.attr,
|
||||
&format_attr_noretcomp.attr,
|
||||
&format_attr_ptw.attr,
|
||||
&format_attr_branch.attr,
|
||||
&format_attr_mtc_period.attr,
|
||||
&format_attr_cyc_thresh.attr,
|
||||
&format_attr_psb_period.attr,
|
||||
@ -197,6 +202,19 @@ static int __init pt_pmu_hw_init(void)
|
||||
pt_pmu.tsc_art_den = eax;
|
||||
}
|
||||
|
||||
/* model-specific quirks */
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case INTEL_FAM6_BROADWELL_CORE:
|
||||
case INTEL_FAM6_BROADWELL_XEON_D:
|
||||
case INTEL_FAM6_BROADWELL_GT3E:
|
||||
case INTEL_FAM6_BROADWELL_X:
|
||||
/* not setting BRANCH_EN will #GP, erratum BDM106 */
|
||||
pt_pmu.branch_en_always_on = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_VMX)) {
|
||||
/*
|
||||
* Intel SDM, 36.5 "Tracing post-VMXON" says that
|
||||
@ -263,8 +281,20 @@ fail:
|
||||
#define RTIT_CTL_PTW (RTIT_CTL_PTW_EN | \
|
||||
RTIT_CTL_FUP_ON_PTW)
|
||||
|
||||
#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \
|
||||
/*
|
||||
* Bit 0 (TraceEn) in the attr.config is meaningless as the
|
||||
* corresponding bit in the RTIT_CTL can only be controlled
|
||||
* by the driver; therefore, repurpose it to mean: pass
|
||||
* through the bit that was previously assumed to be always
|
||||
* on for PT, thereby allowing the user to *not* set it if
|
||||
* they so wish. See also pt_event_valid() and pt_config().
|
||||
*/
|
||||
#define RTIT_CTL_PASSTHROUGH RTIT_CTL_TRACEEN
|
||||
|
||||
#define PT_CONFIG_MASK (RTIT_CTL_TRACEEN | \
|
||||
RTIT_CTL_TSC_EN | \
|
||||
RTIT_CTL_DISRETC | \
|
||||
RTIT_CTL_BRANCH_EN | \
|
||||
RTIT_CTL_CYC_PSB | \
|
||||
RTIT_CTL_MTC | \
|
||||
RTIT_CTL_PWR_EVT_EN | \
|
||||
@ -332,6 +362,33 @@ static bool pt_event_valid(struct perf_event *event)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting bit 0 (TraceEn in RTIT_CTL MSR) in the attr.config
|
||||
* clears the assomption that BranchEn must always be enabled,
|
||||
* as was the case with the first implementation of PT.
|
||||
* If this bit is not set, the legacy behavior is preserved
|
||||
* for compatibility with the older userspace.
|
||||
*
|
||||
* Re-using bit 0 for this purpose is fine because it is never
|
||||
* directly set by the user; previous attempts at setting it in
|
||||
* the attr.config resulted in -EINVAL.
|
||||
*/
|
||||
if (config & RTIT_CTL_PASSTHROUGH) {
|
||||
/*
|
||||
* Disallow not setting BRANCH_EN where BRANCH_EN is
|
||||
* always required.
|
||||
*/
|
||||
if (pt_pmu.branch_en_always_on &&
|
||||
!(config & RTIT_CTL_BRANCH_EN))
|
||||
return false;
|
||||
} else {
|
||||
/*
|
||||
* Disallow BRANCH_EN without the PASSTHROUGH.
|
||||
*/
|
||||
if (config & RTIT_CTL_BRANCH_EN)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -411,6 +468,7 @@ static u64 pt_config_filters(struct perf_event *event)
|
||||
|
||||
static void pt_config(struct perf_event *event)
|
||||
{
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
u64 reg;
|
||||
|
||||
if (!event->hw.itrace_started) {
|
||||
@ -419,7 +477,20 @@ static void pt_config(struct perf_event *event)
|
||||
}
|
||||
|
||||
reg = pt_config_filters(event);
|
||||
reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
|
||||
reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
|
||||
|
||||
/*
|
||||
* Previously, we had BRANCH_EN on by default, but now that PT has
|
||||
* grown features outside of branch tracing, it is useful to allow
|
||||
* the user to disable it. Setting bit 0 in the event's attr.config
|
||||
* allows BRANCH_EN to pass through instead of being always on. See
|
||||
* also the comment in pt_event_valid().
|
||||
*/
|
||||
if (event->attr.config & BIT(0)) {
|
||||
reg |= event->attr.config & RTIT_CTL_BRANCH_EN;
|
||||
} else {
|
||||
reg |= RTIT_CTL_BRANCH_EN;
|
||||
}
|
||||
|
||||
if (!event->attr.exclude_kernel)
|
||||
reg |= RTIT_CTL_OS;
|
||||
@ -429,11 +500,15 @@ static void pt_config(struct perf_event *event)
|
||||
reg |= (event->attr.config & PT_CONFIG_MASK);
|
||||
|
||||
event->hw.config = reg;
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, reg);
|
||||
if (READ_ONCE(pt->vmx_on))
|
||||
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
|
||||
else
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, reg);
|
||||
}
|
||||
|
||||
static void pt_config_stop(struct perf_event *event)
|
||||
{
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
u64 ctl = READ_ONCE(event->hw.config);
|
||||
|
||||
/* may be already stopped by a PMI */
|
||||
@ -441,7 +516,8 @@ static void pt_config_stop(struct perf_event *event)
|
||||
return;
|
||||
|
||||
ctl &= ~RTIT_CTL_TRACEEN;
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
|
||||
if (!READ_ONCE(pt->vmx_on))
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
|
||||
|
||||
WRITE_ONCE(event->hw.config, ctl);
|
||||
|
||||
@ -753,7 +829,8 @@ static void pt_handle_status(struct pt *pt)
|
||||
*/
|
||||
if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
|
||||
buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(&pt->handle,
|
||||
PERF_AUX_FLAG_TRUNCATED);
|
||||
advance++;
|
||||
}
|
||||
}
|
||||
@ -846,8 +923,10 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
|
||||
|
||||
/* can't stop in the middle of an output region */
|
||||
if (buf->output_off + handle->size + 1 <
|
||||
sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
|
||||
sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
/* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
|
||||
@ -1171,12 +1250,6 @@ void intel_pt_interrupt(void)
|
||||
if (!READ_ONCE(pt->handle_nmi))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If VMX is on and PT does not support it, don't touch anything.
|
||||
*/
|
||||
if (READ_ONCE(pt->vmx_on))
|
||||
return;
|
||||
|
||||
if (!event)
|
||||
return;
|
||||
|
||||
@ -1192,8 +1265,7 @@ void intel_pt_interrupt(void)
|
||||
|
||||
pt_update_head(pt);
|
||||
|
||||
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
|
||||
local_xchg(&buf->lost, 0));
|
||||
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
|
||||
|
||||
if (!event->hw.state) {
|
||||
int ret;
|
||||
@ -1208,7 +1280,7 @@ void intel_pt_interrupt(void)
|
||||
/* snapshot counters don't use PMI, so it's safe */
|
||||
ret = pt_buffer_reset_markers(buf, &pt->handle);
|
||||
if (ret) {
|
||||
perf_aux_output_end(&pt->handle, 0, true);
|
||||
perf_aux_output_end(&pt->handle, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1237,12 +1309,19 @@ void intel_pt_handle_vmx(int on)
|
||||
local_irq_save(flags);
|
||||
WRITE_ONCE(pt->vmx_on, on);
|
||||
|
||||
if (on) {
|
||||
/* prevent pt_config_stop() from writing RTIT_CTL */
|
||||
event = pt->handle.event;
|
||||
if (event)
|
||||
event->hw.config = 0;
|
||||
}
|
||||
/*
|
||||
* If an AUX transaction is in progress, it will contain
|
||||
* gap(s), so flag it PARTIAL to inform the user.
|
||||
*/
|
||||
event = pt->handle.event;
|
||||
if (event)
|
||||
perf_aux_output_flag(&pt->handle,
|
||||
PERF_AUX_FLAG_PARTIAL);
|
||||
|
||||
/* Turn PTs back on */
|
||||
if (!on && event)
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
|
||||
@ -1257,9 +1336,6 @@ static void pt_event_start(struct perf_event *event, int mode)
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct pt_buffer *buf;
|
||||
|
||||
if (READ_ONCE(pt->vmx_on))
|
||||
return;
|
||||
|
||||
buf = perf_aux_output_begin(&pt->handle, event);
|
||||
if (!buf)
|
||||
goto fail_stop;
|
||||
@ -1280,7 +1356,7 @@ static void pt_event_start(struct perf_event *event, int mode)
|
||||
return;
|
||||
|
||||
fail_end_stop:
|
||||
perf_aux_output_end(&pt->handle, 0, true);
|
||||
perf_aux_output_end(&pt->handle, 0);
|
||||
fail_stop:
|
||||
hwc->state = PERF_HES_STOPPED;
|
||||
}
|
||||
@ -1321,8 +1397,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
||||
pt->handle.head =
|
||||
local_xchg(&buf->data_size,
|
||||
buf->nr_pages << PAGE_SHIFT);
|
||||
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
|
||||
local_xchg(&buf->lost, 0));
|
||||
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -110,6 +110,7 @@ struct pt_pmu {
|
||||
struct pmu pmu;
|
||||
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
||||
bool vmx;
|
||||
bool branch_en_always_on;
|
||||
unsigned long max_nonturbo_ratio;
|
||||
unsigned int tsc_art_num;
|
||||
unsigned int tsc_art_den;
|
||||
@ -143,7 +144,6 @@ struct pt_buffer {
|
||||
size_t output_off;
|
||||
unsigned long nr_pages;
|
||||
local_t data_size;
|
||||
local_t lost;
|
||||
local64_t head;
|
||||
bool snapshot;
|
||||
unsigned long stop_pos, intr_pos;
|
||||
|
@ -79,6 +79,7 @@ struct amd_nb {
|
||||
|
||||
/* The maximal number of PEBS events: */
|
||||
#define MAX_PEBS_EVENTS 8
|
||||
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
|
||||
|
||||
/*
|
||||
* Flags PEBS can handle without an PMI.
|
||||
|
@ -187,6 +187,7 @@
|
||||
* Reuse free bits when adding new feature flags!
|
||||
*/
|
||||
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
|
||||
#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
|
||||
|
@ -72,14 +72,13 @@ struct arch_specific_insn {
|
||||
/* copy of the original instruction */
|
||||
kprobe_opcode_t *insn;
|
||||
/*
|
||||
* boostable = -1: This instruction type is not boostable.
|
||||
* boostable = 0: This instruction type is boostable.
|
||||
* boostable = 1: This instruction has been boosted: we have
|
||||
* boostable = false: This instruction type is not boostable.
|
||||
* boostable = true: This instruction has been boosted: we have
|
||||
* added a relative jump after the instruction copy in insn,
|
||||
* so no single-step and fixup are needed (unless there's
|
||||
* a post_handler or break_handler).
|
||||
*/
|
||||
int boostable;
|
||||
bool boostable;
|
||||
bool if_modifier;
|
||||
};
|
||||
|
||||
|
@ -45,6 +45,8 @@
|
||||
#define MSR_IA32_PERFCTR1 0x000000c2
|
||||
#define MSR_FSB_FREQ 0x000000cd
|
||||
#define MSR_PLATFORM_INFO 0x000000ce
|
||||
#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
|
||||
#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
|
||||
|
||||
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
|
||||
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
|
||||
@ -127,6 +129,7 @@
|
||||
|
||||
/* DEBUGCTLMSR bits (others vary by model): */
|
||||
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
|
||||
#define DEBUGCTLMSR_BTF_SHIFT 1
|
||||
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
|
||||
#define DEBUGCTLMSR_TR (1UL << 6)
|
||||
#define DEBUGCTLMSR_BTS (1UL << 7)
|
||||
@ -552,10 +555,12 @@
|
||||
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
|
||||
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
|
||||
|
||||
/* MISC_FEATURE_ENABLES non-architectural features */
|
||||
#define MSR_MISC_FEATURE_ENABLES 0x00000140
|
||||
/* MISC_FEATURES_ENABLES non-architectural features */
|
||||
#define MSR_MISC_FEATURES_ENABLES 0x00000140
|
||||
|
||||
#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1
|
||||
#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
|
||||
#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
|
||||
#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
|
||||
|
||||
#define MSR_IA32_TSC_DEADLINE 0x000006E0
|
||||
|
||||
|
@ -884,6 +884,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
|
||||
extern int get_tsc_mode(unsigned long adr);
|
||||
extern int set_tsc_mode(unsigned int val);
|
||||
|
||||
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
|
||||
|
||||
/* Register/unregister a process' MPX related resource */
|
||||
#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
|
||||
#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
|
||||
|
@ -9,6 +9,7 @@ void syscall_init(void);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
void entry_SYSCALL_64(void);
|
||||
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
@ -30,6 +31,7 @@ void x86_report_nx(void);
|
||||
|
||||
extern int reboot_force;
|
||||
|
||||
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
|
||||
long do_arch_prctl_common(struct task_struct *task, int option,
|
||||
unsigned long cpuid_enabled);
|
||||
|
||||
#endif /* _ASM_X86_PROTO_H */
|
||||
|
@ -87,6 +87,7 @@ struct thread_info {
|
||||
#define TIF_SECCOMP 8 /* secure computing */
|
||||
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
|
||||
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
|
||||
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
|
||||
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
|
||||
#define TIF_IA32 17 /* IA32 compatibility process */
|
||||
#define TIF_NOHZ 19 /* in adaptive nohz mode */
|
||||
@ -110,6 +111,7 @@ struct thread_info {
|
||||
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
|
||||
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
|
||||
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
||||
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
|
||||
#define _TIF_NOTSC (1 << TIF_NOTSC)
|
||||
#define _TIF_IA32 (1 << TIF_IA32)
|
||||
#define _TIF_NOHZ (1 << TIF_NOHZ)
|
||||
@ -138,7 +140,7 @@ struct thread_info {
|
||||
|
||||
/* flags to check in __switch_to() */
|
||||
#define _TIF_WORK_CTXSW \
|
||||
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
|
||||
(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
|
||||
|
||||
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
|
||||
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
|
||||
@ -239,6 +241,8 @@ static inline int arch_within_stack_frames(const void * const stack,
|
||||
extern void arch_task_cache_init(void);
|
||||
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
|
||||
extern void arch_release_task_struct(struct task_struct *tsk);
|
||||
extern void arch_setup_new_exec(void);
|
||||
#define arch_setup_new_exec arch_setup_new_exec
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_X86_THREAD_INFO_H */
|
||||
|
@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void cr4_toggle_bits(unsigned long mask)
|
||||
{
|
||||
unsigned long cr4;
|
||||
|
||||
cr4 = this_cpu_read(cpu_tlbstate.cr4);
|
||||
cr4 ^= mask;
|
||||
this_cpu_write(cpu_tlbstate.cr4, cr4);
|
||||
__write_cr4(cr4);
|
||||
}
|
||||
|
||||
/* Read the CR4 shadow. */
|
||||
static inline unsigned long cr4_read_shadow(void)
|
||||
{
|
||||
|
@ -1,10 +1,13 @@
|
||||
#ifndef _ASM_X86_PRCTL_H
|
||||
#define _ASM_X86_PRCTL_H
|
||||
|
||||
#define ARCH_SET_GS 0x1001
|
||||
#define ARCH_SET_FS 0x1002
|
||||
#define ARCH_GET_FS 0x1003
|
||||
#define ARCH_GET_GS 0x1004
|
||||
#define ARCH_SET_GS 0x1001
|
||||
#define ARCH_SET_FS 0x1002
|
||||
#define ARCH_GET_FS 0x1003
|
||||
#define ARCH_GET_GS 0x1004
|
||||
|
||||
#define ARCH_GET_CPUID 0x1011
|
||||
#define ARCH_SET_CPUID 0x1012
|
||||
|
||||
#define ARCH_MAP_VDSO_X32 0x2001
|
||||
#define ARCH_MAP_VDSO_32 0x2002
|
||||
|
@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
|
||||
return;
|
||||
}
|
||||
|
||||
if (ring3mwait_disabled) {
|
||||
msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
|
||||
MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
|
||||
if (ring3mwait_disabled)
|
||||
return;
|
||||
}
|
||||
|
||||
msr_set_bit(MSR_MISC_FEATURE_ENABLES,
|
||||
MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
|
||||
|
||||
set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
|
||||
this_cpu_or(msr_misc_features_shadow,
|
||||
1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
|
||||
|
||||
if (c == &boot_cpu_data)
|
||||
ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
|
||||
@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c)
|
||||
init_intel_energy_perf(c);
|
||||
}
|
||||
|
||||
static void init_cpuid_fault(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 msr;
|
||||
|
||||
if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
|
||||
if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
|
||||
set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
|
||||
}
|
||||
}
|
||||
|
||||
static void init_intel_misc_features(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 msr;
|
||||
|
||||
if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
|
||||
return;
|
||||
|
||||
/* Clear all MISC features */
|
||||
this_cpu_write(msr_misc_features_shadow, 0);
|
||||
|
||||
/* Check features and update capabilities and shadow control bits */
|
||||
init_cpuid_fault(c);
|
||||
probe_xeon_phi_r3mwait(c);
|
||||
|
||||
msr = this_cpu_read(msr_misc_features_shadow);
|
||||
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
|
||||
}
|
||||
|
||||
static void init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int l2 = 0;
|
||||
@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c)
|
||||
|
||||
init_intel_energy_perf(c);
|
||||
|
||||
probe_xeon_phi_r3mwait(c);
|
||||
init_intel_misc_features(c);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@ -67,7 +67,7 @@
|
||||
#endif
|
||||
|
||||
/* Ensure if the instruction can be boostable */
|
||||
extern int can_boost(kprobe_opcode_t *instruction, void *addr);
|
||||
extern int can_boost(struct insn *insn, void *orig_addr);
|
||||
/* Recover instruction if given address is probed */
|
||||
extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
|
||||
unsigned long addr);
|
||||
@ -75,7 +75,7 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
|
||||
* Copy an instruction and adjust the displacement if the instruction
|
||||
* uses the %rip-relative addressing mode.
|
||||
*/
|
||||
extern int __copy_instruction(u8 *dest, u8 *src);
|
||||
extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn);
|
||||
|
||||
/* Generate a relative-jump/call instruction */
|
||||
extern void synthesize_reljump(void *from, void *to);
|
||||
|
@ -164,42 +164,38 @@ static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
|
||||
NOKPROBE_SYMBOL(skip_prefixes);
|
||||
|
||||
/*
|
||||
* Returns non-zero if opcode is boostable.
|
||||
* Returns non-zero if INSN is boostable.
|
||||
* RIP relative instructions are adjusted at copying time in 64 bits mode
|
||||
*/
|
||||
int can_boost(kprobe_opcode_t *opcodes, void *addr)
|
||||
int can_boost(struct insn *insn, void *addr)
|
||||
{
|
||||
kprobe_opcode_t opcode;
|
||||
kprobe_opcode_t *orig_opcodes = opcodes;
|
||||
|
||||
if (search_exception_tables((unsigned long)addr))
|
||||
return 0; /* Page fault may occur on this address. */
|
||||
|
||||
retry:
|
||||
if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
|
||||
return 0;
|
||||
opcode = *(opcodes++);
|
||||
|
||||
/* 2nd-byte opcode */
|
||||
if (opcode == 0x0f) {
|
||||
if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
|
||||
return 0;
|
||||
return test_bit(*opcodes,
|
||||
if (insn->opcode.nbytes == 2)
|
||||
return test_bit(insn->opcode.bytes[1],
|
||||
(unsigned long *)twobyte_is_boostable);
|
||||
}
|
||||
|
||||
if (insn->opcode.nbytes != 1)
|
||||
return 0;
|
||||
|
||||
/* Can't boost Address-size override prefix */
|
||||
if (unlikely(inat_is_address_size_prefix(insn->attr)))
|
||||
return 0;
|
||||
|
||||
opcode = insn->opcode.bytes[0];
|
||||
|
||||
switch (opcode & 0xf0) {
|
||||
#ifdef CONFIG_X86_64
|
||||
case 0x40:
|
||||
goto retry; /* REX prefix is boostable */
|
||||
#endif
|
||||
case 0x60:
|
||||
if (0x63 < opcode && opcode < 0x67)
|
||||
goto retry; /* prefixes */
|
||||
/* can't boost Address-size override and bound */
|
||||
return (opcode != 0x62 && opcode != 0x67);
|
||||
/* can't boost "bound" */
|
||||
return (opcode != 0x62);
|
||||
case 0x70:
|
||||
return 0; /* can't boost conditional jump */
|
||||
case 0x90:
|
||||
return opcode != 0x9a; /* can't boost call far */
|
||||
case 0xc0:
|
||||
/* can't boost software-interruptions */
|
||||
return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
|
||||
@ -210,14 +206,9 @@ retry:
|
||||
/* can boost in/out and absolute jmps */
|
||||
return ((opcode & 0x04) || opcode == 0xea);
|
||||
case 0xf0:
|
||||
if ((opcode & 0x0c) == 0 && opcode != 0xf1)
|
||||
goto retry; /* lock/rep(ne) prefix */
|
||||
/* clear and set flags are boostable */
|
||||
return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
|
||||
default:
|
||||
/* segment override prefixes are boostable */
|
||||
if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
|
||||
goto retry; /* prefixes */
|
||||
/* CS override prefix and call are not boostable */
|
||||
return (opcode != 0x2e && opcode != 0x9a);
|
||||
}
|
||||
@ -264,7 +255,10 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
|
||||
* Fortunately, we know that the original code is the ideal 5-byte
|
||||
* long NOP.
|
||||
*/
|
||||
memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
|
||||
if (probe_kernel_read(buf, (void *)addr,
|
||||
MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
|
||||
return 0UL;
|
||||
|
||||
if (faddr)
|
||||
memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
|
||||
else
|
||||
@ -276,7 +270,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
|
||||
* Recover the probed instruction at addr for further analysis.
|
||||
* Caller must lock kprobes by kprobe_mutex, or disable preemption
|
||||
* for preventing to release referencing kprobes.
|
||||
* Returns zero if the instruction can not get recovered.
|
||||
* Returns zero if the instruction can not get recovered (or access failed).
|
||||
*/
|
||||
unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
|
||||
{
|
||||
@ -348,37 +342,36 @@ static int is_IF_modifier(kprobe_opcode_t *insn)
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy an instruction and adjust the displacement if the instruction
|
||||
* uses the %rip-relative addressing mode.
|
||||
* If it does, Return the address of the 32-bit displacement word.
|
||||
* If not, return null.
|
||||
* Only applicable to 64-bit x86.
|
||||
* Copy an instruction with recovering modified instruction by kprobes
|
||||
* and adjust the displacement if the instruction uses the %rip-relative
|
||||
* addressing mode.
|
||||
* This returns the length of copied instruction, or 0 if it has an error.
|
||||
*/
|
||||
int __copy_instruction(u8 *dest, u8 *src)
|
||||
int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
|
||||
{
|
||||
struct insn insn;
|
||||
kprobe_opcode_t buf[MAX_INSN_SIZE];
|
||||
int length;
|
||||
unsigned long recovered_insn =
|
||||
recover_probed_instruction(buf, (unsigned long)src);
|
||||
|
||||
if (!recovered_insn)
|
||||
if (!recovered_insn || !insn)
|
||||
return 0;
|
||||
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
|
||||
insn_get_length(&insn);
|
||||
length = insn.length;
|
||||
|
||||
/* This can access kernel text if given address is not recovered */
|
||||
if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE))
|
||||
return 0;
|
||||
|
||||
kernel_insn_init(insn, dest, MAX_INSN_SIZE);
|
||||
insn_get_length(insn);
|
||||
|
||||
/* Another subsystem puts a breakpoint, failed to recover */
|
||||
if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
|
||||
if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
|
||||
return 0;
|
||||
memcpy(dest, insn.kaddr, length);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (insn_rip_relative(&insn)) {
|
||||
/* Only x86_64 has RIP relative instructions */
|
||||
if (insn_rip_relative(insn)) {
|
||||
s64 newdisp;
|
||||
u8 *disp;
|
||||
kernel_insn_init(&insn, dest, length);
|
||||
insn_get_displacement(&insn);
|
||||
/*
|
||||
* The copied instruction uses the %rip-relative addressing
|
||||
* mode. Adjust the displacement for the difference between
|
||||
@ -391,36 +384,57 @@ int __copy_instruction(u8 *dest, u8 *src)
|
||||
* extension of the original signed 32-bit displacement would
|
||||
* have given.
|
||||
*/
|
||||
newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
|
||||
newdisp = (u8 *) src + (s64) insn->displacement.value
|
||||
- (u8 *) dest;
|
||||
if ((s64) (s32) newdisp != newdisp) {
|
||||
pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
|
||||
pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value);
|
||||
pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
|
||||
src, dest, insn->displacement.value);
|
||||
return 0;
|
||||
}
|
||||
disp = (u8 *) dest + insn_offset_displacement(&insn);
|
||||
disp = (u8 *) dest + insn_offset_displacement(insn);
|
||||
*(s32 *) disp = (s32) newdisp;
|
||||
}
|
||||
#endif
|
||||
return length;
|
||||
return insn->length;
|
||||
}
|
||||
|
||||
/* Prepare reljump right after instruction to boost */
|
||||
static void prepare_boost(struct kprobe *p, struct insn *insn)
|
||||
{
|
||||
if (can_boost(insn, p->addr) &&
|
||||
MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) {
|
||||
/*
|
||||
* These instructions can be executed directly if it
|
||||
* jumps back to correct address.
|
||||
*/
|
||||
synthesize_reljump(p->ainsn.insn + insn->length,
|
||||
p->addr + insn->length);
|
||||
p->ainsn.boostable = true;
|
||||
} else {
|
||||
p->ainsn.boostable = false;
|
||||
}
|
||||
}
|
||||
|
||||
static int arch_copy_kprobe(struct kprobe *p)
|
||||
{
|
||||
int ret;
|
||||
struct insn insn;
|
||||
int len;
|
||||
|
||||
set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
|
||||
|
||||
/* Copy an instruction with recovering if other optprobe modifies it.*/
|
||||
ret = __copy_instruction(p->ainsn.insn, p->addr);
|
||||
if (!ret)
|
||||
len = __copy_instruction(p->ainsn.insn, p->addr, &insn);
|
||||
if (!len)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* __copy_instruction can modify the displacement of the instruction,
|
||||
* but it doesn't affect boostable check.
|
||||
*/
|
||||
if (can_boost(p->ainsn.insn, p->addr))
|
||||
p->ainsn.boostable = 0;
|
||||
else
|
||||
p->ainsn.boostable = -1;
|
||||
prepare_boost(p, &insn);
|
||||
|
||||
set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
|
||||
|
||||
/* Check whether the instruction modifies Interrupt Flag or not */
|
||||
p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
|
||||
@ -459,7 +473,7 @@ void arch_disarm_kprobe(struct kprobe *p)
|
||||
void arch_remove_kprobe(struct kprobe *p)
|
||||
{
|
||||
if (p->ainsn.insn) {
|
||||
free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
|
||||
free_insn_slot(p->ainsn.insn, p->ainsn.boostable);
|
||||
p->ainsn.insn = NULL;
|
||||
}
|
||||
}
|
||||
@ -531,7 +545,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
|
||||
return;
|
||||
|
||||
#if !defined(CONFIG_PREEMPT)
|
||||
if (p->ainsn.boostable == 1 && !p->post_handler) {
|
||||
if (p->ainsn.boostable && !p->post_handler) {
|
||||
/* Boost up -- we can execute copied instructions directly */
|
||||
if (!reenter)
|
||||
reset_current_kprobe();
|
||||
@ -851,7 +865,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
|
||||
case 0xcf:
|
||||
case 0xea: /* jmp absolute -- ip is correct */
|
||||
/* ip is already adjusted, no more changes required */
|
||||
p->ainsn.boostable = 1;
|
||||
p->ainsn.boostable = true;
|
||||
goto no_change;
|
||||
case 0xe8: /* call relative - Fix return addr */
|
||||
*tos = orig_ip + (*tos - copy_ip);
|
||||
@ -876,28 +890,13 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
|
||||
* jmp near and far, absolute indirect
|
||||
* ip is correct. And this is boostable
|
||||
*/
|
||||
p->ainsn.boostable = 1;
|
||||
p->ainsn.boostable = true;
|
||||
goto no_change;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (p->ainsn.boostable == 0) {
|
||||
if ((regs->ip > copy_ip) &&
|
||||
(regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
|
||||
/*
|
||||
* These instructions can be executed directly if it
|
||||
* jumps back to correct address.
|
||||
*/
|
||||
synthesize_reljump((void *)regs->ip,
|
||||
(void *)orig_ip + (regs->ip - copy_ip));
|
||||
p->ainsn.boostable = 1;
|
||||
} else {
|
||||
p->ainsn.boostable = -1;
|
||||
}
|
||||
}
|
||||
|
||||
regs->ip += orig_ip - copy_ip;
|
||||
|
||||
no_change:
|
||||
|
@ -94,6 +94,6 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
|
||||
int arch_prepare_kprobe_ftrace(struct kprobe *p)
|
||||
{
|
||||
p->ainsn.insn = NULL;
|
||||
p->ainsn.boostable = -1;
|
||||
p->ainsn.boostable = false;
|
||||
return 0;
|
||||
}
|
||||
|
@ -65,7 +65,10 @@ found:
|
||||
* overwritten by jump destination address. In this case, original
|
||||
* bytes must be recovered from op->optinsn.copied_insn buffer.
|
||||
*/
|
||||
memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
|
||||
if (probe_kernel_read(buf, (void *)addr,
|
||||
MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
|
||||
return 0UL;
|
||||
|
||||
if (addr == (unsigned long)kp->addr) {
|
||||
buf[0] = kp->opcode;
|
||||
memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
|
||||
@ -174,11 +177,12 @@ NOKPROBE_SYMBOL(optimized_callback);
|
||||
|
||||
static int copy_optimized_instructions(u8 *dest, u8 *src)
|
||||
{
|
||||
struct insn insn;
|
||||
int len = 0, ret;
|
||||
|
||||
while (len < RELATIVEJUMP_SIZE) {
|
||||
ret = __copy_instruction(dest + len, src + len);
|
||||
if (!ret || !can_boost(dest + len, src + len))
|
||||
ret = __copy_instruction(dest + len, src + len, &insn);
|
||||
if (!ret || !can_boost(&insn, src + len))
|
||||
return -EINVAL;
|
||||
len += ret;
|
||||
}
|
||||
@ -350,6 +354,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
|
||||
}
|
||||
|
||||
buf = (u8 *)op->optinsn.insn;
|
||||
set_memory_rw((unsigned long)buf & PAGE_MASK, 1);
|
||||
|
||||
/* Copy instructions into the out-of-line buffer */
|
||||
ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
|
||||
@ -372,6 +377,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
|
||||
synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
|
||||
(u8 *)op->kp.addr + op->optinsn.size);
|
||||
|
||||
set_memory_ro((unsigned long)buf & PAGE_MASK, 1);
|
||||
|
||||
flush_icache_range((unsigned long) buf,
|
||||
(unsigned long) buf + TMPL_END_IDX +
|
||||
op->optinsn.size + RELATIVEJUMP_SIZE);
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <asm/vm86.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/prctl.h>
|
||||
|
||||
/*
|
||||
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
||||
@ -124,11 +125,6 @@ void flush_thread(void)
|
||||
fpu__clear(&tsk->thread.fpu);
|
||||
}
|
||||
|
||||
static void hard_disable_TSC(void)
|
||||
{
|
||||
cr4_set_bits(X86_CR4_TSD);
|
||||
}
|
||||
|
||||
void disable_TSC(void)
|
||||
{
|
||||
preempt_disable();
|
||||
@ -137,15 +133,10 @@ void disable_TSC(void)
|
||||
* Must flip the CPU state synchronously with
|
||||
* TIF_NOTSC in the current running context.
|
||||
*/
|
||||
hard_disable_TSC();
|
||||
cr4_set_bits(X86_CR4_TSD);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void hard_enable_TSC(void)
|
||||
{
|
||||
cr4_clear_bits(X86_CR4_TSD);
|
||||
}
|
||||
|
||||
static void enable_TSC(void)
|
||||
{
|
||||
preempt_disable();
|
||||
@ -154,7 +145,7 @@ static void enable_TSC(void)
|
||||
* Must flip the CPU state synchronously with
|
||||
* TIF_NOTSC in the current running context.
|
||||
*/
|
||||
hard_enable_TSC();
|
||||
cr4_clear_bits(X86_CR4_TSD);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
struct tss_struct *tss)
|
||||
DEFINE_PER_CPU(u64, msr_misc_features_shadow);
|
||||
|
||||
static void set_cpuid_faulting(bool on)
|
||||
{
|
||||
struct thread_struct *prev, *next;
|
||||
u64 msrval;
|
||||
|
||||
prev = &prev_p->thread;
|
||||
next = &next_p->thread;
|
||||
msrval = this_cpu_read(msr_misc_features_shadow);
|
||||
msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
|
||||
msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
|
||||
this_cpu_write(msr_misc_features_shadow, msrval);
|
||||
wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
|
||||
}
|
||||
|
||||
if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
|
||||
test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
|
||||
unsigned long debugctl = get_debugctlmsr();
|
||||
|
||||
debugctl &= ~DEBUGCTLMSR_BTF;
|
||||
if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
|
||||
debugctl |= DEBUGCTLMSR_BTF;
|
||||
|
||||
update_debugctlmsr(debugctl);
|
||||
static void disable_cpuid(void)
|
||||
{
|
||||
preempt_disable();
|
||||
if (!test_and_set_thread_flag(TIF_NOCPUID)) {
|
||||
/*
|
||||
* Must flip the CPU state synchronously with
|
||||
* TIF_NOCPUID in the current running context.
|
||||
*/
|
||||
set_cpuid_faulting(true);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
|
||||
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
|
||||
/* prev and next are different */
|
||||
if (test_tsk_thread_flag(next_p, TIF_NOTSC))
|
||||
hard_disable_TSC();
|
||||
else
|
||||
hard_enable_TSC();
|
||||
static void enable_cpuid(void)
|
||||
{
|
||||
preempt_disable();
|
||||
if (test_and_clear_thread_flag(TIF_NOCPUID)) {
|
||||
/*
|
||||
* Must flip the CPU state synchronously with
|
||||
* TIF_NOCPUID in the current running context.
|
||||
*/
|
||||
set_cpuid_faulting(false);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
|
||||
static int get_cpuid_mode(void)
|
||||
{
|
||||
return !test_thread_flag(TIF_NOCPUID);
|
||||
}
|
||||
|
||||
static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
|
||||
{
|
||||
if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
|
||||
return -ENODEV;
|
||||
|
||||
if (cpuid_enabled)
|
||||
enable_cpuid();
|
||||
else
|
||||
disable_cpuid();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called immediately after a successful exec.
|
||||
*/
|
||||
void arch_setup_new_exec(void)
|
||||
{
|
||||
/* If cpuid was previously disabled for this task, re-enable it. */
|
||||
if (test_thread_flag(TIF_NOCPUID))
|
||||
enable_cpuid();
|
||||
}
|
||||
|
||||
static inline void switch_to_bitmap(struct tss_struct *tss,
|
||||
struct thread_struct *prev,
|
||||
struct thread_struct *next,
|
||||
unsigned long tifp, unsigned long tifn)
|
||||
{
|
||||
if (tifn & _TIF_IO_BITMAP) {
|
||||
/*
|
||||
* Copy the relevant range of the IO bitmap.
|
||||
* Normally this is 128 bytes or less:
|
||||
*/
|
||||
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
|
||||
max(prev->io_bitmap_max, next->io_bitmap_max));
|
||||
|
||||
/*
|
||||
* Make sure that the TSS limit is correct for the CPU
|
||||
* to notice the IO bitmap.
|
||||
*/
|
||||
refresh_tss_limit();
|
||||
} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
|
||||
} else if (tifp & _TIF_IO_BITMAP) {
|
||||
/*
|
||||
* Clear any possible leftover bits:
|
||||
*/
|
||||
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
|
||||
}
|
||||
}
|
||||
|
||||
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
struct tss_struct *tss)
|
||||
{
|
||||
struct thread_struct *prev, *next;
|
||||
unsigned long tifp, tifn;
|
||||
|
||||
prev = &prev_p->thread;
|
||||
next = &next_p->thread;
|
||||
|
||||
tifn = READ_ONCE(task_thread_info(next_p)->flags);
|
||||
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
|
||||
switch_to_bitmap(tss, prev, next, tifp, tifn);
|
||||
|
||||
propagate_user_return_notify(prev_p, next_p);
|
||||
|
||||
if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
|
||||
arch_has_block_step()) {
|
||||
unsigned long debugctl, msk;
|
||||
|
||||
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
||||
debugctl &= ~DEBUGCTLMSR_BTF;
|
||||
msk = tifn & _TIF_BLOCKSTEP;
|
||||
debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
|
||||
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
||||
}
|
||||
|
||||
if ((tifp ^ tifn) & _TIF_NOTSC)
|
||||
cr4_toggle_bits(X86_CR4_TSD);
|
||||
|
||||
if ((tifp ^ tifn) & _TIF_NOCPUID)
|
||||
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -550,3 +616,16 @@ out:
|
||||
put_task_stack(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long do_arch_prctl_common(struct task_struct *task, int option,
|
||||
unsigned long cpuid_enabled)
|
||||
{
|
||||
switch (option) {
|
||||
case ARCH_GET_CPUID:
|
||||
return get_cpuid_mode();
|
||||
case ARCH_SET_CPUID:
|
||||
return set_cpuid_mode(task, cpuid_enabled);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/ldt.h>
|
||||
@ -56,6 +57,7 @@
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/vm86.h>
|
||||
#include <asm/intel_rdt.h>
|
||||
#include <asm/proto.h>
|
||||
|
||||
void __show_regs(struct pt_regs *regs, int all)
|
||||
{
|
||||
@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
|
||||
return prev_p;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
{
|
||||
return do_arch_prctl_common(current, option, arg2);
|
||||
}
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/processor.h>
|
||||
@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||
(struct user_desc __user *)tls, 0);
|
||||
else
|
||||
#endif
|
||||
err = do_arch_prctl(p, ARCH_SET_FS, tls);
|
||||
err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
|
||||
}
|
||||
#endif
|
||||
|
||||
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
|
||||
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
|
||||
{
|
||||
int ret = 0;
|
||||
int doit = task == current;
|
||||
int cpu;
|
||||
|
||||
switch (code) {
|
||||
switch (option) {
|
||||
case ARCH_SET_GS:
|
||||
if (addr >= TASK_SIZE_MAX)
|
||||
if (arg2 >= TASK_SIZE_MAX)
|
||||
return -EPERM;
|
||||
cpu = get_cpu();
|
||||
task->thread.gsindex = 0;
|
||||
task->thread.gsbase = addr;
|
||||
task->thread.gsbase = arg2;
|
||||
if (doit) {
|
||||
load_gs_index(0);
|
||||
ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
|
||||
ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
|
||||
}
|
||||
put_cpu();
|
||||
break;
|
||||
case ARCH_SET_FS:
|
||||
/* Not strictly needed for fs, but do it for symmetry
|
||||
with gs */
|
||||
if (addr >= TASK_SIZE_MAX)
|
||||
if (arg2 >= TASK_SIZE_MAX)
|
||||
return -EPERM;
|
||||
cpu = get_cpu();
|
||||
task->thread.fsindex = 0;
|
||||
task->thread.fsbase = addr;
|
||||
task->thread.fsbase = arg2;
|
||||
if (doit) {
|
||||
/* set the selector to 0 to not confuse __switch_to */
|
||||
loadsegment(fs, 0);
|
||||
ret = wrmsrl_safe(MSR_FS_BASE, addr);
|
||||
ret = wrmsrl_safe(MSR_FS_BASE, arg2);
|
||||
}
|
||||
put_cpu();
|
||||
break;
|
||||
case ARCH_GET_FS: {
|
||||
unsigned long base;
|
||||
|
||||
if (doit)
|
||||
rdmsrl(MSR_FS_BASE, base);
|
||||
else
|
||||
base = task->thread.fsbase;
|
||||
ret = put_user(base, (unsigned long __user *)addr);
|
||||
ret = put_user(base, (unsigned long __user *)arg2);
|
||||
break;
|
||||
}
|
||||
case ARCH_GET_GS: {
|
||||
unsigned long base;
|
||||
|
||||
if (doit)
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, base);
|
||||
else
|
||||
base = task->thread.gsbase;
|
||||
ret = put_user(base, (unsigned long __user *)addr);
|
||||
ret = put_user(base, (unsigned long __user *)arg2);
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CHECKPOINT_RESTORE
|
||||
# ifdef CONFIG_X86_X32_ABI
|
||||
case ARCH_MAP_VDSO_X32:
|
||||
return prctl_map_vdso(&vdso_image_x32, addr);
|
||||
return prctl_map_vdso(&vdso_image_x32, arg2);
|
||||
# endif
|
||||
# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
||||
case ARCH_MAP_VDSO_32:
|
||||
return prctl_map_vdso(&vdso_image_32, addr);
|
||||
return prctl_map_vdso(&vdso_image_32, arg2);
|
||||
# endif
|
||||
case ARCH_MAP_VDSO_64:
|
||||
return prctl_map_vdso(&vdso_image_64, addr);
|
||||
return prctl_map_vdso(&vdso_image_64, arg2);
|
||||
#endif
|
||||
|
||||
default:
|
||||
@ -621,11 +624,24 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
long sys_arch_prctl(int code, unsigned long addr)
|
||||
SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
{
|
||||
return do_arch_prctl(current, code, addr);
|
||||
long ret;
|
||||
|
||||
ret = do_arch_prctl_64(current, option, arg2);
|
||||
if (ret == -EINVAL)
|
||||
ret = do_arch_prctl_common(current, option, arg2);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
{
|
||||
return do_arch_prctl_common(current, option, arg2);
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned long KSTK_ESP(struct task_struct *task)
|
||||
{
|
||||
return task_pt_regs(task)->sp;
|
||||
|
@ -396,12 +396,12 @@ static int putreg(struct task_struct *child,
|
||||
if (value >= TASK_SIZE_MAX)
|
||||
return -EIO;
|
||||
/*
|
||||
* When changing the segment base, use do_arch_prctl
|
||||
* When changing the segment base, use do_arch_prctl_64
|
||||
* to set either thread.fs or thread.fsindex and the
|
||||
* corresponding GDT slot.
|
||||
*/
|
||||
if (child->thread.fsbase != value)
|
||||
return do_arch_prctl(child, ARCH_SET_FS, value);
|
||||
return do_arch_prctl_64(child, ARCH_SET_FS, value);
|
||||
return 0;
|
||||
case offsetof(struct user_regs_struct,gs_base):
|
||||
/*
|
||||
@ -410,7 +410,7 @@ static int putreg(struct task_struct *child,
|
||||
if (value >= TASK_SIZE_MAX)
|
||||
return -EIO;
|
||||
if (child->thread.gsbase != value)
|
||||
return do_arch_prctl(child, ARCH_SET_GS, value);
|
||||
return do_arch_prctl_64(child, ARCH_SET_GS, value);
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request,
|
||||
Works just like arch_prctl, except that the arguments
|
||||
are reversed. */
|
||||
case PTRACE_ARCH_PRCTL:
|
||||
ret = do_arch_prctl(child, data, addr);
|
||||
ret = do_arch_prctl_64(child, data, addr);
|
||||
break;
|
||||
#endif
|
||||
|
||||
|
@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
|
||||
|
||||
ifeq ($(CONFIG_X86_32),y)
|
||||
|
||||
obj-y += checksum_32.o
|
||||
obj-y += checksum_32.o syscalls_32.o
|
||||
obj-$(CONFIG_ELF_CORE) += elfcore.o
|
||||
|
||||
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
|
||||
|
@ -78,7 +78,7 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
extern long arch_prctl(struct task_struct *task, int code,
|
||||
extern long arch_prctl(struct task_struct *task, int option,
|
||||
unsigned long __user *addr);
|
||||
|
||||
#endif
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <sys/ptrace.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
int os_arch_prctl(int pid, int code, unsigned long *addr)
|
||||
int os_arch_prctl(int pid, int option, unsigned long *arg2)
|
||||
{
|
||||
return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code);
|
||||
return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
|
||||
}
|
||||
|
7
arch/x86/um/syscalls_32.c
Normal file
7
arch/x86/um/syscalls_32.c
Normal file
@ -0,0 +1,7 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <os.h>
|
||||
|
||||
SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
@ -7,13 +7,15 @@
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/prctl.h> /* XXX This should get the constants from libc */
|
||||
#include <os.h>
|
||||
|
||||
long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
|
||||
long arch_prctl(struct task_struct *task, int option)
|
||||
unsigned long __user *arg2)
|
||||
{
|
||||
unsigned long *ptr = addr, tmp;
|
||||
unsigned long *ptr = arg2, tmp;
|
||||
long ret;
|
||||
int pid = task->mm->context.id.u.pid;
|
||||
|
||||
@ -30,7 +32,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
|
||||
* arch_prctl is run on the host, then the registers are read
|
||||
* back.
|
||||
*/
|
||||
switch (code) {
|
||||
switch (option) {
|
||||
case ARCH_SET_FS:
|
||||
case ARCH_SET_GS:
|
||||
ret = restore_registers(pid, ¤t->thread.regs.regs);
|
||||
@ -50,11 +52,11 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
|
||||
ptr = &tmp;
|
||||
}
|
||||
|
||||
ret = os_arch_prctl(pid, code, ptr);
|
||||
ret = os_arch_prctl(pid, option, ptr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (code) {
|
||||
switch (option) {
|
||||
case ARCH_SET_FS:
|
||||
current->thread.arch.fs = (unsigned long) ptr;
|
||||
ret = save_registers(pid, ¤t->thread.regs.regs);
|
||||
@ -63,19 +65,19 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
|
||||
ret = save_registers(pid, ¤t->thread.regs.regs);
|
||||
break;
|
||||
case ARCH_GET_FS:
|
||||
ret = put_user(tmp, addr);
|
||||
ret = put_user(tmp, arg2);
|
||||
break;
|
||||
case ARCH_GET_GS:
|
||||
ret = put_user(tmp, addr);
|
||||
ret = put_user(tmp, arg2);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long sys_arch_prctl(int code, unsigned long addr)
|
||||
SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
{
|
||||
return arch_prctl(current, code, (unsigned long __user *) addr);
|
||||
return arch_prctl(current, option, (unsigned long __user *) arg2);
|
||||
}
|
||||
|
||||
void arch_switch_to(struct task_struct *to)
|
||||
|
@ -321,7 +321,7 @@ static int etb_set_buffer(struct coresight_device *csdev,
|
||||
|
||||
static unsigned long etb_reset_buffer(struct coresight_device *csdev,
|
||||
struct perf_output_handle *handle,
|
||||
void *sink_config, bool *lost)
|
||||
void *sink_config)
|
||||
{
|
||||
unsigned long size = 0;
|
||||
struct cs_buffers *buf = sink_config;
|
||||
@ -343,7 +343,6 @@ static unsigned long etb_reset_buffer(struct coresight_device *csdev,
|
||||
* resetting parameters here and squaring off with the ring
|
||||
* buffer API in the tracer PMU is fine.
|
||||
*/
|
||||
*lost = !!local_xchg(&buf->lost, 0);
|
||||
size = local_xchg(&buf->data_size, 0);
|
||||
}
|
||||
|
||||
@ -385,7 +384,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
|
||||
(unsigned long)write_ptr);
|
||||
|
||||
write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1);
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -396,7 +395,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
|
||||
*/
|
||||
status = readl_relaxed(drvdata->base + ETB_STATUS_REG);
|
||||
if (status & ETB_STATUS_RAM_FULL) {
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
to_read = capacity;
|
||||
read_ptr = write_ptr;
|
||||
} else {
|
||||
@ -429,7 +428,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
|
||||
if (read_ptr > (drvdata->buffer_depth - 1))
|
||||
read_ptr -= drvdata->buffer_depth;
|
||||
/* let the decoder know we've skipped ahead */
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
}
|
||||
|
||||
/* finally tell HW where we want to start reading from */
|
||||
|
@ -302,7 +302,8 @@ out:
|
||||
return;
|
||||
|
||||
fail_end_stop:
|
||||
perf_aux_output_end(handle, 0, true);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
perf_aux_output_end(handle, 0);
|
||||
fail:
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
goto out;
|
||||
@ -310,7 +311,6 @@ fail:
|
||||
|
||||
static void etm_event_stop(struct perf_event *event, int mode)
|
||||
{
|
||||
bool lost;
|
||||
int cpu = smp_processor_id();
|
||||
unsigned long size;
|
||||
struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
|
||||
@ -348,10 +348,9 @@ static void etm_event_stop(struct perf_event *event, int mode)
|
||||
return;
|
||||
|
||||
size = sink_ops(sink)->reset_buffer(sink, handle,
|
||||
event_data->snk_config,
|
||||
&lost);
|
||||
event_data->snk_config);
|
||||
|
||||
perf_aux_output_end(handle, size, lost);
|
||||
perf_aux_output_end(handle, size);
|
||||
}
|
||||
|
||||
/* Disabling the path make its elements available to other sessions */
|
||||
|
@ -76,7 +76,6 @@ enum cs_mode {
|
||||
* @nr_pages: max number of pages granted to us
|
||||
* @offset: offset within the current buffer
|
||||
* @data_size: how much we collected in this run
|
||||
* @lost: other than zero if we had a HW buffer wrap around
|
||||
* @snapshot: is this run in snapshot mode
|
||||
* @data_pages: a handle the ring buffer
|
||||
*/
|
||||
@ -85,7 +84,6 @@ struct cs_buffers {
|
||||
unsigned int nr_pages;
|
||||
unsigned long offset;
|
||||
local_t data_size;
|
||||
local_t lost;
|
||||
bool snapshot;
|
||||
void **data_pages;
|
||||
};
|
||||
|
@ -329,7 +329,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev,
|
||||
|
||||
static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
|
||||
struct perf_output_handle *handle,
|
||||
void *sink_config, bool *lost)
|
||||
void *sink_config)
|
||||
{
|
||||
long size = 0;
|
||||
struct cs_buffers *buf = sink_config;
|
||||
@ -350,7 +350,6 @@ static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
|
||||
* resetting parameters here and squaring off with the ring
|
||||
* buffer API in the tracer PMU is fine.
|
||||
*/
|
||||
*lost = !!local_xchg(&buf->lost, 0);
|
||||
size = local_xchg(&buf->data_size, 0);
|
||||
}
|
||||
|
||||
@ -389,7 +388,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
|
||||
*/
|
||||
status = readl_relaxed(drvdata->base + TMC_STS);
|
||||
if (status & TMC_STS_FULL) {
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
to_read = drvdata->size;
|
||||
} else {
|
||||
to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size);
|
||||
@ -434,7 +433,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
|
||||
read_ptr -= drvdata->size;
|
||||
/* Tell the HW */
|
||||
writel_relaxed(read_ptr, drvdata->base + TMC_RRP);
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
}
|
||||
|
||||
cur = buf->cur;
|
||||
|
@ -1234,7 +1234,7 @@ static void __domain_flush_pages(struct protection_domain *domain,
|
||||
|
||||
build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
|
||||
|
||||
for (i = 0; i < amd_iommus_present; ++i) {
|
||||
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
|
||||
if (!domain->dev_iommu[i])
|
||||
continue;
|
||||
|
||||
@ -1278,7 +1278,7 @@ static void domain_flush_complete(struct protection_domain *domain)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < amd_iommus_present; ++i) {
|
||||
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
|
||||
if (domain && !domain->dev_iommu[i])
|
||||
continue;
|
||||
|
||||
@ -3363,7 +3363,7 @@ static int __flush_pasid(struct protection_domain *domain, int pasid,
|
||||
* IOMMU TLB needs to be flushed before Device TLB to
|
||||
* prevent device TLB refill from IOMMU TLB
|
||||
*/
|
||||
for (i = 0; i < amd_iommus_present; ++i) {
|
||||
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
|
||||
if (domain->dev_iommu[i] == 0)
|
||||
continue;
|
||||
|
||||
|
@ -167,7 +167,9 @@ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
|
||||
|
||||
/* Array to assign indices to IOMMUs*/
|
||||
struct amd_iommu *amd_iommus[MAX_IOMMUS];
|
||||
int amd_iommus_present;
|
||||
|
||||
/* Number of IOMMUs present in the system */
|
||||
static int amd_iommus_present;
|
||||
|
||||
/* IOMMUs have a non-present cache? */
|
||||
bool amd_iommu_np_cache __read_mostly;
|
||||
@ -254,10 +256,6 @@ static int amd_iommu_enable_interrupts(void);
|
||||
static int __init iommu_go_to_state(enum iommu_init_state state);
|
||||
static void init_device_table_dma(void);
|
||||
|
||||
static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
|
||||
u8 bank, u8 cntr, u8 fxn,
|
||||
u64 *value, bool is_write);
|
||||
|
||||
static inline void update_last_devid(u16 devid)
|
||||
{
|
||||
if (devid > amd_iommu_last_bdf)
|
||||
@ -272,6 +270,11 @@ static inline unsigned long tbl_size(int entry_size)
|
||||
return 1UL << shift;
|
||||
}
|
||||
|
||||
int amd_iommu_get_num_iommus(void)
|
||||
{
|
||||
return amd_iommus_present;
|
||||
}
|
||||
|
||||
/* Access to l1 and l2 indexed register spaces */
|
||||
|
||||
static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
|
||||
@ -1336,7 +1339,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
|
||||
|
||||
/* Add IOMMU to internal data structures */
|
||||
list_add_tail(&iommu->list, &amd_iommu_list);
|
||||
iommu->index = amd_iommus_present++;
|
||||
iommu->index = amd_iommus_present++;
|
||||
|
||||
if (unlikely(iommu->index >= MAX_IOMMUS)) {
|
||||
WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
|
||||
@ -1477,6 +1480,8 @@ static int __init init_iommu_all(struct acpi_table_header *table)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value, bool is_write);
|
||||
|
||||
static void init_iommu_perf_ctr(struct amd_iommu *iommu)
|
||||
{
|
||||
@ -1488,8 +1493,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
|
||||
amd_iommu_pc_present = true;
|
||||
|
||||
/* Check if the performance counters can be written to */
|
||||
if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) ||
|
||||
(0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) ||
|
||||
if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
|
||||
(iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
|
||||
(val != val2)) {
|
||||
pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
|
||||
amd_iommu_pc_present = false;
|
||||
@ -2711,6 +2716,18 @@ bool amd_iommu_v2_supported(void)
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_v2_supported);
|
||||
|
||||
struct amd_iommu *get_amd_iommu(unsigned int idx)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
struct amd_iommu *iommu;
|
||||
|
||||
for_each_iommu(iommu)
|
||||
if (i++ == idx)
|
||||
return iommu;
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(get_amd_iommu);
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* IOMMU EFR Performance Counter support functionality. This code allows
|
||||
@ -2718,17 +2735,14 @@ EXPORT_SYMBOL(amd_iommu_v2_supported);
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
u8 amd_iommu_pc_get_max_banks(u16 devid)
|
||||
u8 amd_iommu_pc_get_max_banks(unsigned int idx)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
u8 ret = 0;
|
||||
struct amd_iommu *iommu = get_amd_iommu(idx);
|
||||
|
||||
/* locate the iommu governing the devid */
|
||||
iommu = amd_iommu_rlookup_table[devid];
|
||||
if (iommu)
|
||||
ret = iommu->max_banks;
|
||||
return iommu->max_banks;
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
|
||||
|
||||
@ -2738,62 +2752,69 @@ bool amd_iommu_pc_supported(void)
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_supported);
|
||||
|
||||
u8 amd_iommu_pc_get_max_counters(u16 devid)
|
||||
u8 amd_iommu_pc_get_max_counters(unsigned int idx)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
u8 ret = 0;
|
||||
struct amd_iommu *iommu = get_amd_iommu(idx);
|
||||
|
||||
/* locate the iommu governing the devid */
|
||||
iommu = amd_iommu_rlookup_table[devid];
|
||||
if (iommu)
|
||||
ret = iommu->max_counters;
|
||||
return iommu->max_counters;
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
|
||||
|
||||
static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
|
||||
u8 bank, u8 cntr, u8 fxn,
|
||||
u64 *value, bool is_write)
|
||||
static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value, bool is_write)
|
||||
{
|
||||
u32 offset;
|
||||
u32 max_offset_lim;
|
||||
|
||||
/* Check for valid iommu and pc register indexing */
|
||||
if (WARN_ON((fxn > 0x28) || (fxn & 7)))
|
||||
/* Make sure the IOMMU PC resource is available */
|
||||
if (!amd_iommu_pc_present)
|
||||
return -ENODEV;
|
||||
|
||||
offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
|
||||
/* Check for valid iommu and pc register indexing */
|
||||
if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
|
||||
return -ENODEV;
|
||||
|
||||
offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
|
||||
|
||||
/* Limit the offset to the hw defined mmio region aperture */
|
||||
max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) |
|
||||
max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
|
||||
(iommu->max_counters << 8) | 0x28);
|
||||
if ((offset < MMIO_CNTR_REG_OFFSET) ||
|
||||
(offset > max_offset_lim))
|
||||
return -EINVAL;
|
||||
|
||||
if (is_write) {
|
||||
writel((u32)*value, iommu->mmio_base + offset);
|
||||
writel((*value >> 32), iommu->mmio_base + offset + 4);
|
||||
u64 val = *value & GENMASK_ULL(47, 0);
|
||||
|
||||
writel((u32)val, iommu->mmio_base + offset);
|
||||
writel((val >> 32), iommu->mmio_base + offset + 4);
|
||||
} else {
|
||||
*value = readl(iommu->mmio_base + offset + 4);
|
||||
*value <<= 32;
|
||||
*value = readl(iommu->mmio_base + offset);
|
||||
*value |= readl(iommu->mmio_base + offset);
|
||||
*value &= GENMASK_ULL(47, 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
|
||||
|
||||
int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
|
||||
u64 *value, bool is_write)
|
||||
int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
|
||||
{
|
||||
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
|
||||
if (!iommu)
|
||||
return -EINVAL;
|
||||
|
||||
/* Make sure the IOMMU PC resource is available */
|
||||
if (!amd_iommu_pc_present || iommu == NULL)
|
||||
return -ENODEV;
|
||||
|
||||
return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn,
|
||||
value, is_write);
|
||||
return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_get_reg);
|
||||
|
||||
int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
|
||||
{
|
||||
if (!iommu)
|
||||
return -EINVAL;
|
||||
|
||||
return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_set_reg);
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include "amd_iommu_types.h"
|
||||
|
||||
extern int amd_iommu_get_num_iommus(void);
|
||||
extern int amd_iommu_init_dma_ops(void);
|
||||
extern int amd_iommu_init_passthrough(void);
|
||||
extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
|
||||
@ -56,13 +57,6 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
|
||||
extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
|
||||
extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
|
||||
|
||||
/* IOMMU Performance Counter functions */
|
||||
extern bool amd_iommu_pc_supported(void);
|
||||
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
|
||||
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
|
||||
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
|
||||
u64 *value, bool is_write);
|
||||
|
||||
#ifdef CONFIG_IRQ_REMAP
|
||||
extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
|
||||
#else
|
||||
|
@ -611,9 +611,6 @@ extern struct list_head amd_iommu_list;
|
||||
*/
|
||||
extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
|
||||
|
||||
/* Number of IOMMUs present in the system */
|
||||
extern int amd_iommus_present;
|
||||
|
||||
/*
|
||||
* Declarations for the global list of all protection domains
|
||||
*/
|
||||
|
@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm)
|
||||
else
|
||||
set_dumpable(current->mm, suid_dumpable);
|
||||
|
||||
arch_setup_new_exec();
|
||||
perf_event_exec();
|
||||
__set_task_comm(current, kbasename(bprm->filename), true);
|
||||
|
||||
|
@ -723,6 +723,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
|
||||
asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
|
||||
int, const char __user *);
|
||||
|
||||
asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2);
|
||||
|
||||
/*
|
||||
* For most but not all architectures, "am I in a compat syscall?" and
|
||||
* "am I a compat task?" are the same question. For architectures on which
|
||||
|
@ -201,7 +201,7 @@ struct coresight_ops_sink {
|
||||
void *sink_config);
|
||||
unsigned long (*reset_buffer)(struct coresight_device *csdev,
|
||||
struct perf_output_handle *handle,
|
||||
void *sink_config, bool *lost);
|
||||
void *sink_config);
|
||||
void (*update_buffer)(struct coresight_device *csdev,
|
||||
struct perf_output_handle *handle,
|
||||
void *sink_config);
|
||||
|
@ -267,6 +267,8 @@ extern int arch_init_kprobes(void);
|
||||
extern void show_registers(struct pt_regs *regs);
|
||||
extern void kprobes_inc_nmissed_count(struct kprobe *p);
|
||||
extern bool arch_within_kprobe_blacklist(unsigned long addr);
|
||||
extern bool arch_function_offset_within_entry(unsigned long offset);
|
||||
extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
|
||||
|
||||
extern bool within_kprobe_blacklist(unsigned long addr);
|
||||
|
||||
|
@ -165,6 +165,13 @@ struct hw_perf_event {
|
||||
struct list_head bp_list;
|
||||
};
|
||||
#endif
|
||||
struct { /* amd_iommu */
|
||||
u8 iommu_bank;
|
||||
u8 iommu_cntr;
|
||||
u16 padding;
|
||||
u64 conf;
|
||||
u64 conf1;
|
||||
};
|
||||
};
|
||||
/*
|
||||
* If the event is a per task event, this will point to the task in
|
||||
@ -801,6 +808,7 @@ struct perf_output_handle {
|
||||
struct ring_buffer *rb;
|
||||
unsigned long wakeup;
|
||||
unsigned long size;
|
||||
u64 aux_flags;
|
||||
union {
|
||||
void *addr;
|
||||
unsigned long head;
|
||||
@ -849,10 +857,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
|
||||
extern void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event);
|
||||
extern void perf_aux_output_end(struct perf_output_handle *handle,
|
||||
unsigned long size, bool truncated);
|
||||
unsigned long size);
|
||||
extern int perf_aux_output_skip(struct perf_output_handle *handle,
|
||||
unsigned long size);
|
||||
extern void *perf_get_aux(struct perf_output_handle *handle);
|
||||
extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
|
||||
|
||||
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
|
||||
extern void perf_pmu_unregister(struct pmu *pmu);
|
||||
@ -1112,6 +1121,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
|
||||
|
||||
extern void perf_event_exec(void);
|
||||
extern void perf_event_comm(struct task_struct *tsk, bool exec);
|
||||
extern void perf_event_namespaces(struct task_struct *tsk);
|
||||
extern void perf_event_fork(struct task_struct *tsk);
|
||||
|
||||
/* Callchains */
|
||||
@ -1267,8 +1277,8 @@ static inline void *
|
||||
perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event) { return NULL; }
|
||||
static inline void
|
||||
perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||
bool truncated) { }
|
||||
perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
|
||||
{ }
|
||||
static inline int
|
||||
perf_aux_output_skip(struct perf_output_handle *handle,
|
||||
unsigned long size) { return -EINVAL; }
|
||||
@ -1315,6 +1325,7 @@ static inline int perf_unregister_guest_info_callbacks
|
||||
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
|
||||
static inline void perf_event_exec(void) { }
|
||||
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
|
||||
static inline void perf_event_namespaces(struct task_struct *tsk) { }
|
||||
static inline void perf_event_fork(struct task_struct *tsk) { }
|
||||
static inline void perf_event_init(void) { }
|
||||
static inline int perf_swevent_get_recursion_context(void) { return -1; }
|
||||
|
@ -101,6 +101,10 @@ static inline void check_object_size(const void *ptr, unsigned long n,
|
||||
{ }
|
||||
#endif /* CONFIG_HARDENED_USERCOPY */
|
||||
|
||||
#ifndef arch_setup_new_exec
|
||||
static inline void arch_setup_new_exec(void) { }
|
||||
#endif
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _LINUX_THREAD_INFO_H */
|
||||
|
@ -344,7 +344,8 @@ struct perf_event_attr {
|
||||
use_clockid : 1, /* use @clockid for time fields */
|
||||
context_switch : 1, /* context switch data */
|
||||
write_backward : 1, /* Write ring buffer from end to beginning */
|
||||
__reserved_1 : 36;
|
||||
namespaces : 1, /* include namespaces data */
|
||||
__reserved_1 : 35;
|
||||
|
||||
union {
|
||||
__u32 wakeup_events; /* wakeup every n events */
|
||||
@ -610,6 +611,23 @@ struct perf_event_header {
|
||||
__u16 size;
|
||||
};
|
||||
|
||||
struct perf_ns_link_info {
|
||||
__u64 dev;
|
||||
__u64 ino;
|
||||
};
|
||||
|
||||
enum {
|
||||
NET_NS_INDEX = 0,
|
||||
UTS_NS_INDEX = 1,
|
||||
IPC_NS_INDEX = 2,
|
||||
PID_NS_INDEX = 3,
|
||||
USER_NS_INDEX = 4,
|
||||
MNT_NS_INDEX = 5,
|
||||
CGROUP_NS_INDEX = 6,
|
||||
|
||||
NR_NAMESPACES, /* number of available namespaces */
|
||||
};
|
||||
|
||||
enum perf_event_type {
|
||||
|
||||
/*
|
||||
@ -862,6 +880,18 @@ enum perf_event_type {
|
||||
*/
|
||||
PERF_RECORD_SWITCH_CPU_WIDE = 15,
|
||||
|
||||
/*
|
||||
* struct {
|
||||
* struct perf_event_header header;
|
||||
* u32 pid;
|
||||
* u32 tid;
|
||||
* u64 nr_namespaces;
|
||||
* { u64 dev, inode; } [nr_namespaces];
|
||||
* struct sample_id sample_id;
|
||||
* };
|
||||
*/
|
||||
PERF_RECORD_NAMESPACES = 16,
|
||||
|
||||
PERF_RECORD_MAX, /* non-ABI */
|
||||
};
|
||||
|
||||
@ -885,6 +915,7 @@ enum perf_callchain_context {
|
||||
*/
|
||||
#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
|
||||
#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
|
||||
#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
|
||||
|
||||
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
|
||||
#define PERF_FLAG_FD_OUTPUT (1UL << 1)
|
||||
|
@ -48,6 +48,8 @@
|
||||
#include <linux/parser.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/proc_ns.h>
|
||||
#include <linux/mount.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
|
||||
|
||||
static atomic_t nr_mmap_events __read_mostly;
|
||||
static atomic_t nr_comm_events __read_mostly;
|
||||
static atomic_t nr_namespaces_events __read_mostly;
|
||||
static atomic_t nr_task_events __read_mostly;
|
||||
static atomic_t nr_freq_events __read_mostly;
|
||||
static atomic_t nr_switch_events __read_mostly;
|
||||
@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
|
||||
atomic_dec(&nr_mmap_events);
|
||||
if (event->attr.comm)
|
||||
atomic_dec(&nr_comm_events);
|
||||
if (event->attr.namespaces)
|
||||
atomic_dec(&nr_namespaces_events);
|
||||
if (event->attr.task)
|
||||
atomic_dec(&nr_task_events);
|
||||
if (event->attr.freq)
|
||||
@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
|
||||
void perf_event_fork(struct task_struct *task)
|
||||
{
|
||||
perf_event_task(task, NULL, 1);
|
||||
perf_event_namespaces(task);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
|
||||
perf_event_comm_event(&comm_event);
|
||||
}
|
||||
|
||||
/*
|
||||
* namespaces tracking
|
||||
*/
|
||||
|
||||
struct perf_namespaces_event {
|
||||
struct task_struct *task;
|
||||
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
|
||||
u32 pid;
|
||||
u32 tid;
|
||||
u64 nr_namespaces;
|
||||
struct perf_ns_link_info link_info[NR_NAMESPACES];
|
||||
} event_id;
|
||||
};
|
||||
|
||||
static int perf_event_namespaces_match(struct perf_event *event)
|
||||
{
|
||||
return event->attr.namespaces;
|
||||
}
|
||||
|
||||
static void perf_event_namespaces_output(struct perf_event *event,
|
||||
void *data)
|
||||
{
|
||||
struct perf_namespaces_event *namespaces_event = data;
|
||||
struct perf_output_handle handle;
|
||||
struct perf_sample_data sample;
|
||||
int ret;
|
||||
|
||||
if (!perf_event_namespaces_match(event))
|
||||
return;
|
||||
|
||||
perf_event_header__init_id(&namespaces_event->event_id.header,
|
||||
&sample, event);
|
||||
ret = perf_output_begin(&handle, event,
|
||||
namespaces_event->event_id.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
namespaces_event->event_id.pid = perf_event_pid(event,
|
||||
namespaces_event->task);
|
||||
namespaces_event->event_id.tid = perf_event_tid(event,
|
||||
namespaces_event->task);
|
||||
|
||||
perf_output_put(&handle, namespaces_event->event_id);
|
||||
|
||||
perf_event__output_id_sample(event, &handle, &sample);
|
||||
|
||||
perf_output_end(&handle);
|
||||
}
|
||||
|
||||
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
|
||||
struct task_struct *task,
|
||||
const struct proc_ns_operations *ns_ops)
|
||||
{
|
||||
struct path ns_path;
|
||||
struct inode *ns_inode;
|
||||
void *error;
|
||||
|
||||
error = ns_get_path(&ns_path, task, ns_ops);
|
||||
if (!error) {
|
||||
ns_inode = ns_path.dentry->d_inode;
|
||||
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
|
||||
ns_link_info->ino = ns_inode->i_ino;
|
||||
}
|
||||
}
|
||||
|
||||
void perf_event_namespaces(struct task_struct *task)
|
||||
{
|
||||
struct perf_namespaces_event namespaces_event;
|
||||
struct perf_ns_link_info *ns_link_info;
|
||||
|
||||
if (!atomic_read(&nr_namespaces_events))
|
||||
return;
|
||||
|
||||
namespaces_event = (struct perf_namespaces_event){
|
||||
.task = task,
|
||||
.event_id = {
|
||||
.header = {
|
||||
.type = PERF_RECORD_NAMESPACES,
|
||||
.misc = 0,
|
||||
.size = sizeof(namespaces_event.event_id),
|
||||
},
|
||||
/* .pid */
|
||||
/* .tid */
|
||||
.nr_namespaces = NR_NAMESPACES,
|
||||
/* .link_info[NR_NAMESPACES] */
|
||||
},
|
||||
};
|
||||
|
||||
ns_link_info = namespaces_event.event_id.link_info;
|
||||
|
||||
perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
|
||||
task, &mntns_operations);
|
||||
|
||||
#ifdef CONFIG_USER_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
|
||||
task, &userns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_NET_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
|
||||
task, &netns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_UTS_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
|
||||
task, &utsns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_IPC_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
|
||||
task, &ipcns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_PID_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
|
||||
task, &pidns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUPS
|
||||
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
|
||||
task, &cgroupns_operations);
|
||||
#endif
|
||||
|
||||
perf_iterate_sb(perf_event_namespaces_output,
|
||||
&namespaces_event,
|
||||
NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmap tracking
|
||||
*/
|
||||
@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
|
||||
atomic_inc(&nr_mmap_events);
|
||||
if (event->attr.comm)
|
||||
atomic_inc(&nr_comm_events);
|
||||
if (event->attr.namespaces)
|
||||
atomic_inc(&nr_namespaces_events);
|
||||
if (event->attr.task)
|
||||
atomic_inc(&nr_task_events);
|
||||
if (event->attr.freq)
|
||||
@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (attr.namespaces) {
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (attr.freq) {
|
||||
if (attr.sample_freq > sysctl_perf_event_sample_rate)
|
||||
return -EINVAL;
|
||||
|
@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
|
||||
rb->paused = 1;
|
||||
}
|
||||
|
||||
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
|
||||
{
|
||||
/*
|
||||
* OVERWRITE is determined by perf_aux_output_end() and can't
|
||||
* be passed in directly.
|
||||
*/
|
||||
if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE))
|
||||
return;
|
||||
|
||||
handle->aux_flags |= flags;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_aux_output_flag);
|
||||
|
||||
/*
|
||||
* This is called before hardware starts writing to the AUX area to
|
||||
* obtain an output handle and make sure there's room in the buffer.
|
||||
@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
handle->event = event;
|
||||
handle->head = aux_head;
|
||||
handle->size = 0;
|
||||
handle->aux_flags = 0;
|
||||
|
||||
/*
|
||||
* In overwrite mode, AUX data stores do not depend on aux_tail,
|
||||
@ -408,34 +422,32 @@ err:
|
||||
* of the AUX buffer management code is that after pmu::stop(), the AUX
|
||||
* transaction must be stopped and therefore drop the AUX reference count.
|
||||
*/
|
||||
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||
bool truncated)
|
||||
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
|
||||
{
|
||||
bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED);
|
||||
struct ring_buffer *rb = handle->rb;
|
||||
bool wakeup = truncated;
|
||||
unsigned long aux_head;
|
||||
u64 flags = 0;
|
||||
|
||||
if (truncated)
|
||||
flags |= PERF_AUX_FLAG_TRUNCATED;
|
||||
|
||||
/* in overwrite mode, driver provides aux_head via handle */
|
||||
if (rb->aux_overwrite) {
|
||||
flags |= PERF_AUX_FLAG_OVERWRITE;
|
||||
handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE;
|
||||
|
||||
aux_head = handle->head;
|
||||
local_set(&rb->aux_head, aux_head);
|
||||
} else {
|
||||
handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE;
|
||||
|
||||
aux_head = local_read(&rb->aux_head);
|
||||
local_add(size, &rb->aux_head);
|
||||
}
|
||||
|
||||
if (size || flags) {
|
||||
if (size || handle->aux_flags) {
|
||||
/*
|
||||
* Only send RECORD_AUX if we have something useful to communicate
|
||||
*/
|
||||
|
||||
perf_event_aux_event(handle->event, aux_head, size, flags);
|
||||
perf_event_aux_event(handle->event, aux_head, size,
|
||||
handle->aux_flags);
|
||||
}
|
||||
|
||||
aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
|
||||
@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||
}
|
||||
|
||||
if (wakeup) {
|
||||
if (truncated)
|
||||
if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
|
||||
handle->event->pending_disable = 1;
|
||||
perf_output_wakeup(handle);
|
||||
}
|
||||
|
@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
|
||||
}
|
||||
}
|
||||
|
||||
perf_event_namespaces(current);
|
||||
|
||||
bad_unshare_cleanup_cred:
|
||||
if (new_cred)
|
||||
put_cred(new_cred);
|
||||
|
@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr)
|
||||
* This returns encoded errors if it fails to look up symbol or invalid
|
||||
* combination of parameters.
|
||||
*/
|
||||
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
|
||||
static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
|
||||
const char *symbol_name, unsigned int offset)
|
||||
{
|
||||
kprobe_opcode_t *addr = p->addr;
|
||||
|
||||
if ((p->symbol_name && p->addr) ||
|
||||
(!p->symbol_name && !p->addr))
|
||||
if ((symbol_name && addr) || (!symbol_name && !addr))
|
||||
goto invalid;
|
||||
|
||||
if (p->symbol_name) {
|
||||
kprobe_lookup_name(p->symbol_name, addr);
|
||||
if (symbol_name) {
|
||||
kprobe_lookup_name(symbol_name, addr);
|
||||
if (!addr)
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
|
||||
addr = (kprobe_opcode_t *)(((char *)addr) + offset);
|
||||
if (addr)
|
||||
return addr;
|
||||
|
||||
@ -1413,6 +1411,11 @@ invalid:
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
|
||||
{
|
||||
return _kprobe_addr(p->addr, p->symbol_name, p->offset);
|
||||
}
|
||||
|
||||
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
|
||||
static struct kprobe *__get_valid_kprobe(struct kprobe *p)
|
||||
{
|
||||
@ -1740,11 +1743,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kprobes);
|
||||
|
||||
int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
int __weak kprobe_exceptions_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
|
||||
|
||||
static struct notifier_block kprobe_exceptions_nb = {
|
||||
.notifier_call = kprobe_exceptions_notify,
|
||||
@ -1875,6 +1879,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
||||
}
|
||||
NOKPROBE_SYMBOL(pre_handler_kretprobe);
|
||||
|
||||
bool __weak arch_function_offset_within_entry(unsigned long offset)
|
||||
{
|
||||
return !offset;
|
||||
}
|
||||
|
||||
bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
|
||||
{
|
||||
kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
|
||||
|
||||
if (IS_ERR(kp_addr))
|
||||
return false;
|
||||
|
||||
if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
|
||||
!arch_function_offset_within_entry(offset))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
int ret = 0;
|
||||
@ -1882,6 +1905,9 @@ int register_kretprobe(struct kretprobe *rp)
|
||||
int i;
|
||||
void *addr;
|
||||
|
||||
if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
|
||||
return -EINVAL;
|
||||
|
||||
if (kretprobe_blacklist_size) {
|
||||
addr = kprobe_addr(&rp->kp);
|
||||
if (IS_ERR(addr))
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <linux/file.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
static struct kmem_cache *nsproxy_cachep;
|
||||
|
||||
@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
|
||||
goto out;
|
||||
}
|
||||
switch_task_namespaces(tsk, new_nsproxy);
|
||||
|
||||
perf_event_namespaces(tsk);
|
||||
out:
|
||||
fput(file);
|
||||
return err;
|
||||
|
@ -455,7 +455,7 @@ config UPROBE_EVENTS
|
||||
select UPROBES
|
||||
select PROBE_EVENTS
|
||||
select TRACING
|
||||
default n
|
||||
default y
|
||||
help
|
||||
This allows the user to add tracing events on top of userspace
|
||||
dynamic events (similar to tracepoints) on the fly via the trace
|
||||
|
@ -4355,6 +4355,7 @@ static const char readme_msg[] =
|
||||
"\t -:[<group>/]<event>\n"
|
||||
#ifdef CONFIG_KPROBE_EVENTS
|
||||
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
|
||||
"place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
|
||||
#endif
|
||||
#ifdef CONFIG_UPROBE_EVENTS
|
||||
"\t place: <path>:<offset>\n"
|
||||
|
@ -681,10 +681,6 @@ static int create_trace_kprobe(int argc, char **argv)
|
||||
return -EINVAL;
|
||||
}
|
||||
if (isdigit(argv[1][0])) {
|
||||
if (is_return) {
|
||||
pr_info("Return probe point must be a symbol.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
/* an address specified */
|
||||
ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
|
||||
if (ret) {
|
||||
@ -700,8 +696,9 @@ static int create_trace_kprobe(int argc, char **argv)
|
||||
pr_info("Failed to parse symbol.\n");
|
||||
return ret;
|
||||
}
|
||||
if (offset && is_return) {
|
||||
pr_info("Return probe must be used without offset.\n");
|
||||
if (offset && is_return &&
|
||||
!function_offset_within_entry(NULL, symbol, offset)) {
|
||||
pr_info("Given offset is not valid for return probe.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
@ -181,10 +181,23 @@ struct kvm_arch_memory_slot {
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
|
||||
(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
|
||||
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
|
||||
#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
|
||||
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
|
||||
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
|
||||
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
|
||||
|
||||
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
|
||||
|
||||
/* KVM_IRQ_LINE irq field index values */
|
||||
|
@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
|
||||
(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
|
||||
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
|
||||
#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
|
||||
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
|
||||
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
|
||||
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
|
||||
|
||||
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
|
||||
|
||||
/* Device Control API on vcpu fd */
|
||||
|
@ -413,6 +413,26 @@ struct kvm_get_htab_header {
|
||||
__u16 n_invalid;
|
||||
};
|
||||
|
||||
/* For KVM_PPC_CONFIGURE_V3_MMU */
|
||||
struct kvm_ppc_mmuv3_cfg {
|
||||
__u64 flags;
|
||||
__u64 process_table; /* second doubleword of partition table entry */
|
||||
};
|
||||
|
||||
/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
|
||||
#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */
|
||||
#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */
|
||||
|
||||
/* For KVM_PPC_GET_RMMU_INFO */
|
||||
struct kvm_ppc_rmmu_info {
|
||||
struct kvm_ppc_radix_geom {
|
||||
__u8 page_shift;
|
||||
__u8 level_bits[4];
|
||||
__u8 pad[3];
|
||||
} geometries[8];
|
||||
__u32 ap_encodings[8];
|
||||
};
|
||||
|
||||
/* Per-vcpu XICS interrupt controller state */
|
||||
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
|
||||
|
||||
@ -613,5 +633,7 @@ struct kvm_get_htab_header {
|
||||
#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
|
||||
#define KVM_XICS_MASKED (1ULL << 41)
|
||||
#define KVM_XICS_PENDING (1ULL << 42)
|
||||
#define KVM_XICS_PRESENTED (1ULL << 43)
|
||||
#define KVM_XICS_QUEUED (1ULL << 44)
|
||||
|
||||
#endif /* __LINUX_KVM_POWERPC_H */
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define LOCK_PREFIX "\n\tlock; "
|
||||
|
||||
#include <asm/cmpxchg.h>
|
||||
|
||||
/*
|
||||
* Atomic operations that C can't guarantee us. Useful for
|
||||
* resource counting etc..
|
||||
@ -62,4 +64,9 @@ static inline int atomic_dec_and_test(atomic_t *v)
|
||||
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
|
||||
}
|
||||
|
||||
static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
|
||||
{
|
||||
return cmpxchg(&v->counter, old, new);
|
||||
}
|
||||
|
||||
#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
|
||||
|
89
tools/arch/x86/include/asm/cmpxchg.h
Normal file
89
tools/arch/x86/include/asm/cmpxchg.h
Normal file
@ -0,0 +1,89 @@
|
||||
#ifndef TOOLS_ASM_X86_CMPXCHG_H
|
||||
#define TOOLS_ASM_X86_CMPXCHG_H
|
||||
|
||||
#include <linux/compiler.h>
|
||||
|
||||
/*
|
||||
* Non-existant functions to indicate usage errors at link time
|
||||
* (or compile-time if the compiler implements __compiletime_error().
|
||||
*/
|
||||
extern void __cmpxchg_wrong_size(void)
|
||||
__compiletime_error("Bad argument size for cmpxchg");
|
||||
|
||||
/*
|
||||
* Constants for operation sizes. On 32-bit, the 64-bit size it set to
|
||||
* -1 because sizeof will never return -1, thereby making those switch
|
||||
* case statements guaranteeed dead code which the compiler will
|
||||
* eliminate, and allowing the "missing symbol in the default case" to
|
||||
* indicate a usage error.
|
||||
*/
|
||||
#define __X86_CASE_B 1
|
||||
#define __X86_CASE_W 2
|
||||
#define __X86_CASE_L 4
|
||||
#ifdef __x86_64__
|
||||
#define __X86_CASE_Q 8
|
||||
#else
|
||||
#define __X86_CASE_Q -1 /* sizeof will never return -1 */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Atomic compare and exchange. Compare OLD with MEM, if identical,
|
||||
* store NEW in MEM. Return the initial value in MEM. Success is
|
||||
* indicated by comparing RETURN with OLD.
|
||||
*/
|
||||
#define __raw_cmpxchg(ptr, old, new, size, lock) \
|
||||
({ \
|
||||
__typeof__(*(ptr)) __ret; \
|
||||
__typeof__(*(ptr)) __old = (old); \
|
||||
__typeof__(*(ptr)) __new = (new); \
|
||||
switch (size) { \
|
||||
case __X86_CASE_B: \
|
||||
{ \
|
||||
volatile u8 *__ptr = (volatile u8 *)(ptr); \
|
||||
asm volatile(lock "cmpxchgb %2,%1" \
|
||||
: "=a" (__ret), "+m" (*__ptr) \
|
||||
: "q" (__new), "0" (__old) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
} \
|
||||
case __X86_CASE_W: \
|
||||
{ \
|
||||
volatile u16 *__ptr = (volatile u16 *)(ptr); \
|
||||
asm volatile(lock "cmpxchgw %2,%1" \
|
||||
: "=a" (__ret), "+m" (*__ptr) \
|
||||
: "r" (__new), "0" (__old) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
} \
|
||||
case __X86_CASE_L: \
|
||||
{ \
|
||||
volatile u32 *__ptr = (volatile u32 *)(ptr); \
|
||||
asm volatile(lock "cmpxchgl %2,%1" \
|
||||
: "=a" (__ret), "+m" (*__ptr) \
|
||||
: "r" (__new), "0" (__old) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
} \
|
||||
case __X86_CASE_Q: \
|
||||
{ \
|
||||
volatile u64 *__ptr = (volatile u64 *)(ptr); \
|
||||
asm volatile(lock "cmpxchgq %2,%1" \
|
||||
: "=a" (__ret), "+m" (*__ptr) \
|
||||
: "r" (__new), "0" (__old) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
} \
|
||||
default: \
|
||||
__cmpxchg_wrong_size(); \
|
||||
} \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __cmpxchg(ptr, old, new, size) \
|
||||
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
|
||||
|
||||
#define cmpxchg(ptr, old, new) \
|
||||
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
|
||||
|
||||
|
||||
#endif /* TOOLS_ASM_X86_CMPXCHG_H */
|
@ -100,7 +100,7 @@
|
||||
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
|
||||
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
|
||||
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
|
||||
/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
|
||||
#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
|
||||
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
|
||||
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
|
||||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
|
||||
@ -186,7 +186,8 @@
|
||||
*
|
||||
* Reuse free bits when adding new feature flags!
|
||||
*/
|
||||
|
||||
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
|
||||
#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
|
||||
@ -289,7 +290,8 @@
|
||||
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
|
||||
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
|
||||
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
|
||||
#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */
|
||||
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
|
||||
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
|
||||
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
|
||||
@ -321,5 +323,4 @@
|
||||
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
|
||||
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
|
||||
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
@ -286,7 +286,7 @@ ENDPROC(memcpy_mcsafe_unrolled)
|
||||
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
|
||||
|
@ -63,6 +63,7 @@ FEATURE_TESTS_BASIC := \
|
||||
lzma \
|
||||
get_cpuid \
|
||||
bpf \
|
||||
sched_getcpu \
|
||||
sdt
|
||||
|
||||
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
|
||||
|
@ -48,21 +48,22 @@ FILES= \
|
||||
test-get_cpuid.bin \
|
||||
test-sdt.bin \
|
||||
test-cxx.bin \
|
||||
test-jvmti.bin
|
||||
test-jvmti.bin \
|
||||
test-sched_getcpu.bin
|
||||
|
||||
FILES := $(addprefix $(OUTPUT),$(FILES))
|
||||
|
||||
CC := $(CROSS_COMPILE)gcc -MD
|
||||
CXX := $(CROSS_COMPILE)g++ -MD
|
||||
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
|
||||
CC ?= $(CROSS_COMPILE)gcc
|
||||
CXX ?= $(CROSS_COMPILE)g++
|
||||
PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
|
||||
LLVM_CONFIG ?= llvm-config
|
||||
|
||||
all: $(FILES)
|
||||
|
||||
__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
|
||||
__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
|
||||
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
|
||||
|
||||
__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
|
||||
__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
|
||||
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
|
||||
|
||||
###############################
|
||||
@ -91,6 +92,9 @@ $(OUTPUT)test-libelf.bin:
|
||||
$(OUTPUT)test-glibc.bin:
|
||||
$(BUILD)
|
||||
|
||||
$(OUTPUT)test-sched_getcpu.bin:
|
||||
$(BUILD)
|
||||
|
||||
DWARFLIBS := -ldw
|
||||
ifeq ($(findstring -static,${LDFLAGS}),-static)
|
||||
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
|
||||
@ -171,7 +175,7 @@ $(OUTPUT)test-libperl.bin:
|
||||
$(BUILD) $(FLAGS_PERL_EMBED)
|
||||
|
||||
$(OUTPUT)test-libpython.bin:
|
||||
$(BUILD)
|
||||
$(BUILD) $(FLAGS_PYTHON_EMBED)
|
||||
|
||||
$(OUTPUT)test-libpython-version.bin:
|
||||
$(BUILD)
|
||||
|
@ -117,6 +117,10 @@
|
||||
# include "test-pthread-attr-setaffinity-np.c"
|
||||
#undef main
|
||||
|
||||
#define main main_test_sched_getcpu
|
||||
# include "test-sched_getcpu.c"
|
||||
#undef main
|
||||
|
||||
# if 0
|
||||
/*
|
||||
* Disable libbabeltrace check for test-all, because the requested
|
||||
@ -182,6 +186,7 @@ int main(int argc, char *argv[])
|
||||
main_test_get_cpuid();
|
||||
main_test_bpf();
|
||||
main_test_libcrypto();
|
||||
main_test_sched_getcpu();
|
||||
main_test_sdt();
|
||||
|
||||
return 0;
|
||||
|
7
tools/build/feature/test-sched_getcpu.c
Normal file
7
tools/build/feature/test-sched_getcpu.c
Normal file
@ -0,0 +1,7 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
return sched_getcpu();
|
||||
}
|
@ -60,4 +60,12 @@ static inline int atomic_dec_and_test(atomic_t *v)
|
||||
return __sync_sub_and_fetch(&v->counter, 1) == 0;
|
||||
}
|
||||
|
||||
#define cmpxchg(ptr, oldval, newval) \
|
||||
__sync_val_compare_and_swap(ptr, oldval, newval)
|
||||
|
||||
static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval)
|
||||
{
|
||||
return cmpxchg(&(v)->counter, oldval, newval);
|
||||
}
|
||||
|
||||
#endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
|
||||
|
@ -3,4 +3,10 @@
|
||||
|
||||
#include <asm/atomic.h>
|
||||
|
||||
/* atomic_cmpxchg_relaxed */
|
||||
#ifndef atomic_cmpxchg_relaxed
|
||||
#define atomic_cmpxchg_relaxed atomic_cmpxchg
|
||||
#define atomic_cmpxchg_release atomic_cmpxchg
|
||||
#endif /* atomic_cmpxchg_relaxed */
|
||||
|
||||
#endif /* __TOOLS_LINUX_ATOMIC_H */
|
||||
|
10
tools/include/linux/bug.h
Normal file
10
tools/include/linux/bug.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef _TOOLS_PERF_LINUX_BUG_H
|
||||
#define _TOOLS_PERF_LINUX_BUG_H
|
||||
|
||||
/* Force a compilation error if condition is true, but also produce a
|
||||
result (of value 0 and type size_t), so the expression can be used
|
||||
e.g. in a structure initializer (or where-ever else comma expressions
|
||||
aren't permitted). */
|
||||
#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
|
||||
|
||||
#endif /* _TOOLS_PERF_LINUX_BUG_H */
|
@ -12,3 +12,10 @@
|
||||
#if GCC_VERSION >= 70000 && !defined(__CHECKER__)
|
||||
# define __fallthrough __attribute__ ((fallthrough))
|
||||
#endif
|
||||
|
||||
#if GCC_VERSION >= 40300
|
||||
# define __compiletime_error(message) __attribute__((error(message)))
|
||||
#endif /* GCC_VERSION >= 40300 */
|
||||
|
||||
/* &a[0] degrades to a pointer: a different type from an array */
|
||||
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
|
||||
|
@ -5,6 +5,10 @@
|
||||
#include <linux/compiler-gcc.h>
|
||||
#endif
|
||||
|
||||
#ifndef __compiletime_error
|
||||
# define __compiletime_error(message)
|
||||
#endif
|
||||
|
||||
/* Optimization barrier */
|
||||
/* The "volatile" is due to gcc bugs */
|
||||
#define barrier() __asm__ __volatile__("": : :"memory")
|
||||
@ -13,6 +17,11 @@
|
||||
# define __always_inline inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
/* Are two types/vars the same type (ignoring qualifiers)? */
|
||||
#ifndef __same_type
|
||||
# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
|
||||
#endif
|
||||
|
||||
#ifdef __ANDROID__
|
||||
/*
|
||||
* FIXME: Big hammer to get rid of tons of:
|
||||
|
@ -13,10 +13,6 @@
|
||||
#include <linux/hash.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
#endif
|
||||
|
||||
#define DEFINE_HASHTABLE(name, bits) \
|
||||
struct hlist_head name[1 << (bits)] = \
|
||||
{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
|
||||
|
@ -4,6 +4,11 @@
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
#include <assert.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#ifndef UINT_MAX
|
||||
#define UINT_MAX (~0U)
|
||||
#endif
|
||||
|
||||
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
|
||||
|
||||
@ -72,6 +77,8 @@
|
||||
int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
|
||||
int scnprintf(char * buf, size_t size, const char * fmt, ...);
|
||||
|
||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
|
||||
|
||||
/*
|
||||
* This looks more complex than it should be. But we need to
|
||||
* get the type for the ~ right in round_down (it needs to be
|
||||
|
@ -12,6 +12,9 @@
|
||||
#ifndef _TOOLS_LINUX_LOG2_H
|
||||
#define _TOOLS_LINUX_LOG2_H
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* non-constant log of base 2 calculators
|
||||
* - the arch may override these in asm/bitops.h if they can be implemented
|
||||
|
151
tools/include/linux/refcount.h
Normal file
151
tools/include/linux/refcount.h
Normal file
@ -0,0 +1,151 @@
|
||||
#ifndef _TOOLS_LINUX_REFCOUNT_H
|
||||
#define _TOOLS_LINUX_REFCOUNT_H
|
||||
|
||||
/*
|
||||
* Variant of atomic_t specialized for reference counts.
|
||||
*
|
||||
* The interface matches the atomic_t interface (to aid in porting) but only
|
||||
* provides the few functions one should use for reference counting.
|
||||
*
|
||||
* It differs in that the counter saturates at UINT_MAX and will not move once
|
||||
* there. This avoids wrapping the counter and causing 'spurious'
|
||||
* use-after-free issues.
|
||||
*
|
||||
* Memory ordering rules are slightly relaxed wrt regular atomic_t functions
|
||||
* and provide only what is strictly required for refcounts.
|
||||
*
|
||||
* The increments are fully relaxed; these will not provide ordering. The
|
||||
* rationale is that whatever is used to obtain the object we're increasing the
|
||||
* reference count on will provide the ordering. For locked data structures,
|
||||
* its the lock acquire, for RCU/lockless data structures its the dependent
|
||||
* load.
|
||||
*
|
||||
* Do note that inc_not_zero() provides a control dependency which will order
|
||||
* future stores against the inc, this ensures we'll never modify the object
|
||||
* if we did not in fact acquire a reference.
|
||||
*
|
||||
* The decrements will provide release order, such that all the prior loads and
|
||||
* stores will be issued before, it also provides a control dependency, which
|
||||
* will order us against the subsequent free().
|
||||
*
|
||||
* The control dependency is against the load of the cmpxchg (ll/sc) that
|
||||
* succeeded. This means the stores aren't fully ordered, but this is fine
|
||||
* because the 1->0 transition indicates no concurrency.
|
||||
*
|
||||
* Note that the allocator is responsible for ordering things between free()
|
||||
* and alloc().
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define REFCOUNT_WARN(cond, str) (void)(cond)
|
||||
#define __refcount_check
|
||||
#else
|
||||
#define REFCOUNT_WARN(cond, str) BUG_ON(cond)
|
||||
#define __refcount_check __must_check
|
||||
#endif
|
||||
|
||||
typedef struct refcount_struct {
|
||||
atomic_t refs;
|
||||
} refcount_t;
|
||||
|
||||
#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
|
||||
|
||||
static inline void refcount_set(refcount_t *r, unsigned int n)
|
||||
{
|
||||
atomic_set(&r->refs, n);
|
||||
}
|
||||
|
||||
static inline unsigned int refcount_read(const refcount_t *r)
|
||||
{
|
||||
return atomic_read(&r->refs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller has guaranteed the
|
||||
* object memory to be stable (RCU, etc.). It does provide a control dependency
|
||||
* and thereby orders future stores. See the comment on top.
|
||||
*/
|
||||
static inline __refcount_check
|
||||
bool refcount_inc_not_zero(refcount_t *r)
|
||||
{
|
||||
unsigned int old, new, val = atomic_read(&r->refs);
|
||||
|
||||
for (;;) {
|
||||
new = val + 1;
|
||||
|
||||
if (!val)
|
||||
return false;
|
||||
|
||||
if (unlikely(!new))
|
||||
return true;
|
||||
|
||||
old = atomic_cmpxchg_relaxed(&r->refs, val, new);
|
||||
if (old == val)
|
||||
break;
|
||||
|
||||
val = old;
|
||||
}
|
||||
|
||||
REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller already has a
|
||||
* reference on the object, will WARN when this is not so.
|
||||
*/
|
||||
static inline void refcount_inc(refcount_t *r)
|
||||
{
|
||||
REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
|
||||
* decrement when saturated at UINT_MAX.
|
||||
*
|
||||
* Provides release memory ordering, such that prior loads and stores are done
|
||||
* before, and provides a control dependency such that free() must come after.
|
||||
* See the comment on top.
|
||||
*/
|
||||
static inline __refcount_check
|
||||
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
|
||||
{
|
||||
unsigned int old, new, val = atomic_read(&r->refs);
|
||||
|
||||
for (;;) {
|
||||
if (unlikely(val == UINT_MAX))
|
||||
return false;
|
||||
|
||||
new = val - i;
|
||||
if (new > val) {
|
||||
REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
old = atomic_cmpxchg_release(&r->refs, val, new);
|
||||
if (old == val)
|
||||
break;
|
||||
|
||||
val = old;
|
||||
}
|
||||
|
||||
return !new;
|
||||
}
|
||||
|
||||
static inline __refcount_check
|
||||
bool refcount_dec_and_test(refcount_t *r)
|
||||
{
|
||||
return refcount_sub_and_test(1, r);
|
||||
}
|
||||
|
||||
|
||||
#endif /* _ATOMIC_LINUX_REFCOUNT_H */
|
@ -18,4 +18,6 @@ extern size_t strlcpy(char *dest, const char *src, size_t size);
|
||||
|
||||
char *str_error_r(int errnum, char *buf, size_t buflen);
|
||||
|
||||
int prefixcmp(const char *str, const char *prefix);
|
||||
|
||||
#endif /* _LINUX_STRING_H_ */
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
|
||||
#include <asm/types.h>
|
||||
#include <asm/posix_types.h>
|
||||
|
||||
struct page;
|
||||
struct kmem_cache;
|
||||
|
72
tools/include/uapi/linux/fcntl.h
Normal file
72
tools/include/uapi/linux/fcntl.h
Normal file
@ -0,0 +1,72 @@
|
||||
#ifndef _UAPI_LINUX_FCNTL_H
|
||||
#define _UAPI_LINUX_FCNTL_H
|
||||
|
||||
#include <asm/fcntl.h>
|
||||
|
||||
#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
|
||||
#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
|
||||
|
||||
/*
|
||||
* Cancel a blocking posix lock; internal use only until we expose an
|
||||
* asynchronous lock api to userspace:
|
||||
*/
|
||||
#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5)
|
||||
|
||||
/* Create a file descriptor with FD_CLOEXEC set. */
|
||||
#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
|
||||
|
||||
/*
|
||||
* Request nofications on a directory.
|
||||
* See below for events that may be notified.
|
||||
*/
|
||||
#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2)
|
||||
|
||||
/*
|
||||
* Set and get of pipe page size array
|
||||
*/
|
||||
#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
|
||||
#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
|
||||
|
||||
/*
|
||||
* Set/Get seals
|
||||
*/
|
||||
#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
|
||||
#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
|
||||
|
||||
/*
|
||||
* Types of seals
|
||||
*/
|
||||
#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
|
||||
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
|
||||
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
|
||||
#define F_SEAL_WRITE 0x0008 /* prevent writes */
|
||||
/* (1U << 31) is reserved for signed error codes */
|
||||
|
||||
/*
|
||||
* Types of directory notifications that may be requested.
|
||||
*/
|
||||
#define DN_ACCESS 0x00000001 /* File accessed */
|
||||
#define DN_MODIFY 0x00000002 /* File modified */
|
||||
#define DN_CREATE 0x00000004 /* File created */
|
||||
#define DN_DELETE 0x00000008 /* File removed */
|
||||
#define DN_RENAME 0x00000010 /* File renamed */
|
||||
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
|
||||
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
|
||||
|
||||
#define AT_FDCWD -100 /* Special value used to indicate
|
||||
openat should use the current
|
||||
working directory. */
|
||||
#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
|
||||
#define AT_REMOVEDIR 0x200 /* Remove directory instead of
|
||||
unlinking file. */
|
||||
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
|
||||
#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
|
||||
#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
|
||||
|
||||
#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
|
||||
#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
|
||||
#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
|
||||
#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
|
||||
|
||||
|
||||
#endif /* _UAPI_LINUX_FCNTL_H */
|
@ -344,7 +344,8 @@ struct perf_event_attr {
|
||||
use_clockid : 1, /* use @clockid for time fields */
|
||||
context_switch : 1, /* context switch data */
|
||||
write_backward : 1, /* Write ring buffer from end to beginning */
|
||||
__reserved_1 : 36;
|
||||
namespaces : 1, /* include namespaces data */
|
||||
__reserved_1 : 35;
|
||||
|
||||
union {
|
||||
__u32 wakeup_events; /* wakeup every n events */
|
||||
@ -610,6 +611,23 @@ struct perf_event_header {
|
||||
__u16 size;
|
||||
};
|
||||
|
||||
struct perf_ns_link_info {
|
||||
__u64 dev;
|
||||
__u64 ino;
|
||||
};
|
||||
|
||||
enum {
|
||||
NET_NS_INDEX = 0,
|
||||
UTS_NS_INDEX = 1,
|
||||
IPC_NS_INDEX = 2,
|
||||
PID_NS_INDEX = 3,
|
||||
USER_NS_INDEX = 4,
|
||||
MNT_NS_INDEX = 5,
|
||||
CGROUP_NS_INDEX = 6,
|
||||
|
||||
NR_NAMESPACES, /* number of available namespaces */
|
||||
};
|
||||
|
||||
enum perf_event_type {
|
||||
|
||||
/*
|
||||
@ -862,6 +880,18 @@ enum perf_event_type {
|
||||
*/
|
||||
PERF_RECORD_SWITCH_CPU_WIDE = 15,
|
||||
|
||||
/*
|
||||
* struct {
|
||||
* struct perf_event_header header;
|
||||
* u32 pid;
|
||||
* u32 tid;
|
||||
* u64 nr_namespaces;
|
||||
* { u64 dev, inode; } [nr_namespaces];
|
||||
* struct sample_id sample_id;
|
||||
* };
|
||||
*/
|
||||
PERF_RECORD_NAMESPACES = 16,
|
||||
|
||||
PERF_RECORD_MAX, /* non-ABI */
|
||||
};
|
||||
|
||||
@ -885,6 +915,7 @@ enum perf_callchain_context {
|
||||
*/
|
||||
#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
|
||||
#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
|
||||
#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
|
||||
|
||||
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
|
||||
#define PERF_FLAG_FD_OUTPUT (1UL << 1)
|
||||
|
177
tools/include/uapi/linux/stat.h
Normal file
177
tools/include/uapi/linux/stat.h
Normal file
@ -0,0 +1,177 @@
|
||||
#ifndef _UAPI_LINUX_STAT_H
|
||||
#define _UAPI_LINUX_STAT_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
|
||||
|
||||
#define S_IFMT 00170000
|
||||
#define S_IFSOCK 0140000
|
||||
#define S_IFLNK 0120000
|
||||
#define S_IFREG 0100000
|
||||
#define S_IFBLK 0060000
|
||||
#define S_IFDIR 0040000
|
||||
#define S_IFCHR 0020000
|
||||
#define S_IFIFO 0010000
|
||||
#define S_ISUID 0004000
|
||||
#define S_ISGID 0002000
|
||||
#define S_ISVTX 0001000
|
||||
|
||||
#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
|
||||
#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
|
||||
#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
|
||||
#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
|
||||
#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK)
|
||||
#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO)
|
||||
#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
|
||||
|
||||
#define S_IRWXU 00700
|
||||
#define S_IRUSR 00400
|
||||
#define S_IWUSR 00200
|
||||
#define S_IXUSR 00100
|
||||
|
||||
#define S_IRWXG 00070
|
||||
#define S_IRGRP 00040
|
||||
#define S_IWGRP 00020
|
||||
#define S_IXGRP 00010
|
||||
|
||||
#define S_IRWXO 00007
|
||||
#define S_IROTH 00004
|
||||
#define S_IWOTH 00002
|
||||
#define S_IXOTH 00001
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Timestamp structure for the timestamps in struct statx.
|
||||
*
|
||||
* tv_sec holds the number of seconds before (negative) or after (positive)
|
||||
* 00:00:00 1st January 1970 UTC.
|
||||
*
|
||||
* tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
|
||||
* negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
|
||||
*
|
||||
* Note that if both tv_sec and tv_nsec are non-zero, then the two values must
|
||||
* either be both positive or both negative.
|
||||
*
|
||||
* __reserved is held in case we need a yet finer resolution.
|
||||
*/
|
||||
struct statx_timestamp {
|
||||
__s64 tv_sec;
|
||||
__s32 tv_nsec;
|
||||
__s32 __reserved;
|
||||
};
|
||||
|
||||
/*
|
||||
* Structures for the extended file attribute retrieval system call
|
||||
* (statx()).
|
||||
*
|
||||
* The caller passes a mask of what they're specifically interested in as a
|
||||
* parameter to statx(). What statx() actually got will be indicated in
|
||||
* st_mask upon return.
|
||||
*
|
||||
* For each bit in the mask argument:
|
||||
*
|
||||
* - if the datum is not supported:
|
||||
*
|
||||
* - the bit will be cleared, and
|
||||
*
|
||||
* - the datum will be set to an appropriate fabricated value if one is
|
||||
* available (eg. CIFS can take a default uid and gid), otherwise
|
||||
*
|
||||
* - the field will be cleared;
|
||||
*
|
||||
* - otherwise, if explicitly requested:
|
||||
*
|
||||
* - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
|
||||
* set or if the datum is considered out of date, and
|
||||
*
|
||||
* - the field will be filled in and the bit will be set;
|
||||
*
|
||||
* - otherwise, if not requested, but available in approximate form without any
|
||||
* effort, it will be filled in anyway, and the bit will be set upon return
|
||||
* (it might not be up to date, however, and no attempt will be made to
|
||||
* synchronise the internal state first);
|
||||
*
|
||||
* - otherwise the field and the bit will be cleared before returning.
|
||||
*
|
||||
* Items in STATX_BASIC_STATS may be marked unavailable on return, but they
|
||||
* will have values installed for compatibility purposes so that stat() and
|
||||
* co. can be emulated in userspace.
|
||||
*/
|
||||
struct statx {
|
||||
/* 0x00 */
|
||||
__u32 stx_mask; /* What results were written [uncond] */
|
||||
__u32 stx_blksize; /* Preferred general I/O size [uncond] */
|
||||
__u64 stx_attributes; /* Flags conveying information about the file [uncond] */
|
||||
/* 0x10 */
|
||||
__u32 stx_nlink; /* Number of hard links */
|
||||
__u32 stx_uid; /* User ID of owner */
|
||||
__u32 stx_gid; /* Group ID of owner */
|
||||
__u16 stx_mode; /* File mode */
|
||||
__u16 __spare0[1];
|
||||
/* 0x20 */
|
||||
__u64 stx_ino; /* Inode number */
|
||||
__u64 stx_size; /* File size */
|
||||
__u64 stx_blocks; /* Number of 512-byte blocks allocated */
|
||||
__u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
|
||||
/* 0x40 */
|
||||
struct statx_timestamp stx_atime; /* Last access time */
|
||||
struct statx_timestamp stx_btime; /* File creation time */
|
||||
struct statx_timestamp stx_ctime; /* Last attribute change time */
|
||||
struct statx_timestamp stx_mtime; /* Last data modification time */
|
||||
/* 0x80 */
|
||||
__u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
|
||||
__u32 stx_rdev_minor;
|
||||
__u32 stx_dev_major; /* ID of device containing file [uncond] */
|
||||
__u32 stx_dev_minor;
|
||||
/* 0x90 */
|
||||
__u64 __spare2[14]; /* Spare space for future expansion */
|
||||
/* 0x100 */
|
||||
};
|
||||
|
||||
/*
|
||||
* Flags to be stx_mask
|
||||
*
|
||||
* Query request/result mask for statx() and struct statx::stx_mask.
|
||||
*
|
||||
* These bits should be set in the mask argument of statx() to request
|
||||
* particular items when calling statx().
|
||||
*/
|
||||
#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */
|
||||
#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */
|
||||
#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */
|
||||
#define STATX_UID 0x00000008U /* Want/got stx_uid */
|
||||
#define STATX_GID 0x00000010U /* Want/got stx_gid */
|
||||
#define STATX_ATIME 0x00000020U /* Want/got stx_atime */
|
||||
#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */
|
||||
#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */
|
||||
#define STATX_INO 0x00000100U /* Want/got stx_ino */
|
||||
#define STATX_SIZE 0x00000200U /* Want/got stx_size */
|
||||
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
|
||||
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
|
||||
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
|
||||
#define STATX_ALL 0x00000fffU /* All currently supported flags */
|
||||
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
|
||||
|
||||
/*
|
||||
* Attributes to be found in stx_attributes and masked in stx_attributes_mask.
|
||||
*
|
||||
* These give information about the features or the state of a file that might
|
||||
* be of use to ordinary userspace programs such as GUIs or ls rather than
|
||||
* specialised tools.
|
||||
*
|
||||
* Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
|
||||
* semantically. Where possible, the numerical value is picked to correspond
|
||||
* also.
|
||||
*/
|
||||
#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */
|
||||
#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */
|
||||
#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
|
||||
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
|
||||
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
|
||||
|
||||
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
|
||||
|
||||
|
||||
#endif /* _UAPI_LINUX_STAT_H */
|
@ -439,6 +439,35 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
|
||||
return filename__read_str(path, buf, sizep);
|
||||
}
|
||||
|
||||
int sysfs__read_bool(const char *entry, bool *value)
|
||||
{
|
||||
char *buf;
|
||||
size_t size;
|
||||
int ret;
|
||||
|
||||
ret = sysfs__read_str(entry, &buf, &size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
switch (buf[0]) {
|
||||
case '1':
|
||||
case 'y':
|
||||
case 'Y':
|
||||
*value = true;
|
||||
break;
|
||||
case '0':
|
||||
case 'n':
|
||||
case 'N':
|
||||
*value = false;
|
||||
break;
|
||||
default:
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
free(buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
int sysctl__read_int(const char *sysctl, int *value)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
|
@ -37,4 +37,5 @@ int sysctl__read_int(const char *sysctl, int *value);
|
||||
int sysfs__read_int(const char *entry, int *value);
|
||||
int sysfs__read_ull(const char *entry, unsigned long long *value);
|
||||
int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
|
||||
int sysfs__read_bool(const char *entry, bool *value);
|
||||
#endif /* __API_FS__ */
|
||||
|
@ -87,3 +87,12 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size)
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int prefixcmp(const char *str, const char *prefix)
|
||||
{
|
||||
for (; ; str++, prefix++)
|
||||
if (!*prefix)
|
||||
return 0;
|
||||
else if (*str != *prefix)
|
||||
return (unsigned char)*prefix - (unsigned char)*str;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <linux/string.h>
|
||||
#include <termios.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/types.h>
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define __SUBCMD_HELP_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
|
||||
struct cmdnames {
|
||||
size_t alloc;
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -79,13 +79,4 @@ static inline void astrcat(char **out, const char *add)
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
static inline int prefixcmp(const char *str, const char *prefix)
|
||||
{
|
||||
for (; ; str++, prefix++)
|
||||
if (!*prefix)
|
||||
return 0;
|
||||
else if (*str != *prefix)
|
||||
return (unsigned char)*prefix - (unsigned char)*str;
|
||||
}
|
||||
|
||||
#endif /* __SUBCMD_UTIL_H */
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <ctype.h>
|
||||
#include "symbol/kallsyms.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -36,8 +36,7 @@
|
||||
#include "warn.h"
|
||||
|
||||
#include <linux/hashtable.h>
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#define STATE_FP_SAVED 0x1
|
||||
#define STATE_FP_SETUP 0x2
|
||||
|
@ -31,11 +31,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <subcmd/exec-cmd.h>
|
||||
#include <subcmd/pager.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "builtin.h"
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
|
||||
|
||||
struct cmd_struct {
|
||||
const char *name;
|
||||
int (*fn)(int, const char **);
|
||||
|
2
tools/perf/.gitignore
vendored
2
tools/perf/.gitignore
vendored
@ -31,3 +31,5 @@ config.mak.autogen
|
||||
.config-detected
|
||||
util/intel-pt-decoder/inat-tables.c
|
||||
arch/*/include/generated/
|
||||
pmu-events/pmu-events.c
|
||||
pmu-events/jevents
|
||||
|
@ -50,5 +50,6 @@ libperf-y += util/
|
||||
libperf-y += arch/
|
||||
libperf-y += ui/
|
||||
libperf-y += scripts/
|
||||
libperf-y += trace/beauty/
|
||||
|
||||
gtk-y += ui/gtk/
|
||||
|
@ -30,6 +30,24 @@ OPTIONS
|
||||
--verbose=::
|
||||
Verbosity level.
|
||||
|
||||
-p::
|
||||
--pid=::
|
||||
Trace on existing process id (comma separated list).
|
||||
|
||||
-a::
|
||||
--all-cpus::
|
||||
Force system-wide collection. Scripts run without a <command>
|
||||
normally use -a by default, while scripts run with a <command>
|
||||
normally don't - this option allows the latter to be run in
|
||||
system-wide mode.
|
||||
|
||||
-C::
|
||||
--cpu=::
|
||||
Only trace for the list of CPUs provided. Multiple CPUs can
|
||||
be provided as a comma separated list with no space like: 0,1.
|
||||
Ranges of CPUs are specified with -: 0-2.
|
||||
Default is to trace on all online CPUs.
|
||||
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
|
@ -8,7 +8,7 @@ perf-list - List all symbolic event types
|
||||
SYNOPSIS
|
||||
--------
|
||||
[verse]
|
||||
'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob]
|
||||
'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
@ -24,6 +24,10 @@ Don't print descriptions.
|
||||
--long-desc::
|
||||
Print longer event descriptions.
|
||||
|
||||
--details::
|
||||
Print how named events are resolved internally into perf events, and also
|
||||
any extra expressions computed by perf stat.
|
||||
|
||||
|
||||
[[EVENT_MODIFIERS]]
|
||||
EVENT MODIFIERS
|
||||
@ -240,6 +244,8 @@ To limit the list use:
|
||||
|
||||
. 'pmu' to print the kernel supplied PMU events.
|
||||
|
||||
. 'sdt' to list all Statically Defined Tracepoint events.
|
||||
|
||||
. If none of the above is matched, it will apply the supplied glob to all
|
||||
events, printing the ones that match.
|
||||
|
||||
|
@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can
|
||||
displayed with the weight and local_weight sort keys. This currently works for TSX
|
||||
abort events and some memory events in precise mode on modern Intel CPUs.
|
||||
|
||||
--namespaces::
|
||||
Record events of type PERF_RECORD_NAMESPACES.
|
||||
|
||||
--transaction::
|
||||
Record transaction flags for transaction related events.
|
||||
|
||||
|
@ -72,7 +72,8 @@ OPTIONS
|
||||
--sort=::
|
||||
Sort histogram entries by given key(s) - multiple keys can be specified
|
||||
in CSV format. Following sort keys are available:
|
||||
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
|
||||
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
|
||||
local_weight, cgroup_id.
|
||||
|
||||
Each key has following meaning:
|
||||
|
||||
@ -80,6 +81,7 @@ OPTIONS
|
||||
- pid: command and tid of the task
|
||||
- dso: name of library or module executed at the time of sample
|
||||
- symbol: name of function executed at the time of sample
|
||||
- symbol_size: size of function executed at the time of sample
|
||||
- parent: name of function matched to the parent regex filter. Unmatched
|
||||
entries are displayed as "[other]".
|
||||
- cpu: cpu number the task ran at the time of sample
|
||||
@ -91,6 +93,7 @@ OPTIONS
|
||||
- weight: Event specific weight, e.g. memory latency or transaction
|
||||
abort cost. This is the global weight.
|
||||
- local_weight: Local weight version of the weight above.
|
||||
- cgroup_id: ID derived from cgroup namespace device and inode numbers.
|
||||
- transaction: Transaction abort flags.
|
||||
- overhead: Overhead percentage of sample
|
||||
- overhead_sys: Overhead percentage of sample running in system mode
|
||||
@ -172,6 +175,9 @@ OPTIONS
|
||||
By default, every sort keys not specified in -F will be appended
|
||||
automatically.
|
||||
|
||||
If the keys starts with a prefix '+', then it will append the specified
|
||||
field(s) to the default field order. For example: perf report -F +period,sample.
|
||||
|
||||
-p::
|
||||
--parent=<regex>::
|
||||
A regex filter to identify parent. The parent is a caller of this
|
||||
@ -229,6 +235,7 @@ OPTIONS
|
||||
sort_key can be:
|
||||
- function: compare on functions (default)
|
||||
- address: compare on individual code addresses
|
||||
- srcline: compare on source filename and line number
|
||||
|
||||
branch can be:
|
||||
- branch: include last branch information in callgraph when available.
|
||||
@ -424,6 +431,10 @@ include::itrace.txt[]
|
||||
--hierarchy::
|
||||
Enable hierarchical output.
|
||||
|
||||
--inline::
|
||||
If a callgraph address belongs to an inlined function, the inline stack
|
||||
will be printed. Each entry is function name or file/line.
|
||||
|
||||
include::callchain-overhead-calculation.txt[]
|
||||
|
||||
SEE ALSO
|
||||
|
@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist'
|
||||
--migrations::
|
||||
Show migration events.
|
||||
|
||||
-n::
|
||||
--next::
|
||||
Show next task.
|
||||
|
||||
-I::
|
||||
--idle-hist::
|
||||
Show idle-related events only.
|
||||
|
@ -116,7 +116,7 @@ OPTIONS
|
||||
--fields::
|
||||
Comma separated list of fields to print. Options are:
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
|
||||
srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
|
||||
srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
|
||||
callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
|
||||
to indicate to which event type the field list applies.
|
||||
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
|
||||
@ -189,15 +189,20 @@ OPTIONS
|
||||
i.e., -F "" is not allowed.
|
||||
|
||||
The brstack output includes branch related information with raw addresses using the
|
||||
/v/v/v/v/ syntax in the following order:
|
||||
/v/v/v/v/cycles syntax in the following order:
|
||||
FROM: branch source instruction
|
||||
TO : branch target instruction
|
||||
M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
|
||||
X/- : X=branch inside a transactional region, -=not in transaction region or not supported
|
||||
A/- : A=TSX abort entry, -=not aborted region or not supported
|
||||
cycles
|
||||
|
||||
The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
|
||||
|
||||
When brstackinsn is specified the full assembler sequences of branch sequences for each sample
|
||||
is printed. This is the full execution path leading to the sample. This is only supported when the
|
||||
sample was recorded with perf record -b or -j any.
|
||||
|
||||
-k::
|
||||
--vmlinux=<file>::
|
||||
vmlinux pathname
|
||||
@ -248,6 +253,9 @@ OPTIONS
|
||||
--show-mmap-events
|
||||
Display mmap related events (e.g. MMAP, MMAP2).
|
||||
|
||||
--show-namespace-events
|
||||
Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
|
||||
|
||||
--show-switch-events
|
||||
Display context switch events i.e. events of type PERF_RECORD_SWITCH or
|
||||
PERF_RECORD_SWITCH_CPU_WIDE.
|
||||
@ -299,6 +307,10 @@ include::itrace.txt[]
|
||||
stop time is not given (i.e, time string is 'x.y,') then analysis goes
|
||||
to end of file.
|
||||
|
||||
--max-blocks::
|
||||
Set the maximum number of program blocks to print with brstackasm for
|
||||
each sample.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-script-perl[1],
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user