9f473ecfe7
As Chris has discovered on his Ivybridge, and later automated test runs have confirmed, on most of our platforms hrtimer faced with heavy GPU load can occasionally become sufficiently imprecise to affect PMU sampling calculations. This means we cannot assume sampling frequency is what we asked for, but we need to measure the interval ourselves. This patch is similar to Chris' original proposal for per-engine counters, but instead of introducing a new set to work around the problem with frequency sampling, it swaps around the way internal frequency accounting is done. Instead of accumulating current frequency and dividing by sampling frequency on readout, it accumulates frequency scaled by each period. v2: * Typo in commit message, comment on period calculation and USEC_PER_SEC. (Chris Wilson) Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20180605140253.3541-1-tvrtko.ursulin@linux.intel.com
124 lines
3.1 KiB
C
124 lines
3.1 KiB
C
/*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
* Copyright © 2017-2018 Intel Corporation
|
|
*/
|
|
|
|
#ifndef __I915_PMU_H__
|
|
#define __I915_PMU_H__
|
|
|
|
#include <linux/hrtimer.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/spinlock_types.h>
|
|
#include <drm/i915_drm.h>
|
|
|
|
struct drm_i915_private;
|
|
|
|
enum {
|
|
__I915_SAMPLE_FREQ_ACT = 0,
|
|
__I915_SAMPLE_FREQ_REQ,
|
|
__I915_SAMPLE_RC6,
|
|
__I915_SAMPLE_RC6_ESTIMATED,
|
|
__I915_NUM_PMU_SAMPLERS
|
|
};
|
|
|
|
/**
|
|
* How many different events we track in the global PMU mask.
|
|
*
|
|
* It is also used to know to needed number of event reference counters.
|
|
*/
|
|
#define I915_PMU_MASK_BITS \
|
|
((1 << I915_PMU_SAMPLE_BITS) + \
|
|
(I915_PMU_LAST + 1 - __I915_PMU_OTHER(0)))
|
|
|
|
struct i915_pmu_sample {
|
|
u64 cur;
|
|
};
|
|
|
|
struct i915_pmu {
|
|
/**
|
|
* @node: List node for CPU hotplug handling.
|
|
*/
|
|
struct hlist_node node;
|
|
/**
|
|
* @base: PMU base.
|
|
*/
|
|
struct pmu base;
|
|
/**
|
|
* @lock: Lock protecting enable mask and ref count handling.
|
|
*/
|
|
spinlock_t lock;
|
|
/**
|
|
* @timer: Timer for internal i915 PMU sampling.
|
|
*/
|
|
struct hrtimer timer;
|
|
/**
|
|
* @enable: Bitmask of all currently enabled events.
|
|
*
|
|
* Bits are derived from uAPI event numbers in a way that low 16 bits
|
|
* correspond to engine event _sample_ _type_ (I915_SAMPLE_QUEUED is
|
|
* bit 0), and higher bits correspond to other events (for instance
|
|
* I915_PMU_ACTUAL_FREQUENCY is bit 16 etc).
|
|
*
|
|
* In other words, low 16 bits are not per engine but per engine
|
|
* sampler type, while the upper bits are directly mapped to other
|
|
* event types.
|
|
*/
|
|
u64 enable;
|
|
|
|
/**
|
|
* @timer_last:
|
|
*
|
|
* Timestmap of the previous timer invocation.
|
|
*/
|
|
ktime_t timer_last;
|
|
|
|
/**
|
|
* @enable_count: Reference counts for the enabled events.
|
|
*
|
|
* Array indices are mapped in the same way as bits in the @enable field
|
|
* and they are used to control sampling on/off when multiple clients
|
|
* are using the PMU API.
|
|
*/
|
|
unsigned int enable_count[I915_PMU_MASK_BITS];
|
|
/**
|
|
* @timer_enabled: Should the internal sampling timer be running.
|
|
*/
|
|
bool timer_enabled;
|
|
/**
|
|
* @sample: Current and previous (raw) counters for sampling events.
|
|
*
|
|
* These counters are updated from the i915 PMU sampling timer.
|
|
*
|
|
* Only global counters are held here, while the per-engine ones are in
|
|
* struct intel_engine_cs.
|
|
*/
|
|
struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
|
|
/**
|
|
* @suspended_jiffies_last: Cached suspend time from PM core.
|
|
*/
|
|
unsigned long suspended_jiffies_last;
|
|
/**
|
|
* @i915_attr: Memory block holding device attributes.
|
|
*/
|
|
void *i915_attr;
|
|
/**
|
|
* @pmu_attr: Memory block holding device attributes.
|
|
*/
|
|
void *pmu_attr;
|
|
};
|
|
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
void i915_pmu_register(struct drm_i915_private *i915);
|
|
void i915_pmu_unregister(struct drm_i915_private *i915);
|
|
void i915_pmu_gt_parked(struct drm_i915_private *i915);
|
|
void i915_pmu_gt_unparked(struct drm_i915_private *i915);
|
|
#else
|
|
static inline void i915_pmu_register(struct drm_i915_private *i915) {}
|
|
static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
|
|
static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {}
|
|
static inline void i915_pmu_gt_unparked(struct drm_i915_private *i915) {}
|
|
#endif
|
|
|
|
#endif
|