linux/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
Chris Wilson 7ce5b6850b drm/i915/selftests: Use mul_u32_u32() for 32b x 32b -> 64b result
As realised by commit 9e3d6223d2 ("math64, timers: Fix 32bit
mul_u64_u32_shr() and friends"), GCC does not always generate ideal code
for performing a 32b x 32b multiply returning a 64b result (i.e. where
we idiomatically use u64 result = (u64)x * (u32)x).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20170913105154.2910-2-chris@chris-wilson.co.uk
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
2017-09-13 13:27:20 +01:00

300 lines
8.0 KiB
C

/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "../i915_selftest.h"
#include "i915_random.h"
#include "mock_gem_device.h"
#include "mock_timeline.h"
struct __igt_sync {
const char *name;
u32 seqno;
bool expected;
bool set;
};
static int __igt_sync(struct intel_timeline *tl,
u64 ctx,
const struct __igt_sync *p,
const char *name)
{
int ret;
if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
name, p->name, ctx, p->seqno, yesno(p->expected));
return -EINVAL;
}
if (p->set) {
ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
if (ret)
return ret;
}
return 0;
}
static int igt_sync(void *arg)
{
const struct __igt_sync pass[] = {
{ "unset", 0, false, false },
{ "new", 0, false, true },
{ "0a", 0, true, true },
{ "1a", 1, false, true },
{ "1b", 1, true, true },
{ "0b", 0, true, false },
{ "2a", 2, false, true },
{ "4", 4, false, true },
{ "INT_MAX", INT_MAX, false, true },
{ "INT_MAX-1", INT_MAX-1, true, false },
{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
{ "INT_MAX", INT_MAX, true, false },
{ "UINT_MAX", UINT_MAX, false, true },
{ "wrap", 0, false, true },
{ "unwrap", UINT_MAX, true, false },
{},
}, *p;
struct intel_timeline *tl;
int order, offset;
int ret;
tl = mock_timeline(0);
if (!tl)
return -ENOMEM;
for (p = pass; p->name; p++) {
for (order = 1; order < 64; order++) {
for (offset = -1; offset <= (order > 1); offset++) {
u64 ctx = BIT_ULL(order) + offset;
ret = __igt_sync(tl, ctx, p, "1");
if (ret)
goto out;
}
}
}
mock_timeline_destroy(tl);
tl = mock_timeline(0);
if (!tl)
return -ENOMEM;
for (order = 1; order < 64; order++) {
for (offset = -1; offset <= (order > 1); offset++) {
u64 ctx = BIT_ULL(order) + offset;
for (p = pass; p->name; p++) {
ret = __igt_sync(tl, ctx, p, "2");
if (ret)
goto out;
}
}
}
out:
mock_timeline_destroy(tl);
return ret;
}
static unsigned int random_engine(struct rnd_state *rnd)
{
return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
}
static int bench_sync(void *arg)
{
struct rnd_state prng;
struct intel_timeline *tl;
unsigned long end_time, count;
u64 prng32_1M;
ktime_t kt;
int order, last_order;
tl = mock_timeline(0);
if (!tl)
return -ENOMEM;
/* Lookups from cache are very fast and so the random number generation
* and the loop itself becomes a significant factor in the per-iteration
* timings. We try to compensate the results by measuring the overhead
* of the prng and subtract it from the reported results.
*/
prandom_seed_state(&prng, i915_selftest.random_seed);
count = 0;
kt = ktime_get();
end_time = jiffies + HZ/10;
do {
u32 x;
/* Make sure the compiler doesn't optimise away the prng call */
WRITE_ONCE(x, prandom_u32_state(&prng));
count++;
} while (!time_after(jiffies, end_time));
kt = ktime_sub(ktime_get(), kt);
pr_debug("%s: %lu random evaluations, %lluns/prng\n",
__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
/* Benchmark (only) setting random context ids */
prandom_seed_state(&prng, i915_selftest.random_seed);
count = 0;
kt = ktime_get();
end_time = jiffies + HZ/10;
do {
u64 id = i915_prandom_u64_state(&prng);
__intel_timeline_sync_set(tl, id, 0);
count++;
} while (!time_after(jiffies, end_time));
kt = ktime_sub(ktime_get(), kt);
kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
pr_info("%s: %lu random insertions, %lluns/insert\n",
__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
/* Benchmark looking up the exact same context ids as we just set */
prandom_seed_state(&prng, i915_selftest.random_seed);
end_time = count;
kt = ktime_get();
while (end_time--) {
u64 id = i915_prandom_u64_state(&prng);
if (!__intel_timeline_sync_is_later(tl, id, 0)) {
mock_timeline_destroy(tl);
pr_err("Lookup of %llu failed\n", id);
return -EINVAL;
}
}
kt = ktime_sub(ktime_get(), kt);
kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
pr_info("%s: %lu random lookups, %lluns/lookup\n",
__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
mock_timeline_destroy(tl);
cond_resched();
tl = mock_timeline(0);
if (!tl)
return -ENOMEM;
/* Benchmark setting the first N (in order) contexts */
count = 0;
kt = ktime_get();
end_time = jiffies + HZ/10;
do {
__intel_timeline_sync_set(tl, count++, 0);
} while (!time_after(jiffies, end_time));
kt = ktime_sub(ktime_get(), kt);
pr_info("%s: %lu in-order insertions, %lluns/insert\n",
__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
/* Benchmark looking up the exact same context ids as we just set */
end_time = count;
kt = ktime_get();
while (end_time--) {
if (!__intel_timeline_sync_is_later(tl, end_time, 0)) {
pr_err("Lookup of %lu failed\n", end_time);
mock_timeline_destroy(tl);
return -EINVAL;
}
}
kt = ktime_sub(ktime_get(), kt);
pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
mock_timeline_destroy(tl);
cond_resched();
tl = mock_timeline(0);
if (!tl)
return -ENOMEM;
/* Benchmark searching for a random context id and maybe changing it */
prandom_seed_state(&prng, i915_selftest.random_seed);
count = 0;
kt = ktime_get();
end_time = jiffies + HZ/10;
do {
u32 id = random_engine(&prng);
u32 seqno = prandom_u32_state(&prng);
if (!__intel_timeline_sync_is_later(tl, id, seqno))
__intel_timeline_sync_set(tl, id, seqno);
count++;
} while (!time_after(jiffies, end_time));
kt = ktime_sub(ktime_get(), kt);
kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
mock_timeline_destroy(tl);
cond_resched();
/* Benchmark searching for a known context id and changing the seqno */
for (last_order = 1, order = 1; order < 32;
({ int tmp = last_order; last_order = order; order += tmp; })) {
unsigned int mask = BIT(order) - 1;
tl = mock_timeline(0);
if (!tl)
return -ENOMEM;
count = 0;
kt = ktime_get();
end_time = jiffies + HZ/10;
do {
/* Without assuming too many details of the underlying
* implementation, try to identify its phase-changes
* (if any)!
*/
u64 id = (u64)(count & mask) << order;
__intel_timeline_sync_is_later(tl, id, 0);
__intel_timeline_sync_set(tl, id, 0);
count++;
} while (!time_after(jiffies, end_time));
kt = ktime_sub(ktime_get(), kt);
pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
__func__, count, order,
(long long)div64_ul(ktime_to_ns(kt), count));
mock_timeline_destroy(tl);
cond_resched();
}
return 0;
}
int i915_gem_timeline_mock_selftests(void)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_sync),
SUBTEST(bench_sync),
};
return i915_subtests(tests, NULL);
}