linux/drivers/gpu/drm/nouveau/dispnv50/disp.h
Lyude Paul 12885ecbfe drm/nouveau/kms/nvd9-: Add CRC support
This introduces support for CRC readback on gf119+, using the
documentation generously provided to us by Nvidia:

https://github.com/NVIDIA/open-gpu-doc/blob/master/Display-CRC/display-crc.txt

We expose all available CRC sources. SF, SOR, PIOR, and DAC are exposed
through a single set of "outp" sources: outp-active/auto for a CRC of
the scanout region, outp-complete for a CRC of both the scanout and
blanking/sync region combined, and outp-inactive for a CRC of only the
blanking/sync region. For each source, nouveau selects the appropriate
tap point based on the output path in use. We also expose an "rg"
source, which allows for capturing CRCs of the scanout raster before
it's encoded into a video signal in the output path. This tap point is
referred to as the raster generator.

Note that while there's some other neat features that can be used with
CRC capture on nvidia hardware, like capturing from two CRC sources
simultaneously, I couldn't see any usecase for them and did not
implement them.

Nvidia only allows for accessing CRCs through a shared DMA region that
we program through the core EVO/NvDisplay channel which is referred to
as the notifier context. The notifier context is limited to either 255
(for Fermi-Pascal) or 2047 (Volta+) entries to store CRCs in, and
unfortunately the hardware simply drops CRCs and reports an overflow
once all available entries in the notifier context are filled.

Since the DRM CRC API and igt-gpu-tools don't expect there to be a limit
on how many CRCs can be captured, we work around this in nouveau by
allocating two separate notifier contexts for each head instead of one.
We schedule a vblank worker ahead of time so that once we start getting
close to filling up all of the available entries in the notifier
context, we can swap the currently used notifier context out with
another pre-prepared notifier context in a manner similar to page
flipping.

Unfortunately, the hardware only allows us to this by flushing two
separate updates on the core channel: one to release the current
notifier context handle, and one to program the next notifier context's
handle. When the hardware processes the first update, the CRC for the
current frame is lost. However, the second update can be flushed
immediately without waiting for the first to complete so that CRC
generation resumes on the next frame. According to Nvidia's hardware
engineers, there isn't any cleaner way of flipping notifier contexts
that would avoid this.

Since using vblank workers to swap out the notifier context will ensure
we can usually flush both updates to hardware within the timespan of a
single frame, we can also ensure that there will only be exactly one
frame lost between the first and second update being executed by the
hardware. This gives us the guarantee that we're always correctly
matching each CRC entry with it's respective frame even after a context
flip. And since IGT will retrieve the CRC entry for a frame by waiting
until it receives a CRC for any subsequent frames, this doesn't cause an
issue with any tests and is much simpler than trying to change the
current DRM API to accommodate.

In order to facilitate testing of correct handling of this limitation,
we also expose a debugfs interface to manually control the threshold for
when we start trying to flip the notifier context. We will use this in
igt to trigger a context flip for testing purposes without needing to
wait for the notifier to completely fill up. This threshold is reset
to the default value set by nouveau after each capture, and is exposed
in a separate folder within each CRTC's debugfs directory labelled
"nv_crc".

Changes since v1:
* Forgot to finish saving crc.h before saving, whoops. This just adds
  some corrections to the empty function declarations that we use if
  CONFIG_DEBUG_FS isn't enabled.
Changes since v2:
* Don't check return code from debugfs_create_dir() or
  debugfs_create_file() - Greg K-H
Changes since v3:
  (no functional changes)
* Fix SPDX license identifiers (checkpatch)
* s/uint32_t/u32/ (checkpatch)
* Fix indenting in switch cases (checkpatch)
Changes since v4:
* Remove unneeded param changes with nv50_head_flush_clr/set
* Rebase
Changes since v5:
* Remove set but unused variable (outp) in nv50_crc_atomic_check() -
  Kbuild bot

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
Acked-by: Dave Airlie <airlied@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200627194657.156514-10-lyude@redhat.com
2020-07-16 18:16:33 -04:00

124 lines
3.4 KiB
C

#ifndef __NV50_KMS_H__
#define __NV50_KMS_H__
#include <linux/workqueue.h>
#include <nvif/mem.h>
#include "nouveau_display.h"
struct nv50_msto;
struct nouveau_encoder;
struct nv50_disp {
struct nvif_disp *disp;
struct nv50_core *core;
struct nvif_object caps;
#define NV50_DISP_SYNC(c, o) ((c) * 0x040 + (o))
#define NV50_DISP_CORE_NTFY NV50_DISP_SYNC(0 , 0x00)
#define NV50_DISP_WNDW_SEM0(c) NV50_DISP_SYNC(1 + (c), 0x00)
#define NV50_DISP_WNDW_SEM1(c) NV50_DISP_SYNC(1 + (c), 0x10)
#define NV50_DISP_WNDW_NTFY(c) NV50_DISP_SYNC(1 + (c), 0x20)
#define NV50_DISP_BASE_SEM0(c) NV50_DISP_WNDW_SEM0(0 + (c))
#define NV50_DISP_BASE_SEM1(c) NV50_DISP_WNDW_SEM1(0 + (c))
#define NV50_DISP_BASE_NTFY(c) NV50_DISP_WNDW_NTFY(0 + (c))
#define NV50_DISP_OVLY_SEM0(c) NV50_DISP_WNDW_SEM0(4 + (c))
#define NV50_DISP_OVLY_SEM1(c) NV50_DISP_WNDW_SEM1(4 + (c))
#define NV50_DISP_OVLY_NTFY(c) NV50_DISP_WNDW_NTFY(4 + (c))
struct nouveau_bo *sync;
struct mutex mutex;
};
static inline struct nv50_disp *
nv50_disp(struct drm_device *dev)
{
return nouveau_display(dev)->priv;
}
struct nv50_disp_interlock {
enum nv50_disp_interlock_type {
NV50_DISP_INTERLOCK_CORE = 0,
NV50_DISP_INTERLOCK_CURS,
NV50_DISP_INTERLOCK_BASE,
NV50_DISP_INTERLOCK_OVLY,
NV50_DISP_INTERLOCK_WNDW,
NV50_DISP_INTERLOCK_WIMM,
NV50_DISP_INTERLOCK__SIZE
} type;
u32 data;
u32 wimm;
};
void corec37d_ntfy_init(struct nouveau_bo *, u32);
void head907d_olut_load(struct drm_color_lut *, int size, void __iomem *);
struct nv50_chan {
struct nvif_object user;
struct nvif_device *device;
};
struct nv50_dmac {
struct nv50_chan base;
struct nvif_mem push;
u32 *ptr;
struct nvif_object sync;
struct nvif_object vram;
/* Protects against concurrent pushbuf access to this channel, lock is
* grabbed by evo_wait (if the pushbuf reservation is successful) and
* dropped again by evo_kick. */
struct mutex lock;
};
struct nv50_outp_atom {
struct list_head head;
struct drm_encoder *encoder;
bool flush_disable;
union nv50_outp_atom_mask {
struct {
bool ctrl:1;
};
u8 mask;
} set, clr;
};
int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
const s32 *oclass, u8 head, void *data, u32 size,
u64 syncbuf, struct nv50_dmac *dmac);
void nv50_dmac_destroy(struct nv50_dmac *);
/*
* For normal encoders this just returns the encoder. For active MST encoders,
* this returns the real outp that's driving displays on the topology.
* Inactive MST encoders return NULL, since they would have no real outp to
* return anyway.
*/
struct nouveau_encoder *nv50_real_outp(struct drm_encoder *encoder);
u32 *evo_wait(struct nv50_dmac *, int nr);
void evo_kick(u32 *, struct nv50_dmac *);
extern const u64 disp50xx_modifiers[];
extern const u64 disp90xx_modifiers[];
extern const u64 wndwc57e_modifiers[];
#define evo_mthd(p, m, s) do { \
const u32 _m = (m), _s = (s); \
if (drm_debug_enabled(DRM_UT_KMS)) \
pr_err("%04x %d %s\n", _m, _s, __func__); \
*((p)++) = ((_s << 18) | _m); \
} while(0)
#define evo_data(p, d) do { \
const u32 _d = (d); \
if (drm_debug_enabled(DRM_UT_KMS)) \
pr_err("\t%08x\n", _d); \
*((p)++) = _d; \
} while(0)
#endif