From bf4f6d16c89466bbbe2f9c959a5ae366856f3111 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 10 Apr 2019 08:38:13 +0200 Subject: [PATCH 01/30] drm: switch drm_fb_memcpy_dstclip to accept __iomem dst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all archs have the __io_virt() macro, so cirrus can't simply convert pointers that way. The drm format helpers have to use memcpy_toio() instead. This patch makes drm_fb_memcpy_dstclip() accept a __iomem dst pointer and use memcpy_toio() instead of memcpy(). With that separating out the memcpy loop into the drm_fb_memcpy_lines() helper isn't useful any more, so move the code back into the calling functins. Signed-off-by: Gerd Hoffmann Reviewed-by: Noralf Trønnes Link: http://patchwork.freedesktop.org/patch/msgid/20190410063815.17062-2-kraxel@redhat.com --- drivers/gpu/drm/cirrus/cirrus.c | 2 +- drivers/gpu/drm/drm_format_helper.c | 45 +++++++++++++++-------------- include/drm/drm_format_helper.h | 3 +- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/cirrus/cirrus.c b/drivers/gpu/drm/cirrus/cirrus.c index 5095b8ce52c2..0fc3aa31b5a4 100644 --- a/drivers/gpu/drm/cirrus/cirrus.c +++ b/drivers/gpu/drm/cirrus/cirrus.c @@ -307,7 +307,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb, return -ENOMEM; if (cirrus->cpp == fb->format->cpp[0]) - drm_fb_memcpy_dstclip(__io_virt(cirrus->vram), + drm_fb_memcpy_dstclip(cirrus->vram, vmap, fb, rect); else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2) diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index 00d716f14173..dace05638bc3 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -10,23 +10,17 @@ #include #include +#include #include #include #include #include -static void drm_fb_memcpy_lines(void *dst, unsigned int dst_pitch, - void *src, unsigned int src_pitch, - unsigned int linelength, unsigned int lines) +static unsigned int clip_offset(struct drm_rect *clip, + unsigned int pitch, unsigned int cpp) { - int line; - - for (line = 0; line < lines; line++) { - memcpy(dst, src, linelength); - src += src_pitch; - dst += dst_pitch; - } + return clip->y1 * pitch + clip->x1 * cpp; } /** @@ -43,35 +37,44 @@ void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip) { unsigned int cpp = drm_format_plane_cpp(fb->format->format, 0); - unsigned int offset = (clip->y1 * fb->pitches[0]) + (clip->x1 * cpp); size_t len = (clip->x2 - clip->x1) * cpp; + unsigned int y, lines = clip->y2 - clip->y1; - drm_fb_memcpy_lines(dst, len, - vaddr + offset, fb->pitches[0], - len, clip->y2 - clip->y1); + vaddr += clip_offset(clip, fb->pitches[0], cpp); + for (y = 0; y < lines; y++) { + memcpy(dst, vaddr, len); + vaddr += fb->pitches[0]; + dst += len; + } } EXPORT_SYMBOL(drm_fb_memcpy); /** * drm_fb_memcpy_dstclip - Copy clip buffer - * @dst: Destination buffer + * @dst: Destination buffer (iomem) * @vaddr: Source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * * This function applies clipping on dst, i.e. the destination is a - * full framebuffer but only the clip rect content is copied over. + * full (iomem) framebuffer but only the clip rect content is copied over. */ -void drm_fb_memcpy_dstclip(void *dst, void *vaddr, struct drm_framebuffer *fb, +void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr, + struct drm_framebuffer *fb, struct drm_rect *clip) { unsigned int cpp = drm_format_plane_cpp(fb->format->format, 0); - unsigned int offset = (clip->y1 * fb->pitches[0]) + (clip->x1 * cpp); + unsigned int offset = clip_offset(clip, fb->pitches[0], cpp); size_t len = (clip->x2 - clip->x1) * cpp; + unsigned int y, lines = clip->y2 - clip->y1; - drm_fb_memcpy_lines(dst + offset, fb->pitches[0], - vaddr + offset, fb->pitches[0], - len, clip->y2 - clip->y1); + vaddr += offset; + dst += offset; + for (y = 0; y < lines; y++) { + memcpy_toio(dst, vaddr, len); + vaddr += fb->pitches[0]; + dst += fb->pitches[0]; + } } EXPORT_SYMBOL(drm_fb_memcpy_dstclip); diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index 6f84380757ee..bc2e1004e166 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -15,7 +15,8 @@ struct drm_rect; void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip); -void drm_fb_memcpy_dstclip(void *dst, void *vaddr, struct drm_framebuffer *fb, +void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr, + struct drm_framebuffer *fb, struct drm_rect *clip); void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip); From d653bd394823772f0696c0642be94d78d5562e41 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 10 Apr 2019 08:38:14 +0200 Subject: [PATCH 02/30] drm: switch drm_fb_xrgb8888_to_rgb565_dstclip to accept __iomem dst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all archs have the __io_virt() macro, so cirrus can't simply convert pointers that way. The drm format helpers have to use memcpy_toio() instead. This patch makes drm_fb_xrgb8888_to_rgb565_dstclip() accept a __iomem dst pointer and use memcpy_toio() instead of memcpy(). The helper function (drm_fb_xrgb8888_to_rgb565_line) has been changed to process a single scanline. Signed-off-by: Gerd Hoffmann Reviewed-by: Noralf Trønnes Link: http://patchwork.freedesktop.org/patch/msgid/20190410063815.17062-3-kraxel@redhat.com --- drivers/gpu/drm/cirrus/cirrus.c | 2 +- drivers/gpu/drm/drm_format_helper.c | 117 ++++++++++++++-------------- include/drm/drm_format_helper.h | 6 +- 3 files changed, 64 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/cirrus/cirrus.c b/drivers/gpu/drm/cirrus/cirrus.c index 0fc3aa31b5a4..ed2f2d8cfb6f 100644 --- a/drivers/gpu/drm/cirrus/cirrus.c +++ b/drivers/gpu/drm/cirrus/cirrus.c @@ -311,7 +311,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb, vmap, fb, rect); else if (fb->format->cpp[0] == 4 && cirrus->cpp == 2) - drm_fb_xrgb8888_to_rgb565_dstclip(__io_virt(cirrus->vram), + drm_fb_xrgb8888_to_rgb565_dstclip(cirrus->vram, cirrus->pitch, vmap, fb, rect, false); diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index dace05638bc3..7dbc43a4343e 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -113,42 +113,22 @@ void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb, } EXPORT_SYMBOL(drm_fb_swab16); -static void drm_fb_xrgb8888_to_rgb565_lines(void *dst, unsigned int dst_pitch, - void *src, unsigned int src_pitch, - unsigned int src_linelength, - unsigned int lines, - bool swap) +static void drm_fb_xrgb8888_to_rgb565_line(u16 *dbuf, u32 *sbuf, + unsigned int pixels, + bool swab) { - unsigned int linepixels = src_linelength / sizeof(u32); - unsigned int x, y; - u32 *sbuf; - u16 *dbuf, val16; + unsigned int x; + u16 val16; - /* - * The cma memory is write-combined so reads are uncached. - * Speed up by fetching one line at a time. - */ - sbuf = kmalloc(src_linelength, GFP_KERNEL); - if (!sbuf) - return; - - for (y = 0; y < lines; y++) { - memcpy(sbuf, src, src_linelength); - dbuf = dst; - for (x = 0; x < linepixels; x++) { - val16 = ((sbuf[x] & 0x00F80000) >> 8) | - ((sbuf[x] & 0x0000FC00) >> 5) | - ((sbuf[x] & 0x000000F8) >> 3); - if (swap) - *dbuf++ = swab16(val16); - else - *dbuf++ = val16; - } - src += src_pitch; - dst += dst_pitch; + for (x = 0; x < pixels; x++) { + val16 = ((sbuf[x] & 0x00F80000) >> 8) | + ((sbuf[x] & 0x0000FC00) >> 5) | + ((sbuf[x] & 0x000000F8) >> 3); + if (swab) + dbuf[x] = swab16(val16); + else + dbuf[x] = val16; } - - kfree(sbuf); } /** @@ -157,7 +137,7 @@ static void drm_fb_xrgb8888_to_rgb565_lines(void *dst, unsigned int dst_pitch, * @vaddr: XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy - * @swap: Swap bytes + * @swab: Swap bytes * * Drivers can use this function for RGB565 devices that don't natively * support XRGB8888. @@ -167,49 +147,72 @@ static void drm_fb_xrgb8888_to_rgb565_lines(void *dst, unsigned int dst_pitch, */ void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip, bool swap) + struct drm_rect *clip, bool swab) { - unsigned int src_offset = (clip->y1 * fb->pitches[0]) - + (clip->x1 * sizeof(u32)); - size_t src_len = (clip->x2 - clip->x1) * sizeof(u32); - size_t dst_len = (clip->x2 - clip->x1) * sizeof(u16); + size_t linepixels = clip->x2 - clip->x1; + size_t src_len = linepixels * sizeof(u32); + size_t dst_len = linepixels * sizeof(u16); + unsigned y, lines = clip->y2 - clip->y1; + void *sbuf; - drm_fb_xrgb8888_to_rgb565_lines(dst, dst_len, - vaddr + src_offset, fb->pitches[0], - src_len, clip->y2 - clip->y1, - swap); + /* + * The cma memory is write-combined so reads are uncached. + * Speed up by fetching one line at a time. + */ + sbuf = kmalloc(src_len, GFP_KERNEL); + if (!sbuf) + return; + + vaddr += clip_offset(clip, fb->pitches[0], sizeof(u32)); + for (y = 0; y < lines; y++) { + memcpy(sbuf, vaddr, src_len); + drm_fb_xrgb8888_to_rgb565_line(dst, sbuf, linepixels, swab); + vaddr += fb->pitches[0]; + dst += dst_len; + } + + kfree(sbuf); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565); /** * drm_fb_xrgb8888_to_rgb565_dstclip - Convert XRGB8888 to RGB565 clip buffer - * @dst: RGB565 destination buffer + * @dst: RGB565 destination buffer (iomem) * @dst_pitch: destination buffer pitch * @vaddr: XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy - * @swap: Swap bytes + * @swab: Swap bytes * * Drivers can use this function for RGB565 devices that don't natively * support XRGB8888. * * This function applies clipping on dst, i.e. the destination is a - * full framebuffer but only the clip rect content is copied over. + * full (iomem) framebuffer but only the clip rect content is copied over. */ -void drm_fb_xrgb8888_to_rgb565_dstclip(void *dst, unsigned int dst_pitch, +void drm_fb_xrgb8888_to_rgb565_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip, bool swap) + struct drm_rect *clip, bool swab) { - unsigned int src_offset = (clip->y1 * fb->pitches[0]) - + (clip->x1 * sizeof(u32)); - unsigned int dst_offset = (clip->y1 * dst_pitch) - + (clip->x1 * sizeof(u16)); - size_t src_len = (clip->x2 - clip->x1) * sizeof(u32); + size_t linepixels = clip->x2 - clip->x1; + size_t dst_len = linepixels * sizeof(u16); + unsigned y, lines = clip->y2 - clip->y1; + void *dbuf; - drm_fb_xrgb8888_to_rgb565_lines(dst + dst_offset, dst_pitch, - vaddr + src_offset, fb->pitches[0], - src_len, clip->y2 - clip->y1, - swap); + dbuf = kmalloc(dst_len, GFP_KERNEL); + if (!dbuf) + return; + + vaddr += clip_offset(clip, fb->pitches[0], sizeof(u32)); + dst += clip_offset(clip, dst_pitch, sizeof(u16)); + for (y = 0; y < lines; y++) { + drm_fb_xrgb8888_to_rgb565_line(dbuf, vaddr, linepixels, swab); + memcpy_toio(dst, dbuf, dst_len); + vaddr += fb->pitches[0]; + dst += dst_len; + } + + kfree(dbuf); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565_dstclip); diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index bc2e1004e166..490e9a06c910 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -22,10 +22,10 @@ void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip); void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip, bool swap); -void drm_fb_xrgb8888_to_rgb565_dstclip(void *dst, unsigned int dst_pitch, + struct drm_rect *clip, bool swab); +void drm_fb_xrgb8888_to_rgb565_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip, bool swap); + struct drm_rect *clip, bool swab); void drm_fb_xrgb8888_to_rgb888_dstclip(void *dst, unsigned int dst_pitch, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip); From 5c5373b51becbabb16390a5689722a7efd4600b2 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 10 Apr 2019 08:38:15 +0200 Subject: [PATCH 03/30] drm: switch drm_fb_xrgb8888_to_rgb888_dstclip to accept __iomem dst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all archs have the __io_virt() macro, so cirrus can't simply convert pointers that way. The drm format helpers have to use memcpy_toio() instead. This patch makes drm_fb_xrgb8888_to_rgb888_dstclip() accept a __iomem dst pointer and use memcpy_toio() instead of memcpy(). The helper function (drm_fb_xrgb8888_to_rgb888_line) has been changed to process a single scanline. Signed-off-by: Gerd Hoffmann Reviewed-by: Noralf Trønnes Link: http://patchwork.freedesktop.org/patch/msgid/20190410063815.17062-4-kraxel@redhat.com --- drivers/gpu/drm/cirrus/cirrus.c | 2 +- drivers/gpu/drm/drm_format_helper.c | 57 +++++++++++++---------------- include/drm/drm_format_helper.h | 2 +- 3 files changed, 27 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/cirrus/cirrus.c b/drivers/gpu/drm/cirrus/cirrus.c index ed2f2d8cfb6f..be4ea370ba31 100644 --- a/drivers/gpu/drm/cirrus/cirrus.c +++ b/drivers/gpu/drm/cirrus/cirrus.c @@ -316,7 +316,7 @@ static int cirrus_fb_blit_rect(struct drm_framebuffer *fb, vmap, fb, rect, false); else if (fb->format->cpp[0] == 4 && cirrus->cpp == 3) - drm_fb_xrgb8888_to_rgb888_dstclip(__io_virt(cirrus->vram), + drm_fb_xrgb8888_to_rgb888_dstclip(cirrus->vram, cirrus->pitch, vmap, fb, rect); diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index 7dbc43a4343e..319de608a088 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -216,33 +216,16 @@ void drm_fb_xrgb8888_to_rgb565_dstclip(void __iomem *dst, unsigned int dst_pitch } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565_dstclip); -static void drm_fb_xrgb8888_to_rgb888_lines(void *dst, unsigned int dst_pitch, - void *src, unsigned int src_pitch, - unsigned int src_linelength, - unsigned int lines) +static void drm_fb_xrgb8888_to_rgb888_line(u8 *dbuf, u32 *sbuf, + unsigned int pixels) { - unsigned int linepixels = src_linelength / 3; - unsigned int x, y; - u32 *sbuf; - u8 *dbuf; + unsigned int x; - sbuf = kmalloc(src_linelength, GFP_KERNEL); - if (!sbuf) - return; - - for (y = 0; y < lines; y++) { - memcpy(sbuf, src, src_linelength); - dbuf = dst; - for (x = 0; x < linepixels; x++) { - *dbuf++ = (sbuf[x] & 0x000000FF) >> 0; - *dbuf++ = (sbuf[x] & 0x0000FF00) >> 8; - *dbuf++ = (sbuf[x] & 0x00FF0000) >> 16; - } - src += src_pitch; - dst += dst_pitch; + for (x = 0; x < pixels; x++) { + *dbuf++ = (sbuf[x] & 0x000000FF) >> 0; + *dbuf++ = (sbuf[x] & 0x0000FF00) >> 8; + *dbuf++ = (sbuf[x] & 0x00FF0000) >> 16; } - - kfree(sbuf); } /** @@ -264,15 +247,25 @@ void drm_fb_xrgb8888_to_rgb888_dstclip(void *dst, unsigned int dst_pitch, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip) { - unsigned int src_offset = (clip->y1 * fb->pitches[0]) - + (clip->x1 * sizeof(u32)); - unsigned int dst_offset = (clip->y1 * dst_pitch) - + (clip->x1 * 3); - size_t src_len = (clip->x2 - clip->x1) * sizeof(u32); + size_t linepixels = clip->x2 - clip->x1; + size_t dst_len = linepixels * 3; + unsigned y, lines = clip->y2 - clip->y1; + void *dbuf; - drm_fb_xrgb8888_to_rgb888_lines(dst + dst_offset, dst_pitch, - vaddr + src_offset, fb->pitches[0], - src_len, clip->y2 - clip->y1); + dbuf = kmalloc(dst_len, GFP_KERNEL); + if (!dbuf) + return; + + vaddr += clip_offset(clip, fb->pitches[0], sizeof(u32)); + dst += clip_offset(clip, dst_pitch, sizeof(u16)); + for (y = 0; y < lines; y++) { + drm_fb_xrgb8888_to_rgb888_line(dbuf, vaddr, linepixels); + memcpy_toio(dst, dbuf, dst_len); + vaddr += fb->pitches[0]; + dst += dst_len; + } + + kfree(dbuf); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888_dstclip); diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index 490e9a06c910..085d63faee12 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -26,7 +26,7 @@ void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr, void drm_fb_xrgb8888_to_rgb565_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip, bool swab); -void drm_fb_xrgb8888_to_rgb888_dstclip(void *dst, unsigned int dst_pitch, +void drm_fb_xrgb8888_to_rgb888_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip); void drm_fb_xrgb8888_to_gray8(u8 *dst, void *vaddr, struct drm_framebuffer *fb, From 669105a74a287b14cdec04c64eb51db1bb890f64 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 10 Apr 2019 09:48:28 +0200 Subject: [PATCH 04/30] drm/bochs: use simple display pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gerd Hoffmann Acked-by: Noralf Trønnes Link: http://patchwork.freedesktop.org/patch/msgid/20190410074828.10296-1-kraxel@redhat.com --- drivers/gpu/drm/bochs/bochs.h | 6 +- drivers/gpu/drm/bochs/bochs_kms.c | 178 +++++++++--------------------- 2 files changed, 53 insertions(+), 131 deletions(-) diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h index a7f6723bebdd..36f6ee725d13 100644 --- a/drivers/gpu/drm/bochs/bochs.h +++ b/drivers/gpu/drm/bochs/bochs.h @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -69,9 +70,8 @@ struct bochs_device { struct edid *edid; /* drm */ - struct drm_device *dev; - struct drm_crtc crtc; - struct drm_encoder encoder; + struct drm_device *dev; + struct drm_simple_display_pipe pipe; struct drm_connector connector; /* ttm */ diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c index 485f9cf05e8b..5e905f50449d 100644 --- a/drivers/gpu/drm/bochs/bochs_kms.c +++ b/drivers/gpu/drm/bochs/bochs_kms.c @@ -22,76 +22,55 @@ MODULE_PARM_DESC(defy, "default y resolution"); /* ---------------------------------------------------------------------- */ -static void bochs_crtc_mode_set_nofb(struct drm_crtc *crtc) -{ - struct bochs_device *bochs = - container_of(crtc, struct bochs_device, crtc); - - bochs_hw_setmode(bochs, &crtc->mode); -} - -static void bochs_crtc_atomic_enable(struct drm_crtc *crtc, - struct drm_crtc_state *old_crtc_state) -{ -} - -static void bochs_crtc_atomic_flush(struct drm_crtc *crtc, - struct drm_crtc_state *old_crtc_state) -{ - struct drm_device *dev = crtc->dev; - struct drm_pending_vblank_event *event; - - if (crtc->state && crtc->state->event) { - unsigned long irqflags; - - spin_lock_irqsave(&dev->event_lock, irqflags); - event = crtc->state->event; - crtc->state->event = NULL; - drm_crtc_send_vblank_event(crtc, event); - spin_unlock_irqrestore(&dev->event_lock, irqflags); - } -} - - -/* These provide the minimum set of functions required to handle a CRTC */ -static const struct drm_crtc_funcs bochs_crtc_funcs = { - .set_config = drm_atomic_helper_set_config, - .destroy = drm_crtc_cleanup, - .page_flip = drm_atomic_helper_page_flip, - .reset = drm_atomic_helper_crtc_reset, - .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, -}; - -static const struct drm_crtc_helper_funcs bochs_helper_funcs = { - .mode_set_nofb = bochs_crtc_mode_set_nofb, - .atomic_enable = bochs_crtc_atomic_enable, - .atomic_flush = bochs_crtc_atomic_flush, -}; - static const uint32_t bochs_formats[] = { DRM_FORMAT_XRGB8888, DRM_FORMAT_BGRX8888, }; -static void bochs_plane_atomic_update(struct drm_plane *plane, - struct drm_plane_state *old_state) +static void bochs_plane_update(struct bochs_device *bochs, + struct drm_plane_state *state) { - struct bochs_device *bochs = plane->dev->dev_private; struct bochs_bo *bo; - if (!plane->state->fb) + if (!state->fb || !bochs->stride) return; - bo = gem_to_bochs_bo(plane->state->fb->obj[0]); + + bo = gem_to_bochs_bo(state->fb->obj[0]); bochs_hw_setbase(bochs, - plane->state->crtc_x, - plane->state->crtc_y, + state->crtc_x, + state->crtc_y, bo->bo.offset); - bochs_hw_setformat(bochs, plane->state->fb->format); + bochs_hw_setformat(bochs, state->fb->format); } -static int bochs_plane_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) +static void bochs_pipe_enable(struct drm_simple_display_pipe *pipe, + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) +{ + struct bochs_device *bochs = pipe->crtc.dev->dev_private; + + bochs_hw_setmode(bochs, &crtc_state->mode); + bochs_plane_update(bochs, plane_state); +} + +static void bochs_pipe_update(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *old_state) +{ + struct bochs_device *bochs = pipe->crtc.dev->dev_private; + struct drm_crtc *crtc = &pipe->crtc; + + bochs_plane_update(bochs, pipe->plane.state); + + if (crtc->state->event) { + spin_lock_irq(&crtc->dev->event_lock); + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + spin_unlock_irq(&crtc->dev->event_lock); + } +} + +static int bochs_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *new_state) { struct bochs_bo *bo; @@ -101,8 +80,8 @@ static int bochs_plane_prepare_fb(struct drm_plane *plane, return bochs_bo_pin(bo, TTM_PL_FLAG_VRAM); } -static void bochs_plane_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) +static void bochs_pipe_cleanup_fb(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *old_state) { struct bochs_bo *bo; @@ -112,73 +91,13 @@ static void bochs_plane_cleanup_fb(struct drm_plane *plane, bochs_bo_unpin(bo); } -static const struct drm_plane_helper_funcs bochs_plane_helper_funcs = { - .atomic_update = bochs_plane_atomic_update, - .prepare_fb = bochs_plane_prepare_fb, - .cleanup_fb = bochs_plane_cleanup_fb, +static const struct drm_simple_display_pipe_funcs bochs_pipe_funcs = { + .enable = bochs_pipe_enable, + .update = bochs_pipe_update, + .prepare_fb = bochs_pipe_prepare_fb, + .cleanup_fb = bochs_pipe_cleanup_fb, }; -static const struct drm_plane_funcs bochs_plane_funcs = { - .update_plane = drm_atomic_helper_update_plane, - .disable_plane = drm_atomic_helper_disable_plane, - .destroy = drm_primary_helper_destroy, - .reset = drm_atomic_helper_plane_reset, - .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, -}; - -static struct drm_plane *bochs_primary_plane(struct drm_device *dev) -{ - struct drm_plane *primary; - int ret; - - primary = kzalloc(sizeof(*primary), GFP_KERNEL); - if (primary == NULL) { - DRM_DEBUG_KMS("Failed to allocate primary plane\n"); - return NULL; - } - - ret = drm_universal_plane_init(dev, primary, 0, - &bochs_plane_funcs, - bochs_formats, - ARRAY_SIZE(bochs_formats), - NULL, - DRM_PLANE_TYPE_PRIMARY, NULL); - if (ret) { - kfree(primary); - return NULL; - } - - drm_plane_helper_add(primary, &bochs_plane_helper_funcs); - return primary; -} - -static void bochs_crtc_init(struct drm_device *dev) -{ - struct bochs_device *bochs = dev->dev_private; - struct drm_crtc *crtc = &bochs->crtc; - struct drm_plane *primary = bochs_primary_plane(dev); - - drm_crtc_init_with_planes(dev, crtc, primary, NULL, - &bochs_crtc_funcs, NULL); - drm_crtc_helper_add(crtc, &bochs_helper_funcs); -} - -static const struct drm_encoder_funcs bochs_encoder_encoder_funcs = { - .destroy = drm_encoder_cleanup, -}; - -static void bochs_encoder_init(struct drm_device *dev) -{ - struct bochs_device *bochs = dev->dev_private; - struct drm_encoder *encoder = &bochs->encoder; - - encoder->possible_crtcs = 0x1; - drm_encoder_init(dev, encoder, &bochs_encoder_encoder_funcs, - DRM_MODE_ENCODER_DAC, NULL); -} - - static int bochs_connector_get_modes(struct drm_connector *connector) { struct bochs_device *bochs = @@ -278,11 +197,14 @@ int bochs_kms_init(struct bochs_device *bochs) bochs->dev->mode_config.funcs = &bochs_mode_funcs; - bochs_crtc_init(bochs->dev); - bochs_encoder_init(bochs->dev); bochs_connector_init(bochs->dev); - drm_connector_attach_encoder(&bochs->connector, - &bochs->encoder); + drm_simple_display_pipe_init(bochs->dev, + &bochs->pipe, + &bochs_pipe_funcs, + bochs_formats, + ARRAY_SIZE(bochs_formats), + NULL, + &bochs->connector); drm_mode_config_reset(bochs->dev); From 564d6fd611f9c66dba4a3ad1da7a4589f8eb699a Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 1 Apr 2019 10:56:45 +0200 Subject: [PATCH 05/30] drm/sun4i: Rely on dma interconnect for our RAM offset Now that we can express our DMA topology, rely on those property instead of hardcoding an offset from the dma_addr_t which wasn't really great. We still need to add some code to deal with the old DT that would lack that property, but we move the offset to the DRM device dma_pfn_offset to be able to rely on just the dma_addr_t associated to the GEM object. Acked-by: Daniel Vetter Acked-by: Robin Murphy Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/5df781318e7e05f780a11ed243dcf2b9fe8a08cb.1554108995.git-series.maxime.ripard@bootlin.com --- drivers/gpu/drm/sun4i/sun4i_backend.c | 28 ++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index ee59da4a0172..4e5922c89d7b 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -361,13 +361,6 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend, paddr = drm_fb_cma_get_gem_addr(fb, state, 0); DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &paddr); - /* - * backend DMA accesses DRAM directly, bypassing the system - * bus. As such, the address range is different and the buffer - * address needs to be corrected. - */ - paddr -= PHYS_OFFSET; - if (fb->format->is_yuv) return sun4i_backend_update_yuv_buffer(backend, fb, paddr); @@ -803,6 +796,27 @@ static int sun4i_backend_bind(struct device *dev, struct device *master, dev_set_drvdata(dev, backend); spin_lock_init(&backend->frontend_lock); + if (of_find_property(dev->of_node, "interconnects", NULL)) { + /* + * This assume we have the same DMA constraints for all our the + * devices in our pipeline (all the backends, but also the + * frontends). This sounds bad, but it has always been the case + * for us, and DRM doesn't do per-device allocation either, so + * we would need to fix DRM first... + */ + ret = of_dma_configure(drm->dev, dev->of_node, true); + if (ret) + return ret; + } else { + /* + * If we don't have the interconnect property, most likely + * because of an old DT, we need to set the DMA offset by hand + * on our device since the RAM mapping is at 0 for the DMA bus, + * unlike the CPU. + */ + drm->dev->dma_pfn_offset = PHYS_PFN_OFFSET; + } + backend->engine.node = dev->of_node; backend->engine.ops = &sun4i_backend_engine_ops; backend->engine.id = sun4i_backend_of_get_id(dev->of_node); From e33898a20744de9c98cbb7d38506734ece824cb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Wed, 3 Apr 2019 14:56:58 +0200 Subject: [PATCH 06/30] drm/client: Rename drm_client_add() to drm_client_register() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is done to stay consistent with our naming scheme of _register() = others can start calling us from any thread. Suggested-by: Daniel Vetter Signed-off-by: Noralf Trønnes Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190403125658.32389-1-noralf@tronnes.org --- drivers/gpu/drm/drm_client.c | 11 ++++++----- drivers/gpu/drm/drm_fb_helper.c | 2 +- include/drm/drm_client.h | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 9b2bd28dde0a..f20d1dda3961 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -69,7 +69,8 @@ EXPORT_SYMBOL(drm_client_close); * @name: Client name * @funcs: DRM client functions (optional) * - * This initialises the client and opens a &drm_file. Use drm_client_add() to complete the process. + * This initialises the client and opens a &drm_file. + * Use drm_client_register() to complete the process. * The caller needs to hold a reference on @dev before calling this function. * The client is freed when the &drm_device is unregistered. See drm_client_release(). * @@ -108,16 +109,16 @@ err_put_module: EXPORT_SYMBOL(drm_client_init); /** - * drm_client_add - Add client to the device list + * drm_client_register - Register client * @client: DRM client * * Add the client to the &drm_device client list to activate its callbacks. * @client must be initialized by a call to drm_client_init(). After - * drm_client_add() it is no longer permissible to call drm_client_release() + * drm_client_register() it is no longer permissible to call drm_client_release() * directly (outside the unregister callback), instead cleanup will happen * automatically on driver unload. */ -void drm_client_add(struct drm_client_dev *client) +void drm_client_register(struct drm_client_dev *client) { struct drm_device *dev = client->dev; @@ -125,7 +126,7 @@ void drm_client_add(struct drm_client_dev *client) list_add(&client->list, &dev->clientlist); mutex_unlock(&dev->clientlist_mutex); } -EXPORT_SYMBOL(drm_client_add); +EXPORT_SYMBOL(drm_client_register); /** * drm_client_release - Release DRM client resources diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 84791dd4a90d..367641c7754f 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -3322,7 +3322,7 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp) if (ret) DRM_DEV_DEBUG(dev->dev, "client hotplug ret=%d\n", ret); - drm_client_add(&fb_helper->client); + drm_client_register(&fb_helper->client); return 0; } diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h index 8b552b1a6ce9..268b2cf0052a 100644 --- a/include/drm/drm_client.h +++ b/include/drm/drm_client.h @@ -90,7 +90,7 @@ struct drm_client_dev { int drm_client_init(struct drm_device *dev, struct drm_client_dev *client, const char *name, const struct drm_client_funcs *funcs); void drm_client_release(struct drm_client_dev *client); -void drm_client_add(struct drm_client_dev *client); +void drm_client_register(struct drm_client_dev *client); void drm_client_dev_unregister(struct drm_device *dev); void drm_client_dev_hotplug(struct drm_device *dev); From 09ded8af57bcef7287b8242087d3e7556380de62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Sun, 7 Apr 2019 18:52:34 +0200 Subject: [PATCH 07/30] drm/i915/fbdev: Move intel_fb_initial_config() to fbdev helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is generic code and having it in the helper will let other drivers benefit from it. One change was necessary assuming this to be true: INTEL_INFO(dev_priv)->num_pipes == dev->mode_config.num_crtc Suggested-by: Daniel Vetter Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Noralf Trønnes Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190407165243.54043-4-noralf@tronnes.org --- drivers/gpu/drm/drm_fb_helper.c | 194 ++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_fbdev.c | 218 ----------------------------- include/drm/drm_fb_helper.h | 23 --- 3 files changed, 190 insertions(+), 245 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 367641c7754f..2339f0f8f5a8 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2559,6 +2559,194 @@ static void drm_setup_crtc_rotation(struct drm_fb_helper *fb_helper, fb_helper->sw_rotations |= DRM_MODE_ROTATE_0; } +static struct drm_fb_helper_crtc * +drm_fb_helper_crtc(struct drm_fb_helper *fb_helper, struct drm_crtc *crtc) +{ + int i; + + for (i = 0; i < fb_helper->crtc_count; i++) + if (fb_helper->crtc_info[i].mode_set.crtc == crtc) + return &fb_helper->crtc_info[i]; + + return NULL; +} + +/* Try to read the BIOS display configuration and use it for the initial config */ +static bool drm_fb_helper_firmware_config(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_crtc **crtcs, + struct drm_display_mode **modes, + struct drm_fb_offset *offsets, + bool *enabled, int width, int height) +{ + struct drm_device *dev = fb_helper->dev; + unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); + unsigned long conn_configured, conn_seq; + int i, j; + bool *save_enabled; + bool fallback = true, ret = true; + int num_connectors_enabled = 0; + int num_connectors_detected = 0; + struct drm_modeset_acquire_ctx ctx; + + save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL); + if (!save_enabled) + return false; + + drm_modeset_acquire_init(&ctx, 0); + + while (drm_modeset_lock_all_ctx(dev, &ctx) != 0) + drm_modeset_backoff(&ctx); + + memcpy(save_enabled, enabled, count); + conn_seq = GENMASK(count - 1, 0); + conn_configured = 0; +retry: + for (i = 0; i < count; i++) { + struct drm_fb_helper_connector *fb_conn; + struct drm_connector *connector; + struct drm_encoder *encoder; + struct drm_fb_helper_crtc *new_crtc; + + fb_conn = fb_helper->connector_info[i]; + connector = fb_conn->connector; + + if (conn_configured & BIT(i)) + continue; + + /* First pass, only consider tiled connectors */ + if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile) + continue; + + if (connector->status == connector_status_connected) + num_connectors_detected++; + + if (!enabled[i]) { + DRM_DEBUG_KMS("connector %s not enabled, skipping\n", + connector->name); + conn_configured |= BIT(i); + continue; + } + + if (connector->force == DRM_FORCE_OFF) { + DRM_DEBUG_KMS("connector %s is disabled by user, skipping\n", + connector->name); + enabled[i] = false; + continue; + } + + encoder = connector->state->best_encoder; + if (!encoder || WARN_ON(!connector->state->crtc)) { + if (connector->force > DRM_FORCE_OFF) + goto bail; + + DRM_DEBUG_KMS("connector %s has no encoder or crtc, skipping\n", + connector->name); + enabled[i] = false; + conn_configured |= BIT(i); + continue; + } + + num_connectors_enabled++; + + new_crtc = drm_fb_helper_crtc(fb_helper, connector->state->crtc); + + /* + * Make sure we're not trying to drive multiple connectors + * with a single CRTC, since our cloning support may not + * match the BIOS. + */ + for (j = 0; j < count; j++) { + if (crtcs[j] == new_crtc) { + DRM_DEBUG_KMS("fallback: cloned configuration\n"); + goto bail; + } + } + + DRM_DEBUG_KMS("looking for cmdline mode on connector %s\n", + connector->name); + + /* go for command line mode first */ + modes[i] = drm_pick_cmdline_mode(fb_conn); + + /* try for preferred next */ + if (!modes[i]) { + DRM_DEBUG_KMS("looking for preferred mode on connector %s %d\n", + connector->name, connector->has_tile); + modes[i] = drm_has_preferred_mode(fb_conn, width, + height); + } + + /* No preferred mode marked by the EDID? Are there any modes? */ + if (!modes[i] && !list_empty(&connector->modes)) { + DRM_DEBUG_KMS("using first mode listed on connector %s\n", + connector->name); + modes[i] = list_first_entry(&connector->modes, + struct drm_display_mode, + head); + } + + /* last resort: use current mode */ + if (!modes[i]) { + /* + * IMPORTANT: We want to use the adjusted mode (i.e. + * after the panel fitter upscaling) as the initial + * config, not the input mode, which is what crtc->mode + * usually contains. But since our current + * code puts a mode derived from the post-pfit timings + * into crtc->mode this works out correctly. + * + * This is crtc->mode and not crtc->state->mode for the + * fastboot check to work correctly. + */ + DRM_DEBUG_KMS("looking for current mode on connector %s\n", + connector->name); + modes[i] = &connector->state->crtc->mode; + } + crtcs[i] = new_crtc; + + DRM_DEBUG_KMS("connector %s on [CRTC:%d:%s]: %dx%d%s\n", + connector->name, + connector->state->crtc->base.id, + connector->state->crtc->name, + modes[i]->hdisplay, modes[i]->vdisplay, + modes[i]->flags & DRM_MODE_FLAG_INTERLACE ? "i" : ""); + + fallback = false; + conn_configured |= BIT(i); + } + + if (conn_configured != conn_seq) { /* repeat until no more are found */ + conn_seq = conn_configured; + goto retry; + } + + /* + * If the BIOS didn't enable everything it could, fall back to have the + * same user experiencing of lighting up as much as possible like the + * fbdev helper library. + */ + if (num_connectors_enabled != num_connectors_detected && + num_connectors_enabled < dev->mode_config.num_crtc) { + DRM_DEBUG_KMS("fallback: Not all outputs enabled\n"); + DRM_DEBUG_KMS("Enabled: %i, detected: %i\n", num_connectors_enabled, + num_connectors_detected); + fallback = true; + } + + if (fallback) { +bail: + DRM_DEBUG_KMS("Not using firmware configuration\n"); + memcpy(enabled, save_enabled, count); + ret = false; + } + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + + kfree(save_enabled); + return ret; +} + static void drm_setup_crtcs(struct drm_fb_helper *fb_helper, u32 width, u32 height) { @@ -2591,10 +2779,8 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper, DRM_DEBUG_KMS("No connectors reported connected with modes\n"); drm_enable_connectors(fb_helper, enabled); - if (!(fb_helper->funcs->initial_config && - fb_helper->funcs->initial_config(fb_helper, crtcs, modes, - offsets, - enabled, width, height))) { + if (!drm_fb_helper_firmware_config(fb_helper, crtcs, modes, offsets, + enabled, width, height)) { memset(modes, 0, fb_helper->connector_count*sizeof(modes[0])); memset(crtcs, 0, fb_helper->connector_count*sizeof(crtcs[0])); memset(offsets, 0, fb_helper->connector_count*sizeof(offsets[0])); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index ef93c27e60b4..c4d17dda3355 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -284,225 +284,7 @@ out_unlock: return ret; } -static struct drm_fb_helper_crtc * -intel_fb_helper_crtc(struct drm_fb_helper *fb_helper, struct drm_crtc *crtc) -{ - int i; - - for (i = 0; i < fb_helper->crtc_count; i++) - if (fb_helper->crtc_info[i].mode_set.crtc == crtc) - return &fb_helper->crtc_info[i]; - - return NULL; -} - -/* - * Try to read the BIOS display configuration and use it for the initial - * fb configuration. - * - * The BIOS or boot loader will generally create an initial display - * configuration for us that includes some set of active pipes and displays. - * This routine tries to figure out which pipes and connectors are active - * and stuffs them into the crtcs and modes array given to us by the - * drm_fb_helper code. - * - * The overall sequence is: - * intel_fbdev_init - from driver load - * intel_fbdev_init_bios - initialize the intel_fbdev using BIOS data - * drm_fb_helper_init - build fb helper structs - * drm_fb_helper_single_add_all_connectors - more fb helper structs - * intel_fbdev_initial_config - apply the config - * drm_fb_helper_initial_config - call ->probe then register_framebuffer() - * drm_setup_crtcs - build crtc config for fbdev - * intel_fb_initial_config - find active connectors etc - * drm_fb_helper_single_fb_probe - set up fbdev - * intelfb_create - re-use or alloc fb, build out fbdev structs - * - * Note that we don't make special consideration whether we could actually - * switch to the selected modes without a full modeset. E.g. when the display - * is in VGA mode we need to recalculate watermarks and set a new high-res - * framebuffer anyway. - */ -static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, - struct drm_fb_helper_crtc **crtcs, - struct drm_display_mode **modes, - struct drm_fb_offset *offsets, - bool *enabled, int width, int height) -{ - struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); - unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); - unsigned long conn_configured, conn_seq; - int i, j; - bool *save_enabled; - bool fallback = true, ret = true; - int num_connectors_enabled = 0; - int num_connectors_detected = 0; - struct drm_modeset_acquire_ctx ctx; - - save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL); - if (!save_enabled) - return false; - - drm_modeset_acquire_init(&ctx, 0); - - while (drm_modeset_lock_all_ctx(fb_helper->dev, &ctx) != 0) - drm_modeset_backoff(&ctx); - - memcpy(save_enabled, enabled, count); - conn_seq = GENMASK(count - 1, 0); - conn_configured = 0; -retry: - for (i = 0; i < count; i++) { - struct drm_fb_helper_connector *fb_conn; - struct drm_connector *connector; - struct drm_encoder *encoder; - struct drm_fb_helper_crtc *new_crtc; - - fb_conn = fb_helper->connector_info[i]; - connector = fb_conn->connector; - - if (conn_configured & BIT(i)) - continue; - - /* First pass, only consider tiled connectors */ - if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile) - continue; - - if (connector->status == connector_status_connected) - num_connectors_detected++; - - if (!enabled[i]) { - DRM_DEBUG_KMS("connector %s not enabled, skipping\n", - connector->name); - conn_configured |= BIT(i); - continue; - } - - if (connector->force == DRM_FORCE_OFF) { - DRM_DEBUG_KMS("connector %s is disabled by user, skipping\n", - connector->name); - enabled[i] = false; - continue; - } - - encoder = connector->state->best_encoder; - if (!encoder || WARN_ON(!connector->state->crtc)) { - if (connector->force > DRM_FORCE_OFF) - goto bail; - - DRM_DEBUG_KMS("connector %s has no encoder or crtc, skipping\n", - connector->name); - enabled[i] = false; - conn_configured |= BIT(i); - continue; - } - - num_connectors_enabled++; - - new_crtc = intel_fb_helper_crtc(fb_helper, - connector->state->crtc); - - /* - * Make sure we're not trying to drive multiple connectors - * with a single CRTC, since our cloning support may not - * match the BIOS. - */ - for (j = 0; j < count; j++) { - if (crtcs[j] == new_crtc) { - DRM_DEBUG_KMS("fallback: cloned configuration\n"); - goto bail; - } - } - - DRM_DEBUG_KMS("looking for cmdline mode on connector %s\n", - connector->name); - - /* go for command line mode first */ - modes[i] = drm_pick_cmdline_mode(fb_conn); - - /* try for preferred next */ - if (!modes[i]) { - DRM_DEBUG_KMS("looking for preferred mode on connector %s %d\n", - connector->name, connector->has_tile); - modes[i] = drm_has_preferred_mode(fb_conn, width, - height); - } - - /* No preferred mode marked by the EDID? Are there any modes? */ - if (!modes[i] && !list_empty(&connector->modes)) { - DRM_DEBUG_KMS("using first mode listed on connector %s\n", - connector->name); - modes[i] = list_first_entry(&connector->modes, - struct drm_display_mode, - head); - } - - /* last resort: use current mode */ - if (!modes[i]) { - /* - * IMPORTANT: We want to use the adjusted mode (i.e. - * after the panel fitter upscaling) as the initial - * config, not the input mode, which is what crtc->mode - * usually contains. But since our current - * code puts a mode derived from the post-pfit timings - * into crtc->mode this works out correctly. - * - * This is crtc->mode and not crtc->state->mode for the - * fastboot check to work correctly. crtc_state->mode has - * I915_MODE_FLAG_INHERITED, which we clear to force check - * state. - */ - DRM_DEBUG_KMS("looking for current mode on connector %s\n", - connector->name); - modes[i] = &connector->state->crtc->mode; - } - crtcs[i] = new_crtc; - - DRM_DEBUG_KMS("connector %s on [CRTC:%d:%s]: %dx%d%s\n", - connector->name, - connector->state->crtc->base.id, - connector->state->crtc->name, - modes[i]->hdisplay, modes[i]->vdisplay, - modes[i]->flags & DRM_MODE_FLAG_INTERLACE ? "i" :""); - - fallback = false; - conn_configured |= BIT(i); - } - - if (conn_configured != conn_seq) { /* repeat until no more are found */ - conn_seq = conn_configured; - goto retry; - } - - /* - * If the BIOS didn't enable everything it could, fall back to have the - * same user experiencing of lighting up as much as possible like the - * fbdev helper library. - */ - if (num_connectors_enabled != num_connectors_detected && - num_connectors_enabled < INTEL_INFO(dev_priv)->num_pipes) { - DRM_DEBUG_KMS("fallback: Not all outputs enabled\n"); - DRM_DEBUG_KMS("Enabled: %i, detected: %i\n", num_connectors_enabled, - num_connectors_detected); - fallback = true; - } - - if (fallback) { -bail: - DRM_DEBUG_KMS("Not using firmware configuration\n"); - memcpy(enabled, save_enabled, count); - ret = false; - } - - drm_modeset_drop_locks(&ctx); - drm_modeset_acquire_fini(&ctx); - - kfree(save_enabled); - return ret; -} - static const struct drm_fb_helper_funcs intel_fb_helper_funcs = { - .initial_config = intel_fb_initial_config, .fb_probe = intelfb_create, }; diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index 17857e458ac3..40af2866c26a 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -102,29 +102,6 @@ struct drm_fb_helper_funcs { */ int (*fb_probe)(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes); - - /** - * @initial_config: - * - * Driver callback to setup an initial fbdev display configuration. - * Drivers can use this callback to tell the fbdev emulation what the - * preferred initial configuration is. This is useful to implement - * smooth booting where the fbdev (and subsequently all userspace) never - * changes the mode, but always inherits the existing configuration. - * - * This callback is optional. - * - * RETURNS: - * - * The driver should return true if a suitable initial configuration has - * been filled out and false when the fbdev helper should fall back to - * the default probing logic. - */ - bool (*initial_config)(struct drm_fb_helper *fb_helper, - struct drm_fb_helper_crtc **crtcs, - struct drm_display_mode **modes, - struct drm_fb_offset *offsets, - bool *enabled, int width, int height); }; struct drm_fb_helper_connector { From f47056e880574c923165577b77982ffe3395c510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Wed, 10 Apr 2019 14:43:45 +0200 Subject: [PATCH 08/30] drm/tinydrm: Fix fbdev pixel format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to copy/paste error, the fbdev format was changed to 32bpp = XRGB8888 which is an emulated format for the RGB565 drivers. Revert to to using the fallback which is dev->mode_config.preferred_depth for the drivers that set it or 32bpp for those that don't (repaper, st7586). Fixes: 3eba3922819f ("drm/tinydrm: Drop using tinydrm_device") Signed-off-by: Noralf Trønnes Reviewed-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20190410124345.25945-1-noralf@tronnes.org --- drivers/gpu/drm/tinydrm/hx8357d.c | 2 +- drivers/gpu/drm/tinydrm/ili9225.c | 2 +- drivers/gpu/drm/tinydrm/ili9341.c | 2 +- drivers/gpu/drm/tinydrm/mi0283qt.c | 2 +- drivers/gpu/drm/tinydrm/repaper.c | 2 +- drivers/gpu/drm/tinydrm/st7586.c | 2 +- drivers/gpu/drm/tinydrm/st7735r.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/tinydrm/hx8357d.c b/drivers/gpu/drm/tinydrm/hx8357d.c index fab961dded87..5773d0fb6ca1 100644 --- a/drivers/gpu/drm/tinydrm/hx8357d.c +++ b/drivers/gpu/drm/tinydrm/hx8357d.c @@ -267,7 +267,7 @@ static int hx8357d_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_fbdev_generic_setup(drm, 32); + drm_fbdev_generic_setup(drm, 0); return 0; } diff --git a/drivers/gpu/drm/tinydrm/ili9225.c b/drivers/gpu/drm/tinydrm/ili9225.c index e9116ef4b5bc..4b1a587c0134 100644 --- a/drivers/gpu/drm/tinydrm/ili9225.c +++ b/drivers/gpu/drm/tinydrm/ili9225.c @@ -433,7 +433,7 @@ static int ili9225_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_fbdev_generic_setup(drm, 32); + drm_fbdev_generic_setup(drm, 0); return 0; } diff --git a/drivers/gpu/drm/tinydrm/ili9341.c b/drivers/gpu/drm/tinydrm/ili9341.c index d15f85e837ae..4ade9e4b924f 100644 --- a/drivers/gpu/drm/tinydrm/ili9341.c +++ b/drivers/gpu/drm/tinydrm/ili9341.c @@ -229,7 +229,7 @@ static int ili9341_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_fbdev_generic_setup(drm, 32); + drm_fbdev_generic_setup(drm, 0); return 0; } diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c index c6dc31084a4e..8e169846fbd8 100644 --- a/drivers/gpu/drm/tinydrm/mi0283qt.c +++ b/drivers/gpu/drm/tinydrm/mi0283qt.c @@ -242,7 +242,7 @@ static int mi0283qt_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_fbdev_generic_setup(drm, 32); + drm_fbdev_generic_setup(drm, 0); return 0; } diff --git a/drivers/gpu/drm/tinydrm/repaper.c b/drivers/gpu/drm/tinydrm/repaper.c index a29b8278324b..370629e2de94 100644 --- a/drivers/gpu/drm/tinydrm/repaper.c +++ b/drivers/gpu/drm/tinydrm/repaper.c @@ -1131,7 +1131,7 @@ static int repaper_probe(struct spi_device *spi) DRM_DEBUG_DRIVER("SPI speed: %uMHz\n", spi->max_speed_hz / 1000000); - drm_fbdev_generic_setup(drm, 32); + drm_fbdev_generic_setup(drm, 0); return 0; } diff --git a/drivers/gpu/drm/tinydrm/st7586.c b/drivers/gpu/drm/tinydrm/st7586.c index 560d7ac0cadc..36bb16a15f7e 100644 --- a/drivers/gpu/drm/tinydrm/st7586.c +++ b/drivers/gpu/drm/tinydrm/st7586.c @@ -408,7 +408,7 @@ static int st7586_probe(struct spi_device *spi) DRM_DEBUG_KMS("preferred_depth=%u, rotation = %u\n", drm->mode_config.preferred_depth, rotation); - drm_fbdev_generic_setup(drm, 32); + drm_fbdev_generic_setup(drm, 0); return 0; } diff --git a/drivers/gpu/drm/tinydrm/st7735r.c b/drivers/gpu/drm/tinydrm/st7735r.c index 022e9849b95b..ce9109e613e0 100644 --- a/drivers/gpu/drm/tinydrm/st7735r.c +++ b/drivers/gpu/drm/tinydrm/st7735r.c @@ -207,7 +207,7 @@ static int st7735r_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_fbdev_generic_setup(drm, 32); + drm_fbdev_generic_setup(drm, 0); return 0; } From 79b979735e85ec62aeaf007287a871f0b42b027e Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Apr 2019 06:49:32 +0200 Subject: [PATCH 09/30] drm: fix drm_fb_xrgb8888_to_rgb888_dstclip() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Oops, the __iomem annotation was added to the header file only. Add it to the implementation (and documentation) too. Fixes: 5c5373b51bec ("drm: switch drm_fb_xrgb8888_to_rgb888_dstclip to accept __iomem dst") Signed-off-by: Gerd Hoffmann Reviewed-by: Noralf Trønnes Link: http://patchwork.freedesktop.org/patch/msgid/20190411044932.13247-1-kraxel@redhat.com --- drivers/gpu/drm/drm_format_helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index 319de608a088..da388012df2a 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -230,7 +230,7 @@ static void drm_fb_xrgb8888_to_rgb888_line(u8 *dbuf, u32 *sbuf, /** * drm_fb_xrgb8888_to_rgb888_dstclip - Convert XRGB8888 to RGB888 clip buffer - * @dst: RGB565 destination buffer + * @dst: RGB565 destination buffer (iomem) * @dst_pitch: destination buffer pitch * @vaddr: XRGB8888 source buffer * @fb: DRM framebuffer @@ -241,9 +241,9 @@ static void drm_fb_xrgb8888_to_rgb888_line(u8 *dbuf, u32 *sbuf, * support XRGB8888. * * This function applies clipping on dst, i.e. the destination is a - * full framebuffer but only the clip rect content is copied over. + * full (iomem) framebuffer but only the clip rect content is copied over. */ -void drm_fb_xrgb8888_to_rgb888_dstclip(void *dst, unsigned int dst_pitch, +void drm_fb_xrgb8888_to_rgb888_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip) { From beb941b970fb129dae206050593d3e768859b146 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 10 Apr 2019 13:42:25 +0200 Subject: [PATCH 10/30] virtio-gpu api: comment feature flags Add comments to the existing feature flags, documenting which commands belong to them. Signed-off-by: Gerd Hoffmann Reviewed-by: Gurchetan Singh Link: http://patchwork.freedesktop.org/patch/msgid/20190410114227.25846-2-kraxel@redhat.com --- include/uapi/linux/virtio_gpu.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 8e88eba1fa7a..0c85914d9369 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -40,8 +40,16 @@ #include -#define VIRTIO_GPU_F_VIRGL 0 -#define VIRTIO_GPU_F_EDID 1 +/* + * VIRTIO_GPU_CMD_CTX_* + * VIRTIO_GPU_CMD_*_3D + */ +#define VIRTIO_GPU_F_VIRGL 0 + +/* + * VIRTIO_GPU_CMD_GET_EDID + */ +#define VIRTIO_GPU_F_EDID 1 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, From 3d31e21522819925313a95174b3071ee408c12dd Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 1 Apr 2019 15:33:42 +0300 Subject: [PATCH 11/30] drm/bridge: ti-tfp410: Fall back to HPD polling if HPD irq is not available In case either the HPD gpio is not specified or when the HPD gpio can not be used as interrupt we should tell the core that the HPD needs to be polled for detecting hotplug. Signed-off-by: Peter Ujfalusi Reviewed-by: Laurent Pinchart Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190401123342.15767-1-peter.ujfalusi@ti.com --- drivers/gpu/drm/bridge/ti-tfp410.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c index 285be4a0f4bd..6fc831eb3804 100644 --- a/drivers/gpu/drm/bridge/ti-tfp410.c +++ b/drivers/gpu/drm/bridge/ti-tfp410.c @@ -31,6 +31,7 @@ struct tfp410 { struct i2c_adapter *ddc; struct gpio_desc *hpd; + int hpd_irq; struct delayed_work hpd_work; struct gpio_desc *powerdown; @@ -124,8 +125,10 @@ static int tfp410_attach(struct drm_bridge *bridge) return -ENODEV; } - if (dvi->hpd) + if (dvi->hpd_irq >= 0) dvi->connector.polled = DRM_CONNECTOR_POLL_HPD; + else + dvi->connector.polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; drm_connector_helper_add(&dvi->connector, &tfp410_con_helper_funcs); @@ -324,10 +327,15 @@ static int tfp410_init(struct device *dev, bool i2c) return PTR_ERR(dvi->powerdown); } - if (dvi->hpd) { + if (dvi->hpd) + dvi->hpd_irq = gpiod_to_irq(dvi->hpd); + else + dvi->hpd_irq = -ENXIO; + + if (dvi->hpd_irq >= 0) { INIT_DELAYED_WORK(&dvi->hpd_work, tfp410_hpd_work_func); - ret = devm_request_threaded_irq(dev, gpiod_to_irq(dvi->hpd), + ret = devm_request_threaded_irq(dev, dvi->hpd_irq, NULL, tfp410_hpd_irq_thread, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "hdmi-hpd", dvi); From 51c7b4477c8bf1b34b69f7697f70cd8396492ca6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 1 Apr 2019 15:41:42 +0300 Subject: [PATCH 12/30] dt-bindings: display: tfp410: Add bus-width parameter property tfp410 can be connect to host processor in 24bit, single-edge (24 lines) or 12bit, dual-edge (12 lines). Add bus-width to the documentation so it can be used to select between the two connection scheme. Signed-off-by: Peter Ujfalusi Reviewed-by: Laurent Pinchart Reviewed-by: Rob Herring Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190401124143.17179-2-peter.ujfalusi@ti.com --- .../devicetree/bindings/display/bridge/ti,tfp410.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt b/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt index 3f903af93949..5ff4f64ef8e8 100644 --- a/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt +++ b/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt @@ -18,7 +18,14 @@ This device has two video ports. Their connections are modeled using the OF graph bindings specified in [1]. Each port node shall have a single endpoint. - Port 0 is the DPI input port. Its endpoint subnode shall contain a - pclk-sample property and a remote-endpoint property as specified in [1]. + pclk-sample and bus-width property and a remote-endpoint property as specified + in [1]. + - If pclk-sample is not defined, pclk-sample = 0 should be assumed for + backward compatibility. + - If bus-width is not defined then bus-width = 24 should be assumed for + backward compatibility. + bus-width = 24: 24 data lines are connected and single-edge mode + bus-width = 12: 12 data lines are connected and dual-edge mode - Port 1 is the DVI output port. Its endpoint subnode shall contain a remote-endpoint property is specified in [1]. @@ -43,6 +50,7 @@ tfp410: encoder@0 { tfp410_in: endpoint@0 { pclk-sample = <1>; + bus-width = <24>; remote-endpoint = <&dpi_out>; }; }; From 0eb2766dd6f366d42448121c383420bb0307bcc7 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 1 Apr 2019 15:41:43 +0300 Subject: [PATCH 13/30] drm/bridge: ti-tfp410: Set the bus_format The TFP410 supports 24 bit, single-edge and 12 bit, dual-edge modes. Depending on how many wires are used (24/12) the driver can set the correct bus_format. If the information is not available in DT then assume 24 bit, single-edge setup. Signed-off-by: Peter Ujfalusi Reviewed-by: Laurent Pinchart Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190401124143.17179-3-peter.ujfalusi@ti.com --- drivers/gpu/drm/bridge/ti-tfp410.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c index 6fc831eb3804..8b0e71bd3ca7 100644 --- a/drivers/gpu/drm/bridge/ti-tfp410.c +++ b/drivers/gpu/drm/bridge/ti-tfp410.c @@ -29,6 +29,7 @@ struct tfp410 { struct drm_connector connector; unsigned int connector_type; + u32 bus_format; struct i2c_adapter *ddc; struct gpio_desc *hpd; int hpd_irq; @@ -139,6 +140,9 @@ static int tfp410_attach(struct drm_bridge *bridge) return ret; } + drm_display_info_set_bus_formats(&dvi->connector.display_info, + &dvi->bus_format, 1); + drm_connector_attach_encoder(&dvi->connector, bridge->encoder); @@ -197,6 +201,7 @@ static int tfp410_parse_timings(struct tfp410 *dvi, bool i2c) struct drm_bridge_timings *timings = &dvi->timings; struct device_node *ep; u32 pclk_sample = 0; + u32 bus_width = 24; s32 deskew = 0; /* Start with defaults. */ @@ -221,6 +226,7 @@ static int tfp410_parse_timings(struct tfp410 *dvi, bool i2c) /* Get the sampling edge from the endpoint. */ of_property_read_u32(ep, "pclk-sample", &pclk_sample); + of_property_read_u32(ep, "bus-width", &bus_width); of_node_put(ep); timings->input_bus_flags = DRM_BUS_FLAG_DE_HIGH; @@ -238,6 +244,17 @@ static int tfp410_parse_timings(struct tfp410 *dvi, bool i2c) return -EINVAL; } + switch (bus_width) { + case 12: + dvi->bus_format = MEDIA_BUS_FMT_RGB888_2X12_LE; + break; + case 24: + dvi->bus_format = MEDIA_BUS_FMT_RGB888_1X24; + break; + default: + return -EINVAL; + } + /* Get the setup and hold time from vendor-specific properties. */ of_property_read_u32(dvi->dev->of_node, "ti,deskew", (u32 *)&deskew); if (deskew < -4 || deskew > 3) From d08d42de6432d5064045159aed060e3db9fa7807 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 21 Feb 2019 14:23:25 -0600 Subject: [PATCH 14/30] iommu: io-pgtable: Add ARM Mali midgard MMU page table format ARM Mali midgard GPU is similar to standard 64-bit stage 1 page tables, but have a few differences. Add a new format type to represent the format. The input address size is 48-bits and the output address size is 40-bits (and possibly less?). Note that the later bifrost GPUs follow the standard 64-bit stage 1 format. The differences in the format compared to 64-bit stage 1 format are: The 3rd level page entry bits are 0x1 instead of 0x3 for page entries. The access flags are not read-only and unprivileged, but read and write. This is similar to stage 2 entries, but the memory attributes field matches stage 1 being an index. The nG bit is not set by the vendor driver. This one didn't seem to matter, but we'll keep it aligned to the vendor driver. Cc: Will Deacon Acked-by: Robin Murphy Cc: linux-arm-kernel@lists.infradead.org Cc: iommu@lists.linux-foundation.org Acked-by: Alyssa Rosenzweig Acked-by: Joerg Roedel Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20190409205427.6943-2-robh@kernel.org --- drivers/iommu/io-pgtable-arm.c | 91 ++++++++++++++++++++++++++-------- drivers/iommu/io-pgtable.c | 1 + include/linux/io-pgtable.h | 7 +++ 3 files changed, 77 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index d3700ec15cbd..4e21efbc4459 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -172,6 +172,10 @@ #define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 +#define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0) +#define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2) +#define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4) + /* IOPTE accessors */ #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) @@ -180,11 +184,6 @@ #define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) -#define iopte_leaf(pte,l) \ - (l == (ARM_LPAE_MAX_LEVELS - 1) ? \ - (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) : \ - (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK)) - struct arm_lpae_io_pgtable { struct io_pgtable iop; @@ -198,6 +197,15 @@ struct arm_lpae_io_pgtable { typedef u64 arm_lpae_iopte; +static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl, + enum io_pgtable_fmt fmt) +{ + if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE) + return iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_PAGE; + + return iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_BLOCK; +} + static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr, struct arm_lpae_io_pgtable *data) { @@ -303,12 +311,14 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) pte |= ARM_LPAE_PTE_NS; - if (lvl == ARM_LPAE_MAX_LEVELS - 1) + if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1) pte |= ARM_LPAE_PTE_TYPE_PAGE; else pte |= ARM_LPAE_PTE_TYPE_BLOCK; - pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; + if (data->iop.fmt != ARM_MALI_LPAE) + pte |= ARM_LPAE_PTE_AF; + pte |= ARM_LPAE_PTE_SH_IS; pte |= paddr_to_iopte(paddr, data); __arm_lpae_set_pte(ptep, pte, &data->iop.cfg); @@ -321,7 +331,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte = *ptep; - if (iopte_leaf(pte, lvl)) { + if (iopte_leaf(pte, lvl, data->iop.fmt)) { /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; @@ -409,7 +419,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, __arm_lpae_sync_pte(ptep, cfg); } - if (pte && !iopte_leaf(pte, lvl)) { + if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) { cptep = iopte_deref(pte, data); } else if (pte) { /* We require an unmap first */ @@ -429,31 +439,37 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, if (data->iop.fmt == ARM_64_LPAE_S1 || data->iop.fmt == ARM_32_LPAE_S1) { pte = ARM_LPAE_PTE_nG; - if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) pte |= ARM_LPAE_PTE_AP_RDONLY; - if (!(prot & IOMMU_PRIV)) pte |= ARM_LPAE_PTE_AP_UNPRIV; - - if (prot & IOMMU_MMIO) - pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV - << ARM_LPAE_PTE_ATTRINDX_SHIFT); - else if (prot & IOMMU_CACHE) - pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE - << ARM_LPAE_PTE_ATTRINDX_SHIFT); } else { pte = ARM_LPAE_PTE_HAP_FAULT; if (prot & IOMMU_READ) pte |= ARM_LPAE_PTE_HAP_READ; if (prot & IOMMU_WRITE) pte |= ARM_LPAE_PTE_HAP_WRITE; + } + + /* + * Note that this logic is structured to accommodate Mali LPAE + * having stage-1-like attributes but stage-2-like permissions. + */ + if (data->iop.fmt == ARM_64_LPAE_S2 || + data->iop.fmt == ARM_32_LPAE_S2) { if (prot & IOMMU_MMIO) pte |= ARM_LPAE_PTE_MEMATTR_DEV; else if (prot & IOMMU_CACHE) pte |= ARM_LPAE_PTE_MEMATTR_OIWB; else pte |= ARM_LPAE_PTE_MEMATTR_NC; + } else { + if (prot & IOMMU_MMIO) + pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV + << ARM_LPAE_PTE_ATTRINDX_SHIFT); + else if (prot & IOMMU_CACHE) + pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE + << ARM_LPAE_PTE_ATTRINDX_SHIFT); } if (prot & IOMMU_NOEXEC) @@ -511,7 +527,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, while (ptep != end) { arm_lpae_iopte pte = *ptep++; - if (!pte || iopte_leaf(pte, lvl)) + if (!pte || iopte_leaf(pte, lvl, data->iop.fmt)) continue; __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); @@ -602,7 +618,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { __arm_lpae_set_pte(ptep, 0, &iop->cfg); - if (!iopte_leaf(pte, lvl)) { + if (!iopte_leaf(pte, lvl, iop->fmt)) { /* Also flush any partial walks */ io_pgtable_tlb_add_flush(iop, iova, size, ARM_LPAE_GRANULE(data), false); @@ -621,7 +637,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, } return size; - } else if (iopte_leaf(pte, lvl)) { + } else if (iopte_leaf(pte, lvl, iop->fmt)) { /* * Insert a table at the next level to map the old region, * minus the part we want to unmap @@ -669,7 +685,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return 0; /* Leaf entry? */ - if (iopte_leaf(pte,lvl)) + if (iopte_leaf(pte, lvl, data->iop.fmt)) goto found_translation; /* Take it to the next level */ @@ -995,6 +1011,32 @@ arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) return iop; } +static struct io_pgtable * +arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct io_pgtable *iop; + + if (cfg->ias != 48 || cfg->oas > 40) + return NULL; + + cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); + iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie); + if (iop) { + u64 mair, ttbr; + + /* Copy values as union fields overlap */ + mair = cfg->arm_lpae_s1_cfg.mair[0]; + ttbr = cfg->arm_lpae_s1_cfg.ttbr[0]; + + cfg->arm_mali_lpae_cfg.memattr = mair; + cfg->arm_mali_lpae_cfg.transtab = ttbr | + ARM_MALI_LPAE_TTBR_READ_INNER | + ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; + } + + return iop; +} + struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { .alloc = arm_64_lpae_alloc_pgtable_s1, .free = arm_lpae_free_pgtable, @@ -1015,6 +1057,11 @@ struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = { .free = arm_lpae_free_pgtable, }; +struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = { + .alloc = arm_mali_lpae_alloc_pgtable, + .free = arm_lpae_free_pgtable, +}; + #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST static struct io_pgtable_cfg *cfg_cookie; diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index 93f2880be6c6..5227cfdbb65b 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -30,6 +30,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { [ARM_32_LPAE_S2] = &io_pgtable_arm_32_lpae_s2_init_fns, [ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns, [ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns, + [ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns, #endif #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S [ARM_V7S] = &io_pgtable_arm_v7s_init_fns, diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 47d5ae559329..76969a564831 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -12,6 +12,7 @@ enum io_pgtable_fmt { ARM_64_LPAE_S1, ARM_64_LPAE_S2, ARM_V7S, + ARM_MALI_LPAE, IO_PGTABLE_NUM_FMTS, }; @@ -108,6 +109,11 @@ struct io_pgtable_cfg { u32 nmrr; u32 prrr; } arm_v7s_cfg; + + struct { + u64 transtab; + u64 memattr; + } arm_mali_lpae_cfg; }; }; @@ -209,5 +215,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; +extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; #endif /* __IO_PGTABLE_H */ From c117aa4d8701a713e6dee82657291ae26f0c3c7c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 8 Mar 2019 14:26:02 -0600 Subject: [PATCH 15/30] drm: Add a drm_gem_objects_lookup helper Similar to the single handle drm_gem_object_lookup(), drm_gem_objects_lookup() takes an array of handles and returns an array of GEM objects. v2: - Take the userspace pointer directly and allocate the array. - Expand the function documentation. Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sean Paul Cc: David Airlie Cc: Daniel Vetter Acked-by: Alyssa Rosenzweig Acked-by: Tomeu Vizoso Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20190409205427.6943-3-robh@kernel.org --- drivers/gpu/drm/drm_gem.c | 93 ++++++++++++++++++++++++++++++++++----- include/drm/drm_gem.h | 2 + 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 52c0a837a3b2..e93043b64c2d 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -646,6 +646,85 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages, } EXPORT_SYMBOL(drm_gem_put_pages); +static int objects_lookup(struct drm_file *filp, u32 *handle, int count, + struct drm_gem_object **objs) +{ + int i, ret = 0; + struct drm_gem_object *obj; + + spin_lock(&filp->table_lock); + + for (i = 0; i < count; i++) { + /* Check if we currently have a reference on the object */ + obj = idr_find(&filp->object_idr, handle[i]); + if (!obj) { + ret = -ENOENT; + break; + } + drm_gem_object_get(obj); + objs[i] = obj; + } + spin_unlock(&filp->table_lock); + + return ret; +} + +/** + * drm_gem_objects_lookup - look up GEM objects from an array of handles + * @filp: DRM file private date + * @bo_handles: user pointer to array of userspace handle + * @count: size of handle array + * @objs_out: returned pointer to array of drm_gem_object pointers + * + * Takes an array of userspace handles and returns a newly allocated array of + * GEM objects. + * + * For a single handle lookup, use drm_gem_object_lookup(). + * + * Returns: + * + * @objs filled in with GEM object pointers. Returned GEM objects need to be + * released with drm_gem_object_put(). -ENOENT is returned on a lookup + * failure. 0 is returned on success. + * + */ +int drm_gem_objects_lookup(struct drm_file *filp, void __user *bo_handles, + int count, struct drm_gem_object ***objs_out) +{ + int ret; + u32 *handles; + struct drm_gem_object **objs; + + if (!count) + return 0; + + objs = kvmalloc_array(count, sizeof(struct drm_gem_object *), + GFP_KERNEL | __GFP_ZERO); + if (!objs) + return -ENOMEM; + + handles = kvmalloc_array(count, sizeof(u32), GFP_KERNEL); + if (!handles) { + ret = -ENOMEM; + goto out; + } + + if (copy_from_user(handles, bo_handles, count * sizeof(u32))) { + ret = -EFAULT; + DRM_DEBUG("Failed to copy in GEM handles\n"); + goto out; + } + + ret = objects_lookup(filp, handles, count, objs); + *objs_out = objs; + +out: + kvfree(handles); + return ret; + +} +EXPORT_SYMBOL(drm_gem_objects_lookup); + /** * drm_gem_object_lookup - look up a GEM object from its handle * @filp: DRM file private date @@ -655,21 +734,15 @@ EXPORT_SYMBOL(drm_gem_put_pages); * * A reference to the object named by the handle if such exists on @filp, NULL * otherwise. + * + * If looking up an array of handles, use drm_gem_objects_lookup(). */ struct drm_gem_object * drm_gem_object_lookup(struct drm_file *filp, u32 handle) { - struct drm_gem_object *obj; - - spin_lock(&filp->table_lock); - - /* Check if we currently have a reference on the object */ - obj = idr_find(&filp->object_idr, handle); - if (obj) - drm_gem_object_get(obj); - - spin_unlock(&filp->table_lock); + struct drm_gem_object *obj = NULL; + objects_lookup(filp, &handle, 1, &obj); return obj; } EXPORT_SYMBOL(drm_gem_object_lookup); diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 2955aaab3dca..5ee85c9eaa9d 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -381,6 +381,8 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj); void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages, bool dirty, bool accessed); +int drm_gem_objects_lookup(struct drm_file *filp, void __user *bo_handles, + int count, struct drm_gem_object ***objs_out); struct drm_gem_object *drm_gem_object_lookup(struct drm_file *filp, u32 handle); long drm_gem_reservation_object_wait(struct drm_file *filep, u32 handle, bool wait_all, unsigned long timeout); From f3ba91228e8e917e5bd6c4b72bfe846933d17370 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 10 Sep 2018 14:27:58 -0500 Subject: [PATCH 16/30] drm/panfrost: Add initial panfrost driver This adds the initial driver for panfrost which supports Arm Mali Midgard and Bifrost family of GPUs. Currently, only the T860 and T760 Midgard GPUs have been tested. v2: - Add GPU reset on job hangs (Tomeu) - Add RuntimePM and devfreq support (Tomeu) - Fix T760 support (Tomeu) - Add a TODO file (Rob, Tomeu) - Support multiple in fences (Tomeu) - Drop support for shared fences (Tomeu) - Fill in MMU de-init (Rob) - Move register definitions back to single header (Rob) - Clean-up hardcoded job submit todos (Rob) - Implement feature setup based on features/issues (Rob) - Add remaining Midgard DT compatible strings (Rob) v3: - Add support for reset lines (Neil) - Add a MAINTAINERS entry (Rob) - Call dma_set_mask_and_coherent (Rob) - Do MMU invalidate on map and unmap. Restructure to do a single operation per map/unmap call. (Rob) - Add a missing explicit padding to struct drm_panfrost_create_bo (Rob) - Fix 0-day error: "panfrost_devfreq.c:151:9-16: ERROR: PTR_ERR applied after initialization to constant on line 150" - Drop HW_FEATURE_AARCH64_MMU conditional (Rob) - s/DRM_PANFROST_PARAM_GPU_ID/DRM_PANFROST_PARAM_GPU_PROD_ID/ (Rob) - Check drm_gem_shmem_prime_import_sg_table() error code (Rob) - Re-order power on sequence (Rob) - Move panfrost_acquire_object_fences() before scheduling job (Rob) - Add NULL checks on array pointers in job clean-up (Rob) - Rework devfreq (Tomeu) - Fix devfreq init with no regulator (Rob) - Various WS and comments clean-up (Rob) Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sean Paul Cc: David Airlie Cc: Daniel Vetter Cc: Lyude Paul Reviewed-by: Alyssa Rosenzweig Reviewed-by: Eric Anholt Reviewed-by: Steven Price Signed-off-by: Marty E. Plummer Signed-off-by: Tomeu Vizoso Signed-off-by: Neil Armstrong Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20190409205427.6943-4-robh@kernel.org --- MAINTAINERS | 9 + drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/panfrost/Kconfig | 14 + drivers/gpu/drm/panfrost/Makefile | 12 + drivers/gpu/drm/panfrost/TODO | 27 + drivers/gpu/drm/panfrost/panfrost_devfreq.c | 218 ++++++++ drivers/gpu/drm/panfrost/panfrost_devfreq.h | 14 + drivers/gpu/drm/panfrost/panfrost_device.c | 252 +++++++++ drivers/gpu/drm/panfrost/panfrost_device.h | 124 ++++ drivers/gpu/drm/panfrost/panfrost_drv.c | 463 +++++++++++++++ drivers/gpu/drm/panfrost/panfrost_features.h | 309 ++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.c | 95 ++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 29 + drivers/gpu/drm/panfrost/panfrost_gpu.c | 362 ++++++++++++ drivers/gpu/drm/panfrost/panfrost_gpu.h | 19 + drivers/gpu/drm/panfrost/panfrost_issues.h | 176 ++++++ drivers/gpu/drm/panfrost/panfrost_job.c | 560 +++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_job.h | 51 ++ drivers/gpu/drm/panfrost/panfrost_mmu.c | 370 ++++++++++++ drivers/gpu/drm/panfrost/panfrost_mmu.h | 17 + drivers/gpu/drm/panfrost/panfrost_regs.h | 298 ++++++++++ include/uapi/drm/panfrost_drm.h | 142 +++++ 23 files changed, 3564 insertions(+) create mode 100644 drivers/gpu/drm/panfrost/Kconfig create mode 100644 drivers/gpu/drm/panfrost/Makefile create mode 100644 drivers/gpu/drm/panfrost/TODO create mode 100644 drivers/gpu/drm/panfrost/panfrost_devfreq.c create mode 100644 drivers/gpu/drm/panfrost/panfrost_devfreq.h create mode 100644 drivers/gpu/drm/panfrost/panfrost_device.c create mode 100644 drivers/gpu/drm/panfrost/panfrost_device.h create mode 100644 drivers/gpu/drm/panfrost/panfrost_drv.c create mode 100644 drivers/gpu/drm/panfrost/panfrost_features.h create mode 100644 drivers/gpu/drm/panfrost/panfrost_gem.c create mode 100644 drivers/gpu/drm/panfrost/panfrost_gem.h create mode 100644 drivers/gpu/drm/panfrost/panfrost_gpu.c create mode 100644 drivers/gpu/drm/panfrost/panfrost_gpu.h create mode 100644 drivers/gpu/drm/panfrost/panfrost_issues.h create mode 100644 drivers/gpu/drm/panfrost/panfrost_job.c create mode 100644 drivers/gpu/drm/panfrost/panfrost_job.h create mode 100644 drivers/gpu/drm/panfrost/panfrost_mmu.c create mode 100644 drivers/gpu/drm/panfrost/panfrost_mmu.h create mode 100644 drivers/gpu/drm/panfrost/panfrost_regs.h create mode 100644 include/uapi/drm/panfrost_drm.h diff --git a/MAINTAINERS b/MAINTAINERS index 8825dffebb4c..c227d2818c98 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1180,6 +1180,15 @@ F: drivers/gpu/drm/arm/ F: Documentation/devicetree/bindings/display/arm,malidp.txt F: Documentation/gpu/afbc.rst +ARM MALI PANFROST DRM DRIVER +M: Rob Herring +M: Tomeu Vizoso +L: dri-devel@lists.freedesktop.org +S: Supported +T: git git://anongit.freedesktop.org/drm/drm-misc +F: drivers/gpu/drm/panfrost/ +F: include/uapi/drm/panfrost_drm.h + ARM MFM AND FLOPPY DRIVERS M: Ian Molton S: Maintained diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index bcbc4234893a..39d5f7562f1c 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -337,6 +337,8 @@ source "drivers/gpu/drm/vboxvideo/Kconfig" source "drivers/gpu/drm/lima/Kconfig" +source "drivers/gpu/drm/panfrost/Kconfig" + source "drivers/gpu/drm/aspeed/Kconfig" # Keep legacy drivers last diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 7ebae3d45505..3d0c75cd687c 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -112,4 +112,5 @@ obj-$(CONFIG_DRM_TVE200) += tve200/ obj-$(CONFIG_DRM_XEN) += xen/ obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/ obj-$(CONFIG_DRM_LIMA) += lima/ +obj-$(CONFIG_DRM_PANFROST) += panfrost/ obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/ diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig new file mode 100644 index 000000000000..7f5e572daa2d --- /dev/null +++ b/drivers/gpu/drm/panfrost/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +config DRM_PANFROST + tristate "Panfrost (DRM support for ARM Mali Midgard/Bifrost GPUs)" + depends on DRM + depends on ARM || ARM64 || COMPILE_TEST + depends on MMU + select DRM_SCHED + select IOMMU_SUPPORT + select IOMMU_IO_PGTABLE_LPAE + select DRM_GEM_SHMEM_HELPER + help + DRM driver for ARM Mali Midgard (T6xx, T7xx, T8xx) and + Bifrost (G3x, G5x, G7x) GPUs. diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile new file mode 100644 index 000000000000..6de72d13c58f --- /dev/null +++ b/drivers/gpu/drm/panfrost/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 + +panfrost-y := \ + panfrost_drv.o \ + panfrost_device.o \ + panfrost_devfreq.o \ + panfrost_gem.o \ + panfrost_gpu.o \ + panfrost_job.o \ + panfrost_mmu.o + +obj-$(CONFIG_DRM_PANFROST) += panfrost.o diff --git a/drivers/gpu/drm/panfrost/TODO b/drivers/gpu/drm/panfrost/TODO new file mode 100644 index 000000000000..c2e44add37d8 --- /dev/null +++ b/drivers/gpu/drm/panfrost/TODO @@ -0,0 +1,27 @@ +- Thermal support. + +- Bifrost support: + - DT bindings (Neil, WIP) + - MMU page table format and address space setup + - Bifrost specific feature and issue handling + - Coherent DMA support + +- Support for 2MB pages. The io-pgtable code already supports this. Finishing + support involves either copying or adapting the iommu API to handle passing + aligned addresses and sizes to the io-pgtable code. + +- Per FD address space support. The h/w supports multiple addresses spaces. + The hard part is handling when more address spaces are needed than what + the h/w provides. + +- Support pinning pages on demand (GPU page faults). + +- Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu) + +- Support for madvise and a shrinker. + +- Compute job support. So called 'compute only' jobs need to be plumbed up to + userspace. + +- Performance counter support. (Boris) + diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c new file mode 100644 index 000000000000..a8121ae67ee3 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Collabora ltd. */ +#include +#include +#include +#include +#include + +#include "panfrost_device.h" +#include "panfrost_features.h" +#include "panfrost_issues.h" +#include "panfrost_gpu.h" +#include "panfrost_regs.h" + +static void panfrost_devfreq_update_utilization(struct panfrost_device *pfdev, int slot); + +static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev)); + struct dev_pm_opp *opp; + unsigned long old_clk_rate = pfdev->devfreq.cur_freq; + unsigned long target_volt, target_rate; + int err; + + opp = devfreq_recommended_opp(dev, freq, flags); + if (IS_ERR(opp)) + return PTR_ERR(opp); + + target_rate = dev_pm_opp_get_freq(opp); + target_volt = dev_pm_opp_get_voltage(opp); + dev_pm_opp_put(opp); + + if (old_clk_rate == target_rate) + return 0; + + /* + * If frequency scaling from low to high, adjust voltage first. + * If frequency scaling from high to low, adjust frequency first. + */ + if (old_clk_rate < target_rate) { + err = regulator_set_voltage(pfdev->regulator, target_volt, + target_volt); + if (err) { + dev_err(dev, "Cannot set voltage %lu uV\n", + target_volt); + return err; + } + } + + err = clk_set_rate(pfdev->clock, target_rate); + if (err) { + dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate, + err); + regulator_set_voltage(pfdev->regulator, pfdev->devfreq.cur_volt, + pfdev->devfreq.cur_volt); + return err; + } + + if (old_clk_rate > target_rate) { + err = regulator_set_voltage(pfdev->regulator, target_volt, + target_volt); + if (err) + dev_err(dev, "Cannot set voltage %lu uV\n", target_volt); + } + + pfdev->devfreq.cur_freq = target_rate; + pfdev->devfreq.cur_volt = target_volt; + + return 0; +} + +static void panfrost_devfreq_reset(struct panfrost_device *pfdev) +{ + ktime_t now = ktime_get(); + int i; + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + pfdev->devfreq.slot[i].busy_time = 0; + pfdev->devfreq.slot[i].idle_time = 0; + pfdev->devfreq.slot[i].time_last_update = now; + } +} + +static int panfrost_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *status) +{ + struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev)); + int i; + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + panfrost_devfreq_update_utilization(pfdev, i); + } + + status->current_frequency = clk_get_rate(pfdev->clock); + status->total_time = ktime_to_ns(ktime_add(pfdev->devfreq.slot[0].busy_time, + pfdev->devfreq.slot[0].idle_time)); + + status->busy_time = 0; + for (i = 0; i < NUM_JOB_SLOTS; i++) { + status->busy_time += ktime_to_ns(pfdev->devfreq.slot[i].busy_time); + } + + /* We're scheduling only to one core atm, so don't divide for now */ + /* status->busy_time /= NUM_JOB_SLOTS; */ + + panfrost_devfreq_reset(pfdev); + + dev_dbg(pfdev->dev, "busy %lu total %lu %lu %% freq %lu MHz\n", status->busy_time, + status->total_time, + status->busy_time / (status->total_time / 100), + status->current_frequency / 1000 / 1000); + + return 0; +} + +static int panfrost_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev)); + + *freq = pfdev->devfreq.cur_freq; + + return 0; +} + +static struct devfreq_dev_profile panfrost_devfreq_profile = { + .polling_ms = 50, /* ~3 frames */ + .target = panfrost_devfreq_target, + .get_dev_status = panfrost_devfreq_get_dev_status, + .get_cur_freq = panfrost_devfreq_get_cur_freq, +}; + +int panfrost_devfreq_init(struct panfrost_device *pfdev) +{ + int ret; + struct dev_pm_opp *opp; + + if (!pfdev->regulator) + return 0; + + ret = dev_pm_opp_of_add_table(&pfdev->pdev->dev); + if (ret == -ENODEV) /* Optional, continue without devfreq */ + return 0; + + panfrost_devfreq_reset(pfdev); + + pfdev->devfreq.cur_freq = clk_get_rate(pfdev->clock); + + opp = devfreq_recommended_opp(&pfdev->pdev->dev, &pfdev->devfreq.cur_freq, 0); + if (IS_ERR(opp)) + return PTR_ERR(opp); + + panfrost_devfreq_profile.initial_freq = pfdev->devfreq.cur_freq; + dev_pm_opp_put(opp); + + pfdev->devfreq.devfreq = devm_devfreq_add_device(&pfdev->pdev->dev, + &panfrost_devfreq_profile, "simple_ondemand", NULL); + if (IS_ERR(pfdev->devfreq.devfreq)) { + DRM_DEV_ERROR(&pfdev->pdev->dev, "Couldn't initialize GPU devfreq\n"); + ret = PTR_ERR(pfdev->devfreq.devfreq); + pfdev->devfreq.devfreq = NULL; + return ret; + } + + return 0; +} + +void panfrost_devfreq_resume(struct panfrost_device *pfdev) +{ + int i; + + if (!pfdev->devfreq.devfreq) + return; + + panfrost_devfreq_reset(pfdev); + for (i = 0; i < NUM_JOB_SLOTS; i++) + pfdev->devfreq.slot[i].busy = false; + + devfreq_resume_device(pfdev->devfreq.devfreq); +} + +void panfrost_devfreq_suspend(struct panfrost_device *pfdev) +{ + if (!pfdev->devfreq.devfreq) + return; + + devfreq_suspend_device(pfdev->devfreq.devfreq); +} + +static void panfrost_devfreq_update_utilization(struct panfrost_device *pfdev, int slot) +{ + struct panfrost_devfreq_slot *devfreq_slot = &pfdev->devfreq.slot[slot]; + ktime_t now; + ktime_t last; + + if (!pfdev->devfreq.devfreq) + return; + + now = ktime_get(); + last = pfdev->devfreq.slot[slot].time_last_update; + + /* If we last recorded a transition to busy, we have been idle since */ + if (devfreq_slot->busy) + pfdev->devfreq.slot[slot].busy_time += ktime_sub(now, last); + else + pfdev->devfreq.slot[slot].idle_time += ktime_sub(now, last); + + pfdev->devfreq.slot[slot].time_last_update = now; +} + +/* The job scheduler is expected to call this at every transition busy <-> idle */ +void panfrost_devfreq_record_transition(struct panfrost_device *pfdev, int slot) +{ + struct panfrost_devfreq_slot *devfreq_slot = &pfdev->devfreq.slot[slot]; + + panfrost_devfreq_update_utilization(pfdev, slot); + devfreq_slot->busy = !devfreq_slot->busy; +} diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.h b/drivers/gpu/drm/panfrost/panfrost_devfreq.h new file mode 100644 index 000000000000..eb999531ed90 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 Collabora ltd. */ + +#ifndef __PANFROST_DEVFREQ_H__ +#define __PANFROST_DEVFREQ_H__ + +int panfrost_devfreq_init(struct panfrost_device *pfdev); + +void panfrost_devfreq_resume(struct panfrost_device *pfdev); +void panfrost_devfreq_suspend(struct panfrost_device *pfdev); + +void panfrost_devfreq_record_transition(struct panfrost_device *pfdev, int slot); + +#endif /* __PANFROST_DEVFREQ_H__ */ diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c new file mode 100644 index 000000000000..91e8fb0f2b25 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2018 Marty E. Plummer */ +/* Copyright 2019 Linaro, Ltd, Rob Herring */ + +#include +#include +#include +#include +#include + +#include "panfrost_device.h" +#include "panfrost_devfreq.h" +#include "panfrost_features.h" +#include "panfrost_gpu.h" +#include "panfrost_job.h" +#include "panfrost_mmu.h" + +static int panfrost_reset_init(struct panfrost_device *pfdev) +{ + int err; + + pfdev->rstc = devm_reset_control_array_get(pfdev->dev, false, true); + if (IS_ERR(pfdev->rstc)) { + dev_err(pfdev->dev, "get reset failed %ld\n", PTR_ERR(pfdev->rstc)); + return PTR_ERR(pfdev->rstc); + } + + err = reset_control_deassert(pfdev->rstc); + if (err) + return err; + + return 0; +} + +static void panfrost_reset_fini(struct panfrost_device *pfdev) +{ + reset_control_assert(pfdev->rstc); +} + +static int panfrost_clk_init(struct panfrost_device *pfdev) +{ + int err; + unsigned long rate; + + pfdev->clock = devm_clk_get(pfdev->dev, NULL); + if (IS_ERR(pfdev->clock)) { + dev_err(pfdev->dev, "get clock failed %ld\n", PTR_ERR(pfdev->clock)); + return PTR_ERR(pfdev->clock); + } + + rate = clk_get_rate(pfdev->clock); + dev_info(pfdev->dev, "clock rate = %lu\n", rate); + + err = clk_prepare_enable(pfdev->clock); + if (err) + return err; + + return 0; +} + +static void panfrost_clk_fini(struct panfrost_device *pfdev) +{ + clk_disable_unprepare(pfdev->clock); +} + +static int panfrost_regulator_init(struct panfrost_device *pfdev) +{ + int ret; + + pfdev->regulator = devm_regulator_get_optional(pfdev->dev, "mali"); + if (IS_ERR(pfdev->regulator)) { + ret = PTR_ERR(pfdev->regulator); + pfdev->regulator = NULL; + if (ret == -ENODEV) + return 0; + dev_err(pfdev->dev, "failed to get regulator: %d\n", ret); + return ret; + } + + ret = regulator_enable(pfdev->regulator); + if (ret < 0) { + dev_err(pfdev->dev, "failed to enable regulator: %d\n", ret); + return ret; + } + + return 0; +} + +static void panfrost_regulator_fini(struct panfrost_device *pfdev) +{ + if (pfdev->regulator) + regulator_disable(pfdev->regulator); +} + +int panfrost_device_init(struct panfrost_device *pfdev) +{ + int err; + struct resource *res; + + mutex_init(&pfdev->sched_lock); + INIT_LIST_HEAD(&pfdev->scheduled_jobs); + + spin_lock_init(&pfdev->hwaccess_lock); + + err = panfrost_clk_init(pfdev); + if (err) { + dev_err(pfdev->dev, "clk init failed %d\n", err); + return err; + } + + err = panfrost_regulator_init(pfdev); + if (err) { + dev_err(pfdev->dev, "regulator init failed %d\n", err); + goto err_out0; + } + + err = panfrost_reset_init(pfdev); + if (err) { + dev_err(pfdev->dev, "reset init failed %d\n", err); + goto err_out1; + } + + res = platform_get_resource(pfdev->pdev, IORESOURCE_MEM, 0); + pfdev->iomem = devm_ioremap_resource(pfdev->dev, res); + if (IS_ERR(pfdev->iomem)) { + dev_err(pfdev->dev, "failed to ioremap iomem\n"); + err = PTR_ERR(pfdev->iomem); + goto err_out2; + } + + err = panfrost_gpu_init(pfdev); + if (err) + goto err_out2; + + err = panfrost_mmu_init(pfdev); + if (err) + goto err_out3; + + err = panfrost_job_init(pfdev); + if (err) + goto err_out4; + + /* runtime PM will wake us up later */ + panfrost_gpu_power_off(pfdev); + + pm_runtime_set_active(pfdev->dev); + pm_runtime_get_sync(pfdev->dev); + pm_runtime_mark_last_busy(pfdev->dev); + pm_runtime_put_autosuspend(pfdev->dev); + + return 0; +err_out4: + panfrost_mmu_fini(pfdev); +err_out3: + panfrost_gpu_fini(pfdev); +err_out2: + panfrost_reset_fini(pfdev); +err_out1: + panfrost_regulator_fini(pfdev); +err_out0: + panfrost_clk_fini(pfdev); + return err; +} + +void panfrost_device_fini(struct panfrost_device *pfdev) +{ + panfrost_regulator_fini(pfdev); + panfrost_clk_fini(pfdev); +} + +const char *panfrost_exception_name(struct panfrost_device *pfdev, u32 exception_code) +{ + switch (exception_code) { + /* Non-Fault Status code */ + case 0x00: return "NOT_STARTED/IDLE/OK"; + case 0x01: return "DONE"; + case 0x02: return "INTERRUPTED"; + case 0x03: return "STOPPED"; + case 0x04: return "TERMINATED"; + case 0x08: return "ACTIVE"; + /* Job exceptions */ + case 0x40: return "JOB_CONFIG_FAULT"; + case 0x41: return "JOB_POWER_FAULT"; + case 0x42: return "JOB_READ_FAULT"; + case 0x43: return "JOB_WRITE_FAULT"; + case 0x44: return "JOB_AFFINITY_FAULT"; + case 0x48: return "JOB_BUS_FAULT"; + case 0x50: return "INSTR_INVALID_PC"; + case 0x51: return "INSTR_INVALID_ENC"; + case 0x52: return "INSTR_TYPE_MISMATCH"; + case 0x53: return "INSTR_OPERAND_FAULT"; + case 0x54: return "INSTR_TLS_FAULT"; + case 0x55: return "INSTR_BARRIER_FAULT"; + case 0x56: return "INSTR_ALIGN_FAULT"; + case 0x58: return "DATA_INVALID_FAULT"; + case 0x59: return "TILE_RANGE_FAULT"; + case 0x5A: return "ADDR_RANGE_FAULT"; + case 0x60: return "OUT_OF_MEMORY"; + /* GPU exceptions */ + case 0x80: return "DELAYED_BUS_FAULT"; + case 0x88: return "SHAREABILITY_FAULT"; + /* MMU exceptions */ + case 0xC1: return "TRANSLATION_FAULT_LEVEL1"; + case 0xC2: return "TRANSLATION_FAULT_LEVEL2"; + case 0xC3: return "TRANSLATION_FAULT_LEVEL3"; + case 0xC4: return "TRANSLATION_FAULT_LEVEL4"; + case 0xC8: return "PERMISSION_FAULT"; + case 0xC9 ... 0xCF: return "PERMISSION_FAULT"; + case 0xD1: return "TRANSTAB_BUS_FAULT_LEVEL1"; + case 0xD2: return "TRANSTAB_BUS_FAULT_LEVEL2"; + case 0xD3: return "TRANSTAB_BUS_FAULT_LEVEL3"; + case 0xD4: return "TRANSTAB_BUS_FAULT_LEVEL4"; + case 0xD8: return "ACCESS_FLAG"; + case 0xD9 ... 0xDF: return "ACCESS_FLAG"; + case 0xE0 ... 0xE7: return "ADDRESS_SIZE_FAULT"; + case 0xE8 ... 0xEF: return "MEMORY_ATTRIBUTES_FAULT"; + } + + return "UNKNOWN"; +} + +#ifdef CONFIG_PM +int panfrost_device_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct panfrost_device *pfdev = platform_get_drvdata(pdev); + + panfrost_gpu_soft_reset(pfdev); + + /* TODO: Re-enable all other address spaces */ + panfrost_gpu_power_on(pfdev); + panfrost_mmu_enable(pfdev, 0); + panfrost_job_enable_interrupts(pfdev); + panfrost_devfreq_resume(pfdev); + + return 0; +} + +int panfrost_device_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct panfrost_device *pfdev = platform_get_drvdata(pdev); + + if (!panfrost_job_is_idle(pfdev)) + return -EBUSY; + + panfrost_devfreq_suspend(pfdev); + panfrost_gpu_power_off(pfdev); + + return 0; +} +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h new file mode 100644 index 000000000000..1ba48d105763 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2018 Marty E. Plummer */ +/* Copyright 2019 Linaro, Ltd, Rob Herring */ + +#ifndef __PANFROST_DEVICE_H__ +#define __PANFROST_DEVICE_H__ + +#include +#include +#include +#include + +struct panfrost_device; +struct panfrost_mmu; +struct panfrost_job_slot; +struct panfrost_job; + +#define NUM_JOB_SLOTS 3 + +struct panfrost_features { + u16 id; + u16 revision; + + u64 shader_present; + u64 tiler_present; + u64 l2_present; + u64 stack_present; + u32 as_present; + u32 js_present; + + u32 l2_features; + u32 core_features; + u32 tiler_features; + u32 mem_features; + u32 mmu_features; + u32 thread_features; + u32 max_threads; + u32 thread_max_workgroup_sz; + u32 thread_max_barrier_sz; + u32 coherency_features; + u32 texture_features[4]; + u32 js_features[16]; + + u32 nr_core_groups; + + unsigned long hw_features[64 / BITS_PER_LONG]; + unsigned long hw_issues[64 / BITS_PER_LONG]; +}; + +struct panfrost_devfreq_slot { + ktime_t busy_time; + ktime_t idle_time; + ktime_t time_last_update; + bool busy; +}; + +struct panfrost_device { + struct device *dev; + struct drm_device *ddev; + struct platform_device *pdev; + + spinlock_t hwaccess_lock; + + struct drm_mm mm; + spinlock_t mm_lock; + + void __iomem *iomem; + struct clk *clock; + struct regulator *regulator; + struct reset_control *rstc; + + struct panfrost_features features; + + struct panfrost_mmu *mmu; + struct panfrost_job_slot *js; + + struct panfrost_job *jobs[NUM_JOB_SLOTS]; + struct list_head scheduled_jobs; + + struct mutex sched_lock; + + struct { + struct devfreq *devfreq; + struct thermal_cooling_device *cooling; + unsigned long cur_freq; + unsigned long cur_volt; + struct panfrost_devfreq_slot slot[NUM_JOB_SLOTS]; + } devfreq; +}; + +struct panfrost_file_priv { + struct panfrost_device *pfdev; + + struct drm_sched_entity sched_entity[NUM_JOB_SLOTS]; +}; + +static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev) +{ + return ddev->dev_private; +} + +static inline int panfrost_model_cmp(struct panfrost_device *pfdev, s32 id) +{ + s32 match_id = pfdev->features.id; + + if (match_id & 0xf000) + match_id &= 0xf00f; + return match_id - id; +} + +static inline bool panfrost_model_eq(struct panfrost_device *pfdev, s32 id) +{ + return !panfrost_model_cmp(pfdev, id); +} + +int panfrost_device_init(struct panfrost_device *pfdev); +void panfrost_device_fini(struct panfrost_device *pfdev); + +int panfrost_device_resume(struct device *dev); +int panfrost_device_suspend(struct device *dev); + +const char *panfrost_exception_name(struct panfrost_device *pfdev, u32 exception_code); + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c new file mode 100644 index 000000000000..c06af78ab833 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2018 Marty E. Plummer */ +/* Copyright 2019 Linaro, Ltd., Rob Herring */ +/* Copyright 2019 Collabora ltd. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "panfrost_device.h" +#include "panfrost_devfreq.h" +#include "panfrost_gem.h" +#include "panfrost_mmu.h" +#include "panfrost_job.h" +#include "panfrost_gpu.h" + +static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct drm_file *file) +{ + struct drm_panfrost_get_param *param = data; + struct panfrost_device *pfdev = ddev->dev_private; + + if (param->pad != 0) + return -EINVAL; + + switch (param->param) { + case DRM_PANFROST_PARAM_GPU_PROD_ID: + param->value = pfdev->features.id; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, + struct drm_file *file) +{ + int ret; + struct drm_gem_shmem_object *shmem; + struct drm_panfrost_create_bo *args = data; + + if (!args->size || args->flags || args->pad) + return -EINVAL; + + shmem = drm_gem_shmem_create_with_handle(file, dev, args->size, + &args->handle); + if (IS_ERR(shmem)) + return PTR_ERR(shmem); + + ret = panfrost_mmu_map(to_panfrost_bo(&shmem->base)); + if (ret) + goto err_free; + + args->offset = to_panfrost_bo(&shmem->base)->node.start << PAGE_SHIFT; + + return 0; + +err_free: + drm_gem_object_put_unlocked(&shmem->base); + return ret; +} + +/** + * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects + * referenced by the job. + * @dev: DRM device + * @file_priv: DRM file for this fd + * @args: IOCTL args + * @job: job being set up + * + * Resolve handles from userspace to BOs and attach them to job. + * + * Note that this function doesn't need to unreference the BOs on + * failure, because that will happen at panfrost_job_cleanup() time. + */ +static int +panfrost_lookup_bos(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_panfrost_submit *args, + struct panfrost_job *job) +{ + job->bo_count = args->bo_handle_count; + + if (!job->bo_count) + return 0; + + job->implicit_fences = kvmalloc_array(job->bo_count, + sizeof(struct dma_fence *), + GFP_KERNEL | __GFP_ZERO); + if (!job->implicit_fences) + return -ENOMEM; + + return drm_gem_objects_lookup(file_priv, + (void __user *)(uintptr_t)args->bo_handles, + job->bo_count, &job->bos); +} + +/** + * panfrost_copy_in_sync() - Sets up job->in_fences[] with the sync objects + * referenced by the job. + * @dev: DRM device + * @file_priv: DRM file for this fd + * @args: IOCTL args + * @job: job being set up + * + * Resolve syncobjs from userspace to fences and attach them to job. + * + * Note that this function doesn't need to unreference the fences on + * failure, because that will happen at panfrost_job_cleanup() time. + */ +static int +panfrost_copy_in_sync(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_panfrost_submit *args, + struct panfrost_job *job) +{ + u32 *handles; + int ret = 0; + int i; + + job->in_fence_count = args->in_sync_count; + + if (!job->in_fence_count) + return 0; + + job->in_fences = kvmalloc_array(job->in_fence_count, + sizeof(struct dma_fence *), + GFP_KERNEL | __GFP_ZERO); + if (!job->in_fences) { + DRM_DEBUG("Failed to allocate job in fences\n"); + return -ENOMEM; + } + + handles = kvmalloc_array(job->in_fence_count, sizeof(u32), GFP_KERNEL); + if (!handles) { + ret = -ENOMEM; + DRM_DEBUG("Failed to allocate incoming syncobj handles\n"); + goto fail; + } + + if (copy_from_user(handles, + (void __user *)(uintptr_t)args->in_syncs, + job->in_fence_count * sizeof(u32))) { + ret = -EFAULT; + DRM_DEBUG("Failed to copy in syncobj handles\n"); + goto fail; + } + + for (i = 0; i < job->in_fence_count; i++) { + ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0, + &job->in_fences[i]); + if (ret == -EINVAL) + goto fail; + } + +fail: + kvfree(handles); + return ret; +} + +static int panfrost_ioctl_submit(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct panfrost_device *pfdev = dev->dev_private; + struct drm_panfrost_submit *args = data; + struct drm_syncobj *sync_out; + struct panfrost_job *job; + int ret = 0; + + job = kzalloc(sizeof(*job), GFP_KERNEL); + if (!job) + return -ENOMEM; + + kref_init(&job->refcount); + + job->pfdev = pfdev; + job->jc = args->jc; + job->requirements = args->requirements; + job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); + job->file_priv = file->driver_priv; + + ret = panfrost_copy_in_sync(dev, file, args, job); + if (ret) + goto fail; + + ret = panfrost_lookup_bos(dev, file, args, job); + if (ret) + goto fail; + + ret = panfrost_job_push(job); + if (ret) + goto fail; + + /* Update the return sync object for the job */ + sync_out = drm_syncobj_find(file, args->out_sync); + if (sync_out) { + drm_syncobj_replace_fence(sync_out, job->render_done_fence); + drm_syncobj_put(sync_out); + } + +fail: + panfrost_job_put(job); + + return ret; +} + +static int +panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + long ret; + struct drm_panfrost_wait_bo *args = data; + struct drm_gem_object *gem_obj; + unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + + if (args->pad) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) + return -ENOENT; + + ret = reservation_object_wait_timeout_rcu(gem_obj->resv, true, + true, timeout); + if (!ret) + ret = timeout ? -ETIMEDOUT : -EBUSY; + + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} + +static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_panfrost_mmap_bo *args = data; + struct drm_gem_object *gem_obj; + int ret; + + if (args->flags != 0) { + DRM_INFO("unknown mmap_bo flags: %d\n", args->flags); + return -EINVAL; + } + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + ret = drm_gem_create_mmap_offset(gem_obj); + if (ret == 0) + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} + +static int panfrost_ioctl_get_bo_offset(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_panfrost_get_bo_offset *args = data; + struct drm_gem_object *gem_obj; + struct panfrost_gem_object *bo; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + bo = to_panfrost_bo(gem_obj); + + args->offset = bo->node.start << PAGE_SHIFT; + + drm_gem_object_put_unlocked(gem_obj); + return 0; +} + +static int +panfrost_open(struct drm_device *dev, struct drm_file *file) +{ + struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_file_priv *panfrost_priv; + + panfrost_priv = kzalloc(sizeof(*panfrost_priv), GFP_KERNEL); + if (!panfrost_priv) + return -ENOMEM; + + panfrost_priv->pfdev = pfdev; + file->driver_priv = panfrost_priv; + + return panfrost_job_open(panfrost_priv); +} + +static void +panfrost_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct panfrost_file_priv *panfrost_priv = file->driver_priv; + + panfrost_job_close(panfrost_priv); + + kfree(panfrost_priv); +} + +/* DRM_AUTH is required on SUBMIT for now, while all clients share a single + * address space. Note that render nodes would be able to submit jobs that + * could access BOs from clients authenticated with the master node. + */ +static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { +#define PANFROST_IOCTL(n, func, flags) \ + DRM_IOCTL_DEF_DRV(PANFROST_##n, panfrost_ioctl_##func, flags) + + PANFROST_IOCTL(SUBMIT, submit, DRM_RENDER_ALLOW | DRM_AUTH), + PANFROST_IOCTL(WAIT_BO, wait_bo, DRM_RENDER_ALLOW), + PANFROST_IOCTL(CREATE_BO, create_bo, DRM_RENDER_ALLOW), + PANFROST_IOCTL(MMAP_BO, mmap_bo, DRM_RENDER_ALLOW), + PANFROST_IOCTL(GET_PARAM, get_param, DRM_RENDER_ALLOW), + PANFROST_IOCTL(GET_BO_OFFSET, get_bo_offset, DRM_RENDER_ALLOW), +}; + +DEFINE_DRM_GEM_SHMEM_FOPS(panfrost_drm_driver_fops); + +static struct drm_driver panfrost_drm_driver = { + .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_PRIME | + DRIVER_SYNCOBJ, + .open = panfrost_open, + .postclose = panfrost_postclose, + .ioctls = panfrost_drm_driver_ioctls, + .num_ioctls = ARRAY_SIZE(panfrost_drm_driver_ioctls), + .fops = &panfrost_drm_driver_fops, + .name = "panfrost", + .desc = "panfrost DRM", + .date = "20180908", + .major = 1, + .minor = 0, + + .gem_create_object = panfrost_gem_create_object, + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table, + .gem_prime_mmap = drm_gem_prime_mmap, +}; + +static int panfrost_probe(struct platform_device *pdev) +{ + struct panfrost_device *pfdev; + struct drm_device *ddev; + int err; + + pfdev = devm_kzalloc(&pdev->dev, sizeof(*pfdev), GFP_KERNEL); + if (!pfdev) + return -ENOMEM; + + pfdev->pdev = pdev; + pfdev->dev = &pdev->dev; + + platform_set_drvdata(pdev, pfdev); + + /* Allocate and initialze the DRM device. */ + ddev = drm_dev_alloc(&panfrost_drm_driver, &pdev->dev); + if (IS_ERR(ddev)) + return PTR_ERR(ddev); + + ddev->dev_private = pfdev; + pfdev->ddev = ddev; + + spin_lock_init(&pfdev->mm_lock); + + /* 4G enough for now. can be 48-bit */ + drm_mm_init(&pfdev->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT); + + pm_runtime_use_autosuspend(pfdev->dev); + pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */ + pm_runtime_enable(pfdev->dev); + + err = panfrost_device_init(pfdev); + if (err) { + dev_err(&pdev->dev, "Fatal error during GPU init\n"); + goto err_out0; + } + + dma_set_mask_and_coherent(pfdev->dev, + DMA_BIT_MASK(FIELD_GET(0xff00, pfdev->features.mmu_features))); + + err = panfrost_devfreq_init(pfdev); + if (err) { + dev_err(&pdev->dev, "Fatal error during devfreq init\n"); + goto err_out1; + } + + /* + * Register the DRM device with the core and the connectors with + * sysfs + */ + err = drm_dev_register(ddev, 0); + if (err < 0) + goto err_out1; + + return 0; + +err_out1: + panfrost_device_fini(pfdev); +err_out0: + drm_dev_put(ddev); + return err; +} + +static int panfrost_remove(struct platform_device *pdev) +{ + struct panfrost_device *pfdev = platform_get_drvdata(pdev); + struct drm_device *ddev = pfdev->ddev; + + drm_dev_unregister(ddev); + pm_runtime_get_sync(pfdev->dev); + pm_runtime_put_sync_autosuspend(pfdev->dev); + pm_runtime_disable(pfdev->dev); + panfrost_device_fini(pfdev); + drm_dev_put(ddev); + return 0; +} + +static const struct of_device_id dt_match[] = { + { .compatible = "arm,mali-t604" }, + { .compatible = "arm,mali-t624" }, + { .compatible = "arm,mali-t628" }, + { .compatible = "arm,mali-t720" }, + { .compatible = "arm,mali-t760" }, + { .compatible = "arm,mali-t820" }, + { .compatible = "arm,mali-t830" }, + { .compatible = "arm,mali-t860" }, + { .compatible = "arm,mali-t880" }, + {} +}; +MODULE_DEVICE_TABLE(of, dt_match); + +static const struct dev_pm_ops panfrost_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(panfrost_device_suspend, panfrost_device_resume, NULL) +}; + +static struct platform_driver panfrost_driver = { + .probe = panfrost_probe, + .remove = panfrost_remove, + .driver = { + .name = "panfrost", + .pm = &panfrost_pm_ops, + .of_match_table = dt_match, + }, +}; +module_platform_driver(panfrost_driver); + +MODULE_AUTHOR("Panfrost Project Developers"); +MODULE_DESCRIPTION("Panfrost DRM Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/panfrost/panfrost_features.h b/drivers/gpu/drm/panfrost/panfrost_features.h new file mode 100644 index 000000000000..5056777c7744 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_features.h @@ -0,0 +1,309 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. */ +/* Copyright 2019 Linaro, Ltd., Rob Herring */ +#ifndef __PANFROST_FEATURES_H__ +#define __PANFROST_FEATURES_H__ + +#include + +#include "panfrost_device.h" + +enum panfrost_hw_feature { + HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + HW_FEATURE_XAFFINITY, + HW_FEATURE_OUT_OF_ORDER_EXEC, + HW_FEATURE_MRT, + HW_FEATURE_BRNDOUT_CC, + HW_FEATURE_INTERPIPE_REG_ALIASING, + HW_FEATURE_LD_ST_TILEBUFFER, + HW_FEATURE_MSAA_16X, + HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + HW_FEATURE_OPTIMIZED_COVERAGE_MASK, + HW_FEATURE_T7XX_PAIRING_RULES, + HW_FEATURE_LD_ST_LEA_TEX, + HW_FEATURE_LINEAR_FILTER_FLOAT, + HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, + HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS, + HW_FEATURE_TEST4_DATUM_MODE, + HW_FEATURE_NEXT_INSTRUCTION_TYPE, + HW_FEATURE_BRNDOUT_KILL, + HW_FEATURE_WARPING, + HW_FEATURE_V4, + HW_FEATURE_FLUSH_REDUCTION, + HW_FEATURE_PROTECTED_MODE, + HW_FEATURE_COHERENCY_REG, + HW_FEATURE_PROTECTED_DEBUG_MODE, + HW_FEATURE_AARCH64_MMU, + HW_FEATURE_TLS_HASHING, + HW_FEATURE_THREAD_GROUP_SPLIT, + HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, +}; + +#define hw_features_t600 (\ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_V4)) + +#define hw_features_t620 (\ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_V4)) + +#define hw_features_t720 (\ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_OPTIMIZED_COVERAGE_MASK) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_V4)) + + +#define hw_features_t760 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) + +// T860 +#define hw_features_t860 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) + +#define hw_features_t880 hw_features_t860 + +#define hw_features_t830 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) + +#define hw_features_t820 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) + +#define hw_features_g71 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG)) + +#define hw_features_g72 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG)) + +#define hw_features_g51 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG)) + +#define hw_features_g52 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG)) + +#define hw_features_g76 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG) | \ + BIT_ULL(HW_FEATURE_AARCH64_MMU) | \ + BIT_ULL(HW_FEATURE_TLS_HASHING) | \ + BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) + +#define hw_features_g31 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG) | \ + BIT_ULL(HW_FEATURE_AARCH64_MMU) | \ + BIT_ULL(HW_FEATURE_TLS_HASHING) | \ + BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) + +static inline bool panfrost_has_hw_feature(struct panfrost_device *pfdev, + enum panfrost_hw_feature feat) +{ + return test_bit(feat, pfdev->features.hw_features); +} + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c new file mode 100644 index 000000000000..8a0376283a21 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Linaro, Ltd, Rob Herring */ + +#include +#include +#include +#include + +#include +#include "panfrost_device.h" +#include "panfrost_gem.h" +#include "panfrost_mmu.h" + +/* Called DRM core on the last userspace/kernel unreference of the + * BO. + */ +void panfrost_gem_free_object(struct drm_gem_object *obj) +{ + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + struct panfrost_device *pfdev = obj->dev->dev_private; + + panfrost_mmu_unmap(bo); + + spin_lock(&pfdev->mm_lock); + drm_mm_remove_node(&bo->node); + spin_unlock(&pfdev->mm_lock); + + drm_gem_shmem_free_object(obj); +} + +static const struct drm_gem_object_funcs panfrost_gem_funcs = { + .free = panfrost_gem_free_object, + .print_info = drm_gem_shmem_print_info, + .pin = drm_gem_shmem_pin, + .unpin = drm_gem_shmem_unpin, + .get_sg_table = drm_gem_shmem_get_sg_table, + .vmap = drm_gem_shmem_vmap, + .vunmap = drm_gem_shmem_vunmap, + .vm_ops = &drm_gem_shmem_vm_ops, +}; + +/** + * panfrost_gem_create_object - Implementation of driver->gem_create_object. + * @dev: DRM device + * @size: Size in bytes of the memory the object will reference + * + * This lets the GEM helpers allocate object structs for us, and keep + * our BO stats correct. + */ +struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size) +{ + int ret; + struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_gem_object *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; + + obj->base.base.funcs = &panfrost_gem_funcs; + + spin_lock(&pfdev->mm_lock); + ret = drm_mm_insert_node(&pfdev->mm, &obj->node, + roundup(size, PAGE_SIZE) >> PAGE_SHIFT); + spin_unlock(&pfdev->mm_lock); + if (ret) + goto free_obj; + + return &obj->base.base; + +free_obj: + kfree(obj); + return ERR_PTR(ret); +} + +struct drm_gem_object * +panfrost_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct drm_gem_object *obj; + struct panfrost_gem_object *pobj; + + obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + pobj = to_panfrost_bo(obj); + + obj->resv = attach->dmabuf->resv; + + panfrost_mmu_map(pobj); + + return obj; +} diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h new file mode 100644 index 000000000000..045000eb5fcf --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 Linaro, Ltd, Rob Herring */ + +#ifndef __PANFROST_GEM_H__ +#define __PANFROST_GEM_H__ + +#include +#include + +struct panfrost_gem_object { + struct drm_gem_shmem_object base; + + struct drm_mm_node node; +}; + +static inline +struct panfrost_gem_object *to_panfrost_bo(struct drm_gem_object *obj) +{ + return container_of(to_drm_gem_shmem_obj(obj), struct panfrost_gem_object, base); +} + +struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size); + +struct drm_gem_object * +panfrost_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); + +#endif /* __PANFROST_GEM_H__ */ diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c new file mode 100644 index 000000000000..aceaf6e44a09 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2018 Marty E. Plummer */ +/* Copyright 2019 Linaro, Ltd., Rob Herring */ +/* Copyright 2019 Collabora ltd. */ +#include +#include +#include +#include +#include +#include + +#include "panfrost_device.h" +#include "panfrost_features.h" +#include "panfrost_issues.h" +#include "panfrost_gpu.h" +#include "panfrost_regs.h" + +#define gpu_write(dev, reg, data) writel(data, dev->iomem + reg) +#define gpu_read(dev, reg) readl(dev->iomem + reg) + +static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data) +{ + struct panfrost_device *pfdev = data; + u32 state = gpu_read(pfdev, GPU_INT_STAT); + u32 fault_status = gpu_read(pfdev, GPU_FAULT_STATUS); + + if (!state) + return IRQ_NONE; + + if (state & GPU_IRQ_MASK_ERROR) { + u64 address = (u64) gpu_read(pfdev, GPU_FAULT_ADDRESS_HI) << 32; + address |= gpu_read(pfdev, GPU_FAULT_ADDRESS_LO); + + dev_warn(pfdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx\n", + fault_status & 0xFF, panfrost_exception_name(pfdev, fault_status), + address); + + if (state & GPU_IRQ_MULTIPLE_FAULT) + dev_warn(pfdev->dev, "There were multiple GPU faults - some have not been reported\n"); + + gpu_write(pfdev, GPU_INT_MASK, 0); + } + + gpu_write(pfdev, GPU_INT_CLEAR, state); + + return IRQ_HANDLED; +} + +int panfrost_gpu_soft_reset(struct panfrost_device *pfdev) +{ + int ret; + u32 val; + + gpu_write(pfdev, GPU_INT_MASK, 0); + gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED); + gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET); + + ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT, + val, val & GPU_IRQ_RESET_COMPLETED, 100, 10000); + + if (ret) { + dev_err(pfdev->dev, "gpu soft reset timed out\n"); + return ret; + } + + gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL); + gpu_write(pfdev, GPU_INT_MASK, GPU_IRQ_MASK_ALL); + + return 0; +} + +static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev) +{ + u32 quirks = 0; + + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8443) || + panfrost_has_hw_issue(pfdev, HW_ISSUE_11035)) + quirks |= SC_LS_PAUSEBUFFER_DISABLE; + + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10327)) + quirks |= SC_SDC_DISABLE_OQ_DISCARD; + + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10797)) + quirks |= SC_ENABLE_TEXGRD_FLAGS; + + if (!panfrost_has_hw_issue(pfdev, GPUCORE_1619)) { + if (panfrost_model_cmp(pfdev, 0x750) < 0) /* T60x, T62x, T72x */ + quirks |= SC_LS_ATTR_CHECK_DISABLE; + else if (panfrost_model_cmp(pfdev, 0x880) <= 0) /* T76x, T8xx */ + quirks |= SC_LS_ALLOW_ATTR_TYPES; + } + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_TLS_HASHING)) + quirks |= SC_TLS_HASH_ENABLE; + + if (quirks) + gpu_write(pfdev, GPU_SHADER_CONFIG, quirks); + + + quirks = gpu_read(pfdev, GPU_TILER_CONFIG); + + /* Set tiler clock gate override if required */ + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_T76X_3953)) + quirks |= TC_CLOCK_GATE_OVERRIDE; + + gpu_write(pfdev, GPU_TILER_CONFIG, quirks); + + + quirks = gpu_read(pfdev, GPU_L2_MMU_CONFIG); + + /* Limit read & write ID width for AXI */ + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) + quirks &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS | + L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES); + else + quirks &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS | + L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES); + + gpu_write(pfdev, GPU_L2_MMU_CONFIG, quirks); + + quirks = 0; + if ((panfrost_model_eq(pfdev, 0x860) || panfrost_model_eq(pfdev, 0x880)) && + pfdev->features.revision >= 0x2000) + quirks |= JM_MAX_JOB_THROTTLE_LIMIT << JM_JOB_THROTTLE_LIMIT_SHIFT; + else if (panfrost_model_eq(pfdev, 0x6000) && + pfdev->features.coherency_features == COHERENCY_ACE) + quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) << + JM_FORCE_COHERENCY_FEATURES_SHIFT; + + if (quirks) + gpu_write(pfdev, GPU_JM_CONFIG, quirks); +} + +#define MAX_HW_REVS 6 + +struct panfrost_model { + const char *name; + u32 id; + u32 id_mask; + u64 features; + u64 issues; + struct { + u32 revision; + u64 issues; + } revs[MAX_HW_REVS]; +}; + +#define GPU_MODEL(_name, _id, ...) \ +{\ + .name = __stringify(_name), \ + .id = _id, \ + .features = hw_features_##_name, \ + .issues = hw_issues_##_name, \ + .revs = { __VA_ARGS__ }, \ +} + +#define GPU_REV_EXT(name, _rev, _p, _s, stat) \ +{\ + .revision = (_rev) << 12 | (_p) << 4 | (_s), \ + .issues = hw_issues_##name##_r##_rev##p##_p##stat, \ +} +#define GPU_REV(name, r, p) GPU_REV_EXT(name, r, p, 0, ) + +static const struct panfrost_model gpu_models[] = { + /* T60x has an oddball version */ + GPU_MODEL(t600, 0x600, + GPU_REV_EXT(t600, 0, 0, 1, _15dev0)), + GPU_MODEL(t620, 0x620, + GPU_REV(t620, 0, 1), GPU_REV(t620, 1, 0)), + GPU_MODEL(t720, 0x720), + GPU_MODEL(t760, 0x750, + GPU_REV(t760, 0, 0), GPU_REV(t760, 0, 1), + GPU_REV_EXT(t760, 0, 1, 0, _50rel0), + GPU_REV(t760, 0, 2), GPU_REV(t760, 0, 3)), + GPU_MODEL(t820, 0x820), + GPU_MODEL(t830, 0x830), + GPU_MODEL(t860, 0x860), + GPU_MODEL(t880, 0x880), + + GPU_MODEL(g71, 0x6000, + GPU_REV_EXT(g71, 0, 0, 1, _05dev0)), + GPU_MODEL(g72, 0x6001), + GPU_MODEL(g51, 0x7000), + GPU_MODEL(g76, 0x7001), + GPU_MODEL(g52, 0x7002), + GPU_MODEL(g31, 0x7003, + GPU_REV(g31, 1, 0)), +}; + +static void panfrost_gpu_init_features(struct panfrost_device *pfdev) +{ + u32 gpu_id, num_js, major, minor, status, rev; + const char *name = "unknown"; + u64 hw_feat = 0; + u64 hw_issues = hw_issues_all; + const struct panfrost_model *model; + int i; + + pfdev->features.l2_features = gpu_read(pfdev, GPU_L2_FEATURES); + pfdev->features.core_features = gpu_read(pfdev, GPU_CORE_FEATURES); + pfdev->features.tiler_features = gpu_read(pfdev, GPU_TILER_FEATURES); + pfdev->features.mem_features = gpu_read(pfdev, GPU_MEM_FEATURES); + pfdev->features.mmu_features = gpu_read(pfdev, GPU_MMU_FEATURES); + pfdev->features.thread_features = gpu_read(pfdev, GPU_THREAD_FEATURES); + pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); + for (i = 0; i < 4; i++) + pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i)); + + pfdev->features.as_present = gpu_read(pfdev, GPU_AS_PRESENT); + + pfdev->features.js_present = gpu_read(pfdev, GPU_JS_PRESENT); + num_js = hweight32(pfdev->features.js_present); + for (i = 0; i < num_js; i++) + pfdev->features.js_features[i] = gpu_read(pfdev, GPU_JS_FEATURES(i)); + + pfdev->features.shader_present = gpu_read(pfdev, GPU_SHADER_PRESENT_LO); + pfdev->features.shader_present |= (u64)gpu_read(pfdev, GPU_SHADER_PRESENT_HI) << 32; + + pfdev->features.tiler_present = gpu_read(pfdev, GPU_TILER_PRESENT_LO); + pfdev->features.tiler_present |= (u64)gpu_read(pfdev, GPU_TILER_PRESENT_HI) << 32; + + pfdev->features.l2_present = gpu_read(pfdev, GPU_L2_PRESENT_LO); + pfdev->features.l2_present |= (u64)gpu_read(pfdev, GPU_L2_PRESENT_HI) << 32; + pfdev->features.nr_core_groups = hweight64(pfdev->features.l2_present); + + pfdev->features.stack_present = gpu_read(pfdev, GPU_STACK_PRESENT_LO); + pfdev->features.stack_present |= (u64)gpu_read(pfdev, GPU_STACK_PRESENT_HI) << 32; + + gpu_id = gpu_read(pfdev, GPU_ID); + pfdev->features.revision = gpu_id & 0xffff; + pfdev->features.id = gpu_id >> 16; + + /* The T60x has an oddball ID value. Fix it up to the standard Midgard + * format so we (and userspace) don't have to special case it. + */ + if (pfdev->features.id == 0x6956) + pfdev->features.id = 0x0600; + + major = (pfdev->features.revision >> 12) & 0xf; + minor = (pfdev->features.revision >> 4) & 0xff; + status = pfdev->features.revision & 0xf; + rev = pfdev->features.revision; + + gpu_id = pfdev->features.id; + + for (model = gpu_models; model->name; model++) { + int best = -1; + + if (!panfrost_model_eq(pfdev, model->id)) + continue; + + name = model->name; + hw_feat = model->features; + hw_issues |= model->issues; + for (i = 0; i < MAX_HW_REVS; i++) { + if (model->revs[i].revision == rev) { + best = i; + break; + } else if (model->revs[i].revision == (rev & ~0xf)) + best = i; + } + + if (best >= 0) + hw_issues |= model->revs[best].issues; + + break; + } + + bitmap_from_u64(pfdev->features.hw_features, hw_feat); + bitmap_from_u64(pfdev->features.hw_issues, hw_issues); + + dev_info(pfdev->dev, "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x", + name, gpu_id, major, minor, status); + dev_info(pfdev->dev, "features: %64pb, issues: %64pb", + pfdev->features.hw_features, + pfdev->features.hw_issues); + + dev_info(pfdev->dev, "Features: L2:0x%08x Shader:0x%08x Tiler:0x%08x Mem:0x%0x MMU:0x%08x AS:0x%x JS:0x%x", + gpu_read(pfdev, GPU_L2_FEATURES), + gpu_read(pfdev, GPU_CORE_FEATURES), + gpu_read(pfdev, GPU_TILER_FEATURES), + gpu_read(pfdev, GPU_MEM_FEATURES), + gpu_read(pfdev, GPU_MMU_FEATURES), + gpu_read(pfdev, GPU_AS_PRESENT), + gpu_read(pfdev, GPU_JS_PRESENT)); + + dev_info(pfdev->dev, "shader_present=0x%0llx l2_present=0x%0llx", + pfdev->features.shader_present, pfdev->features.l2_present); +} + +void panfrost_gpu_power_on(struct panfrost_device *pfdev) +{ + int ret; + u32 val; + + /* Just turn on everything for now */ + gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present); + ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO, + val, val == pfdev->features.l2_present, 100, 1000); + + gpu_write(pfdev, STACK_PWRON_LO, pfdev->features.stack_present); + ret |= readl_relaxed_poll_timeout(pfdev->iomem + STACK_READY_LO, + val, val == pfdev->features.stack_present, 100, 1000); + + gpu_write(pfdev, SHADER_PWRON_LO, pfdev->features.shader_present); + ret |= readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO, + val, val == pfdev->features.shader_present, 100, 1000); + + gpu_write(pfdev, TILER_PWRON_LO, pfdev->features.tiler_present); + ret |= readl_relaxed_poll_timeout(pfdev->iomem + TILER_READY_LO, + val, val == pfdev->features.tiler_present, 100, 1000); + + if (ret) + dev_err(pfdev->dev, "error powering up gpu"); +} + +void panfrost_gpu_power_off(struct panfrost_device *pfdev) +{ + gpu_write(pfdev, TILER_PWROFF_LO, 0); + gpu_write(pfdev, SHADER_PWROFF_LO, 0); + gpu_write(pfdev, STACK_PWROFF_LO, 0); + gpu_write(pfdev, L2_PWROFF_LO, 0); +} + +int panfrost_gpu_init(struct panfrost_device *pfdev) +{ + int err, irq; + + err = panfrost_gpu_soft_reset(pfdev); + if (err) + return err; + + panfrost_gpu_init_features(pfdev); + + irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu"); + if (irq <= 0) + return -ENODEV; + + err = devm_request_irq(pfdev->dev, irq, panfrost_gpu_irq_handler, + IRQF_SHARED, "gpu", pfdev); + if (err) { + dev_err(pfdev->dev, "failed to request gpu irq"); + return err; + } + + panfrost_gpu_init_quirks(pfdev); + panfrost_gpu_power_on(pfdev); + + return 0; +} + +void panfrost_gpu_fini(struct panfrost_device *pfdev) +{ + panfrost_gpu_power_off(pfdev); +} + +u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev) +{ + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) + return gpu_read(pfdev, GPU_LATEST_FLUSH_ID); + return 0; +} diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h new file mode 100644 index 000000000000..4112412087b2 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2018 Marty E. Plummer */ +/* Copyright 2019 Collabora ltd. */ + +#ifndef __PANFROST_GPU_H__ +#define __PANFROST_GPU_H__ + +struct panfrost_device; + +int panfrost_gpu_init(struct panfrost_device *pfdev); +void panfrost_gpu_fini(struct panfrost_device *pfdev); + +u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev); + +int panfrost_gpu_soft_reset(struct panfrost_device *pfdev); +void panfrost_gpu_power_on(struct panfrost_device *pfdev); +void panfrost_gpu_power_off(struct panfrost_device *pfdev); + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_issues.h b/drivers/gpu/drm/panfrost/panfrost_issues.h new file mode 100644 index 000000000000..cec6dcdadb5c --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_issues.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. */ +/* Copyright 2019 Linaro, Ltd., Rob Herring */ +#ifndef __PANFROST_ISSUES_H__ +#define __PANFROST_ISSUES_H__ + +#include + +#include "panfrost_device.h" + +/* + * This is not a complete list of issues, but only the ones the driver needs + * to care about. + */ +enum panfrost_hw_issue { + HW_ISSUE_6367, + HW_ISSUE_6787, + HW_ISSUE_8186, + HW_ISSUE_8245, + HW_ISSUE_8316, + HW_ISSUE_8394, + HW_ISSUE_8401, + HW_ISSUE_8408, + HW_ISSUE_8443, + HW_ISSUE_8987, + HW_ISSUE_9435, + HW_ISSUE_9510, + HW_ISSUE_9630, + HW_ISSUE_10327, + HW_ISSUE_10649, + HW_ISSUE_10676, + HW_ISSUE_10797, + HW_ISSUE_10817, + HW_ISSUE_10883, + HW_ISSUE_10959, + HW_ISSUE_10969, + HW_ISSUE_11020, + HW_ISSUE_11024, + HW_ISSUE_11035, + HW_ISSUE_11056, + HW_ISSUE_T76X_3542, + HW_ISSUE_T76X_3953, + HW_ISSUE_TMIX_8463, + GPUCORE_1619, + HW_ISSUE_TMIX_8438, + HW_ISSUE_TGOX_R1_1234, + HW_ISSUE_END +}; + +#define hw_issues_all (\ + BIT_ULL(HW_ISSUE_9435)) + +#define hw_issues_t600 (\ + BIT_ULL(HW_ISSUE_6367) | \ + BIT_ULL(HW_ISSUE_6787) | \ + BIT_ULL(HW_ISSUE_8408) | \ + BIT_ULL(HW_ISSUE_9510) | \ + BIT_ULL(HW_ISSUE_10649) | \ + BIT_ULL(HW_ISSUE_10676) | \ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11035) | \ + BIT_ULL(HW_ISSUE_11056) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t600_r0p0_15dev0 (\ + BIT_ULL(HW_ISSUE_8186) | \ + BIT_ULL(HW_ISSUE_8245) | \ + BIT_ULL(HW_ISSUE_8316) | \ + BIT_ULL(HW_ISSUE_8394) | \ + BIT_ULL(HW_ISSUE_8401) | \ + BIT_ULL(HW_ISSUE_8443) | \ + BIT_ULL(HW_ISSUE_8987) | \ + BIT_ULL(HW_ISSUE_9630) | \ + BIT_ULL(HW_ISSUE_10969) | \ + BIT_ULL(GPUCORE_1619)) + +#define hw_issues_t620 (\ + BIT_ULL(HW_ISSUE_10649) | \ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_10959) | \ + BIT_ULL(HW_ISSUE_11056) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t620_r0p1 (\ + BIT_ULL(HW_ISSUE_10327) | \ + BIT_ULL(HW_ISSUE_10676) | \ + BIT_ULL(HW_ISSUE_10817) | \ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11024) | \ + BIT_ULL(HW_ISSUE_11035)) + +#define hw_issues_t620_r1p0 (\ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11024)) + +#define hw_issues_t720 (\ + BIT_ULL(HW_ISSUE_10649) | \ + BIT_ULL(HW_ISSUE_10797) | \ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_11056) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t760 (\ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_T76X_3953) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t760_r0p0 (\ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11024) | \ + BIT_ULL(HW_ISSUE_T76X_3542)) + +#define hw_issues_t760_r0p1 (\ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11024) | \ + BIT_ULL(HW_ISSUE_T76X_3542)) + +#define hw_issues_t760_r0p1_50rel0 (\ + BIT_ULL(HW_ISSUE_T76X_3542)) + +#define hw_issues_t760_r0p2 (\ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11024) | \ + BIT_ULL(HW_ISSUE_T76X_3542)) + +#define hw_issues_t760_r0p3 (\ + BIT_ULL(HW_ISSUE_T76X_3542)) + +#define hw_issues_t820 (\ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_T76X_3953) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t830 (\ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_T76X_3953) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t860 (\ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_T76X_3953) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t880 (\ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_T76X_3953) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_g31 0 + +#define hw_issues_g31_r1p0 (\ + BIT_ULL(HW_ISSUE_TGOX_R1_1234)) + +#define hw_issues_g51 0 + +#define hw_issues_g52 0 + +#define hw_issues_g71 (\ + BIT_ULL(HW_ISSUE_TMIX_8463) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_g71_r0p0_05dev0 (\ + BIT_ULL(HW_ISSUE_T76X_3953)) + +#define hw_issues_g72 0 + +#define hw_issues_g76 0 + +static inline bool panfrost_has_hw_issue(struct panfrost_device *pfdev, + enum panfrost_hw_issue issue) +{ + return test_bit(issue, pfdev->features.hw_issues); +} + +#endif /* __PANFROST_ISSUES_H__ */ diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c new file mode 100644 index 000000000000..0a7ed04f7d52 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -0,0 +1,560 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Linaro, Ltd, Rob Herring */ +/* Copyright 2019 Collabora ltd. */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "panfrost_device.h" +#include "panfrost_devfreq.h" +#include "panfrost_job.h" +#include "panfrost_features.h" +#include "panfrost_issues.h" +#include "panfrost_gem.h" +#include "panfrost_regs.h" +#include "panfrost_gpu.h" +#include "panfrost_mmu.h" + +#define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) +#define job_read(dev, reg) readl(dev->iomem + (reg)) + +struct panfrost_queue_state { + struct drm_gpu_scheduler sched; + + u64 fence_context; + u64 emit_seqno; +}; + +struct panfrost_job_slot { + struct panfrost_queue_state queue[NUM_JOB_SLOTS]; + spinlock_t job_lock; +}; + +static struct panfrost_job * +to_panfrost_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct panfrost_job, base); +} + +struct panfrost_fence { + struct dma_fence base; + struct drm_device *dev; + /* panfrost seqno for signaled() test */ + u64 seqno; + int queue; +}; + +static inline struct panfrost_fence * +to_panfrost_fence(struct dma_fence *fence) +{ + return (struct panfrost_fence *)fence; +} + +static const char *panfrost_fence_get_driver_name(struct dma_fence *fence) +{ + return "panfrost"; +} + +static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence) +{ + struct panfrost_fence *f = to_panfrost_fence(fence); + + switch (f->queue) { + case 0: + return "panfrost-js-0"; + case 1: + return "panfrost-js-1"; + case 2: + return "panfrost-js-2"; + default: + return NULL; + } +} + +static const struct dma_fence_ops panfrost_fence_ops = { + .get_driver_name = panfrost_fence_get_driver_name, + .get_timeline_name = panfrost_fence_get_timeline_name, +}; + +static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num) +{ + struct panfrost_fence *fence; + struct panfrost_job_slot *js = pfdev->js; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->dev = pfdev->ddev; + fence->queue = js_num; + fence->seqno = ++js->queue[js_num].emit_seqno; + dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock, + js->queue[js_num].fence_context, fence->seqno); + + return &fence->base; +} + +static int panfrost_job_get_slot(struct panfrost_job *job) +{ + /* JS0: fragment jobs. + * JS1: vertex/tiler jobs + * JS2: compute jobs + */ + if (job->requirements & PANFROST_JD_REQ_FS) + return 0; + +/* Not exposed to userspace yet */ +#if 0 + if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) { + if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) && + (job->pfdev->features.nr_core_groups == 2)) + return 2; + if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987)) + return 2; + } +#endif + return 1; +} + +static void panfrost_job_write_affinity(struct panfrost_device *pfdev, + u32 requirements, + int js) +{ + u64 affinity; + + /* + * Use all cores for now. + * Eventually we may need to support tiler only jobs and h/w with + * multiple (2) coherent core groups + */ + affinity = pfdev->features.shader_present; + + job_write(pfdev, JS_AFFINITY_NEXT_LO(js), affinity & 0xFFFFFFFF); + job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32); +} + +static void panfrost_job_hw_submit(struct panfrost_job *job, int js) +{ + struct panfrost_device *pfdev = job->pfdev; + unsigned long flags; + u32 cfg; + u64 jc_head = job->jc; + int ret; + + ret = pm_runtime_get_sync(pfdev->dev); + if (ret < 0) + return; + + if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) + goto end; + + panfrost_devfreq_record_transition(pfdev, js); + spin_lock_irqsave(&pfdev->hwaccess_lock, flags); + + job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF); + job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32); + + panfrost_job_write_affinity(pfdev, job->requirements, js); + + /* start MMU, medium priority, cache clean/flush on end, clean/flush on + * start */ + /* TODO: different address spaces */ + cfg = JS_CONFIG_THREAD_PRI(8) | + JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | + JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) + cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; + + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649)) + cfg |= JS_CONFIG_START_MMU; + + job_write(pfdev, JS_CONFIG_NEXT(js), cfg); + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) + job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); + + /* GO ! */ + dev_dbg(pfdev->dev, "JS: Submitting atom %p to js[%d] with head=0x%llx", + job, js, jc_head); + + job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); + + spin_unlock_irqrestore(&pfdev->hwaccess_lock, flags); + +end: + pm_runtime_mark_last_busy(pfdev->dev); + pm_runtime_put_autosuspend(pfdev->dev); +} + +static void panfrost_acquire_object_fences(struct drm_gem_object **bos, + int bo_count, + struct dma_fence **implicit_fences) +{ + int i; + + for (i = 0; i < bo_count; i++) + implicit_fences[i] = reservation_object_get_excl_rcu(bos[i]->resv); +} + +static void panfrost_attach_object_fences(struct drm_gem_object **bos, + int bo_count, + struct dma_fence *fence) +{ + int i; + + for (i = 0; i < bo_count; i++) + reservation_object_add_excl_fence(bos[i]->resv, fence); +} + +int panfrost_job_push(struct panfrost_job *job) +{ + struct panfrost_device *pfdev = job->pfdev; + int slot = panfrost_job_get_slot(job); + struct drm_sched_entity *entity = &job->file_priv->sched_entity[slot]; + struct ww_acquire_ctx acquire_ctx; + int ret = 0; + + mutex_lock(&pfdev->sched_lock); + + ret = drm_gem_lock_reservations(job->bos, job->bo_count, + &acquire_ctx); + if (ret) { + mutex_unlock(&pfdev->sched_lock); + return ret; + } + + ret = drm_sched_job_init(&job->base, entity, NULL); + if (ret) { + mutex_unlock(&pfdev->sched_lock); + goto unlock; + } + + job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); + + kref_get(&job->refcount); /* put by scheduler job completion */ + + panfrost_acquire_object_fences(job->bos, job->bo_count, + job->implicit_fences); + + drm_sched_entity_push_job(&job->base, entity); + + mutex_unlock(&pfdev->sched_lock); + + panfrost_attach_object_fences(job->bos, job->bo_count, + job->render_done_fence); + +unlock: + drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx); + + return ret; +} + +static void panfrost_job_cleanup(struct kref *ref) +{ + struct panfrost_job *job = container_of(ref, struct panfrost_job, + refcount); + unsigned int i; + + if (job->in_fences) { + for (i = 0; i < job->in_fence_count; i++) + dma_fence_put(job->in_fences[i]); + kvfree(job->in_fences); + } + if (job->implicit_fences) { + for (i = 0; i < job->bo_count; i++) + dma_fence_put(job->implicit_fences[i]); + kvfree(job->implicit_fences); + } + dma_fence_put(job->done_fence); + dma_fence_put(job->render_done_fence); + + if (job->bos) { + for (i = 0; i < job->bo_count; i++) + drm_gem_object_put_unlocked(job->bos[i]); + kvfree(job->bos); + } + + kfree(job); +} + +void panfrost_job_put(struct panfrost_job *job) +{ + kref_put(&job->refcount, panfrost_job_cleanup); +} + +static void panfrost_job_free(struct drm_sched_job *sched_job) +{ + struct panfrost_job *job = to_panfrost_job(sched_job); + + drm_sched_job_cleanup(sched_job); + + panfrost_job_put(job); +} + +static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) +{ + struct panfrost_job *job = to_panfrost_job(sched_job); + struct dma_fence *fence; + unsigned int i; + + /* Explicit fences */ + for (i = 0; i < job->in_fence_count; i++) { + if (job->in_fences[i]) { + fence = job->in_fences[i]; + job->in_fences[i] = NULL; + return fence; + } + } + + /* Implicit fences, max. one per BO */ + for (i = 0; i < job->bo_count; i++) { + if (job->implicit_fences[i]) { + fence = job->implicit_fences[i]; + job->implicit_fences[i] = NULL; + return fence; + } + } + + return NULL; +} + +static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) +{ + struct panfrost_job *job = to_panfrost_job(sched_job); + struct panfrost_device *pfdev = job->pfdev; + int slot = panfrost_job_get_slot(job); + struct dma_fence *fence = NULL; + + if (unlikely(job->base.s_fence->finished.error)) + return NULL; + + pfdev->jobs[slot] = job; + + fence = panfrost_fence_create(pfdev, slot); + if (IS_ERR(fence)) + return NULL; + + if (job->done_fence) + dma_fence_put(job->done_fence); + job->done_fence = dma_fence_get(fence); + + panfrost_job_hw_submit(job, slot); + + return fence; +} + +void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) +{ + int j; + u32 irq_mask = 0; + + for (j = 0; j < NUM_JOB_SLOTS; j++) { + irq_mask |= MK_JS_MASK(j); + } + + job_write(pfdev, JOB_INT_CLEAR, irq_mask); + job_write(pfdev, JOB_INT_MASK, irq_mask); +} + +static void panfrost_job_timedout(struct drm_sched_job *sched_job) +{ + struct panfrost_job *job = to_panfrost_job(sched_job); + struct panfrost_device *pfdev = job->pfdev; + int js = panfrost_job_get_slot(job); + int i; + + /* + * If the GPU managed to complete this jobs fence, the timeout is + * spurious. Bail out. + */ + if (dma_fence_is_signaled(job->done_fence)) + return; + + dev_err(pfdev->dev, "gpu sched timeout, js=%d, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", + js, + job_read(pfdev, JS_STATUS(js)), + job_read(pfdev, JS_HEAD_LO(js)), + job_read(pfdev, JS_TAIL_LO(js)), + sched_job); + + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_stop(&pfdev->js->queue[i].sched); + + if (sched_job) + drm_sched_increase_karma(sched_job); + + /* panfrost_core_dump(pfdev); */ + + panfrost_devfreq_record_transition(pfdev, js); + panfrost_gpu_soft_reset(pfdev); + + /* TODO: Re-enable all other address spaces */ + panfrost_mmu_enable(pfdev, 0); + panfrost_gpu_power_on(pfdev); + panfrost_job_enable_interrupts(pfdev); + + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched); + + /* restart scheduler after GPU is usable again */ + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_start(&pfdev->js->queue[i].sched, true); +} + +static const struct drm_sched_backend_ops panfrost_sched_ops = { + .dependency = panfrost_job_dependency, + .run_job = panfrost_job_run, + .timedout_job = panfrost_job_timedout, + .free_job = panfrost_job_free +}; + +static irqreturn_t panfrost_job_irq_handler(int irq, void *data) +{ + struct panfrost_device *pfdev = data; + u32 status = job_read(pfdev, JOB_INT_STAT); + int j; + + dev_dbg(pfdev->dev, "jobslot irq status=%x\n", status); + + if (!status) + return IRQ_NONE; + + pm_runtime_mark_last_busy(pfdev->dev); + + for (j = 0; status; j++) { + u32 mask = MK_JS_MASK(j); + + if (!(status & mask)) + continue; + + job_write(pfdev, JOB_INT_CLEAR, mask); + + if (status & JOB_INT_MASK_ERR(j)) { + job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); + + dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", + j, + panfrost_exception_name(pfdev, job_read(pfdev, JS_STATUS(j))), + job_read(pfdev, JS_HEAD_LO(j)), + job_read(pfdev, JS_TAIL_LO(j))); + + drm_sched_fault(&pfdev->js->queue[j].sched); + } + + if (status & JOB_INT_MASK_DONE(j)) { + panfrost_devfreq_record_transition(pfdev, j); + dma_fence_signal(pfdev->jobs[j]->done_fence); + } + + status &= ~mask; + } + + return IRQ_HANDLED; +} + +int panfrost_job_init(struct panfrost_device *pfdev) +{ + struct panfrost_job_slot *js; + int ret, j, irq; + + pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL); + if (!js) + return -ENOMEM; + + spin_lock_init(&js->job_lock); + + irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); + if (irq <= 0) + return -ENODEV; + + ret = devm_request_irq(pfdev->dev, irq, panfrost_job_irq_handler, + IRQF_SHARED, "job", pfdev); + if (ret) { + dev_err(pfdev->dev, "failed to request job irq"); + return ret; + } + + for (j = 0; j < NUM_JOB_SLOTS; j++) { + js->queue[j].fence_context = dma_fence_context_alloc(1); + + ret = drm_sched_init(&js->queue[j].sched, + &panfrost_sched_ops, + 1, 0, msecs_to_jiffies(500), + "pan_js"); + if (ret) { + dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); + goto err_sched; + } + } + + panfrost_job_enable_interrupts(pfdev); + + return 0; + +err_sched: + for (j--; j >= 0; j--) + drm_sched_fini(&js->queue[j].sched); + + return ret; +} + +void panfrost_job_fini(struct panfrost_device *pfdev) +{ + struct panfrost_job_slot *js = pfdev->js; + int j; + + job_write(pfdev, JOB_INT_MASK, 0); + + for (j = 0; j < NUM_JOB_SLOTS; j++) + drm_sched_fini(&js->queue[j].sched); + +} + +int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) +{ + struct panfrost_device *pfdev = panfrost_priv->pfdev; + struct panfrost_job_slot *js = pfdev->js; + struct drm_sched_rq *rq; + int ret, i; + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + rq = &js->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], &rq, 1, NULL); + if (WARN_ON(ret)) + return ret; + } + return 0; +} + +void panfrost_job_close(struct panfrost_file_priv *panfrost_priv) +{ + int i; + + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]); +} + +int panfrost_job_is_idle(struct panfrost_device *pfdev) +{ + struct panfrost_job_slot *js = pfdev->js; + int i; + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + /* If there are any jobs in the HW queue, we're not idle */ + if (atomic_read(&js->queue[i].sched.hw_rq_count)) + return false; + + /* Check whether the hardware is idle */ + if (pfdev->devfreq.slot[i].busy) + return false; + } + + return true; +} diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h new file mode 100644 index 000000000000..62454128a792 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_job.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 Collabora ltd. */ + +#ifndef __PANFROST_JOB_H__ +#define __PANFROST_JOB_H__ + +#include +#include + +struct panfrost_device; +struct panfrost_gem_object; +struct panfrost_file_priv; + +struct panfrost_job { + struct drm_sched_job base; + + struct kref refcount; + + struct panfrost_device *pfdev; + struct panfrost_file_priv *file_priv; + + /* Optional fences userspace can pass in for the job to depend on. */ + struct dma_fence **in_fences; + u32 in_fence_count; + + /* Fence to be signaled by IRQ handler when the job is complete. */ + struct dma_fence *done_fence; + + __u64 jc; + __u32 requirements; + __u32 flush_id; + + /* Exclusive fences we have taken from the BOs to wait for */ + struct dma_fence **implicit_fences; + struct drm_gem_object **bos; + u32 bo_count; + + /* Fence to be signaled by drm-sched once its done with the job */ + struct dma_fence *render_done_fence; +}; + +int panfrost_job_init(struct panfrost_device *pfdev); +void panfrost_job_fini(struct panfrost_device *pfdev); +int panfrost_job_open(struct panfrost_file_priv *panfrost_priv); +void panfrost_job_close(struct panfrost_file_priv *panfrost_priv); +int panfrost_job_push(struct panfrost_job *job); +void panfrost_job_put(struct panfrost_job *job); +void panfrost_job_enable_interrupts(struct panfrost_device *pfdev); +int panfrost_job_is_idle(struct panfrost_device *pfdev); + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c new file mode 100644 index 000000000000..502af37d4b62 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Linaro, Ltd, Rob Herring */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "panfrost_device.h" +#include "panfrost_mmu.h" +#include "panfrost_gem.h" +#include "panfrost_features.h" +#include "panfrost_regs.h" + +#define mmu_write(dev, reg, data) writel(data, dev->iomem + reg) +#define mmu_read(dev, reg) readl(dev->iomem + reg) + +struct panfrost_mmu { + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable_ops *pgtbl_ops; + struct mutex lock; +}; + +static int wait_ready(struct panfrost_device *pfdev, u32 as_nr) +{ + int ret; + u32 val; + + /* Wait for the MMU status to indicate there is no active command, in + * case one is pending. */ + ret = readl_relaxed_poll_timeout_atomic(pfdev->iomem + AS_STATUS(as_nr), + val, !(val & AS_STATUS_AS_ACTIVE), 10, 1000); + + if (ret) + dev_err(pfdev->dev, "AS_ACTIVE bit stuck\n"); + + return ret; +} + +static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd) +{ + int status; + + /* write AS_COMMAND when MMU is ready to accept another command */ + status = wait_ready(pfdev, as_nr); + if (!status) + mmu_write(pfdev, AS_COMMAND(as_nr), cmd); + + return status; +} + +static void lock_region(struct panfrost_device *pfdev, u32 as_nr, + u64 iova, size_t size) +{ + u8 region_width; + u64 region = iova & PAGE_MASK; + /* + * fls returns: + * 1 .. 32 + * + * 10 + fls(num_pages) + * results in the range (11 .. 42) + */ + + size = round_up(size, PAGE_SIZE); + + region_width = 10 + fls(size >> PAGE_SHIFT); + if ((size >> PAGE_SHIFT) != (1ul << (region_width - 11))) { + /* not pow2, so must go up to the next pow2 */ + region_width += 1; + } + region |= region_width; + + /* Lock the region that needs to be updated */ + mmu_write(pfdev, AS_LOCKADDR_LO(as_nr), region & 0xFFFFFFFFUL); + mmu_write(pfdev, AS_LOCKADDR_HI(as_nr), (region >> 32) & 0xFFFFFFFFUL); + write_cmd(pfdev, as_nr, AS_COMMAND_LOCK); +} + + +static int mmu_hw_do_operation(struct panfrost_device *pfdev, u32 as_nr, + u64 iova, size_t size, u32 op) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&pfdev->hwaccess_lock, flags); + + if (op != AS_COMMAND_UNLOCK) + lock_region(pfdev, as_nr, iova, size); + + /* Run the MMU operation */ + write_cmd(pfdev, as_nr, op); + + /* Wait for the flush to complete */ + ret = wait_ready(pfdev, as_nr); + + spin_unlock_irqrestore(&pfdev->hwaccess_lock, flags); + + return ret; +} + +void panfrost_mmu_enable(struct panfrost_device *pfdev, u32 as_nr) +{ + struct io_pgtable_cfg *cfg = &pfdev->mmu->pgtbl_cfg; + u64 transtab = cfg->arm_mali_lpae_cfg.transtab; + u64 memattr = cfg->arm_mali_lpae_cfg.memattr; + + mmu_write(pfdev, MMU_INT_CLEAR, ~0); + mmu_write(pfdev, MMU_INT_MASK, ~0); + + mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL); + mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32); + + /* Need to revisit mem attrs. + * NC is the default, Mali driver is inner WT. + */ + mmu_write(pfdev, AS_MEMATTR_LO(as_nr), memattr & 0xffffffffUL); + mmu_write(pfdev, AS_MEMATTR_HI(as_nr), memattr >> 32); + + write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); +} + +static void mmu_disable(struct panfrost_device *pfdev, u32 as_nr) +{ + mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0); + mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0); + + mmu_write(pfdev, AS_MEMATTR_LO(as_nr), 0); + mmu_write(pfdev, AS_MEMATTR_HI(as_nr), 0); + + write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); +} + +int panfrost_mmu_map(struct panfrost_gem_object *bo) +{ + struct drm_gem_object *obj = &bo->base.base; + struct panfrost_device *pfdev = to_panfrost_device(obj->dev); + struct io_pgtable_ops *ops = pfdev->mmu->pgtbl_ops; + u64 iova = bo->node.start << PAGE_SHIFT; + unsigned int count; + struct scatterlist *sgl; + struct sg_table *sgt; + int ret; + + sgt = drm_gem_shmem_get_pages_sgt(obj); + if (WARN_ON(IS_ERR(sgt))) + return PTR_ERR(sgt); + + ret = pm_runtime_get_sync(pfdev->dev); + if (ret < 0) + return ret; + + mutex_lock(&pfdev->mmu->lock); + + for_each_sg(sgt->sgl, sgl, sgt->nents, count) { + unsigned long paddr = sg_dma_address(sgl); + size_t len = sg_dma_len(sgl); + + dev_dbg(pfdev->dev, "map: iova=%llx, paddr=%lx, len=%zx", iova, paddr, len); + + while (len) { + ops->map(ops, iova, paddr, SZ_4K, IOMMU_WRITE | IOMMU_READ); + iova += SZ_4K; + paddr += SZ_4K; + len -= SZ_4K; + } + } + + mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT, + bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT); + + mutex_unlock(&pfdev->mmu->lock); + + pm_runtime_mark_last_busy(pfdev->dev); + pm_runtime_put_autosuspend(pfdev->dev); + + return 0; +} + +void panfrost_mmu_unmap(struct panfrost_gem_object *bo) +{ + struct drm_gem_object *obj = &bo->base.base; + struct panfrost_device *pfdev = to_panfrost_device(obj->dev); + struct io_pgtable_ops *ops = pfdev->mmu->pgtbl_ops; + u64 iova = bo->node.start << PAGE_SHIFT; + size_t len = bo->node.size << PAGE_SHIFT; + size_t unmapped_len = 0; + int ret; + + dev_dbg(pfdev->dev, "unmap: iova=%llx, len=%zx", iova, len); + + ret = pm_runtime_get_sync(pfdev->dev); + if (ret < 0) + return; + + mutex_lock(&pfdev->mmu->lock); + + while (unmapped_len < len) { + ops->unmap(ops, iova, SZ_4K); + iova += SZ_4K; + unmapped_len += SZ_4K; + } + + mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT, + bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT); + + mutex_unlock(&pfdev->mmu->lock); + + pm_runtime_mark_last_busy(pfdev->dev); + pm_runtime_put_autosuspend(pfdev->dev); +} + +static void mmu_tlb_inv_context_s1(void *cookie) +{ + struct panfrost_device *pfdev = cookie; + + mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM); +} + +static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) +{} + +static void mmu_tlb_sync_context(void *cookie) +{ + //struct panfrost_device *pfdev = cookie; + // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X +} + +static const struct iommu_gather_ops mmu_tlb_ops = { + .tlb_flush_all = mmu_tlb_inv_context_s1, + .tlb_add_flush = mmu_tlb_inv_range_nosync, + .tlb_sync = mmu_tlb_sync_context, +}; + +static const char *access_type_name(struct panfrost_device *pfdev, + u32 fault_status) +{ + switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { + case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_AARCH64_MMU)) + return "ATOMIC"; + else + return "UNKNOWN"; + case AS_FAULTSTATUS_ACCESS_TYPE_READ: + return "READ"; + case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: + return "WRITE"; + case AS_FAULTSTATUS_ACCESS_TYPE_EX: + return "EXECUTE"; + default: + WARN_ON(1); + return NULL; + } +} + +static irqreturn_t panfrost_mmu_irq_handler(int irq, void *data) +{ + struct panfrost_device *pfdev = data; + u32 status = mmu_read(pfdev, MMU_INT_STAT); + int i; + + if (!status) + return IRQ_NONE; + + dev_err(pfdev->dev, "mmu irq status=%x\n", status); + + for (i = 0; status; i++) { + u32 mask = BIT(i) | BIT(i + 16); + u64 addr; + u32 fault_status; + u32 exception_type; + u32 access_type; + u32 source_id; + + if (!(status & mask)) + continue; + + fault_status = mmu_read(pfdev, AS_FAULTSTATUS(i)); + addr = mmu_read(pfdev, AS_FAULTADDRESS_LO(i)); + addr |= (u64)mmu_read(pfdev, AS_FAULTADDRESS_HI(i)) << 32; + + /* decode the fault status */ + exception_type = fault_status & 0xFF; + access_type = (fault_status >> 8) & 0x3; + source_id = (fault_status >> 16); + + /* terminal fault, print info about the fault */ + dev_err(pfdev->dev, + "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "decoded fault status: %s\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n", + i, addr, + "TODO", + fault_status, + (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), + exception_type, panfrost_exception_name(pfdev, exception_type), + access_type, access_type_name(pfdev, fault_status), + source_id); + + mmu_write(pfdev, MMU_INT_CLEAR, mask); + + status &= ~mask; + } + + return IRQ_HANDLED; +}; + +int panfrost_mmu_init(struct panfrost_device *pfdev) +{ + struct io_pgtable_ops *pgtbl_ops; + int err, irq; + + pfdev->mmu = devm_kzalloc(pfdev->dev, sizeof(*pfdev->mmu), GFP_KERNEL); + if (!pfdev->mmu) + return -ENOMEM; + + mutex_init(&pfdev->mmu->lock); + + irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu"); + if (irq <= 0) + return -ENODEV; + + err = devm_request_irq(pfdev->dev, irq, panfrost_mmu_irq_handler, + IRQF_SHARED, "mmu", pfdev); + + if (err) { + dev_err(pfdev->dev, "failed to request mmu irq"); + return err; + } + mmu_write(pfdev, MMU_INT_CLEAR, ~0); + mmu_write(pfdev, MMU_INT_MASK, ~0); + + pfdev->mmu->pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = SZ_4K, // | SZ_2M | SZ_1G), + .ias = FIELD_GET(0xff, pfdev->features.mmu_features), + .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), + .tlb = &mmu_tlb_ops, + .iommu_dev = pfdev->dev, + }; + + pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &pfdev->mmu->pgtbl_cfg, + pfdev); + if (!pgtbl_ops) + return -ENOMEM; + + pfdev->mmu->pgtbl_ops = pgtbl_ops; + + panfrost_mmu_enable(pfdev, 0); + + return 0; +} + +void panfrost_mmu_fini(struct panfrost_device *pfdev) +{ + mmu_write(pfdev, MMU_INT_MASK, 0); + mmu_disable(pfdev, 0); + + free_io_pgtable_ops(pfdev->mmu->pgtbl_ops); +} diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.h b/drivers/gpu/drm/panfrost/panfrost_mmu.h new file mode 100644 index 000000000000..f5878d86a5ce --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 Linaro, Ltd, Rob Herring */ + +#ifndef __PANFROST_MMU_H__ +#define __PANFROST_MMU_H__ + +struct panfrost_gem_object; + +int panfrost_mmu_map(struct panfrost_gem_object *bo); +void panfrost_mmu_unmap(struct panfrost_gem_object *bo); + +int panfrost_mmu_init(struct panfrost_device *pfdev); +void panfrost_mmu_fini(struct panfrost_device *pfdev); + +void panfrost_mmu_enable(struct panfrost_device *pfdev, u32 as_nr); + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h new file mode 100644 index 000000000000..578c5fc2188b --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -0,0 +1,298 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2018 Marty E. Plummer */ +/* Copyright 2019 Linaro, Ltd, Rob Herring */ +/* + * Register definitions based on mali_midg_regmap.h + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + */ +#ifndef __PANFROST_REGS_H__ +#define __PANFROST_REGS_H__ + +#define GPU_ID 0x00 +#define GPU_L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define GPU_CORE_FEATURES 0x008 /* (RO) Shader Core Features */ +#define GPU_TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define GPU_MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define GROUPS_L2_COHERENT BIT(0) /* Cores groups are l2 coherent */ + +#define GPU_MMU_FEATURES 0x014 /* (RO) MMU features */ +#define GPU_AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define GPU_JS_PRESENT 0x01C /* (RO) Job slots present */ + +#define GPU_INT_RAWSTAT 0x20 +#define GPU_INT_CLEAR 0x24 +#define GPU_INT_MASK 0x28 +#define GPU_INT_STAT 0x2c +#define GPU_IRQ_FAULT BIT(0) +#define GPU_IRQ_MULTIPLE_FAULT BIT(7) +#define GPU_IRQ_RESET_COMPLETED BIT(8) +#define GPU_IRQ_POWER_CHANGED BIT(9) +#define GPU_IRQ_POWER_CHANGED_ALL BIT(10) +#define GPU_IRQ_PERFCNT_SAMPLE_COMPLETED BIT(16) +#define GPU_IRQ_CLEAN_CACHES_COMPLETED BIT(17) +#define GPU_IRQ_MASK_ALL \ + (GPU_IRQ_FAULT |\ + GPU_IRQ_MULTIPLE_FAULT |\ + GPU_IRQ_RESET_COMPLETED |\ + GPU_IRQ_POWER_CHANGED |\ + GPU_IRQ_POWER_CHANGED_ALL |\ + GPU_IRQ_PERFCNT_SAMPLE_COMPLETED |\ + GPU_IRQ_CLEAN_CACHES_COMPLETED) +#define GPU_IRQ_MASK_ERROR \ + ( \ + GPU_IRQ_FAULT |\ + GPU_IRQ_MULTIPLE_FAULT) +#define GPU_CMD 0x30 +#define GPU_CMD_SOFT_RESET 0x01 +#define GPU_STATUS 0x34 +#define GPU_LATEST_FLUSH_ID 0x38 +#define GPU_FAULT_STATUS 0x3C +#define GPU_FAULT_ADDRESS_LO 0x40 +#define GPU_FAULT_ADDRESS_HI 0x44 + +#define GPU_THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define GPU_THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define GPU_THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define GPU_THREAD_FEATURES 0x0AC /* (RO) Thread features */ +#define GPU_THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that + * TLS must be allocated for */ + +#define GPU_TEXTURE_FEATURES(n) (0x0B0 + ((n) * 4)) +#define GPU_JS_FEATURES(n) (0x0C0 + ((n) * 4)) + +#define GPU_SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define GPU_SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ +#define GPU_TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define GPU_TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + +#define GPU_L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define GPU_L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + +#define GPU_COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ACE_LITE BIT(0) +#define COHERENCY_ACE BIT(1) + +#define GPU_STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ +#define GPU_STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ +#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ + + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ +#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ + + +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + +#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ +#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ + + +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + +#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ +#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ + + +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + +#define GPU_JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ +#define GPU_SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ +#define GPU_TILER_CONFIG 0xF08 /* (RW) Tiler core configuration settings (Implementation specific register) */ +#define GPU_L2_MMU_CONFIG 0xF0C /* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */ + +/* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT 23 +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT 24 +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) + +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT 26 +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) + +#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT 12 +#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) + +#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT 15 +#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) + +/* SHADER_CONFIG register */ +#define SC_ALT_COUNTERS BIT(3) +#define SC_OVERRIDE_FWD_PIXEL_KILL BIT(4) +#define SC_SDC_DISABLE_OQ_DISCARD BIT(6) +#define SC_LS_ALLOW_ATTR_TYPES BIT(16) +#define SC_LS_PAUSEBUFFER_DISABLE BIT(16) +#define SC_TLS_HASH_ENABLE BIT(17) +#define SC_LS_ATTR_CHECK_DISABLE BIT(18) +#define SC_ENABLE_TEXGRD_FLAGS BIT(25) +/* End SHADER_CONFIG register */ + +/* TILER_CONFIG register */ +#define TC_CLOCK_GATE_OVERRIDE BIT(0) + +/* JM_CONFIG register */ +#define JM_TIMESTAMP_OVERRIDE BIT(0) +#define JM_CLOCK_GATE_OVERRIDE BIT(1) +#define JM_JOB_THROTTLE_ENABLE BIT(2) +#define JM_JOB_THROTTLE_LIMIT_SHIFT 3 +#define JM_MAX_JOB_THROTTLE_LIMIT 0x3F +#define JM_FORCE_COHERENCY_FEATURES_SHIFT 2 +#define JM_IDVS_GROUP_SIZE_SHIFT 16 +#define JM_MAX_IDVS_GROUP_SIZE 0x3F + + +/* Job Control regs */ +#define JOB_INT_RAWSTAT 0x1000 +#define JOB_INT_CLEAR 0x1004 +#define JOB_INT_MASK 0x1008 +#define JOB_INT_STAT 0x100c +#define JOB_INT_JS_STATE 0x1010 +#define JOB_INT_THROTTLE 0x1014 + +#define MK_JS_MASK(j) (0x10001 << (j)) +#define JOB_INT_MASK_ERR(j) BIT((j) + 16) +#define JOB_INT_MASK_DONE(j) BIT(j) + +#define JS_BASE 0x1800 +#define JS_HEAD_LO(n) (JS_BASE + ((n) * 0x80) + 0x00) +#define JS_HEAD_HI(n) (JS_BASE + ((n) * 0x80) + 0x04) +#define JS_TAIL_LO(n) (JS_BASE + ((n) * 0x80) + 0x08) +#define JS_TAIL_HI(n) (JS_BASE + ((n) * 0x80) + 0x0c) +#define JS_AFFINITY_LO(n) (JS_BASE + ((n) * 0x80) + 0x10) +#define JS_AFFINITY_HI(n) (JS_BASE + ((n) * 0x80) + 0x14) +#define JS_CONFIG(n) (JS_BASE + ((n) * 0x80) + 0x18) +#define JS_XAFFINITY(n) (JS_BASE + ((n) * 0x80) + 0x1c) +#define JS_COMMAND(n) (JS_BASE + ((n) * 0x80) + 0x20) +#define JS_STATUS(n) (JS_BASE + ((n) * 0x80) + 0x24) +#define JS_HEAD_NEXT_LO(n) (JS_BASE + ((n) * 0x80) + 0x40) +#define JS_HEAD_NEXT_HI(n) (JS_BASE + ((n) * 0x80) + 0x44) +#define JS_AFFINITY_NEXT_LO(n) (JS_BASE + ((n) * 0x80) + 0x50) +#define JS_AFFINITY_NEXT_HI(n) (JS_BASE + ((n) * 0x80) + 0x54) +#define JS_CONFIG_NEXT(n) (JS_BASE + ((n) * 0x80) + 0x58) +#define JS_COMMAND_NEXT(n) (JS_BASE + ((n) * 0x80) + 0x60) +#define JS_FLUSH_ID_NEXT(n) (JS_BASE + ((n) * 0x80) + 0x70) + +/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ +#define JS_CONFIG_START_FLUSH_CLEAN BIT(8) +#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) +#define JS_CONFIG_START_MMU BIT(10) +#define JS_CONFIG_JOB_CHAIN_FLAG BIT(11) +#define JS_CONFIG_END_FLUSH_CLEAN BIT(12) +#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_ENABLE_FLUSH_REDUCTION BIT(14) +#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK BIT(15) +#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) + +#define JS_COMMAND_NOP 0x00 +#define JS_COMMAND_START 0x01 +#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ +#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ +#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ +#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + +#define JS_STATUS_EVENT_ACTIVE 0x08 + + +/* MMU regs */ +#define MMU_INT_RAWSTAT 0x2000 +#define MMU_INT_CLEAR 0x2004 +#define MMU_INT_MASK 0x2008 +#define MMU_INT_STAT 0x200c + +/* AS_COMMAND register commands */ +#define AS_COMMAND_NOP 0x00 /* NOP Operation */ +#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ +#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ +#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs + (deprecated - only for use with T60x) */ +#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then + flush all L2 caches then issue a flush region command to all MMUs */ + +#define MMU_AS(as) (0x2400 + ((as) << 6)) + +#define AS_TRANSTAB_LO(as) (MMU_AS(as) + 0x00) /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI(as) (MMU_AS(as) + 0x04) /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO(as) (MMU_AS(as) + 0x08) /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI(as) (MMU_AS(as) + 0x0C) /* (RW) Memory attributes for address space n, high word. */ +#define AS_LOCKADDR_LO(as) (MMU_AS(as) + 0x10) /* (RW) Lock region address for address space n, low word */ +#define AS_LOCKADDR_HI(as) (MMU_AS(as) + 0x14) /* (RW) Lock region address for address space n, high word */ +#define AS_COMMAND(as) (MMU_AS(as) + 0x18) /* (WO) MMU command register for address space n */ +#define AS_FAULTSTATUS(as) (MMU_AS(as) + 0x1C) /* (RO) MMU fault status register for address space n */ +#define AS_FAULTADDRESS_LO(as) (MMU_AS(as) + 0x20) /* (RO) Fault Address for address space n, low word */ +#define AS_FAULTADDRESS_HI(as) (MMU_AS(as) + 0x24) /* (RO) Fault Address for address space n, high word */ +#define AS_STATUS(as) (MMU_AS(as) + 0x28) /* (RO) Status flags for address space n */ +/* Additional Bifrost AS regsiters */ +#define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30) /* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34) /* (RW) Translation table configuration for address space n, high word */ +#define AS_FAULTEXTRA_LO(as) (MMU_AS(as) + 0x38) /* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_HI(as) (MMU_AS(as) + 0x3C) /* (RO) Secondary fault address for address space n, high word */ + +/* + * Begin LPAE MMU TRANSTAB register values + */ +#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffffffffffff000 +#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY 0x2 +#define AS_TRANSTAB_LPAE_ADRMODE_TABLE 0x3 +#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x3 +#define AS_TRANSTAB_LPAE_READ_INNER BIT(2) +#define AS_TRANSTAB_LPAE_SHARE_OUTER BIT(4) + +#define AS_STATUS_AS_ACTIVE 0x01 + +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << 8) +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0 << 8) +#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1 << 8) +#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8) +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8) + +#endif diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h new file mode 100644 index 000000000000..a52e0283b90d --- /dev/null +++ b/include/uapi/drm/panfrost_drm.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2018 Broadcom + * Copyright © 2019 Collabora ltd. + */ +#ifndef _PANFROST_DRM_H_ +#define _PANFROST_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_PANFROST_SUBMIT 0x00 +#define DRM_PANFROST_WAIT_BO 0x01 +#define DRM_PANFROST_CREATE_BO 0x02 +#define DRM_PANFROST_MMAP_BO 0x03 +#define DRM_PANFROST_GET_PARAM 0x04 +#define DRM_PANFROST_GET_BO_OFFSET 0x05 + +#define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) +#define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) +#define DRM_IOCTL_PANFROST_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_BO, struct drm_panfrost_create_bo) +#define DRM_IOCTL_PANFROST_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MMAP_BO, struct drm_panfrost_mmap_bo) +#define DRM_IOCTL_PANFROST_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_PARAM, struct drm_panfrost_get_param) +#define DRM_IOCTL_PANFROST_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_BO_OFFSET, struct drm_panfrost_get_bo_offset) + +#define PANFROST_JD_REQ_FS (1 << 0) +/** + * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D + * engine. + * + * This asks the kernel to have the GPU execute a render command list. + */ +struct drm_panfrost_submit { + + /** Address to GPU mapping of job descriptor */ + __u64 jc; + + /** An optional array of sync objects to wait on before starting this job. */ + __u64 in_syncs; + + /** Number of sync objects to wait on before starting this job. */ + __u32 in_sync_count; + + /** An optional sync object to place the completion fence in. */ + __u32 out_sync; + + /** Pointer to a u32 array of the BOs that are referenced by the job. */ + __u64 bo_handles; + + /** Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + /** A combination of PANFROST_JD_REQ_* */ + __u32 requirements; +}; + +/** + * struct drm_panfrost_wait_bo - ioctl argument for waiting for + * completion of the last DRM_PANFROST_SUBMIT on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_panfrost_wait_bo { + __u32 handle; + __u32 pad; + __s64 timeout_ns; /* absolute */ +}; + +/** + * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_panfrost_create_bo { + __u32 size; + __u32 flags; + /** Returned GEM handle for the BO. */ + __u32 handle; + /* Pad, must be zero-filled. */ + __u32 pad; + /** + * Returned offset for the BO in the GPU address space. This offset + * is private to the DRM fd and is valid for the lifetime of the GEM + * handle. + * + * This offset value will always be nonzero, since various HW + * units treat 0 specially. + */ + __u64 offset; +}; + +/** + * struct drm_panfrost_mmap_bo - ioctl argument for mapping Panfrost BOs. + * + * This doesn't actually perform an mmap. Instead, it returns the + * offset you need to use in an mmap on the DRM device node. This + * means that tools like valgrind end up knowing about the mapped + * memory. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_panfrost_mmap_bo { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 flags; + /** offset into the drm node to use for subsequent mmap call. */ + __u64 offset; +}; + +enum drm_panfrost_param { + DRM_PANFROST_PARAM_GPU_PROD_ID, +}; + +struct drm_panfrost_get_param { + __u32 param; + __u32 pad; + __u64 value; +}; + +/** + * Returns the offset for the BO in the GPU address space for this DRM fd. + * This is the same value returned by drm_panfrost_create_bo, if that was called + * from this DRM fd. + */ +struct drm_panfrost_get_bo_offset { + __u32 handle; + __u32 pad; + __u64 offset; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _PANFROST_DRM_H_ */ From 4dff47c7607a7ceb9916fec179dc88e7b90f3b7c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 11 Apr 2019 16:53:13 -0500 Subject: [PATCH 17/30] drm/panfrost: Add support for 2MB page entries Add support for 2MB sized pages. This will improve our map and unmap times and save a bit of memory by avoiding 3rd level page tables for contiguous allocations. As we use shmem for buffers and huge page allocations for shmem are off by default, there isn't an improvement out of the box and userspace must enable THP for shmem. It's not clear if the h/w can support 1GB page sizes which standard ARM long format descriptors support. In any case, it is unlikely we'll see any contiguous 1GB allocations on current h/w. Cc: Tomeu Vizoso Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Rob Herring Acked-by: Alyssa Rosenzweig Reviewed-by: Steven Price Acked-by: Tomeu Vizoso Link: https://patchwork.freedesktop.org/patch/msgid/20190411215313.1937-1-robh@kernel.org --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 32 ++++++++++++++++++------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 502af37d4b62..762b1bd2a8c2 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -137,6 +137,14 @@ static void mmu_disable(struct panfrost_device *pfdev, u32 as_nr) write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); } +static size_t get_pgsize(u64 addr, size_t size) +{ + if (addr & (SZ_2M - 1) || size < SZ_2M) + return SZ_4K; + + return SZ_2M; +} + int panfrost_mmu_map(struct panfrost_gem_object *bo) { struct drm_gem_object *obj = &bo->base.base; @@ -165,10 +173,12 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo) dev_dbg(pfdev->dev, "map: iova=%llx, paddr=%lx, len=%zx", iova, paddr, len); while (len) { - ops->map(ops, iova, paddr, SZ_4K, IOMMU_WRITE | IOMMU_READ); - iova += SZ_4K; - paddr += SZ_4K; - len -= SZ_4K; + size_t pgsize = get_pgsize(iova | paddr, len); + + ops->map(ops, iova, paddr, pgsize, IOMMU_WRITE | IOMMU_READ); + iova += pgsize; + paddr += pgsize; + len -= pgsize; } } @@ -202,9 +212,15 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo) mutex_lock(&pfdev->mmu->lock); while (unmapped_len < len) { - ops->unmap(ops, iova, SZ_4K); - iova += SZ_4K; - unmapped_len += SZ_4K; + size_t unmapped_page; + size_t pgsize = get_pgsize(iova, len - unmapped_len); + + unmapped_page = ops->unmap(ops, iova, pgsize); + if (!unmapped_page) + break; + + iova += unmapped_page; + unmapped_len += unmapped_page; } mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT, @@ -342,7 +358,7 @@ int panfrost_mmu_init(struct panfrost_device *pfdev) mmu_write(pfdev, MMU_INT_MASK, ~0); pfdev->mmu->pgtbl_cfg = (struct io_pgtable_cfg) { - .pgsize_bitmap = SZ_4K, // | SZ_2M | SZ_1G), + .pgsize_bitmap = SZ_4K | SZ_2M, .ias = FIELD_GET(0xff, pfdev->features.mmu_features), .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), .tlb = &mmu_tlb_ops, From 5e498abf14858945f1249d9cc4ff1e8715a307e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 15 Apr 2019 14:46:34 +0200 Subject: [PATCH 18/30] dma-buf: explicitely note that dma-fence-chains use 64bit seqno MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of checking the upper values of the sequence number use an explicit field in the dma_fence_ops structure to note if a sequence should be 32bit or 64bit. Signed-off-by: Christian König Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/299655/ --- drivers/dma-buf/dma-fence-chain.c | 3 ++- drivers/dma-buf/sw_sync.c | 2 +- drivers/dma-buf/sync_file.c | 3 ++- include/linux/dma-fence.h | 21 +++++++++++++++------ 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index c729f98a7bd3..93c42078cb57 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -193,6 +193,7 @@ static void dma_fence_chain_release(struct dma_fence *fence) } const struct dma_fence_ops dma_fence_chain_ops = { + .use_64bit_seqno = true, .get_driver_name = dma_fence_chain_get_driver_name, .get_timeline_name = dma_fence_chain_get_timeline_name, .enable_signaling = dma_fence_chain_enable_signaling, @@ -225,7 +226,7 @@ void dma_fence_chain_init(struct dma_fence_chain *chain, init_irq_work(&chain->work, dma_fence_chain_irq_work); /* Try to reuse the context of the previous chain node. */ - if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) { + if (prev_chain && __dma_fence_is_later(seqno, prev->seqno, prev->ops)) { context = prev->context; chain->prev_seqno = prev->seqno; } else { diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 32dcf7b4c935..119b2ffbc2c9 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -161,7 +161,7 @@ static bool timeline_fence_signaled(struct dma_fence *fence) { struct sync_timeline *parent = dma_fence_parent(fence); - return !__dma_fence_is_later(fence->seqno, parent->value); + return !__dma_fence_is_later(fence->seqno, parent->value, fence->ops); } static bool timeline_fence_enable_signaling(struct dma_fence *fence) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 4f6305ca52c8..ed3fb6e5224c 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -258,7 +258,8 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, i_b++; } else { - if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno)) + if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno, + pt_a->ops)) add_fence(fences, &i, pt_a); else add_fence(fences, &i, pt_b); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 6b788467b2e3..974717d6ac0c 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -111,6 +111,14 @@ struct dma_fence_cb { * */ struct dma_fence_ops { + /** + * @use_64bit_seqno: + * + * True if this dma_fence implementation uses 64bit seqno, false + * otherwise. + */ + bool use_64bit_seqno; + /** * @get_driver_name: * @@ -410,18 +418,19 @@ dma_fence_is_signaled(struct dma_fence *fence) * __dma_fence_is_later - return if f1 is chronologically later than f2 * @f1: the first fence's seqno * @f2: the second fence's seqno from the same context + * @ops: dma_fence_ops associated with the seqno * * Returns true if f1 is chronologically later than f2. Both fences must be * from the same context, since a seqno is not common across contexts. */ -static inline bool __dma_fence_is_later(u64 f1, u64 f2) +static inline bool __dma_fence_is_later(u64 f1, u64 f2, + const struct dma_fence_ops *ops) { /* This is for backward compatibility with drivers which can only handle - * 32bit sequence numbers. Use a 64bit compare when any of the higher - * bits are none zero, otherwise use a 32bit compare with wrap around - * handling. + * 32bit sequence numbers. Use a 64bit compare when the driver says to + * do so. */ - if (upper_32_bits(f1) || upper_32_bits(f2)) + if (ops->use_64bit_seqno) return f1 > f2; return (int)(lower_32_bits(f1) - lower_32_bits(f2)) > 0; @@ -441,7 +450,7 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, if (WARN_ON(f1->context != f2->context)) return false; - return __dma_fence_is_later(f1->seqno, f2->seqno); + return __dma_fence_is_later(f1->seqno, f2->seqno, f1->ops); } /** From 76356a966e332fa0f70d10fd74a66d518af8ce0a Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 5 Apr 2019 18:41:17 +1030 Subject: [PATCH 19/30] drm: aspeed: Clean up Kconfig options The GFX IP is inside of the ASPEED BMC SoC so there is little use enabling it on a kernel that does not support ASPEED. When building with COMPILE_TEST the architecture many not have CMA support, so to avoid breaking the build we only select these options if the architecture supports the contiguous allocator. I suspect the DRM_PANEL came from a cut/paste error. Fixes: 4f2a8f5898ec ("drm: Add ASPEED GFX driver") Reported-by: Stephen Rothwell Reported-by: kernel test robot Signed-off-by: Joel Stanley Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190405081117.27339-1-joel@jms.id.au --- drivers/gpu/drm/aspeed/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/aspeed/Kconfig b/drivers/gpu/drm/aspeed/Kconfig index 42b74d18a41b..cccab520e02f 100644 --- a/drivers/gpu/drm/aspeed/Kconfig +++ b/drivers/gpu/drm/aspeed/Kconfig @@ -1,11 +1,11 @@ config DRM_ASPEED_GFX tristate "ASPEED BMC Display Controller" depends on DRM && OF + depends on (COMPILE_TEST || ARCH_ASPEED) select DRM_KMS_HELPER select DRM_KMS_CMA_HELPER - select DRM_PANEL - select DMA_CMA - select CMA + select DMA_CMA if HAVE_DMA_CONTIGUOUS + select CMA if HAVE_DMA_CONTIGUOUS select MFD_SYSCON help Chose this option if you have an ASPEED AST2500 SOC Display From c8f005684c98f4d9942baec13ad98054dbf312a0 Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Mon, 15 Apr 2019 10:28:05 -0700 Subject: [PATCH 20/30] drm: Expose "FB_DAMAGE_CLIPS" property to atomic aware user-space only Plane property "FB_DAMAGE_CLIPS" can only be used by atomic aware user-space, so no point exposing it otherwise. Cc: Signed-off-by: Deepak Rawat Reviewed-by: Daniel Vetter Fixes: d3b21767821e ("drm: Add a new plane property to send damage during plane update") Link: https://patchwork.freedesktop.org/patch/msgid/20190415172814.9840-1-drawat@vmware.com --- drivers/gpu/drm/drm_mode_config.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index 4a1c2023ccf0..1a346ae1599d 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -297,8 +297,9 @@ static int drm_mode_create_standard_properties(struct drm_device *dev) return -ENOMEM; dev->mode_config.prop_crtc_id = prop; - prop = drm_property_create(dev, DRM_MODE_PROP_BLOB, "FB_DAMAGE_CLIPS", - 0); + prop = drm_property_create(dev, + DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB, + "FB_DAMAGE_CLIPS", 0); if (!prop) return -ENOMEM; dev->mode_config.prop_fb_damage_clips = prop; From 5d5a179d3e90a8385b115e4bd19826ea0bcc6d11 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 1 Apr 2019 15:26:33 -0700 Subject: [PATCH 21/30] drm: Add helpers for setting up an array of dma_fence dependencies. I needed to add implicit dependency support for v3d, and Rob Herring has been working on it for panfrost, and I had recently looked at the lima implementation so I think this will be a good intersection of what we all want and simplify our scheduler implementations. v2: Rebase on xa_limit_32b API change, and tiny checkpatch cleanups on the way in (unsigned int vs unsigned, extra return before EXPORT_SYMBOL_GPL) Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20190401222635.25013-6-eric@anholt.net Reviewed-and-tested-by: Qiang Yu (v1) --- drivers/gpu/drm/drm_gem.c | 93 +++++++++++++++++++++++++++++++++++++++ include/drm/drm_gem.h | 5 +++ 2 files changed, 98 insertions(+) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index e93043b64c2d..fae4676707b6 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1367,3 +1367,96 @@ drm_gem_unlock_reservations(struct drm_gem_object **objs, int count, ww_acquire_fini(acquire_ctx); } EXPORT_SYMBOL(drm_gem_unlock_reservations); + +/** + * drm_gem_fence_array_add - Adds the fence to an array of fences to be + * waited on, deduplicating fences from the same context. + * + * @fence_array array of dma_fence * for the job to block on. + * @fence the dma_fence to add to the list of dependencies. + * + * Returns: + * 0 on success, or an error on failing to expand the array. + */ +int drm_gem_fence_array_add(struct xarray *fence_array, + struct dma_fence *fence) +{ + struct dma_fence *entry; + unsigned long index; + u32 id = 0; + int ret; + + if (!fence) + return 0; + + /* Deduplicate if we already depend on a fence from the same context. + * This lets the size of the array of deps scale with the number of + * engines involved, rather than the number of BOs. + */ + xa_for_each(fence_array, index, entry) { + if (entry->context != fence->context) + continue; + + if (dma_fence_is_later(fence, entry)) { + dma_fence_put(entry); + xa_store(fence_array, index, fence, GFP_KERNEL); + } else { + dma_fence_put(fence); + } + return 0; + } + + ret = xa_alloc(fence_array, &id, fence, xa_limit_32b, GFP_KERNEL); + if (ret != 0) + dma_fence_put(fence); + + return ret; +} +EXPORT_SYMBOL(drm_gem_fence_array_add); + +/** + * drm_gem_fence_array_add_implicit - Adds the implicit dependencies tracked + * in the GEM object's reservation object to an array of dma_fences for use in + * scheduling a rendering job. + * + * This should be called after drm_gem_lock_reservations() on your array of + * GEM objects used in the job but before updating the reservations with your + * own fences. + * + * @fence_array array of dma_fence * for the job to block on. + * @obj the gem object to add new dependencies from. + * @write whether the job might write the object (so we need to depend on + * shared fences in the reservation object). + */ +int drm_gem_fence_array_add_implicit(struct xarray *fence_array, + struct drm_gem_object *obj, + bool write) +{ + int ret; + struct dma_fence **fences; + unsigned int i, fence_count; + + if (!write) { + struct dma_fence *fence = + reservation_object_get_excl_rcu(obj->resv); + + return drm_gem_fence_array_add(fence_array, fence); + } + + ret = reservation_object_get_fences_rcu(obj->resv, NULL, + &fence_count, &fences); + if (ret || !fence_count) + return ret; + + for (i = 0; i < fence_count; i++) { + ret = drm_gem_fence_array_add(fence_array, fences[i]); + if (ret) + break; + } + + for (; i < fence_count; i++) + dma_fence_put(fences[i]); + kfree(fences); + return ret; +} +EXPORT_SYMBOL(drm_gem_fence_array_add_implicit); diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 5ee85c9eaa9d..5047c7ee25f5 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -390,6 +390,11 @@ int drm_gem_lock_reservations(struct drm_gem_object **objs, int count, struct ww_acquire_ctx *acquire_ctx); void drm_gem_unlock_reservations(struct drm_gem_object **objs, int count, struct ww_acquire_ctx *acquire_ctx); +int drm_gem_fence_array_add(struct xarray *fence_array, + struct dma_fence *fence); +int drm_gem_fence_array_add_implicit(struct xarray *fence_array, + struct drm_gem_object *obj, + bool write); int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, u32 handle, u64 *offset); int drm_gem_dumb_destroy(struct drm_file *file, From f3fb20074e02865e1724c586eae46a77a7f2266b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 1 Apr 2019 15:26:35 -0700 Subject: [PATCH 22/30] drm/lima: Use the drm_gem_fence_array_add helpers for our deps. It's a pretty direct port of what I did for v3d. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20190401222635.25013-8-eric@anholt.net Reviewed-and-tested-by: Qiang Yu --- drivers/gpu/drm/lima/lima_gem.c | 37 +---------------- drivers/gpu/drm/lima/lima_sched.c | 66 ++++++------------------------- drivers/gpu/drm/lima/lima_sched.h | 6 +-- 3 files changed, 16 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index 1d69498bc17e..477c0f766663 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -145,40 +145,7 @@ static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo, if (explicit) return 0; - /* implicit sync use bo fence in resv obj */ - if (write) { - unsigned nr_fences; - struct dma_fence **fences; - int i; - - err = reservation_object_get_fences_rcu( - bo->gem.resv, NULL, &nr_fences, &fences); - if (err || !nr_fences) - return err; - - for (i = 0; i < nr_fences; i++) { - err = lima_sched_task_add_dep(task, fences[i]); - if (err) - break; - } - - /* for error case free remaining fences */ - for ( ; i < nr_fences; i++) - dma_fence_put(fences[i]); - - kfree(fences); - } else { - struct dma_fence *fence; - - fence = reservation_object_get_excl_rcu(bo->gem.resv); - if (fence) { - err = lima_sched_task_add_dep(task, fence); - if (err) - dma_fence_put(fence); - } - } - - return err; + return drm_gem_fence_array_add_implicit(&task->deps, &bo->gem, write); } static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos, @@ -251,7 +218,7 @@ static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit) if (err) return err; - err = lima_sched_task_add_dep(submit->task, fence); + err = drm_gem_fence_array_add(&submit->task->deps, fence); if (err) { dma_fence_put(fence); return err; diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 97bd9c1deb87..e253d031fb3d 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -3,6 +3,7 @@ #include #include +#include #include "lima_drv.h" #include "lima_sched.h" @@ -126,19 +127,24 @@ int lima_sched_task_init(struct lima_sched_task *task, task->num_bos = num_bos; task->vm = lima_vm_get(vm); + + xa_init_flags(&task->deps, XA_FLAGS_ALLOC); + return 0; } void lima_sched_task_fini(struct lima_sched_task *task) { + struct dma_fence *fence; + unsigned long index; int i; drm_sched_job_cleanup(&task->base); - for (i = 0; i < task->num_dep; i++) - dma_fence_put(task->dep[i]); - - kfree(task->dep); + xa_for_each(&task->deps, index, fence) { + dma_fence_put(fence); + } + xa_destroy(&task->deps); if (task->bos) { for (i = 0; i < task->num_bos; i++) @@ -149,42 +155,6 @@ void lima_sched_task_fini(struct lima_sched_task *task) lima_vm_put(task->vm); } -int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence) -{ - int i, new_dep = 4; - - /* same context's fence is definitly earlier then this task */ - if (fence->context == task->base.s_fence->finished.context) { - dma_fence_put(fence); - return 0; - } - - if (task->dep && task->num_dep == task->max_dep) - new_dep = task->max_dep * 2; - - if (task->max_dep < new_dep) { - void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL); - - if (!dep) - return -ENOMEM; - - task->max_dep = new_dep; - task->dep = dep; - } - - for (i = 0; i < task->num_dep; i++) { - if (task->dep[i]->context == fence->context && - dma_fence_is_later(fence, task->dep[i])) { - dma_fence_put(task->dep[i]); - task->dep[i] = fence; - return 0; - } - } - - task->dep[task->num_dep++] = fence; - return 0; -} - int lima_sched_context_init(struct lima_sched_pipe *pipe, struct lima_sched_context *context, atomic_t *guilty) @@ -213,21 +183,9 @@ static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job, struct drm_sched_entity *entity) { struct lima_sched_task *task = to_lima_task(job); - int i; - for (i = 0; i < task->num_dep; i++) { - struct dma_fence *fence = task->dep[i]; - - if (!task->dep[i]) - continue; - - task->dep[i] = NULL; - - if (!dma_fence_is_signaled(fence)) - return fence; - - dma_fence_put(fence); - } + if (!xa_empty(&task->deps)) + return xa_erase(&task->deps, task->last_dep++); return NULL; } diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h index b017cfa7e327..928af91c1118 100644 --- a/drivers/gpu/drm/lima/lima_sched.h +++ b/drivers/gpu/drm/lima/lima_sched.h @@ -14,9 +14,8 @@ struct lima_sched_task { struct lima_vm *vm; void *frame; - struct dma_fence **dep; - int num_dep; - int max_dep; + struct xarray deps; + unsigned long last_dep; struct lima_bo **bos; int num_bos; @@ -78,7 +77,6 @@ int lima_sched_task_init(struct lima_sched_task *task, struct lima_bo **bos, int num_bos, struct lima_vm *vm); void lima_sched_task_fini(struct lima_sched_task *task); -int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence); int lima_sched_context_init(struct lima_sched_pipe *pipe, struct lima_sched_context *context, From b7147e9a15718f10148435983838bf30c9943ef4 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 16 Apr 2019 11:05:33 +0200 Subject: [PATCH 23/30] drm: add drm_format_helper.c to kerneldoc Also drop the dstclip parameter sphinx has warned about (leftover from an earlier patch version). Signed-off-by: Gerd Hoffmann Acked-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20190416090533.28374-1-kraxel@redhat.com --- Documentation/gpu/drm-kms-helpers.rst | 6 ++++++ drivers/gpu/drm/drm_format_helper.c | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index 58b375e47615..14102ae035dc 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -107,6 +107,12 @@ fbdev Helper Functions Reference .. kernel-doc:: drivers/gpu/drm/drm_fb_helper.c :export: +format Helper Functions Reference +================================= + +.. kernel-doc:: drivers/gpu/drm/drm_format_helper.c + :export: + Framebuffer CMA Helper Functions Reference ========================================== diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index da388012df2a..a18da35145b7 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -235,7 +235,6 @@ static void drm_fb_xrgb8888_to_rgb888_line(u8 *dbuf, u32 *sbuf, * @vaddr: XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy - * @dstclip: Clip destination too. * * Drivers can use this function for RGB888 devices that don't natively * support XRGB8888. From d48ae1f0532cef3f57793ea4df551f695e63f788 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 16 Apr 2019 22:43:53 +0800 Subject: [PATCH 24/30] drm/lima: Make lima_sched_ops static Fix sparse warning: drivers/gpu/drm/lima/lima_sched.c:356:36: warning: symbol 'lima_sched_ops' was not declared. Should it be static? Fixes: a1d2a6339961 ("drm/lima: driver for ARM Mali4xx GPUs") Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20190416144353.34024-1-yuehaibing@huawei.com --- drivers/gpu/drm/lima/lima_sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index e253d031fb3d..d53bd45f8d96 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -311,7 +311,7 @@ static void lima_sched_free_job(struct drm_sched_job *job) kmem_cache_free(pipe->task_slab, task); } -const struct drm_sched_backend_ops lima_sched_ops = { +static const struct drm_sched_backend_ops lima_sched_ops = { .dependency = lima_sched_dependency, .run_job = lima_sched_run_job, .timedout_job = lima_sched_timedout_job, From 1f2f0599579cac371cf9db05a1b00e066db401fb Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 16 Apr 2019 22:58:55 +0800 Subject: [PATCH 25/30] drm/sun4i: Make some symbols static Fix sparse warnings: drivers/gpu/drm/sun4i/sun8i_tcon_top.c:271:36: warning: symbol 'sun8i_r40_tcon_top_quirks' was not declared. Should it be static? drivers/gpu/drm/sun4i/sun8i_tcon_top.c:276:36: warning: symbol 'sun50i_h6_tcon_top_quirks' was not declared. Should it be static? drivers/gpu/drm/sun4i/sun4i_tcon.c:239:6: warning: symbol 'sun4i_tcon_set_mux' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190416145855.20852-1-yuehaibing@huawei.com --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 4 ++-- drivers/gpu/drm/sun4i/sun8i_tcon_top.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index fa92e992a282..9d8d8124b1f6 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -236,8 +236,8 @@ static struct sun4i_tcon *sun4i_get_tcon0(struct drm_device *drm) return NULL; } -void sun4i_tcon_set_mux(struct sun4i_tcon *tcon, int channel, - const struct drm_encoder *encoder) +static void sun4i_tcon_set_mux(struct sun4i_tcon *tcon, int channel, + const struct drm_encoder *encoder) { int ret = -ENOTSUPP; diff --git a/drivers/gpu/drm/sun4i/sun8i_tcon_top.c b/drivers/gpu/drm/sun4i/sun8i_tcon_top.c index fc36e0c10a37..241904357d9e 100644 --- a/drivers/gpu/drm/sun4i/sun8i_tcon_top.c +++ b/drivers/gpu/drm/sun4i/sun8i_tcon_top.c @@ -268,12 +268,12 @@ static int sun8i_tcon_top_remove(struct platform_device *pdev) return 0; } -const struct sun8i_tcon_top_quirks sun8i_r40_tcon_top_quirks = { +static const struct sun8i_tcon_top_quirks sun8i_r40_tcon_top_quirks = { .has_tcon_tv1 = true, .has_dsi = true, }; -const struct sun8i_tcon_top_quirks sun50i_h6_tcon_top_quirks = { +static const struct sun8i_tcon_top_quirks sun50i_h6_tcon_top_quirks = { /* Nothing special */ }; From 2ccb8433ebe8dc99ed7cd0e3b8ff6976dcc05e3d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 13 Apr 2019 22:14:55 +0800 Subject: [PATCH 26/30] drm/meson: Make some functions static Fix sparse warnings: drivers/gpu/drm/meson/meson_viu.c:93:6: warning: symbol 'meson_viu_set_g12a_osd1_matrix' was not declared. Should it be static? drivers/gpu/drm/meson/meson_viu.c:121:6: warning: symbol 'meson_viu_set_osd_matrix' was not declared. Should it be static? drivers/gpu/drm/meson/meson_viu.c:190:6: warning: symbol 'meson_viu_set_osd_lut' was not declared. Should it be static? Signed-off-by: YueHaibing Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20190413141455.34020-1-yuehaibing@huawei.com --- drivers/gpu/drm/meson/meson_viu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c index 0169c98b01c9..b59072342cae 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -90,8 +90,8 @@ static int eotf_bypass_coeff[EOTF_COEFF_SIZE] = { EOTF_COEFF_RIGHTSHIFT /* right shift */ }; -void meson_viu_set_g12a_osd1_matrix(struct meson_drm *priv, int *m, - bool csc_on) +static void meson_viu_set_g12a_osd1_matrix(struct meson_drm *priv, + int *m, bool csc_on) { /* VPP WRAP OSD1 matrix */ writel(((m[0] & 0xfff) << 16) | (m[1] & 0xfff), @@ -118,8 +118,8 @@ void meson_viu_set_g12a_osd1_matrix(struct meson_drm *priv, int *m, priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_EN_CTRL)); } -void meson_viu_set_osd_matrix(struct meson_drm *priv, - enum viu_matrix_sel_e m_select, +static void meson_viu_set_osd_matrix(struct meson_drm *priv, + enum viu_matrix_sel_e m_select, int *m, bool csc_on) { if (m_select == VIU_MATRIX_OSD) { @@ -187,10 +187,10 @@ void meson_viu_set_osd_matrix(struct meson_drm *priv, #define OSD_EOTF_LUT_SIZE 33 #define OSD_OETF_LUT_SIZE 41 -void meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel, - unsigned int *r_map, unsigned int *g_map, - unsigned int *b_map, - bool csc_on) +static void +meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel, + unsigned int *r_map, unsigned int *g_map, + unsigned int *b_map, bool csc_on) { unsigned int addr_port; unsigned int data_port; From 852ce7285c99e3f7b56e76511e1b33c645a2b648 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 8 Apr 2019 11:01:37 +0200 Subject: [PATCH 27/30] drm/meson: add size and alignment requirements for dumb buffers The Amlogic SoCs Canvas buffers stride must be aligned on 64bytes and overall size should be aligned on PAGE width. Adds a custom dumb_create op to adds these requirements. Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Suggested-by: Sky Zhou Signed-off-by: Neil Armstrong Reviewed-by: Sky Zhou Link: https://patchwork.freedesktop.org/patch/msgid/20190408090137.2402-1-narmstrong@baylibre.com --- drivers/gpu/drm/meson/meson_drv.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 937cfabb95df..568dff5c72bb 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -90,6 +90,18 @@ static irqreturn_t meson_irq(int irq, void *arg) return IRQ_HANDLED; } +static int meson_dumb_create(struct drm_file *file, struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + /* + * We need 64bytes aligned stride, and PAGE aligned size + */ + args->pitch = ALIGN(DIV_ROUND_UP(args->width * args->bpp, 8), SZ_64); + args->size = PAGE_ALIGN(args->pitch * args->height); + + return drm_gem_cma_dumb_create_internal(file, dev, args); +} + DEFINE_DRM_GEM_CMA_FOPS(fops); static struct drm_driver meson_driver = { @@ -112,7 +124,7 @@ static struct drm_driver meson_driver = { .gem_prime_mmap = drm_gem_cma_prime_mmap, /* GEM Ops */ - .dumb_create = drm_gem_cma_dumb_create, + .dumb_create = meson_dumb_create, .gem_free_object_unlocked = drm_gem_cma_free_object, .gem_vm_ops = &drm_gem_cma_vm_ops, From bd84995f062563d731bd00f5d017b71d2edd573c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 16 Apr 2019 16:28:44 +0200 Subject: [PATCH 28/30] drm/mcde: Add device tree bindings This adds the device tree bindings for the ST-Ericsson Multi Channel Display Engine MCDE as found in the U8500 SoCs. Cc: devicetree@vger.kernel.org Reviewed-by: Rob Herring Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20190416142844.12038-1-linus.walleij@linaro.org --- .../devicetree/bindings/display/ste,mcde.txt | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/ste,mcde.txt diff --git a/Documentation/devicetree/bindings/display/ste,mcde.txt b/Documentation/devicetree/bindings/display/ste,mcde.txt new file mode 100644 index 000000000000..4c33c692bd5f --- /dev/null +++ b/Documentation/devicetree/bindings/display/ste,mcde.txt @@ -0,0 +1,104 @@ +ST-Ericsson Multi Channel Display Engine MCDE + +The ST-Ericsson MCDE is a display controller with support for compositing +and displaying several channels memory resident graphics data on DSI or +LCD displays or bridges. It is used in the ST-Ericsson U8500 platform. + +Required properties: + +- compatible: must be: + "ste,mcde" +- reg: register base for the main MCDE control registers, should be + 0x1000 in size +- interrupts: the interrupt line for the MCDE +- epod-supply: a phandle to the EPOD regulator +- vana-supply: a phandle to the analog voltage regulator +- clocks: an array of the MCDE clocks in this strict order: + MCDECLK (main MCDE clock), LCDCLK (LCD clock), PLLDSI + (HDMI clock), DSI0ESCLK (DSI0 energy save clock), + DSI1ESCLK (DSI1 energy save clock), DSI2ESCLK (DSI2 energy + save clock) +- clock-names: must be the following array: + "mcde", "lcd", "hdmi" + to match the required clock inputs above. +- #address-cells: should be <1> (for the DSI hosts that will be children) +- #size-cells: should be <1> (for the DSI hosts that will be children) +- ranges: this should always be stated + +Required subnodes: + +The devicetree must specify subnodes for the DSI host adapters. +These must have the following characteristics: + +- compatible: must be: + "ste,mcde-dsi" +- reg: must specify the register range for the DSI host +- vana-supply: phandle to the VANA voltage regulator +- clocks: phandles to the high speed and low power (energy save) clocks + the high speed clock is not present on the third (dsi2) block, so it + should only have the "lp" clock +- clock-names: "hs" for the high speed clock and "lp" for the low power + (energy save) clock +- #address-cells: should be <1> +- #size-cells: should be <0> + +Display panels and bridges will appear as children on the DSI hosts, and +the displays are connected to the DSI hosts using the common binding +for video transmitter interfaces; see +Documentation/devicetree/bindings/media/video-interfaces.txt + +If a DSI host is unused (not connected) it will have no children defined. + +Example: + +mcde@a0350000 { + compatible = "ste,mcde"; + reg = <0xa0350000 0x1000>; + interrupts = ; + epod-supply = <&db8500_b2r2_mcde_reg>; + vana-supply = <&ab8500_ldo_ana_reg>; + clocks = <&prcmu_clk PRCMU_MCDECLK>, /* Main MCDE clock */ + <&prcmu_clk PRCMU_LCDCLK>, /* LCD clock */ + <&prcmu_clk PRCMU_PLLDSI>; /* HDMI clock */ + clock-names = "mcde", "lcd", "hdmi"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + dsi0: dsi@a0351000 { + compatible = "ste,mcde-dsi"; + reg = <0xa0351000 0x1000>; + vana-supply = <&ab8500_ldo_ana_reg>; + clocks = <&prcmu_clk PRCMU_DSI0CLK>, <&prcmu_clk PRCMU_DSI0ESCCLK>; + clock-names = "hs", "lp"; + #address-cells = <1>; + #size-cells = <0>; + + panel { + compatible = "samsung,s6d16d0"; + reg = <0>; + vdd1-supply = <&ab8500_ldo_aux1_reg>; + reset-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>; + }; + + }; + dsi1: dsi@a0352000 { + compatible = "ste,mcde-dsi"; + reg = <0xa0352000 0x1000>; + vana-supply = <&ab8500_ldo_ana_reg>; + clocks = <&prcmu_clk PRCMU_DSI1CLK>, <&prcmu_clk PRCMU_DSI1ESCCLK>; + clock-names = "hs", "lp"; + #address-cells = <1>; + #size-cells = <0>; + }; + dsi2: dsi@a0353000 { + compatible = "ste,mcde-dsi"; + reg = <0xa0353000 0x1000>; + vana-supply = <&ab8500_ldo_ana_reg>; + /* This DSI port only has the Low Power / Energy Save clock */ + clocks = <&prcmu_clk PRCMU_DSI2ESCCLK>; + clock-names = "lp"; + #address-cells = <1>; + #size-cells = <0>; + }; +}; From 9e0b597534b4c065e2c083c7478d6f3175088fdd Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Sat, 24 Nov 2018 15:06:27 -0500 Subject: [PATCH 29/30] dt-bindings: drm/panel: simple: add lg,acx467akm-7 panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add binding for the LG ACX467AKM-7 4.95" 1080×1920 LCD panel that is found on the LG Nexus 5 (hammerhead) phone. This appears to be a JDI panel based on some Internet searches, however a specific model number could not be found. I disassembled an old Nexus 5 with a broken screen and the LG part number is the only model number present on the back of the panel, so I think that is probably the best ID to use. Signed-off-by: Brian Masney Reviewed-by: Linus Walleij Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20181124200628.24393-1-masneyb@onstation.org --- .../devicetree/bindings/display/panel/lg,acx467akm-7.txt | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/lg,acx467akm-7.txt diff --git a/Documentation/devicetree/bindings/display/panel/lg,acx467akm-7.txt b/Documentation/devicetree/bindings/display/panel/lg,acx467akm-7.txt new file mode 100644 index 000000000000..fc1e1b325e49 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/lg,acx467akm-7.txt @@ -0,0 +1,7 @@ +LG ACX467AKM-7 4.95" 1080×1920 LCD Panel + +Required properties: +- compatible: must be "lg,acx467akm-7" + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. From debcd8f954be2b1f643e76b2400bc7c3d12b4594 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Sat, 24 Nov 2018 15:06:28 -0500 Subject: [PATCH 30/30] drm/panel: simple: add lg,acx467akm-7 panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ACX467AKM-7 4.95" 1080×1920 LCD panel that is found on the LG Nexus 5 (hammerhead) phone. Signed-off-by: Jonathan Marek [masneyb@onstation.org: checkpatch fixes; rename jdi,1080p-hammerhead binding to lg,acx467akm-7.] Signed-off-by: Brian Masney Reviewed-by: Linus Walleij Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20181124200628.24393-2-masneyb@onstation.org --- drivers/gpu/drm/panel/panel-simple.c | 31 ++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 8fee7a8b29d9..569be4efd8d1 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3025,6 +3025,34 @@ static const struct panel_desc_dsi panasonic_vvx10f004b00 = { .lanes = 4, }; +static const struct drm_display_mode lg_acx467akm_7_mode = { + .clock = 150000, + .hdisplay = 1080, + .hsync_start = 1080 + 2, + .hsync_end = 1080 + 2 + 2, + .htotal = 1080 + 2 + 2 + 2, + .vdisplay = 1920, + .vsync_start = 1920 + 2, + .vsync_end = 1920 + 2 + 2, + .vtotal = 1920 + 2 + 2 + 2, + .vrefresh = 60, +}; + +static const struct panel_desc_dsi lg_acx467akm_7 = { + .desc = { + .modes = &lg_acx467akm_7_mode, + .num_modes = 1, + .bpc = 8, + .size = { + .width = 62, + .height = 110, + }, + }, + .flags = 0, + .format = MIPI_DSI_FMT_RGB888, + .lanes = 4, +}; + static const struct of_device_id dsi_of_match[] = { { .compatible = "auo,b080uan01", @@ -3041,6 +3069,9 @@ static const struct of_device_id dsi_of_match[] = { }, { .compatible = "panasonic,vvx10f004b00", .data = &panasonic_vvx10f004b00 + }, { + .compatible = "lg,acx467akm-7", + .data = &lg_acx467akm_7 }, { /* sentinel */ }