From 1d204ee10817aed6666e020ef1b4c643d68bef11 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 18 Mar 2022 13:43:32 -0400 Subject: [PATCH 01/10] drm/ttm: Fix a kernel oops due to an invalid read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The res is initialized here only if there's no errors so passing it to ttm_resource_fini in the error paths results in a kernel oops. In the error paths, instead of the unitialized res, we have to use to use node->base on which ttm_resource_init was called. Sample affected backtrace: Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d8 Mem abort info: ESR = 0x96000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=0000000106ac0000 [00000000000000d8] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 96000004 [#1] SMP Modules linked in: bnep vsock_loopback vmw_vsock_virtio_transport_common vsock snd_hda_codec_generic snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep > CPU: 0 PID: 1197 Comm: gnome-shell Tainted: G U 5.17.0-rc2-vmwgfx #2 Hardware name: VMware, Inc. VBSA/VBSA, BIOS VEFI 12/31/2020 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : ttm_resource_fini+0x5c/0xac [ttm] lr : ttm_range_man_alloc+0x128/0x1e0 [ttm] sp : ffff80000d783510 x29: ffff80000d783510 x28: 0000000000000000 x27: ffff000086514400 x26: 0000000000000300 x25: ffff0000809f9e78 x24: 0000000000000000 x23: ffff80000d783680 x22: ffff000086514400 x21: 00000000ffffffe4 x20: ffff80000d7836a0 x19: ffff0000809f9e00 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000800 x12: ffff0000f2600a00 x11: 000000000000fc96 x10: 0000000000000000 x9 : ffff800001295c18 x8 : 0000000000000000 x7 : 0000000000000300 x6 : 0000000000000000 x5 : 0000000000000000 x4 : ffff0000f1034e20 x3 : ffff0000f1034600 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000600000 Call trace: ttm_resource_fini+0x5c/0xac [ttm] ttm_range_man_alloc+0x128/0x1e0 [ttm] ttm_resource_alloc+0x58/0x90 [ttm] ttm_bo_mem_space+0xc8/0x3e4 [ttm] ttm_bo_validate+0xb4/0x134 [ttm] vmw_bo_pin_in_start_of_vram+0xbc/0x200 [vmwgfx] vmw_framebuffer_pin+0xc0/0x154 [vmwgfx] vmw_ldu_primary_plane_atomic_update+0x8c/0x6e0 [vmwgfx] drm_atomic_helper_commit_planes+0x11c/0x2e0 drm_atomic_helper_commit_tail+0x60/0xb0 commit_tail+0x1b0/0x210 drm_atomic_helper_commit+0x168/0x400 drm_atomic_commit+0x64/0x74 drm_atomic_helper_set_config+0xdc/0x11c drm_mode_setcrtc+0x1c4/0x780 drm_ioctl_kernel+0xd0/0x1a0 drm_ioctl+0x2c4/0x690 vmw_generic_ioctl+0xe0/0x174 [vmwgfx] vmw_unlocked_ioctl+0x24/0x30 [vmwgfx] __arm64_sys_ioctl+0xb4/0x100 invoke_syscall+0x78/0x100 el0_svc_common.constprop.0+0x54/0x184 do_el0_svc+0x34/0x9c el0_svc+0x48/0x1b0 el0t_64_sync_handler+0xa4/0x130 el0t_64_sync+0x1a4/0x1a8 Code: 35000260 f9401a81 52800002 f9403a60 (f9406c23) ---[ end trace 0000000000000000 ]--- Signed-off-by: Zack Rusin Fixes: de3688e469b0 ("drm/ttm: add ttm_resource_fini v2") Cc: Christian König Cc: Daniel Vetter Reviewed-by: Martin Krastev Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220318174332.440068-6-zack@kde.org --- drivers/gpu/drm/ttm/ttm_range_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c b/drivers/gpu/drm/ttm/ttm_range_manager.c index 8cd4f3fb9f79..d91666721dc6 100644 --- a/drivers/gpu/drm/ttm/ttm_range_manager.c +++ b/drivers/gpu/drm/ttm/ttm_range_manager.c @@ -89,7 +89,7 @@ static int ttm_range_man_alloc(struct ttm_resource_manager *man, spin_unlock(&rman->lock); if (unlikely(ret)) { - ttm_resource_fini(man, *res); + ttm_resource_fini(man, &node->base); kfree(node); return ret; } From a860f266a0e19f271b839451d291a6acf6ddcfe8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 2 Mar 2022 16:59:09 -0700 Subject: [PATCH 02/10] drm/selftest: plane_helper: Put test structures in static storage Clang warns on certain 32-bit architectures: drivers/gpu/drm/selftests/test-drm_plane_helper.c:76:5: warning: stack frame size (1064) exceeds limit (1024) in 'igt_check_plane_state' [-Wframe-larger-than] int igt_check_plane_state(void *ignored) ^ 1 warning generated. The structures in igt_check_plane_state() total 1008 bytes, so any small amount of inlining will cause the stack frame to exceed the 32-bit limit of 1024, triggering the warning. Move these structures to static storage, which dramatically reduces the amount of stack space in igt_check_plane_state(). There is no testing impact, as igt_check_plane_state() is only called once in the driver. Fixes: 943e6a8beeac ("mock a drm_plane in igt_check_plane_state to make the test more robust") Link: https://github.com/ClangBuiltLinux/linux/issues/1600 Reported-by: kernel test robot Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220302235909.784935-1-nathan@kernel.org --- drivers/gpu/drm/selftests/test-drm_plane_helper.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/selftests/test-drm_plane_helper.c b/drivers/gpu/drm/selftests/test-drm_plane_helper.c index ceebeede55ea..b61273e9c403 100644 --- a/drivers/gpu/drm/selftests/test-drm_plane_helper.c +++ b/drivers/gpu/drm/selftests/test-drm_plane_helper.c @@ -77,7 +77,7 @@ int igt_check_plane_state(void *ignored) { int ret; - const struct drm_crtc_state crtc_state = { + static const struct drm_crtc_state crtc_state = { .crtc = ZERO_SIZE_PTR, .enable = true, .active = true, @@ -87,14 +87,14 @@ int igt_check_plane_state(void *ignored) DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, }; - struct drm_plane plane = { + static struct drm_plane plane = { .dev = NULL }; - struct drm_framebuffer fb = { + static struct drm_framebuffer fb = { .width = 2048, .height = 2048 }; - struct drm_plane_state plane_state = { + static struct drm_plane_state plane_state = { .plane = &plane, .crtc = ZERO_SIZE_PTR, .fb = &fb, From 9cddf03b2af07443bebdc73cba21acb360c079e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 11 Mar 2022 23:28:45 +0200 Subject: [PATCH 03/10] drm/i915: Reject unsupported TMDS rates on ICL+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ICL+ PLLs can't genenerate certain frequencies. Running the PLL algorithms through for all frequencies 25-594MHz we see a gap just above 500 MHz. Specifically 500-522.8MHZ for TC PLLs, and 500-533.2 MHz for combo PHY PLLs. Reject those frequencies hdmi_port_clock_valid() so that we properly filter out unsupported modes and/or color depths for HDMI. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5247 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220311212845.32358-1-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola (cherry picked from commit e5086cb3f3d3f94091be29eec38cf13f8a75a778) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_hdmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 1aa5bdc7b0dc..6512f014cad4 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1836,6 +1836,7 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, bool has_hdmi_sink) { struct drm_i915_private *dev_priv = intel_hdmi_to_i915(hdmi); + enum phy phy = intel_port_to_phy(dev_priv, hdmi_to_dig_port(hdmi)->base.port); if (clock < 25000) return MODE_CLOCK_LOW; @@ -1856,6 +1857,14 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, if (IS_CHERRYVIEW(dev_priv) && clock > 216000 && clock < 240000) return MODE_CLOCK_RANGE; + /* ICL+ combo PHY PLL can't generate 500-533.2 MHz */ + if (intel_phy_is_combo(dev_priv, phy) && clock > 500000 && clock < 533200) + return MODE_CLOCK_RANGE; + + /* ICL+ TC PHY PLL can't generate 500-532.8 MHz */ + if (intel_phy_is_tc(dev_priv, phy) && clock > 500000 && clock < 532800) + return MODE_CLOCK_RANGE; + /* * SNPS PHYs' MPLLB table-based programming can only handle a fixed * set of link rates. From 1937f3feb0e84089ae4065e09c871b8ab4676f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 9 Mar 2022 18:49:41 +0200 Subject: [PATCH 04/10] drm/i915: Treat SAGV block time 0 as SAGV disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For modern platforms the spec explicitly states that a SAGV block time of zero means that SAGV is not supported. Let's extend that to all platforms. Supposedly there should be no systems where this isn't true, and it'll allow us to: - use the same code regardless of older vs. newer platform - wm latencies already treat 0 as disabled, so this fits well with other related code - make it a bit more clear when SAGV is used vs. not - avoid overflows from adding U32_MAX with a u16 wm0 latency value which could cause us to miscalculate the SAGV watermarks on tgl+ Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220309164948.10671-2-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit d8f5855b31c0523ea3b171db8dfb998830e8735d) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_pm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 71f7fba2c9e2..9333f732cda8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3698,8 +3698,7 @@ skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) MISSING_CASE(DISPLAY_VER(dev_priv)); } - /* Default to an unusable block time */ - dev_priv->sagv_block_time_us = -1; + dev_priv->sagv_block_time_us = 0; } /* @@ -5645,7 +5644,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, result->min_ddb_alloc = max(min_ddb_alloc, blocks) + 1; result->enable = true; - if (DISPLAY_VER(dev_priv) < 12) + if (DISPLAY_VER(dev_priv) < 12 && dev_priv->sagv_block_time_us) result->can_sagv = latency >= dev_priv->sagv_block_time_us; } @@ -5678,7 +5677,10 @@ static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); struct skl_wm_level *sagv_wm = &plane_wm->sagv.wm0; struct skl_wm_level *levels = plane_wm->wm; - unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us; + unsigned int latency = 0; + + if (dev_priv->sagv_block_time_us) + latency = dev_priv->sagv_block_time_us + dev_priv->wm.skl_latency[0]; skl_compute_plane_wm(crtc_state, plane, 0, latency, wm_params, &levels[0], From 3ef8b5e19ead5a79600ea55f9549658281415893 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 9 Mar 2022 18:49:46 +0200 Subject: [PATCH 05/10] drm/i915: Fix PSF GV point mask when SAGV is not possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't just mask off all the PSF GV points when SAGV gets disabled. This should in fact cause the Pcode to reject the request since at least one PSF point must remain enabled at all times. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: 192fbfb76744 ("drm/i915: Implement PSF GV point support") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220309164948.10671-7-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit 0fed4ddd18f064d2359b430c6e83ee60dd1f49b1) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_bw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index ad1564ca7269..adf58c58513b 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -992,7 +992,8 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) * cause. */ if (!intel_can_enable_sagv(dev_priv, new_bw_state)) { - allowed_points = BIT(max_bw_point); + allowed_points &= ADLS_PSF_PT_MASK; + allowed_points |= BIT(max_bw_point); drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n", max_bw_point); } From 00f4150d27d2c01eaeffe1091fc311a7c0872c69 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Mar 2022 16:45:37 -0700 Subject: [PATCH 06/10] drm/i915: Fix renamed struct field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Earlier versions of commit a5b7ef27da60 ("drm/i915: Add struct to hold IP version") named "ver" as "arch" and then when it was renamed it missed the rename on MEDIA_VER_FULL() since it it's currently not used. Fixes: a5b7ef27da60 ("drm/i915: Add struct to hold IP version") Cc: José Roberto de Souza Cc: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220316234538.434357-1-lucas.demarchi@intel.com (cherry picked from commit b4ac33b973233dc08a56c8ef9d3c2edeab7a4370) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d134838b3458..fa14da84362e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -947,7 +947,7 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until)) #define MEDIA_VER(i915) (INTEL_INFO(i915)->media.ver) -#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.arch, \ +#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.ver, \ INTEL_INFO(i915)->media.rel) #define IS_MEDIA_VER(i915, from, until) \ (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until)) From 5662abf6e21338be6d085d6375d3732ac6147fd2 Mon Sep 17 00:00:00 2001 From: Cooper Chiou Date: Thu, 24 Mar 2022 14:12:18 +0800 Subject: [PATCH 07/10] drm/edid: check basic audio support on CEA extension block Tag code stored in bit7:5 for CTA block byte[3] is not the same as CEA extension block definition. Only check CEA block has basic audio support. v3: update commit message. Cc: stable@vger.kernel.org Cc: Jani Nikula Cc: Shawn C Lee Cc: intel-gfx Signed-off-by: Cooper Chiou Signed-off-by: Lee Shawn C Fixes: e28ad544f462 ("drm/edid: parse CEA blocks embedded in DisplayID") Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220324061218.32739-1-shawn.c.lee@intel.com --- drivers/gpu/drm/drm_edid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 561f53831e29..f07af6786cec 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4859,7 +4859,8 @@ bool drm_detect_monitor_audio(struct edid *edid) if (!edid_ext) goto end; - has_audio = ((edid_ext[3] & EDID_BASIC_AUDIO) != 0); + has_audio = (edid_ext[0] == CEA_EXT && + (edid_ext[3] & EDID_BASIC_AUDIO) != 0); if (has_audio) { DRM_DEBUG_KMS("Monitor has basic audio support\n"); From 7344bad7fb6daa4877a1c064b52c7d5f9182c41b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 23 Mar 2022 12:04:38 +0200 Subject: [PATCH 08/10] drm/edid: fix CEA extension byte #3 parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only an EDID CEA extension has byte #3, while the CTA DisplayID Data Block does not. Don't interpret bogus data for color formats. For most displays it's probably an unlikely scenario you'd have a CTA DisplayID Data Block without a CEA extension, but they do exist. Fixes: e28ad544f462 ("drm/edid: parse CEA blocks embedded in DisplayID") Cc: Cc: Shawn C Lee Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220323100438.1757295-1-jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f07af6786cec..cc7bd58369df 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5188,10 +5188,14 @@ static void drm_parse_cea_ext(struct drm_connector *connector, /* The existence of a CEA block should imply RGB support */ info->color_formats = DRM_COLOR_FORMAT_RGB444; - if (edid_ext[3] & EDID_CEA_YCRCB444) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; - if (edid_ext[3] & EDID_CEA_YCRCB422) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + + /* CTA DisplayID Data Block does not have byte #3 */ + if (edid_ext[0] == CEA_EXT) { + if (edid_ext[3] & EDID_CEA_YCRCB444) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR444; + if (edid_ext[3] & EDID_CEA_YCRCB422) + info->color_formats |= DRM_COLOR_FORMAT_YCBCR422; + } if (cea_db_offsets(edid_ext, &start, &end)) return; From 40faaf80c463b2c99a0351d28bd74d75d64ed017 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Sun, 13 Mar 2022 20:29:51 +0100 Subject: [PATCH 09/10] fbdev: Fix sys_imageblit() for arbitrary image widths Commit 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()") broke sys_imageblit() for image width that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change. Signed-off-by: Thomas Zimmermann Fixes: 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()") Cc: Thomas Zimmermann Cc: Javier Martinez Canillas Cc: Sam Ravnborg Tested-by: Geert Uytterhoeven Reviewed-by: Javier Martinez Canillas Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20220313192952.12058-2-tzimmermann@suse.de --- drivers/video/fbdev/core/sysimgblt.c | 29 ++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/core/sysimgblt.c b/drivers/video/fbdev/core/sysimgblt.c index 722c327a381b..335e92b813fc 100644 --- a/drivers/video/fbdev/core/sysimgblt.c +++ b/drivers/video/fbdev/core/sysimgblt.c @@ -188,7 +188,7 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p, { u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; u32 ppw = 32/bpp, spitch = (image->width + 7)/8; - u32 bit_mask, eorx; + u32 bit_mask, eorx, shift; const char *s = image->data, *src; u32 *dst; const u32 *tab; @@ -229,17 +229,23 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p, for (i = image->height; i--; ) { dst = dst1; + shift = 8; src = s; + /* + * Manually unroll the per-line copying loop for better + * performance. This works until we processed the last + * completely filled source byte (inclusive). + */ switch (ppw) { case 4: /* 8 bpp */ - for (j = k; j; j -= 2, ++src) { + for (j = k; j >= 2; j -= 2, ++src) { *dst++ = colortab[(*src >> 4) & bit_mask]; *dst++ = colortab[(*src >> 0) & bit_mask]; } break; case 2: /* 16 bpp */ - for (j = k; j; j -= 4, ++src) { + for (j = k; j >= 4; j -= 4, ++src) { *dst++ = colortab[(*src >> 6) & bit_mask]; *dst++ = colortab[(*src >> 4) & bit_mask]; *dst++ = colortab[(*src >> 2) & bit_mask]; @@ -247,7 +253,7 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p, } break; case 1: /* 32 bpp */ - for (j = k; j; j -= 8, ++src) { + for (j = k; j >= 8; j -= 8, ++src) { *dst++ = colortab[(*src >> 7) & bit_mask]; *dst++ = colortab[(*src >> 6) & bit_mask]; *dst++ = colortab[(*src >> 5) & bit_mask]; @@ -259,6 +265,21 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p, } break; } + + /* + * For image widths that are not a multiple of 8, there + * are trailing pixels left on the current line. Print + * them as well. + */ + for (; j--; ) { + shift -= ppw; + *dst++ = colortab[(*src >> shift) & bit_mask]; + if (!shift) { + shift = 8; + ++src; + } + } + dst1 += p->fix.line_length; s += spitch; } From 2a81dba4b577099717cea86d429f053e85e74d96 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Sun, 13 Mar 2022 20:29:52 +0100 Subject: [PATCH 10/10] fbdev: Fix cfb_imageblit() for arbitrary image widths Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") broke cfb_imageblit() for image widths that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change. Signed-off-by: Thomas Zimmermann Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") Reported-by: Marek Szyprowski Cc: Thomas Zimmermann Cc: Javier Martinez Canillas Cc: Sam Ravnborg Tested-by: Marek Szyprowski Acked-by: Daniel Vetter Reviewed-by: Javier Martinez Canillas Tested-by: Guenter Roeck Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20220313192952.12058-3-tzimmermann@suse.de --- drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c index 7361cfabdd85..9ebda4e0dc7a 100644 --- a/drivers/video/fbdev/core/cfbimgblt.c +++ b/drivers/video/fbdev/core/cfbimgblt.c @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * { u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; u32 ppw = 32/bpp, spitch = (image->width + 7)/8; - u32 bit_mask, eorx; + u32 bit_mask, eorx, shift; const char *s = image->data, *src; u32 __iomem *dst; const u32 *tab = NULL; @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * for (i = image->height; i--; ) { dst = (u32 __iomem *)dst1; + shift = 8; src = s; + /* + * Manually unroll the per-line copying loop for better + * performance. This works until we processed the last + * completely filled source byte (inclusive). + */ switch (ppw) { case 4: /* 8 bpp */ - for (j = k; j; j -= 2, ++src) { + for (j = k; j >= 2; j -= 2, ++src) { FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++); } break; case 2: /* 16 bpp */ - for (j = k; j; j -= 4, ++src) { + for (j = k; j >= 4; j -= 4, ++src) { FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++); @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * } break; case 1: /* 32 bpp */ - for (j = k; j; j -= 8, ++src) { + for (j = k; j >= 8; j -= 8, ++src) { FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++); @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * break; } + /* + * For image widths that are not a multiple of 8, there + * are trailing pixels left on the current line. Print + * them as well. + */ + for (; j--; ) { + shift -= ppw; + FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++); + if (!shift) { + shift = 8; + ++src; + } + } + dst1 += p->fix.line_length; s += spitch; }