From f763c3b543d80ebcb94dd19a69324bf2b72b23ab Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Jul 2024 16:17:46 +0530 Subject: [PATCH 001/447] drm/amdgpu: Add sdma_v5_2 ip dump for devcoredump Add ip dump for sdma_v5_2 for devcoredump for all instances of sdma. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 82 ++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index d3706a484870..087ce0f6fa07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -115,6 +115,7 @@ struct amdgpu_sdma { bool has_page_queue; struct ras_common_if *ras_if; struct amdgpu_sdma_ras *ras; + uint32_t *ip_dump; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index cc9e961f0078..0cc969689946 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -60,6 +60,55 @@ MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin"); #define SDMA0_HYP_DEC_REG_END 0x5893 #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_2[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2) +}; + static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1214,6 +1263,8 @@ static int sdma_v5_2_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1245,6 +1296,13 @@ static int sdma_v5_2_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1258,6 +1316,8 @@ static int sdma_v5_2_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, true); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1662,6 +1722,27 @@ static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) amdgpu_gfx_off_ctrl(adev, true); } +static void sdma_v5_2_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v5_2_get_reg_offset(adev, i, + sdma_reg_list_5_2[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, @@ -1678,6 +1759,7 @@ const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .set_clockgating_state = sdma_v5_2_set_clockgating_state, .set_powergating_state = sdma_v5_2_set_powergating_state, .get_clockgating_state = sdma_v5_2_get_clockgating_state, + .dump_ip_state = sdma_v5_2_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { From 08bed7e4ff24f90e200defcd2c23e70b0a3cd710 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Jul 2024 16:44:36 +0530 Subject: [PATCH 002/447] drm/amdgpu: add print support for sdma_v_5_2 ip_dump Add support for ip dump for sdma_v_5_2 in devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 0cc969689946..630b03f2ce3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1722,6 +1722,26 @@ static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) amdgpu_gfx_off_ctrl(adev, true); } +static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_2[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} static void sdma_v5_2_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1760,6 +1780,7 @@ const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .set_powergating_state = sdma_v5_2_set_powergating_state, .get_clockgating_state = sdma_v5_2_get_clockgating_state, .dump_ip_state = sdma_v5_2_dump_ip_state, + .print_ip_state = sdma_v5_2_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { From 43796955a89572c63f7f96e271a2849c27d18f2d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Jul 2024 15:50:49 +0530 Subject: [PATCH 003/447] drm/amdgpu: fix the extra space between two functions fix extra line space between two functions. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 630b03f2ce3d..66bb85955fa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1742,6 +1742,7 @@ static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p) adev->sdma.ip_dump[instance_offset + j]); } } + static void sdma_v5_2_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; From 00bb3223bf7cfed3c9c714e994cbd454cc3e6b73 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Jul 2024 17:55:33 +0530 Subject: [PATCH 004/447] drm/amdgpu: fix the print message in devcoredump Fix the memory type logged for gtt memory size which is wrongly logged as visible vram size. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index f0a44d0dec27..f6806ae1c061 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -236,7 +236,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, drm_printf(&p, "\nSOC Memory Information\n"); drm_printf(&p, "real vram size: %llu\n", coredump->adev->gmc.real_vram_size); drm_printf(&p, "visible vram size: %llu\n", coredump->adev->gmc.visible_vram_size); - drm_printf(&p, "visible vram size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size); + drm_printf(&p, "gtt size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size); /* GDS Config */ drm_printf(&p, "\nGDS Config\n"); From 1eba165aa40c79f65f487678c8ea8e77b1c6a5a4 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Jul 2024 15:22:56 +0530 Subject: [PATCH 005/447] drm/amdgpu: Add sdma_v6_0 ip dump for devcoredump Add ip dump for sdma_v6_0 for devcoredump for all instances of sdma. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 90 ++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index dab4c2db8c9d..102de209f120 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -57,6 +57,63 @@ MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin"); #define SDMA0_HYP_DEC_REG_END 0x589a #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_6_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS), +}; + static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1239,6 +1296,8 @@ static int sdma_v6_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, @@ -1274,6 +1333,13 @@ static int sdma_v6_0_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1287,6 +1353,8 @@ static int sdma_v6_0_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, true); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1488,6 +1556,27 @@ static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v6_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v6_0_get_reg_offset(adev, i, + sdma_reg_list_6_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .name = "sdma_v6_0", .early_init = sdma_v6_0_early_init, @@ -1505,6 +1594,7 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .set_clockgating_state = sdma_v6_0_set_clockgating_state, .set_powergating_state = sdma_v6_0_set_powergating_state, .get_clockgating_state = sdma_v6_0_get_clockgating_state, + .dump_ip_state = sdma_v6_0_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { From ccb54d7d91d256485cfe5403a12abb0175ce4539 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Jul 2024 15:43:44 +0530 Subject: [PATCH 006/447] drm/amdgpu: add print support for sdma_v_6_0 ip_dump Add print support for ip dump for sdma_v_6_0 in devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 102de209f120..208a1fa9d4e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1556,6 +1556,27 @@ static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v6_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_6_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + static void sdma_v6_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1595,6 +1616,7 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .set_powergating_state = sdma_v6_0_set_powergating_state, .get_clockgating_state = sdma_v6_0_get_clockgating_state, .dump_ip_state = sdma_v6_0_dump_ip_state, + .print_ip_state = sdma_v6_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { From 0f1a93704a5cf53ce819a7c544125442666d61ce Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Jul 2024 16:10:26 +0530 Subject: [PATCH 007/447] drm/amdgpu: Add sdma_v5_0 ip dump for devcoredump Add ip dump for sdma_v5_0 for devcoredump for all instances of sdma. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 82 ++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b7d33d78bce0..cb324a90b310 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -59,6 +59,55 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin"); #define SDMA0_HYP_DEC_REG_END 0x5893 #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2) +}; + static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1341,6 +1390,8 @@ static int sdma_v5_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, @@ -1378,6 +1429,13 @@ static int sdma_v5_0_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1391,6 +1449,8 @@ static int sdma_v5_0_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1718,6 +1778,27 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v5_0_get_reg_offset(adev, i, + sdma_reg_list_5_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .name = "sdma_v5_0", .early_init = sdma_v5_0_early_init, @@ -1734,6 +1815,7 @@ const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .set_clockgating_state = sdma_v5_0_set_clockgating_state, .set_powergating_state = sdma_v5_0_set_powergating_state, .get_clockgating_state = sdma_v5_0_get_clockgating_state, + .dump_ip_state = sdma_v5_0_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { From e84f798a93881062cd14ce316a68068edd50bfb4 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Jul 2024 16:15:57 +0530 Subject: [PATCH 008/447] drm/amdgpu: add print support for sdma_v_5_0 ip_dump Add support for ip dump for sdma_v_5_0 in devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index cb324a90b310..d5f0dc132a47 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1778,6 +1778,27 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + static void sdma_v5_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1816,6 +1837,7 @@ const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .set_powergating_state = sdma_v5_0_set_powergating_state, .get_clockgating_state = sdma_v5_0_get_clockgating_state, .dump_ip_state = sdma_v5_0_dump_ip_state, + .print_ip_state = sdma_v5_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { From b68417613d4134b9e39fff95e72ca726268b47db Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Wed, 26 Jun 2024 16:14:24 +0800 Subject: [PATCH 009/447] drm/amd/display: Disable replay if VRR capability is false [Why] The VRR need to be supported for panel replay feature. If VRR capability is false, panel replay capability also need to be disabled. [How] After update the vrr capability, the panel replay capability also need to be check if need. Reviewed-by: Wayne Lin Signed-off-by: Jerry Zuo Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7e7929f24ae4..ea1e2d8dcd8c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12145,6 +12145,12 @@ update: if (dm_con_state) dm_con_state->freesync_capable = freesync_capable; + if (connector->state && amdgpu_dm_connector->dc_link && !freesync_capable && + amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported) { + amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported = false; + amdgpu_dm_connector->dc_link->replay_settings.replay_feature_enabled = false; + } + if (connector->vrr_capable_property) drm_connector_set_vrr_capable_property(connector, freesync_capable); From 4ccc8fdcca670edd76d8bfd6389f04c448cff6f6 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 12 Sep 2023 14:51:47 -0400 Subject: [PATCH 010/447] drm/amd/display: Disable HBR audio for DP2 for certain ASICs [Description] Due to a HW bug, HBR audio is not supported for DP2 encoders for certain ASICs. Reviewed-by: Alvin Lee Signed-off-by: Jerry Zuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dce/dce_audio.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/dce/dce_audio.h | 1 + drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 5 +++++ drivers/gpu/drm/amd/display/dc/inc/hw/audio.h | 2 ++ .../gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c | 1 + .../gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 1 + .../drm/amd/display/dc/resource/dcn321/dcn321_resource.c | 1 + .../gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1 + 9 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 73cdebcd9f37..4c9bb913125d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -466,6 +466,7 @@ struct dc_config { bool use_assr_psp_message; bool support_edp0_on_dp1; unsigned int enable_fpo_flicker_detection; + bool disable_hbr_audio_dp2; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index cf5f84fb9c69..eeed840073fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -630,6 +630,11 @@ void dce_aud_az_enable(struct audio *audio) audio->inst, value); } +void dce_aud_az_disable_hbr_audio(struct audio *audio) +{ + set_high_bit_rate_capable(audio, false); +} + void dce_aud_az_disable(struct audio *audio) { uint32_t value; @@ -1293,6 +1298,7 @@ static const struct audio_funcs funcs = { .az_enable = dce_aud_az_enable, .az_disable = dce_aud_az_disable, .az_configure = dce_aud_az_configure, + .az_disable_hbr_audio = dce_aud_az_disable_hbr_audio, .destroy = dce_aud_destroy, }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h index 539f881928d1..1b7b8b079af4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h @@ -166,6 +166,7 @@ void dce_aud_hw_init(struct audio *audio); void dce_aud_az_enable(struct audio *audio); void dce_aud_az_disable(struct audio *audio); +void dce_aud_az_disable_hbr_audio(struct audio *audio); void dce_aud_az_configure(struct audio *audio, enum signal_type signal, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 1f2eb2f727dc..51c5195f8325 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1597,6 +1597,11 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( &audio_output.crtc_info, &pipe_ctx->stream->audio_info, &audio_output.dp_link_info); + + if (dc->config.disable_hbr_audio_dp2) + if (pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio && + dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) + pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio(pipe_ctx->stream_res.audio); } /* make sure no pipes syncd to the pipe being enabled */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h index b6203253111c..8c18efc2aa70 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h @@ -46,6 +46,8 @@ struct audio_funcs { const struct audio_info *audio_info, const struct audio_dp_link_info *dp_link_info); + void (*az_disable_hbr_audio)(struct audio *audio); + void (*wall_dto_setup)(struct audio *audio, enum signal_type signal, const struct audio_crtc_info *crtc_info, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 5d1801dce273..ac8cb20e2e3b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1948,6 +1948,7 @@ static bool dcn31_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 969658313fd6..3ed6d1fa0c44 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2220,6 +2220,7 @@ static bool dcn32_resource_construct( dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 9a3cc0514a36..a414ed60a724 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1780,6 +1780,7 @@ static bool dcn321_resource_construct( dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index ddf251901fb3..5f3705f97bd7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1899,6 +1899,7 @@ static bool dcn35_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { From 5f30ee493044e9ea3a46167e5597a96f5c302adb Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Thu, 20 Jun 2024 15:42:45 -0400 Subject: [PATCH 011/447] drm/amd/display: quality improvements for EASF and ISHARP [Why] Update coefficients and LUT tables for scaler and sharpener to improve quality and support different use cases (SDR/HDR) [How] Move scaler coefficients to new file dc_spl_scl_easf_filters.c Remove older coefficients file dc_sp_scl_filters_old.c Update default taps for EASF support Update LLS policy for DON'T CARE case Update cositing offset from 0.5 to 0.25 Add support to adjust sharpness based on level, use case, and scaling ratio ( using discrete levels ) Apply sharpness to all RGB surfaces and both NV12 and P010 video ( in fullscreen only ). Upscale and 1:1 ratios only Enable scaler when sharpening 1:1 ratios Add support for coefficients that are in S1.10 format (convert to S1.12 format) Reviewed-by: Jun Lei Signed-off-by: Jerry Zuo Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 12 + drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 7 - .../gpu/drm/amd/display/dc/dc_spl_translate.c | 5 + .../gpu/drm/amd/display/dc/dc_spl_translate.h | 1 + drivers/gpu/drm/amd/display/dc/dm_helpers.h | 3 + .../dc/dml2/dml21/dml21_translation_helper.c | 8 + .../display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 618 +++--- .../dc/resource/dcn401/dcn401_resource.c | 7 + drivers/gpu/drm/amd/display/dc/spl/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 958 +++++---- .../drm/amd/display/dc/spl/dc_spl_filters.c | 15 + .../drm/amd/display/dc/spl/dc_spl_filters.h | 15 + .../display/dc/spl/dc_spl_isharp_filters.c | 427 +++- .../display/dc/spl/dc_spl_isharp_filters.h | 33 +- .../display/dc/spl/dc_spl_scl_easf_filters.c | 1725 +++++++++++++++++ .../display/dc/spl/dc_spl_scl_easf_filters.h | 38 + .../amd/display/dc/spl/dc_spl_scl_filters.c | 26 + .../amd/display/dc/spl/dc_spl_scl_filters.h | 39 +- .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 12 + 19 files changed, 3201 insertions(+), 750 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c create mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h create mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c create mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index b490ae67b6be..165e010fe69c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1286,3 +1286,15 @@ enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link) return as_type; } + +bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream) +{ + // TODO + return false; +} + +bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream) +{ + // TODO + return false; +} \ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 959ae0df1e56..c10567ec1c81 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -763,13 +763,6 @@ enum scanning_type { SCANNING_TYPE_UNDEFINED }; -enum chroma_cositing { - CHROMA_COSITING_NONE, - CHROMA_COSITING_LEFT, - CHROMA_COSITING_TOPLEFT, - CHROMA_COSITING_COUNT -}; - struct dc_crtc_timing_flags { uint32_t INTERLACE :1; uint32_t HSYNC_POSITIVE_POLARITY :1; /* when set to 1, diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 582606319764..49ff59258c8d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -170,6 +170,11 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl /* Translate transfer function */ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type; spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf; + /* Check if it is stream is in fullscreen and if its HDR. + * Use this to determine sharpness levels + */ + spl_in->is_fullscreen = dm_helpers_is_fullscreen(pipe_ctx->stream->ctx, pipe_ctx->stream); + spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream); } diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h index c73d640c3632..eaa5c5373b28 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h @@ -6,6 +6,7 @@ #define __DC_SPL_TRANSLATE_H__ #include "dc.h" #include "resource.h" +#include "dm_helpers.h" /* Map SPL input parameters to pipe context * @pipe_ctx: pipe context diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 34adae7ab6e8..2e4a46f1b499 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -210,4 +210,7 @@ enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link); enum dc_edid_status dm_helpers_get_sbios_edid(struct dc_link *link, struct dc_edid *edid); +bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream); +bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream); + #endif /* __DM_HELPERS__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 06387b8b0aee..a50fe3ec79c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -788,6 +788,14 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm * certain cases. Hence do corrective active and disable scaling. */ plane->composition.scaler_info.enabled = false; + } else if ((plane_state->ctx->dc->config.use_spl == true) && + (plane->composition.scaler_info.enabled == false)) { + /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then + * allow case where ratio is 1 but taps > 1 + */ + if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) || + (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1)) + plane->composition.scaler_info.enabled = true; } /* always_scale is only used for debug purposes not used in production but has to be diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 505929800426..27cbda1cf8cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -655,6 +655,226 @@ static void dpp401_dscl_set_recout(struct dcn401_dpp *dpp, /* Number of RECOUT vertical lines */ RECOUT_HEIGHT, recout->height); } +/** + * dpp401_dscl_program_easf_v - Program EASF_V + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program vertical EASF registers + * + */ +static void dpp401_dscl_program_easf_v(struct dpp *dpp_base, const struct scaler_data *scl_data) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + PERF_TRACE(); + /* DSCL_EASF_V_MODE */ + REG_SET_3(DSCL_EASF_V_MODE, 0, + SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en, + SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor, + SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); + + if (!scl_data->dscl_prog_data.easf_v_en) { + PERF_TRACE(); + return; + } + + /* DSCL_EASF_V_BF_CNTL */ + REG_SET_6(DSCL_EASF_V_BF_CNTL, 0, + SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en, + SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode, + SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode, + SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain, + SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain, + SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); + /* DSCL_EASF_V_RINGEST_3TAP_CNTLn */ + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL1, 0, + SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt, + SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL2, 0, + SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope, + SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL3, 0, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); + /* DSCL_EASF_V_RINGEST_EVENTAP_REDUCE */ + REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, 0, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); + /* DSCL_EASF_V_RINGEST_EVENTAP_GAIN */ + REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, 0, + SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1, + SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); + /* DSCL_EASF_V_BF_FINAL_MAX_MIN */ + REG_SET_4(DSCL_EASF_V_BF_FINAL_MAX_MIN, 0, + SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa, + SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb, + SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina, + SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); + /* DSCL_EASF_V_BF1_PWL_SEGn */ + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG0, 0, + SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0, + SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0, + SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG1, 0, + SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1, + SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1, + SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG2, 0, + SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2, + SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2, + SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG3, 0, + SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3, + SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3, + SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG4, 0, + SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4, + SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4, + SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG5, 0, + SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5, + SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5, + SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG6, 0, + SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6, + SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6, + SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6); + REG_SET_2(DSCL_EASF_V_BF1_PWL_SEG7, 0, + SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7, + SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7); + /* DSCL_EASF_V_BF3_PWL_SEGn */ + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG0, 0, + SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0, + SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0, + SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0); + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG1, 0, + SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1, + SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1, + SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1); + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG2, 0, + SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2, + SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2, + SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2); + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG3, 0, + SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3, + SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3, + SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3); + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG4, 0, + SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4, + SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4, + SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4); + REG_SET_2(DSCL_EASF_V_BF3_PWL_SEG5, 0, + SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5, + SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5); + PERF_TRACE(); +} +/** + * dpp401_dscl_program_easf_h - Program EASF_H + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program horizontal EASF registers + * + */ +static void dpp401_dscl_program_easf_h(struct dpp *dpp_base, const struct scaler_data *scl_data) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + PERF_TRACE(); + /* DSCL_EASF_H_MODE */ + REG_SET_3(DSCL_EASF_H_MODE, 0, + SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en, + SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor, + SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); + + if (!scl_data->dscl_prog_data.easf_h_en) { + PERF_TRACE(); + return; + } + + /* DSCL_EASF_H_BF_CNTL */ + REG_SET_6(DSCL_EASF_H_BF_CNTL, 0, + SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en, + SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode, + SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode, + SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain, + SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain, + SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); + /* DSCL_EASF_H_RINGEST_EVENTAP_REDUCE */ + REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, 0, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); + /* DSCL_EASF_H_RINGEST_EVENTAP_GAIN */ + REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, 0, + SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1, + SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); + /* DSCL_EASF_H_BF_FINAL_MAX_MIN */ + REG_SET_4(DSCL_EASF_H_BF_FINAL_MAX_MIN, 0, + SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa, + SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb, + SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina, + SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); + /* DSCL_EASF_H_BF1_PWL_SEGn */ + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG0, 0, + SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0, + SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0, + SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG1, 0, + SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1, + SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1, + SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG2, 0, + SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2, + SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2, + SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG3, 0, + SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3, + SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3, + SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG4, 0, + SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4, + SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4, + SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG5, 0, + SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5, + SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5, + SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG6, 0, + SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6, + SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6, + SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6); + REG_SET_2(DSCL_EASF_H_BF1_PWL_SEG7, 0, + SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7, + SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7); + /* DSCL_EASF_H_BF3_PWL_SEGn */ + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG0, 0, + SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0, + SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0, + SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0); + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG1, 0, + SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1, + SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1, + SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1); + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG2, 0, + SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2, + SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2, + SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2); + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG3, 0, + SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3, + SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3, + SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3); + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG4, 0, + SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4, + SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4, + SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4); + REG_SET_2(DSCL_EASF_H_BF3_PWL_SEG5, 0, + SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5, + SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5); + PERF_TRACE(); +} /** * dpp401_dscl_program_easf - Program EASF * @@ -669,261 +889,19 @@ static void dpp401_dscl_program_easf(struct dpp *dpp_base, const struct scaler_d struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); PERF_TRACE(); - REG_UPDATE(DSCL_SC_MODE, - SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode); - REG_UPDATE(DSCL_SC_MODE, + /* DSCL_SC_MODE */ + REG_SET_2(DSCL_SC_MODE, 0, + SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode, SCL_SC_LTONL_EN, scl_data->dscl_prog_data.easf_ltonl_en); - /* DSCL_EASF_V_MODE */ - REG_UPDATE(DSCL_EASF_V_MODE, - SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en); - REG_UPDATE(DSCL_EASF_V_MODE, - SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor); - REG_UPDATE(DSCL_EASF_V_MODE, - SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, - SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, - SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, - SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, - SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, - SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, - SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, - SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, - SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, - SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, - SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, - SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, - SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, - SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, - SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, - SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, - SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, - SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, - SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, - SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, - SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, - SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, - SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, - SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, - SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, - SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, - SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, - SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, - SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, - SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, - SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, - SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, - SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, - SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, - SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, - SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, - SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, - SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, - SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, - SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, - SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, - SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, - SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, - SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, - SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, - SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, - SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, - SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, - SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, - SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, - SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5); - /* DSCL_EASF_H_MODE */ - REG_UPDATE(DSCL_EASF_H_MODE, - SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en); - REG_UPDATE(DSCL_EASF_H_MODE, - SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor); - REG_UPDATE(DSCL_EASF_H_MODE, - SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, - SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, - SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, - SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, - SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, - SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, - SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, - SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, - SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, - SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, - SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, - SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, - SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, - SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, - SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, - SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, - SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, - SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, - SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, - SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, - SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, - SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, - SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, - SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, - SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, - SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, - SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, - SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, - SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, - SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, - SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, - SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, - SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, - SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, - SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, - SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, - SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, - SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, - SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, - SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, - SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, - SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, - SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, - SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, - SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5); /* DSCL_EASF_SC_MATRIX_C0C1, DSCL_EASF_SC_MATRIX_C2C3 */ - REG_UPDATE(DSCL_SC_MATRIX_C0C1, - SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0); - REG_UPDATE(DSCL_SC_MATRIX_C0C1, + REG_SET_2(DSCL_SC_MATRIX_C0C1, 0, + SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0, SCL_SC_MATRIX_C1, scl_data->dscl_prog_data.easf_matrix_c1); - REG_UPDATE(DSCL_SC_MATRIX_C2C3, - SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2); - REG_UPDATE(DSCL_SC_MATRIX_C2C3, + REG_SET_2(DSCL_SC_MATRIX_C2C3, 0, + SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2, SCL_SC_MATRIX_C3, scl_data->dscl_prog_data.easf_matrix_c3); + dpp401_dscl_program_easf_v(dpp_base, scl_data); + dpp401_dscl_program_easf_h(dpp_base, scl_data); PERF_TRACE(); } /** @@ -958,10 +936,11 @@ static void dpp401_dscl_set_isharp_filter( REG_UPDATE(ISHARP_DELTA_CTRL, ISHARP_DELTA_LUT_HOST_SELECT, 0); + /* LUT data write is auto-indexed. Write index once */ + REG_SET(ISHARP_DELTA_INDEX, 0, + ISHARP_DELTA_INDEX, 0); for (level = 0; level < NUM_LEVELS; level++) { filter_data = filter[level]; - REG_SET(ISHARP_DELTA_INDEX, 0, - ISHARP_DELTA_INDEX, level); REG_SET(ISHARP_DELTA_DATA, 0, ISHARP_DELTA_DATA, filter_data); } @@ -981,102 +960,67 @@ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); PERF_TRACE(); - /* ISHARP_EN */ - REG_UPDATE(ISHARP_MODE, - ISHARP_EN, scl_data->dscl_prog_data.isharp_en); - /* ISHARP_NOISEDET_EN */ - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable); - /* ISHARP_NOISEDET_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); - /* ISHARP_NOISEDET_UTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); - /* ISHARP_NOISEDET_DTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, + /* ISHARP_MODE */ + REG_SET_6(ISHARP_MODE, 0, + ISHARP_EN, scl_data->dscl_prog_data.isharp_en, + ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable, + ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode, + ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode, + ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode, + ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); + + /* Skip remaining register programming if ISHARP is disabled */ + if (!scl_data->dscl_prog_data.isharp_en) { + PERF_TRACE(); + return; + } + + /* ISHARP_NOISEDET_THRESHOLD */ + REG_SET_2(ISHARP_NOISEDET_THRESHOLD, 0, + ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold, ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); - /* ISHARP_NOISEDET_UTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); - /* ISHARP_NOISEDET_DTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); - /* ISHARP_NOISEDET_PWL_START_IN */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, - ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in); - /* ISHARP_NOISEDET_PWL_END_IN */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, - ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in); - /* ISHARP_NOISEDET_PWL_SLOPE */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, + + /* ISHARP_NOISE_GAIN_PWL */ + REG_SET_3(ISHARP_NOISE_GAIN_PWL, 0, + ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in, + ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in, ISHARP_NOISEDET_PWL_SLOPE, scl_data->dscl_prog_data.isharp_noise_det.pwl_slope); - /* ISHARP_LBA_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode); + /* ISHARP_LBA: IN_SEG, BASE_SEG, SLOPE_SEG */ - REG_UPDATE(ISHARP_LBA_PWL_SEG0, - ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG0, - ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG0, + REG_SET_3(ISHARP_LBA_PWL_SEG0, 0, + ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0], + ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0], ISHARP_LBA_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.isharp_lba.slope_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, - ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, - ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, + REG_SET_3(ISHARP_LBA_PWL_SEG1, 0, + ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1], + ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1], ISHARP_LBA_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.isharp_lba.slope_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, - ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, - ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, + REG_SET_3(ISHARP_LBA_PWL_SEG2, 0, + ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2], + ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2], ISHARP_LBA_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.isharp_lba.slope_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, - ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, - ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, + REG_SET_3(ISHARP_LBA_PWL_SEG3, 0, + ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3], + ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3], ISHARP_LBA_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.isharp_lba.slope_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, - ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, - ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, + REG_SET_3(ISHARP_LBA_PWL_SEG4, 0, + ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4], + ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4], ISHARP_LBA_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.isharp_lba.slope_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG5, - ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5]); - REG_UPDATE(ISHARP_LBA_PWL_SEG5, + REG_SET_2(ISHARP_LBA_PWL_SEG5, 0, + ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5], ISHARP_LBA_PWL_BASE_SEG5, scl_data->dscl_prog_data.isharp_lba.base_seg[5]); - /* ISHARP_FMT_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode); - /* ISHARP_FMT_NORM */ - REG_UPDATE(ISHARP_MODE, - ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); /* ISHARP_DELTA_LUT */ dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); - /* ISHARP_NLDELTA_SCLIP_EN_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p); - /* ISHARP_NLDELTA_SCLIP_PIVOT_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p); - /* ISHARP_NLDELTA_SCLIP_SLOPE_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p); - /* ISHARP_NLDELTA_SCLIP_EN_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n); - /* ISHARP_NLDELTA_SCLIP_PIVOT_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n); - /* ISHARP_NLDELTA_SCLIP_SLOPE_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + + /* ISHARP_NLDELTA_SOFT_CLIP */ + REG_SET_6(ISHARP_NLDELTA_SOFT_CLIP, 0, + ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p, + ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p, + ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p, + ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n, + ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n, ISHARP_NLDELTA_SCLIP_SLOPE_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_n); /* Blur and Scale Coefficients - SCL_COEF_RAM_TAP_SELECT */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index a05a2209a44e..d3808c49d298 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -76,6 +76,9 @@ #include "dml2/dml2_wrapper.h" +#include "spl/dc_spl_scl_easf_filters.h" +#include "spl/dc_spl_isharp_filters.h" + #define DC_LOGGER_INIT(logger) enum dcn401_clk_src_array_id { @@ -2123,6 +2126,10 @@ static bool dcn401_resource_construct( dc->dml2_options.max_segments_per_hubp = 20; dc->dml2_options.det_segment_size = DCN4_01_CRB_SEGMENT_SIZE_KB; + /* SPL */ + spl_init_easf_filter_coeffs(); + spl_init_blur_scale_coeffs(); + return true; create_fail: diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile index 89cad60b1a10..af7eaf839970 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/Makefile +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'spl' sub-component of DAL. # It provides the scaling library interface. -SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_isharp_filters.o +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index e3e20cd86af6..dad38960d34d 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -5,6 +5,7 @@ #include "dc_spl.h" #include "dc_spl_scl_filters.h" #include "dc_spl_isharp_filters.h" +#include "dc_spl_scl_easf_filters.h" #define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) #define MIN_VIEWPORT_SIZE 12 @@ -352,6 +353,7 @@ static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) memset(&spl_out->scl_data.recout, 0, sizeof(struct spl_rect)); } + /* Calculate scaling ratios */ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out *spl_out) { @@ -399,7 +401,22 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out * spl_out->scl_data.ratios.horz_c, 19); spl_out->scl_data.ratios.vert_c = dc_fixpt_truncate( spl_out->scl_data.ratios.vert_c, 19); + + /* + * Coefficient table and some registers are different based on ratio + * that is output/input. Currently we calculate input/output + * Store 1/ratio in recip_ratio for those lookups + */ + spl_out->scl_data.recip_ratios.horz = dc_fixpt_recip( + spl_out->scl_data.ratios.horz); + spl_out->scl_data.recip_ratios.vert = dc_fixpt_recip( + spl_out->scl_data.ratios.vert); + spl_out->scl_data.recip_ratios.horz_c = dc_fixpt_recip( + spl_out->scl_data.ratios.horz_c); + spl_out->scl_data.recip_ratios.vert_c = dc_fixpt_recip( + spl_out->scl_data.ratios.vert_c); } + /* Calculate Viewport size */ static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *spl_out) { @@ -417,6 +434,7 @@ static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *s swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); } } + static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, bool horizontal_mirror, bool *orthogonal_rotation, @@ -440,6 +458,7 @@ static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, if (horizontal_mirror) *flip_horz_scan_dir = !*flip_horz_scan_dir; } + /* * We completely calculate vp offset, size and inits here based entirely on scaling * ratios and recout for pixel perfect pipe combine. @@ -509,8 +528,8 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, static bool spl_is_yuv420(enum spl_pixel_format format) { - if ((format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN) && - (format <= SPL_PIXEL_FORMAT_VIDEO_END)) + if ((format >= SPL_PIXEL_FORMAT_420BPP8) && + (format <= SPL_PIXEL_FORMAT_420BPP10)) return true; return false; @@ -569,11 +588,11 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ case CHROMA_COSITING_LEFT: init_adj_h = dc_fixpt_zero; - init_adj_v = dc_fixpt_from_fraction(sign, 2); + init_adj_v = dc_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_NONE: - init_adj_h = dc_fixpt_from_fraction(sign, 2); - init_adj_v = dc_fixpt_from_fraction(sign, 2); + init_adj_h = dc_fixpt_from_fraction(sign, 4); + init_adj_v = dc_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_TOPLEFT: default: @@ -639,6 +658,7 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ spl_out->scl_data.viewport_c.x += src.x / vpc_div; spl_out->scl_data.viewport_c.y += src.y / vpc_div; } + static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) { /* @@ -665,6 +685,7 @@ static void spl_clamp_viewport(struct spl_rect *viewport) if (viewport->width < MIN_VIEWPORT_SIZE) viewport->width = MIN_VIEWPORT_SIZE; } + static bool spl_dscl_is_420_format(enum spl_pixel_format format) { if (format == SPL_PIXEL_FORMAT_420BPP8 || @@ -673,6 +694,7 @@ static bool spl_dscl_is_420_format(enum spl_pixel_format format) else return false; } + static bool spl_dscl_is_video_format(enum spl_pixel_format format) { if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN @@ -681,17 +703,21 @@ static bool spl_dscl_is_video_format(enum spl_pixel_format format) else return false; } + static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, - const struct spl_scaler_data *data) + const struct spl_scaler_data *data, + bool enable_isharp, bool enable_easf) { const long long one = dc_fixpt_one.value; enum spl_pixel_format pixel_format = spl_in->basic_in.format; + /* Bypass if ratio is 1:1 with no ISHARP or force scale on */ if (data->ratios.horz.value == one && data->ratios.vert.value == one && data->ratios.horz_c.value == one && data->ratios.vert_c.value == one - && !spl_in->basic_out.always_scale) + && !spl_in->basic_out.always_scale + && !enable_isharp) return SCL_MODE_SCALING_444_BYPASS; if (!spl_dscl_is_420_format(pixel_format)) { @@ -700,65 +726,200 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, else return SCL_MODE_SCALING_444_RGB_ENABLE; } - if (data->ratios.horz.value == one && data->ratios.vert.value == one) - return SCL_MODE_SCALING_420_LUMA_BYPASS; - if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) - return SCL_MODE_SCALING_420_CHROMA_BYPASS; + + /* Bypass YUV if at 1:1 with no ISHARP or if doing 2:1 YUV + * downscale without EASF + */ + if ((!enable_isharp) && (!enable_easf)) { + if (data->ratios.horz.value == one && data->ratios.vert.value == one) + return SCL_MODE_SCALING_420_LUMA_BYPASS; + if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) + return SCL_MODE_SCALING_420_CHROMA_BYPASS; + } return SCL_MODE_SCALING_420_YCBCR_ENABLE; } + +static bool spl_choose_lls_policy(enum spl_pixel_format format, + enum spl_transfer_func_type tf_type, + enum spl_transfer_func_predefined tf_predefined_type, + enum linear_light_scaling *lls_pref) +{ + if (spl_is_yuv420(format)) { + *lls_pref = LLS_PREF_NO; + if ((tf_type == SPL_TF_TYPE_PREDEFINED) || + (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) + return true; + } else { /* RGB or YUV444 */ + if ((tf_type == SPL_TF_TYPE_PREDEFINED) || + (tf_type == SPL_TF_TYPE_BYPASS)) { + *lls_pref = LLS_PREF_YES; + return true; + } + } + *lls_pref = LLS_PREF_NO; + return false; +} + +/* Enable EASF ?*/ +static bool enable_easf(struct spl_in *spl_in, struct spl_out *spl_out) +{ + int vratio = 0; + int hratio = 0; + bool skip_easf = false; + bool lls_enable_easf = true; + + /* + * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer + * function to determine whether to use LINEAR or NONLINEAR scaling + */ + if (spl_in->lls_pref == LLS_PREF_DONT_CARE) + lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, + spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, + &spl_in->lls_pref); + + vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + + if (!lls_enable_easf || spl_in->disable_easf) + skip_easf = true; + + /* + * No EASF support for downscaling > 2:1 + * EASF support for upscaling or downscaling up to 2:1 + */ + if ((vratio > 2) || (hratio > 2)) + skip_easf = true; + + /* Check for linear scaling or EASF preferred */ + if (spl_in->lls_pref != LLS_PREF_YES && !spl_in->prefer_easf) + skip_easf = true; + + return skip_easf; +} + +/* Check if video is in fullscreen mode */ +static bool spl_is_video_fullscreen(struct spl_in *spl_in, struct spl_out *spl_out) +{ + if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) + return true; + return false; +} + +static bool spl_get_isharp_en(struct spl_in *spl_in, + struct spl_out *spl_out) +{ + bool enable_isharp = false; + int vratio = 0; + int hratio = 0; + struct spl_taps taps = spl_out->scl_data.taps; + bool fullscreen = spl_is_video_fullscreen(spl_in, spl_out); + + vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + + /* Return if adaptive sharpness is disabled */ + if (spl_in->adaptive_sharpness.enable == false) + return enable_isharp; + + /* No iSHARP support for downscaling */ + if (vratio > 1 || hratio > 1) + return enable_isharp; + + // Scaling is up to 1:1 (no scaling) or upscaling + + /* + * Apply sharpness to all RGB surfaces and to + * NV12/P010 surfaces if in fullscreen + */ + if (spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) + return enable_isharp; + + /* + * Apply sharpness if supports horizontal taps 4,6 AND + * vertical taps 3, 4, 6 + */ + if ((taps.h_taps == 4 || taps.h_taps == 6) && + (taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6)) + enable_isharp = true; + + return enable_isharp; +} + /* Calculate optimal number of taps */ static bool spl_get_optimal_number_of_taps( int max_downscale_src_width, struct spl_in *spl_in, struct spl_out *spl_out, - const struct spl_taps *in_taps) + const struct spl_taps *in_taps, bool *enable_easf_v, bool *enable_easf_h, + bool *enable_isharp) { int num_part_y, num_part_c; int max_taps_y, max_taps_c; int min_taps_y, min_taps_c; enum lb_memory_config lb_config; + bool skip_easf = false; if (spl_out->scl_data.viewport.width > spl_out->scl_data.h_active && max_downscale_src_width != 0 && spl_out->scl_data.viewport.width > max_downscale_src_width) return false; + + /* Check if we are using EASF or not */ + skip_easf = enable_easf(spl_in, spl_out); + /* * Set default taps if none are provided * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling * taps = 4 for upscaling */ - if (in_taps->h_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) - spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz), 8); - else - spl_out->scl_data.taps.h_taps = 4; - } else - spl_out->scl_data.taps.h_taps = in_taps->h_taps; - if (in_taps->v_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) - spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert, 2)), 8); - else - spl_out->scl_data.taps.v_taps = 4; - } else - spl_out->scl_data.taps.v_taps = in_taps->v_taps; - if (in_taps->v_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) - spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert_c, 2)), 8); - else - spl_out->scl_data.taps.v_taps_c = 4; - } else - spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; - if (in_taps->h_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) - spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c), 8); + if (skip_easf) { + if (in_taps->h_taps == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) + spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil( + spl_out->scl_data.ratios.horz), 8); + else + spl_out->scl_data.taps.h_taps = 4; + } else + spl_out->scl_data.taps.h_taps = in_taps->h_taps; + if (in_taps->v_taps == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) + spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( + spl_out->scl_data.ratios.vert, 2)), 8); + else + spl_out->scl_data.taps.v_taps = 4; + } else + spl_out->scl_data.taps.v_taps = in_taps->v_taps; + if (in_taps->v_taps_c == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) + spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( + spl_out->scl_data.ratios.vert_c, 2)), 8); + else + spl_out->scl_data.taps.v_taps_c = 4; + } else + spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; + if (in_taps->h_taps_c == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) + spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil( + spl_out->scl_data.ratios.horz_c), 8); + else + spl_out->scl_data.taps.h_taps_c = 4; + } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) + /* Only 1 and even h_taps_c are supported by hw */ + spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; else + spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; + } else { + if (spl_is_yuv420(spl_in->basic_in.format)) { + spl_out->scl_data.taps.h_taps = 6; + spl_out->scl_data.taps.v_taps = 6; spl_out->scl_data.taps.h_taps_c = 4; - } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) - /* Only 1 and even h_taps_c are supported by hw */ - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; - else - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; + spl_out->scl_data.taps.v_taps_c = 4; + } else { /* RGB */ + spl_out->scl_data.taps.h_taps = 6; + spl_out->scl_data.taps.v_taps = 6; + spl_out->scl_data.taps.h_taps_c = 6; + spl_out->scl_data.taps.v_taps_c = 6; + } + } /*Ensure we can support the requested number of vtaps*/ min_taps_y = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); @@ -794,43 +955,103 @@ static bool spl_get_optimal_number_of_taps( if (spl_out->scl_data.taps.v_taps_c > max_taps_c) spl_out->scl_data.taps.v_taps_c = max_taps_c; - if (spl_in->prefer_easf) { - // EASF can be enabled only for taps 3,4,6 - // If optimal no of taps is 5, then set it to 4 - // If optimal no of taps is 7 or 8, then set it to 6 + + if (!skip_easf) { + /* + * RGB ( L + NL ) and Linear HDR support 6x6, 6x4, 6x3, 4x4, 4x3 + * NL YUV420 only supports 6x6, 6x4 for Y and 4x4 for UV + * + * If LB does not support 3, 4, or 6 taps, then disable EASF_V + * and only enable EASF_H. So for RGB, support 6x2, 4x2 + * and for NL YUV420, support 6x2 for Y and 4x2 for UV + * + * All other cases, have to disable EASF_V and EASF_H + * + * If optimal no of taps is 5, then set it to 4 + * If optimal no of taps is 7 or 8, then fine since max tap is 6 + * + */ if (spl_out->scl_data.taps.v_taps == 5) spl_out->scl_data.taps.v_taps = 4; - if (spl_out->scl_data.taps.v_taps == 7 || spl_out->scl_data.taps.v_taps == 8) - spl_out->scl_data.taps.v_taps = 6; if (spl_out->scl_data.taps.v_taps_c == 5) spl_out->scl_data.taps.v_taps_c = 4; - if (spl_out->scl_data.taps.v_taps_c == 7 || spl_out->scl_data.taps.v_taps_c == 8) - spl_out->scl_data.taps.v_taps_c = 6; if (spl_out->scl_data.taps.h_taps == 5) spl_out->scl_data.taps.h_taps = 4; - if (spl_out->scl_data.taps.h_taps == 7 || spl_out->scl_data.taps.h_taps == 8) - spl_out->scl_data.taps.h_taps = 6; if (spl_out->scl_data.taps.h_taps_c == 5) spl_out->scl_data.taps.h_taps_c = 4; - if (spl_out->scl_data.taps.h_taps_c == 7 || spl_out->scl_data.taps.h_taps_c == 8) - spl_out->scl_data.taps.h_taps_c = 6; + if (spl_is_yuv420(spl_in->basic_in.format)) { + if ((spl_out->scl_data.taps.h_taps <= 4) || + (spl_out->scl_data.taps.h_taps_c <= 3)) { + *enable_easf_v = false; + *enable_easf_h = false; + } else if ((spl_out->scl_data.taps.v_taps <= 3) || + (spl_out->scl_data.taps.v_taps_c <= 3)) { + *enable_easf_v = false; + *enable_easf_h = true; + } else { + *enable_easf_v = true; + *enable_easf_h = true; + } + ASSERT((spl_out->scl_data.taps.v_taps > 1) && + (spl_out->scl_data.taps.v_taps_c > 1)); + } else { /* RGB */ + if (spl_out->scl_data.taps.h_taps <= 3) { + *enable_easf_v = false; + *enable_easf_h = false; + } else if (spl_out->scl_data.taps.v_taps < 3) { + *enable_easf_v = false; + *enable_easf_h = true; + } else { + *enable_easf_v = true; + *enable_easf_h = true; + } + ASSERT(spl_out->scl_data.taps.v_taps > 1); + } + } else { + *enable_easf_v = false; + *enable_easf_h = false; } // end of if prefer_easf - if (!spl_in->basic_out.always_scale) { - if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) + + /* Sharpener requires scaler to be enabled, including for 1:1 + * Check if ISHARP can be enabled + * If ISHARP is not enabled, for 1:1, set taps to 1 and disable + * EASF + * For case of 2:1 YUV where chroma is 1:1, set taps to 1 if + * EASF is not enabled + */ + + *enable_isharp = spl_get_isharp_en(spl_in, spl_out); + if (!*enable_isharp && !spl_in->basic_out.always_scale) { + if ((IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) && + (IDENTITY_RATIO(spl_out->scl_data.ratios.vert))) { spl_out->scl_data.taps.h_taps = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert)) spl_out->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) - spl_out->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) - spl_out->scl_data.taps.v_taps_c = 1; + + if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) + spl_out->scl_data.taps.h_taps_c = 1; + + if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) + spl_out->scl_data.taps.v_taps_c = 1; + + *enable_easf_v = false; + *enable_easf_h = false; + } else { + if ((!*enable_easf_h) && + (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c))) + spl_out->scl_data.taps.h_taps_c = 1; + + if ((!*enable_easf_v) && + (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c))) + spl_out->scl_data.taps.v_taps_c = 1; + } } return true; } + static void spl_set_black_color_data(enum spl_pixel_format format, struct scl_black_color *scl_black_color) { @@ -890,62 +1111,10 @@ static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->taps.v_taps_c = scl_data->taps.v_taps_c - 1; dscl_prog_data->taps.h_taps_c = scl_data->taps.h_taps_c - 1; } -static const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) -{ - if (taps == 8) - return spl_get_filter_8tap_64p(ratio); - else if (taps == 7) - return spl_get_filter_7tap_64p(ratio); - else if (taps == 6) - return spl_get_filter_6tap_64p(ratio); - else if (taps == 5) - return spl_get_filter_5tap_64p(ratio); - else if (taps == 4) - return spl_get_filter_4tap_64p(ratio); - else if (taps == 3) - return spl_get_filter_3tap_64p(ratio); - else if (taps == 2) - return spl_get_filter_2tap_64p(); - else if (taps == 1) - return NULL; - else { - /* should never happen, bug */ - return NULL; - } -} -static void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) -{ - dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps, data->ratios.horz); - dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps, data->ratios.vert); - dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps_c, data->ratios.horz_c); - dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps_c, data->ratios.vert_c); -} - -static const uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) -{ - if ((taps == 3) || (taps == 4) || (taps == 6)) - return spl_get_filter_isharp_bs_4tap_64p(); - else { - /* should never happen, bug */ - return NULL; - } -} -static void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) -{ - dscl_prog_data->filter_blur_scale_h = spl_dscl_get_blur_scale_coeffs_64p( - data->taps.h_taps); - dscl_prog_data->filter_blur_scale_v = spl_dscl_get_blur_scale_coeffs_64p( - data->taps.v_taps); -} /* Populate dscl prog data structure from scaler data calculated by SPL */ -static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out, + bool enable_easf_v, bool enable_easf_h, bool enable_isharp) { struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; @@ -953,6 +1122,8 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_ou struct scl_black_color *scl_black_color = &dscl_prog_data->scl_black_color; + bool enable_easf = enable_easf_v || enable_easf_h; + // Set values for recout dscl_prog_data->recout = spl_out->scl_data.recout; // Set values for MPC Size @@ -960,7 +1131,8 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_ou dscl_prog_data->mpc_size.height = spl_out->scl_data.v_active; // SCL_MODE - Set SCL_MODE data - dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data); + dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data, enable_isharp, + enable_easf); // SCL_BLACK_COLOR spl_set_black_color_data(spl_in->basic_in.format, scl_black_color); @@ -975,99 +1147,97 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_ou // Set viewport_c dscl_prog_data->viewport_c = spl_out->scl_data.viewport_c; // Set filters data - spl_set_filters_data(dscl_prog_data, data); + spl_set_filters_data(dscl_prog_data, data, enable_easf_v, enable_easf_h); } -/* Enable EASF ?*/ -static bool enable_easf(int scale_ratio, int taps, - enum linear_light_scaling lls_pref, bool prefer_easf) -{ - // Is downscaling > 6:1 ? - if (scale_ratio > 6) { - // END - No EASF support for downscaling > 6:1 - return false; - } - // Is upscaling or downscaling up to 2:1? - if (scale_ratio <= 2) { - // Is linear scaling or EASF preferred? - if (lls_pref == LLS_PREF_YES || prefer_easf) { - // LB support taps 3, 4, 6 - if (taps == 3 || taps == 4 || taps == 6) { - // END - EASF supported - return true; - } - } - } - // END - EASF not supported - return false; -} -/* Set EASF data */ -static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, - bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref, - enum spl_pixel_format format) -{ - if (spl_is_yuv420(format)) /* TODO: 0 = RGB, 1 = YUV */ - dscl_prog_data->easf_matrix_mode = 1; - else - dscl_prog_data->easf_matrix_mode = 0; +/* Set EASF data */ +static void spl_set_easf_data(struct spl_out *spl_out, bool enable_easf_v, + bool enable_easf_h, enum linear_light_scaling lls_pref, + enum spl_pixel_format format, enum system_setup setup) +{ + struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; if (enable_easf_v) { dscl_prog_data->easf_v_en = true; dscl_prog_data->easf_v_ring = 0; - dscl_prog_data->easf_v_sharp_factor = 1; + dscl_prog_data->easf_v_sharp_factor = 0; dscl_prog_data->easf_v_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - dscl_prog_data->easf_v_bf3_mode = 2; // 2-bit, BF3 chroma mode correction calculation mode - dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control + /* 2-bit, BF3 chroma mode correction calculation mode */ + dscl_prog_data->easf_v_bf3_mode = spl_get_v_bf3_mode( + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ minCoef ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_uptilt = - 0x9F00;// FP1.5.10 [minCoef] (-0.036109167214271) + spl_get_3tap_dntilt_uptilt_offset(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTiltMaxVal ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt_max = - 0x24FE; // FP1.5.10 [upTiltMaxVal] ( 0.904556445553545) + spl_get_3tap_uptilt_maxval(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ dnTiltSlope ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_slope = - 0x3940; // FP1.5.10 [dnTiltSlope] ( 0.910488988173371) + spl_get_3tap_dntilt_slope(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt1Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt1_slope = - 0x359C; // FP1.5.10 [upTilt1Slope] ( 0.125620179040899) + spl_get_3tap_uptilt1_slope(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt2Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_slope = - 0x359C; // FP1.5.10 [upTilt2Slope] ( 0.006786817723568) + spl_get_3tap_uptilt2_slope(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt2Offset ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_offset = - 0x9F00; // FP1.5.10 [upTilt2Offset] (-0.006139059716651) + spl_get_3tap_uptilt2_offset(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_v_ringest_eventap_reduceg1 = - 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] + spl_get_reducer_gain4(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_v_ringest_eventap_reduceg2 = - 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] + spl_get_reducer_gain6(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain1 = - 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 + spl_get_gainRing4(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain2 = - 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 + spl_get_gainRing6(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); dscl_prog_data->easf_v_bf_maxa = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_maxb = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 1 dscl_prog_data->easf_v_bf_mina = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_minb = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control + + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1088,13 +1258,41 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } else { + dscl_prog_data->easf_v_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 14; // U2.2, Rate Of Change control + + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1113,11 +1311,11 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } } else dscl_prog_data->easf_v_en = false; @@ -1125,52 +1323,63 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, if (enable_easf_h) { dscl_prog_data->easf_h_en = true; dscl_prog_data->easf_h_ring = 0; - dscl_prog_data->easf_h_sharp_factor = 1; + dscl_prog_data->easf_h_sharp_factor = 0; dscl_prog_data->easf_h_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_h_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - dscl_prog_data->easf_h_bf3_mode = - 2; // 2-bit, BF3 chroma mode correction calculation mode - dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control + /* 2-bit, BF3 chroma mode correction calculation mode */ + dscl_prog_data->easf_h_bf3_mode = spl_get_h_bf3_mode( + spl_out->scl_data.recip_ratios.horz); + /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_h_ringest_eventap_reduceg1 = - 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] + spl_get_reducer_gain4(spl_out->scl_data.taps.h_taps, + spl_out->scl_data.recip_ratios.horz); + /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_h_ringest_eventap_reduceg2 = - 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] + spl_get_reducer_gain6(spl_out->scl_data.taps.h_taps, + spl_out->scl_data.recip_ratios.horz); + /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain1 = - 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 + spl_get_gainRing4(spl_out->scl_data.taps.h_taps, + spl_out->scl_data.recip_ratios.horz); + /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain2 = - 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 + spl_get_gainRing6(spl_out->scl_data.taps.h_taps, + spl_out->scl_data.recip_ratios.horz); dscl_prog_data->easf_h_bf_maxa = 63; //Horz Max BF value A in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_maxb = 63; //Horz Max BF value B in U0.6 format.Selected if H_FCNTL==1 dscl_prog_data->easf_h_bf_mina = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_minb = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==1 - dscl_prog_data->easf_h_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control + + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1188,12 +1397,40 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } else { + dscl_prog_data->easf_h_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 14; // U2.2, Rate Of Change control + + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1211,25 +1448,36 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } // if (lls_pref == LLS_PREF_YES) } else dscl_prog_data->easf_h_en = false; if (lls_pref == LLS_PREF_YES) { dscl_prog_data->easf_ltonl_en = 1; // Linear input - dscl_prog_data->easf_matrix_c0 = - 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) - dscl_prog_data->easf_matrix_c1 = - 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) - dscl_prog_data->easf_matrix_c2 = - 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) - dscl_prog_data->easf_matrix_c3 = - 0x0; // fp1.5.10, C3 coefficient + if (setup == HDR_L) { + dscl_prog_data->easf_matrix_c0 = + 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) + dscl_prog_data->easf_matrix_c1 = + 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) + dscl_prog_data->easf_matrix_c2 = + 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) + dscl_prog_data->easf_matrix_c3 = + 0x0; // fp1.5.10, C3 coefficient + } else { // SDR_L + dscl_prog_data->easf_matrix_c0 = + 0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720) + dscl_prog_data->easf_matrix_c1 = + 0x55DC; // fp1.5.10, C1 coefficient (LN_rec709: 0.7152 * (2^14)/125 = 93.74269440) + dscl_prog_data->easf_matrix_c2 = + 0x48BB; // fp1.5.10, C2 coefficient (LN_rec709: 0.0722 * (2^14)/125 = 9.46339840) + dscl_prog_data->easf_matrix_c3 = + 0x0; // fp1.5.10, C3 coefficient + } } else { dscl_prog_data->easf_ltonl_en = 0; // Non-Linear input dscl_prog_data->easf_matrix_c0 = @@ -1241,27 +1489,43 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_matrix_c3 = 0x0; // fp1.5.10, C3 coefficient } + + if (spl_is_yuv420(format)) { /* TODO: 0 = RGB, 1 = YUV */ + dscl_prog_data->easf_matrix_mode = 1; + /* + * 2-bit, BF3 chroma mode correction calculation mode + * Needs to be disabled for YUV420 mode + * Override lookup value + */ + dscl_prog_data->easf_v_bf3_mode = 0; + dscl_prog_data->easf_h_bf3_mode = 0; + } else + dscl_prog_data->easf_matrix_mode = 0; + } + /*Set isharp noise detection */ -static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data) +static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) { // ISHARP_NOISEDET_MODE // 0: 3x5 as VxH // 1: 4x5 as VxH // 2: // 3: 5x5 as VxH - if (dscl_prog_data->taps.v_taps == 6) - dscl_prog_data->isharp_noise_det.mode = 3; // ISHARP_NOISEDET_MODE - else if (dscl_prog_data->taps.h_taps == 4) - dscl_prog_data->isharp_noise_det.mode = 1; // ISHARP_NOISEDET_MODE - else if (dscl_prog_data->taps.h_taps == 3) - dscl_prog_data->isharp_noise_det.mode = 0; // ISHARP_NOISEDET_MODE + if (data->taps.v_taps == 6) + dscl_prog_data->isharp_noise_det.mode = 3; + else if (data->taps.v_taps == 4) + dscl_prog_data->isharp_noise_det.mode = 1; + else if (data->taps.v_taps == 3) + dscl_prog_data->isharp_noise_det.mode = 0; }; /* Set Sharpener data */ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, struct adaptive_sharpness adp_sharpness, bool enable_isharp, enum linear_light_scaling lls_pref, enum spl_pixel_format format, - const struct spl_scaler_data *data) + const struct spl_scaler_data *data, struct fixed31_32 ratio, + enum system_setup setup) { /* Turn off sharpener if not required */ if (!enable_isharp) { @@ -1270,10 +1534,12 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } dscl_prog_data->isharp_en = 1; // ISHARP_EN - dscl_prog_data->isharp_noise_det.enable = 1; // ISHARP_NOISEDET_EN // Set ISHARP_NOISEDET_MODE if htaps = 6-tap - if (dscl_prog_data->taps.h_taps == 6) - spl_set_isharp_noise_det_mode(dscl_prog_data); // ISHARP_NOISEDET_MODE + if (data->taps.h_taps == 6) { + dscl_prog_data->isharp_noise_det.enable = 1; /* ISHARP_NOISEDET_EN */ + spl_set_isharp_noise_det_mode(dscl_prog_data, data); /* ISHARP_NOISEDET_MODE */ + } else + dscl_prog_data->isharp_noise_det.enable = 0; // ISHARP_NOISEDET_EN // Program noise detection threshold dscl_prog_data->isharp_noise_det.uthreshold = 24; // ISHARP_NOISEDET_UTHRE dscl_prog_data->isharp_noise_det.dthreshold = 4; // ISHARP_NOISEDET_DTHRE @@ -1282,50 +1548,67 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->isharp_noise_det.pwl_end_in = 13; // ISHARP_NOISEDET_PWL_END_IN dscl_prog_data->isharp_noise_det.pwl_slope = 1623; // ISHARP_NOISEDET_PWL_SLOPE - if ((lls_pref == LLS_PREF_NO) && !spl_is_yuv420(format)) /* ISHARP_FMT_MODE */ + if (lls_pref == LLS_PREF_NO) /* ISHARP_FMT_MODE */ dscl_prog_data->isharp_fmt.mode = 1; else dscl_prog_data->isharp_fmt.mode = 0; dscl_prog_data->isharp_fmt.norm = 0x3C00; // ISHARP_FMT_NORM dscl_prog_data->isharp_lba.mode = 0; // ISHARP_LBA_MODE - // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 - dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 - dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 - dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[2] = -20; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 - dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 - dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 - dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format - switch (adp_sharpness.sharpness) { - case SHARPNESS_LOW: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_0p5x(); - break; - case SHARPNESS_MID: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_1p0x(); - break; - case SHARPNESS_HIGH: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_2p0x(); - break; - default: - BREAK_TO_DEBUGGER(); + if (setup == SDR_L) { + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 62; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 130; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 312; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = 0x1D9; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -39 + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 520; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 520; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 520; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + } else { + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = 0x1EC; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -20 + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format } + spl_build_isharp_1dlut_from_reference_curve(ratio, setup); + dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut( + adp_sharpness.sharpness); + // Program the nldelta soft clip values if (lls_pref == LLS_PREF_YES) { dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */ @@ -1346,59 +1629,6 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, // Set the values as per lookup table spl_set_blur_scale_data(dscl_prog_data, data); } -static bool spl_get_isharp_en(struct adaptive_sharpness adp_sharpness, - int vscale_ratio, int hscale_ratio, struct spl_taps taps, - enum spl_pixel_format format) -{ - bool enable_isharp = false; - - if (adp_sharpness.enable == false) - return enable_isharp; // Return if adaptive sharpness is disabled - // Is downscaling ? - if (vscale_ratio > 1 || hscale_ratio > 1) { - // END - No iSHARP support for downscaling - return enable_isharp; - } - // Scaling is up to 1:1 (no scaling) or upscaling - - /* Only apply sharpness to NV12 and not P010 */ - if (format != SPL_PIXEL_FORMAT_420BPP8) - return enable_isharp; - - // LB support horizontal taps 4,6 or vertical taps 3, 4, 6 - if (taps.h_taps == 4 || taps.h_taps == 6 || - taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6) { - // END - iSHARP supported - enable_isharp = true; - } - return enable_isharp; -} - -static bool spl_choose_lls_policy(enum spl_pixel_format format, - enum spl_transfer_func_type tf_type, - enum spl_transfer_func_predefined tf_predefined_type, - enum linear_light_scaling *lls_pref) -{ - if (spl_is_yuv420(format)) { - *lls_pref = LLS_PREF_NO; - if ((tf_type == SPL_TF_TYPE_PREDEFINED) || (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) - return true; - } else { /* RGB or YUV444 */ - if (tf_type == SPL_TF_TYPE_PREDEFINED) { - if ((tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG) || - (tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG12)) - *lls_pref = LLS_PREF_NO; - else - *lls_pref = LLS_PREF_YES; - return true; - } else if (tf_type == SPL_TF_TYPE_BYPASS) { - *lls_pref = LLS_PREF_YES; - return true; - } - } - *lls_pref = LLS_PREF_NO; - return false; -} /* Calculate scaler parameters */ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) @@ -1406,8 +1636,13 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool res = false; bool enable_easf_v = false; bool enable_easf_h = false; - bool lls_enable_easf = true; + int vratio = 0; + int hratio = 0; const struct spl_scaler_data *data = &spl_out->scl_data; + struct fixed31_32 isharp_scale_ratio; + enum system_setup setup; + bool enable_isharp = false; + // All SPL calls /* recout calculation */ /* depends on h_active */ @@ -1419,7 +1654,8 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) res = spl_get_optimal_number_of_taps( spl_in->basic_out.max_downscale_src_width, spl_in, - spl_out, &spl_in->scaling_quality); + spl_out, &spl_in->scaling_quality, &enable_easf_v, + &enable_easf_h, &enable_isharp); /* * Depends on recout, scaling ratios, h_active and taps * May need to re-check lb size after this in some obscure scenario @@ -1434,37 +1670,33 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) if (!res) return res; - /* - * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer - * function to determine whether to use LINEAR or NONLINEAR scaling - */ - if (spl_in->lls_pref == LLS_PREF_DONT_CARE) - lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, - spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, - &spl_in->lls_pref); - // Save all calculated parameters in dscl_prog_data structure to program hw registers - spl_set_dscl_prog_data(spl_in, spl_out); + spl_set_dscl_prog_data(spl_in, spl_out, enable_easf_v, enable_easf_h, enable_isharp); - int vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - int hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); - if (!lls_enable_easf || spl_in->disable_easf) { - enable_easf_v = false; - enable_easf_h = false; + if (spl_in->lls_pref == LLS_PREF_YES) { + if (spl_in->is_hdr_on) + setup = HDR_L; + else + setup = SDR_L; } else { - /* Enable EASF on vertical? */ - enable_easf_v = enable_easf(vratio, spl_out->scl_data.taps.v_taps, spl_in->lls_pref, spl_in->prefer_easf); - /* Enable EASF on horizontal? */ - enable_easf_h = enable_easf(hratio, spl_out->scl_data.taps.h_taps, spl_in->lls_pref, spl_in->prefer_easf); + if (spl_in->is_hdr_on) + setup = HDR_NL; + else + setup = SDR_NL; } // Set EASF - spl_set_easf_data(spl_out->dscl_prog_data, enable_easf_v, enable_easf_h, spl_in->lls_pref, - spl_in->basic_in.format); + spl_set_easf_data(spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, + spl_in->basic_in.format, setup); // Set iSHARP - bool enable_isharp = spl_get_isharp_en(spl_in->adaptive_sharpness, vratio, hratio, - spl_out->scl_data.taps, spl_in->basic_in.format); + vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + if (vratio <= hratio) + isharp_scale_ratio = spl_out->scl_data.recip_ratios.vert; + else + isharp_scale_ratio = spl_out->scl_data.recip_ratios.horz; + spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, - spl_in->lls_pref, spl_in->basic_in.format, data); + spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup); return res; } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c new file mode 100644 index 000000000000..99238644e0a1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dc_spl_filters.h" + +void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, + uint16_t *s1_12_filter, int num_taps) +{ + int num_entries = NUM_PHASES_COEFF * num_taps; + int i; + + for (i = 0; i < num_entries; i++) + *(s1_12_filter + i) = *(s1_10_filter + i) * 4; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h new file mode 100644 index 000000000000..20439cdbdb10 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef __DC_SPL_FILTERS_H__ +#define __DC_SPL_FILTERS_H__ + +#include "dc_spl_types.h" + +#define NUM_PHASES_COEFF 33 + +void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, + uint16_t *s1_12_filter, int num_taps); + +#endif /* __DC_SPL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index 8bc838c7c3c5..a5e544406e91 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -3,6 +3,7 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dc_spl_types.h" +#include "dc_spl_filters.h" #include "dc_spl_isharp_filters.h" //======================================== @@ -231,6 +232,53 @@ static const uint32_t filter_isharp_1D_lut_2p0x[32] = { 0x080B0D0E, 0x00020406, }; +//======================================== +// Delta Gain 1DLUT +// LUT content is packed as 4-bytes into one DWORD/entry +// A_start = 0.000000 +// A_end = 10.000000 +// A_gain = 3.000000 +// B_start = 11.000000 +// B_end = 127.000000 +// C_start = 40.000000 +// C_end = 127.000000 +//======================================== +static const uint32_t filter_isharp_1D_lut_3p0x[32] = { +0x03010000, +0x0F0B0805, +0x211E1813, +0x2B292624, +0x3533302E, +0x3E3C3A37, +0x46444240, +0x4D4B4A48, +0x5352504F, +0x59575655, +0x5D5C5B5A, +0x61605F5E, +0x64646362, +0x66666565, +0x68686767, +0x68686868, +0x68686868, +0x67676868, +0x65656666, +0x62636464, +0x5E5F6061, +0x5A5B5C5D, +0x55565759, +0x4F505253, +0x484A4B4D, +0x40424446, +0x373A3C3E, +0x2E303335, +0x2426292B, +0x191B1E21, +0x0D101316, +0x0003060A, +}; + +//======================================== // Wide scaler coefficients //======================================================== // gen_scaler_coeffs.m @@ -285,7 +333,7 @@ static const uint16_t filter_isharp_wide_6tap_64p[198] = { // Blur & Scale LPF // S1.10 //======================================================== -static const uint16_t filter_isharp_bs_4tap_64p[198] = { +static const uint16_t filter_isharp_bs_4tap_in_6_64p[198] = { 0x0000, 0x00E5, 0x0237, 0x00E4, 0x0000, 0x0000, 0x0000, 0x00DE, 0x0237, 0x00EB, 0x0000, 0x0000, 0x0000, 0x00D7, 0x0236, 0x00F2, 0x0001, 0x0000, @@ -320,6 +368,228 @@ static const uint16_t filter_isharp_bs_4tap_64p[198] = { 0x0000, 0x003B, 0x01CF, 0x01C2, 0x0034, 0x0000, 0x0000, 0x0037, 0x01C9, 0x01C9, 0x0037, 0x0000 }; +//======================================================== +// gen_BlurScale_coeffs.m +// 25-Apr-2022 +// 4 +// 64 +// Blur & Scale LPF +// S1.10 +//======================================================== +static const uint16_t filter_isharp_bs_4tap_64p[132] = { +0x00E5, 0x0237, 0x00E4, 0x0000, +0x00DE, 0x0237, 0x00EB, 0x0000, +0x00D7, 0x0236, 0x00F2, 0x0001, +0x00D0, 0x0235, 0x00FA, 0x0001, +0x00C9, 0x0234, 0x0101, 0x0002, +0x00C2, 0x0233, 0x0108, 0x0003, +0x00BB, 0x0232, 0x0110, 0x0003, +0x00B5, 0x0230, 0x0117, 0x0004, +0x00AE, 0x022E, 0x011F, 0x0005, +0x00A8, 0x022C, 0x0126, 0x0006, +0x00A2, 0x022A, 0x012D, 0x0007, +0x009C, 0x0228, 0x0134, 0x0008, +0x0096, 0x0225, 0x013C, 0x0009, +0x0090, 0x0222, 0x0143, 0x000B, +0x008A, 0x021F, 0x014B, 0x000C, +0x0085, 0x021C, 0x0151, 0x000E, +0x007F, 0x0218, 0x015A, 0x000F, +0x007A, 0x0215, 0x0160, 0x0011, +0x0074, 0x0211, 0x0168, 0x0013, +0x006F, 0x020D, 0x016F, 0x0015, +0x006A, 0x0209, 0x0176, 0x0017, +0x0065, 0x0204, 0x017E, 0x0019, +0x0060, 0x0200, 0x0185, 0x001B, +0x005C, 0x01FB, 0x018C, 0x001D, +0x0057, 0x01F6, 0x0193, 0x0020, +0x0053, 0x01F1, 0x019A, 0x0022, +0x004E, 0x01EC, 0x01A1, 0x0025, +0x004A, 0x01E6, 0x01A8, 0x0028, +0x0046, 0x01E1, 0x01AF, 0x002A, +0x0042, 0x01DB, 0x01B6, 0x002D, +0x003F, 0x01D5, 0x01BB, 0x0031, +0x003B, 0x01CF, 0x01C2, 0x0034, +0x0037, 0x01C9, 0x01C9, 0x0037, +}; +//======================================================== +// gen_BlurScale_coeffs.m +// 09-Jun-2022 +// 3 +// 64 +// Blur & Scale LPF +// S1.10 +//======================================================== +static const uint16_t filter_isharp_bs_3tap_64p[99] = { +0x0200, 0x0200, 0x0000, +0x01F6, 0x0206, 0x0004, +0x01EC, 0x020B, 0x0009, +0x01E2, 0x0211, 0x000D, +0x01D8, 0x0216, 0x0012, +0x01CE, 0x021C, 0x0016, +0x01C4, 0x0221, 0x001B, +0x01BA, 0x0226, 0x0020, +0x01B0, 0x022A, 0x0026, +0x01A6, 0x022F, 0x002B, +0x019C, 0x0233, 0x0031, +0x0192, 0x0238, 0x0036, +0x0188, 0x023C, 0x003C, +0x017E, 0x0240, 0x0042, +0x0174, 0x0244, 0x0048, +0x016A, 0x0248, 0x004E, +0x0161, 0x024A, 0x0055, +0x0157, 0x024E, 0x005B, +0x014D, 0x0251, 0x0062, +0x0144, 0x0253, 0x0069, +0x013A, 0x0256, 0x0070, +0x0131, 0x0258, 0x0077, +0x0127, 0x025B, 0x007E, +0x011E, 0x025C, 0x0086, +0x0115, 0x025E, 0x008D, +0x010B, 0x0260, 0x0095, +0x0102, 0x0262, 0x009C, +0x00F9, 0x0263, 0x00A4, +0x00F0, 0x0264, 0x00AC, +0x00E7, 0x0265, 0x00B4, +0x00DF, 0x0264, 0x00BD, +0x00D6, 0x0265, 0x00C5, +0x00CD, 0x0266, 0x00CD, +}; + +/* Converted Blur & Scale coeff tables from S1.10 to S1.12 */ +static uint16_t filter_isharp_bs_4tap_in_6_64p_s1_12[198]; +static uint16_t filter_isharp_bs_4tap_64p_s1_12[132]; +static uint16_t filter_isharp_bs_3tap_64p_s1_12[99]; + +struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_sdr_nl[3][6] = { + { /* LOW */ + {1125, 1000, 75, 100}, + {11, 10, 6, 10}, + {1075, 1000, 45, 100}, + {105, 100, 3, 10}, + {1025, 1000, 15, 100}, + {1, 1, 0, 1}, + }, + { /* MID */ + {1125, 1000, 2, 1}, + {11, 10, 175, 100}, + {1075, 1000, 15, 10}, + {105, 100, 125, 100}, + {1025, 1000, 1, 1}, + {1, 1, 75, 100}, + }, + { /* HIGH */ + {1125, 1000, 35, 10}, + {11, 10, 32, 10}, + {1075, 1000, 29, 10}, + {105, 100, 26, 10}, + {1025, 1000, 23, 10}, + {1, 1, 2, 1}, + }, +}; + +struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_sdr_l[3][6] = { + { /* LOW */ + {1125, 1000, 75, 100}, + {11, 10, 6, 10}, + {1075, 1000, 45, 100}, + {105, 100, 3, 10}, + {1025, 1000, 15, 100}, + {1, 1, 0, 1}, + }, + { /* MID */ + {1125, 1000, 15, 10}, + {11, 10, 135, 100}, + {1075, 1000, 12, 10}, + {105, 100, 105, 100}, + {1025, 1000, 9, 10}, + {1, 1, 75, 100}, + }, + { /* HIGH */ + {1125, 1000, 25, 10}, + {11, 10, 23, 10}, + {1075, 1000, 21, 10}, + {105, 100, 19, 10}, + {1025, 1000, 17, 10}, + {1, 1, 15, 10}, + }, +}; + +struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_hdr_nl[3][6] = { + { /* LOW */ + {1125, 1000, 5, 10}, + {11, 10, 4, 10}, + {1075, 1000, 3, 10}, + {105, 100, 2, 10}, + {1025, 1000, 1, 10}, + {1, 1, 0, 1}, + }, + { /* MID */ + {1125, 1000, 1, 1}, + {11, 10, 9, 10}, + {1075, 1000, 8, 10}, + {105, 100, 7, 10}, + {1025, 1000, 6, 10}, + {1, 1, 5, 10}, + }, + { /* HIGH */ + {1125, 1000, 15, 10}, + {11, 10, 14, 10}, + {1075, 1000, 13, 10}, + {105, 100, 12, 10}, + {1025, 1000, 11, 10}, + {1, 1, 1, 1}, + }, +}; + +struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_hdr_l[3][6] = { + { /* LOW */ + {1125, 1000, 75, 100}, + {11, 10, 6, 10}, + {1075, 1000, 45, 100}, + {105, 100, 3, 10}, + {1025, 1000, 15, 100}, + {1, 1, 0, 1}, + }, + { /* MID */ + {1125, 1000, 15, 10}, + {11, 10, 135, 100}, + {1075, 1000, 12, 10}, + {105, 100, 105, 100}, + {1025, 1000, 9, 10}, + {1, 1, 75, 100}, + }, + { /* HIGH */ + {1125, 1000, 25, 10}, + {11, 10, 23, 10}, + {1075, 1000, 21, 10}, + {105, 100, 19, 10}, + {1025, 1000, 17, 10}, + {1, 1, 15, 10}, + }, +}; + +/* Pre-generated 1DLUT for LOW for given setup and sharpness level */ +uint32_t filter_isharp_1D_lut_pregen[3][32] = { + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, +}; + const uint32_t *spl_get_filter_isharp_1D_lut_0(void) { return filter_isharp_1D_lut_0; @@ -340,11 +610,162 @@ const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void) { return filter_isharp_1D_lut_2p0x; } +const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void) +{ + return filter_isharp_1D_lut_3p0x; +} const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void) { return filter_isharp_wide_6tap_64p; } -const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) +uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void) { - return filter_isharp_bs_4tap_64p; + return filter_isharp_bs_4tap_in_6_64p_s1_12; } +uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) +{ + return filter_isharp_bs_4tap_64p_s1_12; +} +uint16_t *spl_get_filter_isharp_bs_3tap_64p(void) +{ + return filter_isharp_bs_3tap_64p_s1_12; +} + +void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum system_setup setup) +{ + uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; + struct fixed31_32 sharp_base, sharp_calc, sharp_level, ratio_level; + int i, j; + struct scale_ratio_to_sharpness_level_lookup *setup_lookup_ptr; + int num_sharp_ramp_levels; + int size_1dlut; + int sharp_calc_int; + uint32_t filter_pregen_store[32]; + + /* + * Given scaling ratio and current system setup, build pregenerated + * 1DLUT tables for three sharpness levels - LOW, MID, HIGH + */ + for (i = 0; i < 3; i++) { + /* + * Based on setup ( HDR/SDR, L/NL ), get base scale ratio to + * sharpness curve + */ + switch (setup) { + case HDR_L: + setup_lookup_ptr = scale_to_sharp_hdr_l[i]; + num_sharp_ramp_levels = sizeof(scale_to_sharp_hdr_l[i])/ + sizeof(struct scale_ratio_to_sharpness_level_lookup); + break; + case HDR_NL: + setup_lookup_ptr = scale_to_sharp_hdr_nl[i]; + num_sharp_ramp_levels = sizeof(scale_to_sharp_hdr_nl[i])/ + sizeof(struct scale_ratio_to_sharpness_level_lookup); + break; + case SDR_L: + setup_lookup_ptr = scale_to_sharp_sdr_l[i]; + num_sharp_ramp_levels = sizeof(scale_to_sharp_sdr_l[i])/ + sizeof(struct scale_ratio_to_sharpness_level_lookup); + break; + case SDR_NL: + default: + setup_lookup_ptr = scale_to_sharp_sdr_nl[i]; + num_sharp_ramp_levels = sizeof(scale_to_sharp_sdr_nl[i])/ + sizeof(struct scale_ratio_to_sharpness_level_lookup); + break; + } + + /* + * Compare desired scaling ratio and find adjusted sharpness from + * base scale ratio to sharpness curve + */ + j = 0; + sharp_level = dc_fixpt_zero; + while (j < num_sharp_ramp_levels) { + ratio_level = dc_fixpt_from_fraction(setup_lookup_ptr->ratio_numer, + setup_lookup_ptr->ratio_denom); + if (ratio.value >= ratio_level.value) { + sharp_level = dc_fixpt_from_fraction(setup_lookup_ptr->sharpness_numer, + setup_lookup_ptr->sharpness_denom); + break; + } + setup_lookup_ptr++; + j++; + } + + /* + * Calculate LUT_128_gained with this equation: + * + * LUT_128_gained[i] = (uint8)(0.5 + min(255,(double)(LUT_128[i])*sharpLevel/iGain)) + * where LUT_128[i] is contents of 3p0x isharp 1dlut + * where sharpLevel is desired sharpness level + * where iGain is base sharpness level 3.0 + * where LUT_128_gained[i] is adjusted 1dlut value based on desired sharpness level + */ + byte_ptr_1dlut_src = (uint8_t *)filter_isharp_1D_lut_3p0x; + byte_ptr_1dlut_dst = (uint8_t *)filter_pregen_store; + size_1dlut = sizeof(filter_isharp_1D_lut_3p0x); + memset(byte_ptr_1dlut_dst, 0, size_1dlut); + for (j = 0; j < size_1dlut; j++) { + sharp_base = dc_fixpt_from_int((int)*byte_ptr_1dlut_src); + sharp_calc = dc_fixpt_mul(sharp_base, sharp_level); + sharp_calc = dc_fixpt_div(sharp_calc, dc_fixpt_from_int(3)); + sharp_calc = dc_fixpt_min(dc_fixpt_from_int(255), sharp_calc); + sharp_calc = dc_fixpt_add(sharp_calc, dc_fixpt_from_fraction(1, 2)); + sharp_calc_int = dc_fixpt_floor(sharp_calc); + if (sharp_calc_int > 255) + sharp_calc_int = 255; + *byte_ptr_1dlut_dst = (uint8_t)sharp_calc_int; + + byte_ptr_1dlut_src++; + byte_ptr_1dlut_dst++; + } + + /* Compare if filter has change, if so update */ + if (memcmp((void *)filter_isharp_1D_lut_pregen[i], (void *)filter_pregen_store, size_1dlut) != 0) + memcpy((void *)filter_isharp_1D_lut_pregen[i], (void *)filter_pregen_store, size_1dlut); + } +} + +uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum explicit_sharpness sharpness) +{ + return filter_isharp_1D_lut_pregen[sharpness]; +} + +void spl_init_blur_scale_coeffs(void) +{ + convert_filter_s1_10_to_s1_12(filter_isharp_bs_3tap_64p, + filter_isharp_bs_3tap_64p_s1_12, 3); + convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_64p, + filter_isharp_bs_4tap_64p_s1_12, 4); + convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_in_6_64p, + filter_isharp_bs_4tap_in_6_64p_s1_12, 6); +} + +#ifdef CONFIG_DRM_AMD_DC_FP +uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) +{ + if (taps == 3) + return spl_get_filter_isharp_bs_3tap_64p(); + else if (taps == 4) + return spl_get_filter_isharp_bs_4tap_64p(); + else if (taps == 6) + return spl_get_filter_isharp_bs_4tap_in_6_64p(); + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} + +void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) +{ + dscl_prog_data->filter_blur_scale_h = + spl_dscl_get_blur_scale_coeffs_64p(data->taps.h_taps); + + dscl_prog_data->filter_blur_scale_v = + spl_dscl_get_blur_scale_coeffs_64p(data->taps.v_taps); +} +#endif + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index 1aaf4c50c1bc..c8b7cd6404dd 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -12,6 +12,37 @@ const uint32_t *spl_get_filter_isharp_1D_lut_0p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p0x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void); -const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); +const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void); +uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void); +uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); +uint16_t *spl_get_filter_isharp_bs_3tap_64p(void); const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void); +uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps); + +struct scale_ratio_to_sharpness_level_lookup { + unsigned int ratio_numer; + unsigned int ratio_denom; + unsigned int sharpness_numer; + unsigned int sharpness_denom; +}; + +struct sharpness_level_mapping { + unsigned int level; + unsigned int level_numer; + unsigned int level_denom; +}; + +enum system_setup { + SDR_NL = 0, + SDR_L, + HDR_NL, + HDR_L +}; + +void spl_init_blur_scale_coeffs(void); +void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data); + +void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum system_setup setup); +uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum explicit_sharpness sharpness); #endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c new file mode 100644 index 000000000000..83dd3435ebcc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c @@ -0,0 +1,1725 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dc_spl_filters.h" +#include "dc_spl_scl_filters.h" +#include "dc_spl_scl_easf_filters.h" + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.3_p_10qb_ +// 3 +// 64 +// input/output = 0.300000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_30[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F6, 0x0206, 0x0004, + 0x01EC, 0x020B, 0x0009, + 0x01E2, 0x0211, 0x000D, + 0x01D8, 0x0216, 0x0012, + 0x01CE, 0x021C, 0x0016, + 0x01C4, 0x0221, 0x001B, + 0x01BA, 0x0226, 0x0020, + 0x01B0, 0x022A, 0x0026, + 0x01A6, 0x022F, 0x002B, + 0x019C, 0x0233, 0x0031, + 0x0192, 0x0238, 0x0036, + 0x0188, 0x023C, 0x003C, + 0x017E, 0x0240, 0x0042, + 0x0174, 0x0244, 0x0048, + 0x016A, 0x0248, 0x004E, + 0x0161, 0x024A, 0x0055, + 0x0157, 0x024E, 0x005B, + 0x014D, 0x0251, 0x0062, + 0x0144, 0x0253, 0x0069, + 0x013A, 0x0256, 0x0070, + 0x0131, 0x0258, 0x0077, + 0x0127, 0x025B, 0x007E, + 0x011E, 0x025C, 0x0086, + 0x0115, 0x025E, 0x008D, + 0x010B, 0x0260, 0x0095, + 0x0102, 0x0262, 0x009C, + 0x00F9, 0x0263, 0x00A4, + 0x00F0, 0x0264, 0x00AC, + 0x00E7, 0x0265, 0x00B4, + 0x00DF, 0x0264, 0x00BD, + 0x00D6, 0x0265, 0x00C5, + 0x00CD, 0x0266, 0x00CD, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.4_p_10qb_ +// 3 +// 64 +// input/output = 0.400000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_40[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F6, 0x0206, 0x0004, + 0x01EB, 0x020E, 0x0007, + 0x01E1, 0x0214, 0x000B, + 0x01D7, 0x021A, 0x000F, + 0x01CD, 0x0220, 0x0013, + 0x01C2, 0x0226, 0x0018, + 0x01B8, 0x022C, 0x001C, + 0x01AE, 0x0231, 0x0021, + 0x01A3, 0x0237, 0x0026, + 0x0199, 0x023C, 0x002B, + 0x018F, 0x0240, 0x0031, + 0x0185, 0x0245, 0x0036, + 0x017A, 0x024A, 0x003C, + 0x0170, 0x024F, 0x0041, + 0x0166, 0x0253, 0x0047, + 0x015C, 0x0257, 0x004D, + 0x0152, 0x025A, 0x0054, + 0x0148, 0x025E, 0x005A, + 0x013E, 0x0261, 0x0061, + 0x0134, 0x0264, 0x0068, + 0x012B, 0x0266, 0x006F, + 0x0121, 0x0269, 0x0076, + 0x0117, 0x026C, 0x007D, + 0x010E, 0x026E, 0x0084, + 0x0104, 0x0270, 0x008C, + 0x00FB, 0x0271, 0x0094, + 0x00F2, 0x0272, 0x009C, + 0x00E9, 0x0273, 0x00A4, + 0x00E0, 0x0274, 0x00AC, + 0x00D7, 0x0275, 0x00B4, + 0x00CE, 0x0275, 0x00BD, + 0x00C5, 0x0276, 0x00C5, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.5_p_10qb_ +// 3 +// 64 +// input/output = 0.500000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_50[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F5, 0x0209, 0x0002, + 0x01EA, 0x0211, 0x0005, + 0x01DF, 0x021A, 0x0007, + 0x01D4, 0x0222, 0x000A, + 0x01C9, 0x022A, 0x000D, + 0x01BE, 0x0232, 0x0010, + 0x01B3, 0x0239, 0x0014, + 0x01A8, 0x0241, 0x0017, + 0x019D, 0x0248, 0x001B, + 0x0192, 0x024F, 0x001F, + 0x0187, 0x0255, 0x0024, + 0x017C, 0x025C, 0x0028, + 0x0171, 0x0262, 0x002D, + 0x0166, 0x0268, 0x0032, + 0x015B, 0x026E, 0x0037, + 0x0150, 0x0273, 0x003D, + 0x0146, 0x0278, 0x0042, + 0x013B, 0x027D, 0x0048, + 0x0130, 0x0282, 0x004E, + 0x0126, 0x0286, 0x0054, + 0x011B, 0x028A, 0x005B, + 0x0111, 0x028D, 0x0062, + 0x0107, 0x0290, 0x0069, + 0x00FD, 0x0293, 0x0070, + 0x00F3, 0x0296, 0x0077, + 0x00E9, 0x0298, 0x007F, + 0x00DF, 0x029A, 0x0087, + 0x00D5, 0x029C, 0x008F, + 0x00CC, 0x029D, 0x0097, + 0x00C3, 0x029E, 0x009F, + 0x00BA, 0x029E, 0x00A8, + 0x00B1, 0x029E, 0x00B1, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.6_p_10qb_ +// 3 +// 64 +// input/output = 0.600000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_60[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F4, 0x020B, 0x0001, + 0x01E8, 0x0216, 0x0002, + 0x01DC, 0x0221, 0x0003, + 0x01D0, 0x022B, 0x0005, + 0x01C4, 0x0235, 0x0007, + 0x01B8, 0x0240, 0x0008, + 0x01AC, 0x0249, 0x000B, + 0x01A0, 0x0253, 0x000D, + 0x0194, 0x025C, 0x0010, + 0x0188, 0x0265, 0x0013, + 0x017C, 0x026E, 0x0016, + 0x0170, 0x0277, 0x0019, + 0x0164, 0x027F, 0x001D, + 0x0158, 0x0287, 0x0021, + 0x014C, 0x028F, 0x0025, + 0x0140, 0x0297, 0x0029, + 0x0135, 0x029D, 0x002E, + 0x0129, 0x02A4, 0x0033, + 0x011D, 0x02AB, 0x0038, + 0x0112, 0x02B0, 0x003E, + 0x0107, 0x02B5, 0x0044, + 0x00FC, 0x02BA, 0x004A, + 0x00F1, 0x02BF, 0x0050, + 0x00E6, 0x02C3, 0x0057, + 0x00DB, 0x02C7, 0x005E, + 0x00D1, 0x02CA, 0x0065, + 0x00C7, 0x02CC, 0x006D, + 0x00BD, 0x02CE, 0x0075, + 0x00B3, 0x02D0, 0x007D, + 0x00A9, 0x02D2, 0x0085, + 0x00A0, 0x02D2, 0x008E, + 0x0097, 0x02D2, 0x0097, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.7_p_10qb_ +// 3 +// 64 +// input/output = 0.700000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_70[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F3, 0x020D, 0x0000, + 0x01E5, 0x021B, 0x0000, + 0x01D8, 0x0228, 0x0000, + 0x01CB, 0x0235, 0x0000, + 0x01BD, 0x0243, 0x0000, + 0x01B0, 0x024F, 0x0001, + 0x01A2, 0x025C, 0x0002, + 0x0195, 0x0268, 0x0003, + 0x0187, 0x0275, 0x0004, + 0x017A, 0x0280, 0x0006, + 0x016D, 0x028C, 0x0007, + 0x015F, 0x0298, 0x0009, + 0x0152, 0x02A2, 0x000C, + 0x0145, 0x02AD, 0x000E, + 0x0138, 0x02B7, 0x0011, + 0x012B, 0x02C0, 0x0015, + 0x011E, 0x02CA, 0x0018, + 0x0111, 0x02D3, 0x001C, + 0x0105, 0x02DB, 0x0020, + 0x00F8, 0x02E3, 0x0025, + 0x00EC, 0x02EA, 0x002A, + 0x00E0, 0x02F1, 0x002F, + 0x00D5, 0x02F6, 0x0035, + 0x00C9, 0x02FC, 0x003B, + 0x00BE, 0x0301, 0x0041, + 0x00B3, 0x0305, 0x0048, + 0x00A8, 0x0309, 0x004F, + 0x009E, 0x030C, 0x0056, + 0x0094, 0x030E, 0x005E, + 0x008A, 0x0310, 0x0066, + 0x0081, 0x0310, 0x006F, + 0x0077, 0x0312, 0x0077, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.8_p_10qb_ +// 3 +// 64 +// input/output = 0.800000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_80[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F1, 0x0210, 0x0FFF, + 0x01E2, 0x0220, 0x0FFE, + 0x01D2, 0x0232, 0x0FFC, + 0x01C3, 0x0241, 0x0FFC, + 0x01B4, 0x0251, 0x0FFB, + 0x01A4, 0x0262, 0x0FFA, + 0x0195, 0x0271, 0x0FFA, + 0x0186, 0x0281, 0x0FF9, + 0x0176, 0x0291, 0x0FF9, + 0x0167, 0x02A0, 0x0FF9, + 0x0158, 0x02AE, 0x0FFA, + 0x0149, 0x02BD, 0x0FFA, + 0x013A, 0x02CB, 0x0FFB, + 0x012C, 0x02D7, 0x0FFD, + 0x011D, 0x02E5, 0x0FFE, + 0x010F, 0x02F1, 0x0000, + 0x0101, 0x02FD, 0x0002, + 0x00F3, 0x0308, 0x0005, + 0x00E5, 0x0313, 0x0008, + 0x00D8, 0x031D, 0x000B, + 0x00CB, 0x0326, 0x000F, + 0x00BE, 0x032F, 0x0013, + 0x00B2, 0x0337, 0x0017, + 0x00A6, 0x033E, 0x001C, + 0x009A, 0x0345, 0x0021, + 0x008F, 0x034A, 0x0027, + 0x0084, 0x034F, 0x002D, + 0x0079, 0x0353, 0x0034, + 0x006F, 0x0356, 0x003B, + 0x0065, 0x0358, 0x0043, + 0x005C, 0x0359, 0x004B, + 0x0053, 0x035A, 0x0053, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.9_p_10qb_ +// 3 +// 64 +// input/output = 0.900000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_90[99] = { + 0x0200, 0x0200, 0x0000, + 0x01EE, 0x0214, 0x0FFE, + 0x01DC, 0x0228, 0x0FFC, + 0x01CA, 0x023C, 0x0FFA, + 0x01B9, 0x024F, 0x0FF8, + 0x01A7, 0x0262, 0x0FF7, + 0x0195, 0x0276, 0x0FF5, + 0x0183, 0x028A, 0x0FF3, + 0x0172, 0x029C, 0x0FF2, + 0x0160, 0x02AF, 0x0FF1, + 0x014F, 0x02C2, 0x0FEF, + 0x013E, 0x02D4, 0x0FEE, + 0x012D, 0x02E5, 0x0FEE, + 0x011C, 0x02F7, 0x0FED, + 0x010C, 0x0307, 0x0FED, + 0x00FB, 0x0318, 0x0FED, + 0x00EC, 0x0327, 0x0FED, + 0x00DC, 0x0336, 0x0FEE, + 0x00CD, 0x0344, 0x0FEF, + 0x00BE, 0x0352, 0x0FF0, + 0x00B0, 0x035E, 0x0FF2, + 0x00A2, 0x036A, 0x0FF4, + 0x0095, 0x0375, 0x0FF6, + 0x0088, 0x037F, 0x0FF9, + 0x007B, 0x0388, 0x0FFD, + 0x006F, 0x0391, 0x0000, + 0x0064, 0x0397, 0x0005, + 0x0059, 0x039D, 0x000A, + 0x004E, 0x03A3, 0x000F, + 0x0045, 0x03A6, 0x0015, + 0x003B, 0x03A9, 0x001C, + 0x0033, 0x03AA, 0x0023, + 0x002A, 0x03AC, 0x002A, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_1_p_10qb_ +// 3 +// 64 +// input/output = 1.000000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_1_00[99] = { + 0x0200, 0x0200, 0x0000, + 0x01EB, 0x0217, 0x0FFE, + 0x01D5, 0x022F, 0x0FFC, + 0x01C0, 0x0247, 0x0FF9, + 0x01AB, 0x025E, 0x0FF7, + 0x0196, 0x0276, 0x0FF4, + 0x0181, 0x028D, 0x0FF2, + 0x016C, 0x02A5, 0x0FEF, + 0x0158, 0x02BB, 0x0FED, + 0x0144, 0x02D1, 0x0FEB, + 0x0130, 0x02E8, 0x0FE8, + 0x011C, 0x02FE, 0x0FE6, + 0x0109, 0x0313, 0x0FE4, + 0x00F6, 0x0328, 0x0FE2, + 0x00E4, 0x033C, 0x0FE0, + 0x00D2, 0x034F, 0x0FDF, + 0x00C0, 0x0363, 0x0FDD, + 0x00B0, 0x0374, 0x0FDC, + 0x009F, 0x0385, 0x0FDC, + 0x0090, 0x0395, 0x0FDB, + 0x0081, 0x03A4, 0x0FDB, + 0x0072, 0x03B3, 0x0FDB, + 0x0064, 0x03C0, 0x0FDC, + 0x0057, 0x03CC, 0x0FDD, + 0x004B, 0x03D6, 0x0FDF, + 0x003F, 0x03E0, 0x0FE1, + 0x0034, 0x03E8, 0x0FE4, + 0x002A, 0x03EF, 0x0FE7, + 0x0020, 0x03F5, 0x0FEB, + 0x0017, 0x03FA, 0x0FEF, + 0x000F, 0x03FD, 0x0FF4, + 0x0007, 0x03FF, 0x0FFA, + 0x0000, 0x0400, 0x0000, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.3_p_10qb_ +// 4 +// 64 +// input/output = 0.300000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_30[132] = { + 0x0104, 0x01F8, 0x0104, 0x0000, + 0x00FE, 0x01F7, 0x010A, 0x0001, + 0x00F8, 0x01F6, 0x010F, 0x0003, + 0x00F2, 0x01F5, 0x0114, 0x0005, + 0x00EB, 0x01F4, 0x011B, 0x0006, + 0x00E5, 0x01F3, 0x0120, 0x0008, + 0x00DF, 0x01F2, 0x0125, 0x000A, + 0x00DA, 0x01F0, 0x012A, 0x000C, + 0x00D4, 0x01EE, 0x0130, 0x000E, + 0x00CE, 0x01ED, 0x0135, 0x0010, + 0x00C8, 0x01EB, 0x013A, 0x0013, + 0x00C2, 0x01E9, 0x0140, 0x0015, + 0x00BD, 0x01E7, 0x0145, 0x0017, + 0x00B7, 0x01E5, 0x014A, 0x001A, + 0x00B1, 0x01E2, 0x0151, 0x001C, + 0x00AC, 0x01E0, 0x0155, 0x001F, + 0x00A7, 0x01DD, 0x015A, 0x0022, + 0x00A1, 0x01DB, 0x015F, 0x0025, + 0x009C, 0x01D8, 0x0165, 0x0027, + 0x0097, 0x01D5, 0x016A, 0x002A, + 0x0092, 0x01D2, 0x016E, 0x002E, + 0x008C, 0x01CF, 0x0174, 0x0031, + 0x0087, 0x01CC, 0x0179, 0x0034, + 0x0083, 0x01C9, 0x017D, 0x0037, + 0x007E, 0x01C5, 0x0182, 0x003B, + 0x0079, 0x01C2, 0x0187, 0x003E, + 0x0074, 0x01BE, 0x018C, 0x0042, + 0x0070, 0x01BA, 0x0190, 0x0046, + 0x006B, 0x01B7, 0x0195, 0x0049, + 0x0066, 0x01B3, 0x019A, 0x004D, + 0x0062, 0x01AF, 0x019E, 0x0051, + 0x005E, 0x01AB, 0x01A2, 0x0055, + 0x005A, 0x01A6, 0x01A6, 0x005A, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.4_p_10qb_ +// 4 +// 64 +// input/output = 0.400000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_40[132] = { + 0x00FB, 0x0209, 0x00FC, 0x0000, + 0x00F5, 0x0209, 0x0101, 0x0001, + 0x00EE, 0x0208, 0x0108, 0x0002, + 0x00E8, 0x0207, 0x010E, 0x0003, + 0x00E2, 0x0206, 0x0114, 0x0004, + 0x00DB, 0x0205, 0x011A, 0x0006, + 0x00D5, 0x0204, 0x0120, 0x0007, + 0x00CF, 0x0203, 0x0125, 0x0009, + 0x00C9, 0x0201, 0x012C, 0x000A, + 0x00C3, 0x01FF, 0x0132, 0x000C, + 0x00BD, 0x01FD, 0x0138, 0x000E, + 0x00B7, 0x01FB, 0x013E, 0x0010, + 0x00B1, 0x01F9, 0x0144, 0x0012, + 0x00AC, 0x01F7, 0x0149, 0x0014, + 0x00A6, 0x01F4, 0x0150, 0x0016, + 0x00A0, 0x01F2, 0x0156, 0x0018, + 0x009B, 0x01EF, 0x015C, 0x001A, + 0x0095, 0x01EC, 0x0162, 0x001D, + 0x0090, 0x01E9, 0x0168, 0x001F, + 0x008B, 0x01E6, 0x016D, 0x0022, + 0x0085, 0x01E3, 0x0173, 0x0025, + 0x0080, 0x01DF, 0x0179, 0x0028, + 0x007B, 0x01DC, 0x017E, 0x002B, + 0x0076, 0x01D8, 0x0184, 0x002E, + 0x0071, 0x01D4, 0x018A, 0x0031, + 0x006D, 0x01D1, 0x018E, 0x0034, + 0x0068, 0x01CD, 0x0193, 0x0038, + 0x0063, 0x01C8, 0x019A, 0x003B, + 0x005F, 0x01C4, 0x019E, 0x003F, + 0x005B, 0x01C0, 0x01A3, 0x0042, + 0x0056, 0x01BB, 0x01A9, 0x0046, + 0x0052, 0x01B7, 0x01AD, 0x004A, + 0x004E, 0x01B2, 0x01B2, 0x004E, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.5_p_10qb_ +// 4 +// 64 +// input/output = 0.500000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_50[132] = { + 0x00E5, 0x0236, 0x00E5, 0x0000, + 0x00DE, 0x0235, 0x00ED, 0x0000, + 0x00D7, 0x0235, 0x00F4, 0x0000, + 0x00D0, 0x0235, 0x00FB, 0x0000, + 0x00C9, 0x0234, 0x0102, 0x0001, + 0x00C2, 0x0233, 0x010A, 0x0001, + 0x00BC, 0x0232, 0x0111, 0x0001, + 0x00B5, 0x0230, 0x0119, 0x0002, + 0x00AE, 0x022F, 0x0121, 0x0002, + 0x00A8, 0x022D, 0x0128, 0x0003, + 0x00A2, 0x022B, 0x012F, 0x0004, + 0x009B, 0x0229, 0x0137, 0x0005, + 0x0095, 0x0226, 0x013F, 0x0006, + 0x008F, 0x0224, 0x0146, 0x0007, + 0x0089, 0x0221, 0x014E, 0x0008, + 0x0083, 0x021E, 0x0155, 0x000A, + 0x007E, 0x021B, 0x015C, 0x000B, + 0x0078, 0x0217, 0x0164, 0x000D, + 0x0072, 0x0213, 0x016D, 0x000E, + 0x006D, 0x0210, 0x0173, 0x0010, + 0x0068, 0x020C, 0x017A, 0x0012, + 0x0063, 0x0207, 0x0182, 0x0014, + 0x005E, 0x0203, 0x0189, 0x0016, + 0x0059, 0x01FE, 0x0191, 0x0018, + 0x0054, 0x01F9, 0x0198, 0x001B, + 0x0050, 0x01F4, 0x019F, 0x001D, + 0x004B, 0x01EF, 0x01A6, 0x0020, + 0x0047, 0x01EA, 0x01AC, 0x0023, + 0x0043, 0x01E4, 0x01B3, 0x0026, + 0x003F, 0x01DF, 0x01B9, 0x0029, + 0x003B, 0x01D9, 0x01C0, 0x002C, + 0x0037, 0x01D3, 0x01C6, 0x0030, + 0x0033, 0x01CD, 0x01CD, 0x0033, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.6_p_10qb_ +// 4 +// 64 +// input/output = 0.600000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_60[132] = { + 0x00C8, 0x026F, 0x00C9, 0x0000, + 0x00C0, 0x0270, 0x00D1, 0x0FFF, + 0x00B8, 0x0270, 0x00D9, 0x0FFF, + 0x00B1, 0x0270, 0x00E1, 0x0FFE, + 0x00A9, 0x026F, 0x00EB, 0x0FFD, + 0x00A2, 0x026E, 0x00F3, 0x0FFD, + 0x009A, 0x026D, 0x00FD, 0x0FFC, + 0x0093, 0x026C, 0x0105, 0x0FFC, + 0x008C, 0x026A, 0x010F, 0x0FFB, + 0x0085, 0x0268, 0x0118, 0x0FFB, + 0x007E, 0x0265, 0x0122, 0x0FFB, + 0x0078, 0x0263, 0x012A, 0x0FFB, + 0x0071, 0x0260, 0x0134, 0x0FFB, + 0x006B, 0x025C, 0x013E, 0x0FFB, + 0x0065, 0x0259, 0x0147, 0x0FFB, + 0x005F, 0x0255, 0x0151, 0x0FFB, + 0x0059, 0x0251, 0x015A, 0x0FFC, + 0x0054, 0x024D, 0x0163, 0x0FFC, + 0x004E, 0x0248, 0x016D, 0x0FFD, + 0x0049, 0x0243, 0x0176, 0x0FFE, + 0x0044, 0x023E, 0x017F, 0x0FFF, + 0x003F, 0x0238, 0x0189, 0x0000, + 0x003A, 0x0232, 0x0193, 0x0001, + 0x0036, 0x022C, 0x019C, 0x0002, + 0x0031, 0x0226, 0x01A5, 0x0004, + 0x002D, 0x021F, 0x01AF, 0x0005, + 0x0029, 0x0218, 0x01B8, 0x0007, + 0x0025, 0x0211, 0x01C1, 0x0009, + 0x0022, 0x020A, 0x01C9, 0x000B, + 0x001E, 0x0203, 0x01D2, 0x000D, + 0x001B, 0x01FB, 0x01DA, 0x0010, + 0x0018, 0x01F3, 0x01E3, 0x0012, + 0x0015, 0x01EB, 0x01EB, 0x0015, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.7_p_10qb_ +// 4 +// 64 +// input/output = 0.700000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_70[132] = { + 0x00A3, 0x02B9, 0x00A4, 0x0000, + 0x009A, 0x02BA, 0x00AD, 0x0FFF, + 0x0092, 0x02BA, 0x00B6, 0x0FFE, + 0x0089, 0x02BA, 0x00C1, 0x0FFC, + 0x0081, 0x02B9, 0x00CB, 0x0FFB, + 0x0079, 0x02B8, 0x00D5, 0x0FFA, + 0x0071, 0x02B7, 0x00DF, 0x0FF9, + 0x0069, 0x02B5, 0x00EA, 0x0FF8, + 0x0062, 0x02B3, 0x00F4, 0x0FF7, + 0x005B, 0x02B0, 0x00FF, 0x0FF6, + 0x0054, 0x02AD, 0x010B, 0x0FF4, + 0x004D, 0x02A9, 0x0117, 0x0FF3, + 0x0046, 0x02A5, 0x0123, 0x0FF2, + 0x0040, 0x02A1, 0x012D, 0x0FF2, + 0x003A, 0x029C, 0x0139, 0x0FF1, + 0x0034, 0x0297, 0x0145, 0x0FF0, + 0x002F, 0x0292, 0x0150, 0x0FEF, + 0x0029, 0x028C, 0x015C, 0x0FEF, + 0x0024, 0x0285, 0x0169, 0x0FEE, + 0x001F, 0x027F, 0x0174, 0x0FEE, + 0x001B, 0x0278, 0x017F, 0x0FEE, + 0x0016, 0x0270, 0x018D, 0x0FED, + 0x0012, 0x0268, 0x0199, 0x0FED, + 0x000E, 0x0260, 0x01A4, 0x0FEE, + 0x000B, 0x0258, 0x01AF, 0x0FEE, + 0x0007, 0x024F, 0x01BC, 0x0FEE, + 0x0004, 0x0246, 0x01C7, 0x0FEF, + 0x0001, 0x023D, 0x01D3, 0x0FEF, + 0x0FFE, 0x0233, 0x01DF, 0x0FF0, + 0x0FFC, 0x0229, 0x01EA, 0x0FF1, + 0x0FFA, 0x021F, 0x01F4, 0x0FF3, + 0x0FF8, 0x0215, 0x01FF, 0x0FF4, + 0x0FF6, 0x020A, 0x020A, 0x0FF6, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.8_p_10qb_ +// 4 +// 64 +// input/output = 0.800000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_80[132] = { + 0x0075, 0x0315, 0x0076, 0x0000, + 0x006C, 0x0316, 0x007F, 0x0FFF, + 0x0062, 0x0316, 0x008A, 0x0FFE, + 0x0059, 0x0315, 0x0096, 0x0FFC, + 0x0050, 0x0314, 0x00A1, 0x0FFB, + 0x0048, 0x0312, 0x00AD, 0x0FF9, + 0x0040, 0x0310, 0x00B8, 0x0FF8, + 0x0038, 0x030D, 0x00C5, 0x0FF6, + 0x0030, 0x030A, 0x00D1, 0x0FF5, + 0x0029, 0x0306, 0x00DE, 0x0FF3, + 0x0022, 0x0301, 0x00EB, 0x0FF2, + 0x001C, 0x02FC, 0x00F8, 0x0FF0, + 0x0015, 0x02F7, 0x0106, 0x0FEE, + 0x0010, 0x02F1, 0x0112, 0x0FED, + 0x000A, 0x02EA, 0x0121, 0x0FEB, + 0x0005, 0x02E3, 0x012F, 0x0FE9, + 0x0000, 0x02DB, 0x013D, 0x0FE8, + 0x0FFB, 0x02D3, 0x014C, 0x0FE6, + 0x0FF7, 0x02CA, 0x015A, 0x0FE5, + 0x0FF3, 0x02C1, 0x0169, 0x0FE3, + 0x0FF0, 0x02B7, 0x0177, 0x0FE2, + 0x0FEC, 0x02AD, 0x0186, 0x0FE1, + 0x0FE9, 0x02A2, 0x0196, 0x0FDF, + 0x0FE7, 0x0297, 0x01A4, 0x0FDE, + 0x0FE4, 0x028C, 0x01B3, 0x0FDD, + 0x0FE2, 0x0280, 0x01C2, 0x0FDC, + 0x0FE0, 0x0274, 0x01D0, 0x0FDC, + 0x0FDF, 0x0268, 0x01DE, 0x0FDB, + 0x0FDD, 0x025B, 0x01EE, 0x0FDA, + 0x0FDC, 0x024E, 0x01FC, 0x0FDA, + 0x0FDB, 0x0241, 0x020A, 0x0FDA, + 0x0FDB, 0x0233, 0x0218, 0x0FDA, + 0x0FDA, 0x0226, 0x0226, 0x0FDA, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.9_p_10qb_ +// 4 +// 64 +// input/output = 0.900000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_90[132] = { + 0x003F, 0x0383, 0x003E, 0x0000, + 0x0034, 0x0383, 0x004A, 0x0FFF, + 0x002B, 0x0383, 0x0054, 0x0FFE, + 0x0021, 0x0381, 0x0061, 0x0FFD, + 0x0019, 0x037F, 0x006C, 0x0FFC, + 0x0010, 0x037C, 0x0079, 0x0FFB, + 0x0008, 0x0378, 0x0086, 0x0FFA, + 0x0001, 0x0374, 0x0093, 0x0FF8, + 0x0FFA, 0x036E, 0x00A1, 0x0FF7, + 0x0FF3, 0x0368, 0x00B0, 0x0FF5, + 0x0FED, 0x0361, 0x00BF, 0x0FF3, + 0x0FE8, 0x035A, 0x00CD, 0x0FF1, + 0x0FE2, 0x0352, 0x00DC, 0x0FF0, + 0x0FDE, 0x0349, 0x00EB, 0x0FEE, + 0x0FD9, 0x033F, 0x00FC, 0x0FEC, + 0x0FD5, 0x0335, 0x010D, 0x0FE9, + 0x0FD2, 0x032A, 0x011D, 0x0FE7, + 0x0FCF, 0x031E, 0x012E, 0x0FE5, + 0x0FCC, 0x0312, 0x013F, 0x0FE3, + 0x0FCA, 0x0305, 0x0150, 0x0FE1, + 0x0FC8, 0x02F8, 0x0162, 0x0FDE, + 0x0FC6, 0x02EA, 0x0174, 0x0FDC, + 0x0FC5, 0x02DC, 0x0185, 0x0FDA, + 0x0FC4, 0x02CD, 0x0197, 0x0FD8, + 0x0FC3, 0x02BE, 0x01AA, 0x0FD5, + 0x0FC3, 0x02AF, 0x01BB, 0x0FD3, + 0x0FC3, 0x029F, 0x01CD, 0x0FD1, + 0x0FC3, 0x028E, 0x01E0, 0x0FCF, + 0x0FC3, 0x027E, 0x01F2, 0x0FCD, + 0x0FC4, 0x026D, 0x0203, 0x0FCC, + 0x0FC5, 0x025C, 0x0215, 0x0FCA, + 0x0FC6, 0x024B, 0x0227, 0x0FC8, + 0x0FC7, 0x0239, 0x0239, 0x0FC7, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_1_p_10qb_ +// 4 +// 64 +// input/output = 1.000000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_1_00[132] = { + 0x0000, 0x0400, 0x0000, 0x0000, + 0x0FF6, 0x03FF, 0x000B, 0x0000, + 0x0FED, 0x03FE, 0x0015, 0x0000, + 0x0FE4, 0x03FB, 0x0022, 0x0FFF, + 0x0FDC, 0x03F7, 0x002E, 0x0FFF, + 0x0FD5, 0x03F2, 0x003B, 0x0FFE, + 0x0FCE, 0x03EC, 0x0048, 0x0FFE, + 0x0FC8, 0x03E5, 0x0056, 0x0FFD, + 0x0FC3, 0x03DC, 0x0065, 0x0FFC, + 0x0FBE, 0x03D3, 0x0075, 0x0FFA, + 0x0FB9, 0x03C9, 0x0085, 0x0FF9, + 0x0FB6, 0x03BE, 0x0094, 0x0FF8, + 0x0FB2, 0x03B2, 0x00A6, 0x0FF6, + 0x0FB0, 0x03A5, 0x00B7, 0x0FF4, + 0x0FAD, 0x0397, 0x00CA, 0x0FF2, + 0x0FAB, 0x0389, 0x00DC, 0x0FF0, + 0x0FAA, 0x0379, 0x00EF, 0x0FEE, + 0x0FA9, 0x0369, 0x0102, 0x0FEC, + 0x0FA9, 0x0359, 0x0115, 0x0FE9, + 0x0FA9, 0x0348, 0x0129, 0x0FE6, + 0x0FA9, 0x0336, 0x013D, 0x0FE4, + 0x0FA9, 0x0323, 0x0153, 0x0FE1, + 0x0FAA, 0x0310, 0x0168, 0x0FDE, + 0x0FAC, 0x02FD, 0x017C, 0x0FDB, + 0x0FAD, 0x02E9, 0x0192, 0x0FD8, + 0x0FAF, 0x02D5, 0x01A7, 0x0FD5, + 0x0FB1, 0x02C0, 0x01BD, 0x0FD2, + 0x0FB3, 0x02AC, 0x01D2, 0x0FCF, + 0x0FB5, 0x0296, 0x01E9, 0x0FCC, + 0x0FB8, 0x0281, 0x01FE, 0x0FC9, + 0x0FBA, 0x026C, 0x0214, 0x0FC6, + 0x0FBD, 0x0256, 0x022A, 0x0FC3, + 0x0FC0, 0x0240, 0x0240, 0x0FC0, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.3_p_10qb_ +// 6 +// 64 +// input/output = 0.300000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_30[198] = { + 0x004B, 0x0100, 0x0169, 0x0101, 0x004B, 0x0000, + 0x0049, 0x00FD, 0x0169, 0x0103, 0x004E, 0x0000, + 0x0047, 0x00FA, 0x0169, 0x0106, 0x0050, 0x0000, + 0x0045, 0x00F7, 0x0168, 0x0109, 0x0052, 0x0001, + 0x0043, 0x00F5, 0x0168, 0x010B, 0x0054, 0x0001, + 0x0040, 0x00F2, 0x0168, 0x010E, 0x0057, 0x0001, + 0x003E, 0x00EF, 0x0168, 0x0110, 0x0059, 0x0002, + 0x003C, 0x00EC, 0x0167, 0x0113, 0x005C, 0x0002, + 0x003A, 0x00E9, 0x0167, 0x0116, 0x005E, 0x0002, + 0x0038, 0x00E6, 0x0166, 0x0118, 0x0061, 0x0003, + 0x0036, 0x00E3, 0x0165, 0x011C, 0x0063, 0x0003, + 0x0034, 0x00E0, 0x0165, 0x011D, 0x0066, 0x0004, + 0x0033, 0x00DD, 0x0164, 0x0120, 0x0068, 0x0004, + 0x0031, 0x00DA, 0x0163, 0x0122, 0x006B, 0x0005, + 0x002F, 0x00D7, 0x0163, 0x0125, 0x006D, 0x0005, + 0x002D, 0x00D3, 0x0162, 0x0128, 0x0070, 0x0006, + 0x002B, 0x00D0, 0x0161, 0x012A, 0x0073, 0x0007, + 0x002A, 0x00CD, 0x0160, 0x012D, 0x0075, 0x0007, + 0x0028, 0x00CA, 0x015F, 0x012F, 0x0078, 0x0008, + 0x0026, 0x00C7, 0x015E, 0x0131, 0x007B, 0x0009, + 0x0025, 0x00C4, 0x015D, 0x0133, 0x007E, 0x0009, + 0x0023, 0x00C1, 0x015C, 0x0136, 0x0080, 0x000A, + 0x0022, 0x00BE, 0x015A, 0x0138, 0x0083, 0x000B, + 0x0020, 0x00BB, 0x0159, 0x013A, 0x0086, 0x000C, + 0x001F, 0x00B8, 0x0158, 0x013B, 0x0089, 0x000D, + 0x001E, 0x00B5, 0x0156, 0x013E, 0x008C, 0x000D, + 0x001C, 0x00B2, 0x0155, 0x0140, 0x008F, 0x000E, + 0x001B, 0x00AF, 0x0153, 0x0143, 0x0091, 0x000F, + 0x0019, 0x00AC, 0x0152, 0x0145, 0x0094, 0x0010, + 0x0018, 0x00A9, 0x0150, 0x0147, 0x0097, 0x0011, + 0x0017, 0x00A6, 0x014F, 0x0148, 0x009A, 0x0012, + 0x0016, 0x00A3, 0x014D, 0x0149, 0x009D, 0x0014, + 0x0015, 0x00A0, 0x014B, 0x014B, 0x00A0, 0x0015, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.4_p_10qb_ +// 6 +// 64 +// input/output = 0.400000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_40[198] = { + 0x0028, 0x0106, 0x01A3, 0x0107, 0x0028, 0x0000, + 0x0026, 0x0102, 0x01A3, 0x010A, 0x002B, 0x0000, + 0x0024, 0x00FE, 0x01A3, 0x010F, 0x002D, 0x0FFF, + 0x0022, 0x00FA, 0x01A3, 0x0113, 0x002F, 0x0FFF, + 0x0021, 0x00F6, 0x01A3, 0x0116, 0x0031, 0x0FFF, + 0x001F, 0x00F2, 0x01A2, 0x011B, 0x0034, 0x0FFE, + 0x001D, 0x00EE, 0x01A2, 0x011F, 0x0036, 0x0FFE, + 0x001B, 0x00EA, 0x01A1, 0x0123, 0x0039, 0x0FFE, + 0x0019, 0x00E6, 0x01A1, 0x0127, 0x003B, 0x0FFE, + 0x0018, 0x00E2, 0x01A0, 0x012A, 0x003E, 0x0FFE, + 0x0016, 0x00DE, 0x01A0, 0x012E, 0x0041, 0x0FFD, + 0x0015, 0x00DA, 0x019F, 0x0132, 0x0043, 0x0FFD, + 0x0013, 0x00D6, 0x019E, 0x0136, 0x0046, 0x0FFD, + 0x0012, 0x00D2, 0x019D, 0x0139, 0x0049, 0x0FFD, + 0x0010, 0x00CE, 0x019C, 0x013D, 0x004C, 0x0FFD, + 0x000F, 0x00CA, 0x019A, 0x0141, 0x004F, 0x0FFD, + 0x000E, 0x00C6, 0x0199, 0x0144, 0x0052, 0x0FFD, + 0x000D, 0x00C2, 0x0197, 0x0148, 0x0055, 0x0FFD, + 0x000B, 0x00BE, 0x0196, 0x014C, 0x0058, 0x0FFD, + 0x000A, 0x00BA, 0x0195, 0x014F, 0x005B, 0x0FFD, + 0x0009, 0x00B6, 0x0193, 0x0153, 0x005E, 0x0FFD, + 0x0008, 0x00B2, 0x0191, 0x0157, 0x0061, 0x0FFD, + 0x0007, 0x00AE, 0x0190, 0x015A, 0x0064, 0x0FFD, + 0x0006, 0x00AA, 0x018E, 0x015D, 0x0068, 0x0FFD, + 0x0005, 0x00A6, 0x018C, 0x0161, 0x006B, 0x0FFD, + 0x0005, 0x00A2, 0x0189, 0x0164, 0x006F, 0x0FFD, + 0x0004, 0x009E, 0x0187, 0x0167, 0x0072, 0x0FFE, + 0x0003, 0x009A, 0x0185, 0x016B, 0x0075, 0x0FFE, + 0x0002, 0x0096, 0x0183, 0x016E, 0x0079, 0x0FFE, + 0x0002, 0x0093, 0x0180, 0x016F, 0x007D, 0x0FFF, + 0x0001, 0x008F, 0x017E, 0x0173, 0x0080, 0x0FFF, + 0x0001, 0x008B, 0x017B, 0x0175, 0x0084, 0x0000, + 0x0000, 0x0087, 0x0179, 0x0179, 0x0087, 0x0000, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.5_p_10qb_ +// 6 +// 64 +// input/output = 0.500000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_50[198] = { + 0x0000, 0x0107, 0x01F3, 0x0106, 0x0000, 0x0000, + 0x0FFE, 0x0101, 0x01F3, 0x010D, 0x0002, 0x0FFF, + 0x0FFD, 0x00FB, 0x01F3, 0x0113, 0x0003, 0x0FFF, + 0x0FFC, 0x00F6, 0x01F3, 0x0118, 0x0005, 0x0FFE, + 0x0FFA, 0x00F0, 0x01F3, 0x011E, 0x0007, 0x0FFE, + 0x0FF9, 0x00EB, 0x01F2, 0x0124, 0x0009, 0x0FFD, + 0x0FF8, 0x00E5, 0x01F2, 0x0129, 0x000B, 0x0FFD, + 0x0FF7, 0x00E0, 0x01F1, 0x012F, 0x000D, 0x0FFC, + 0x0FF6, 0x00DA, 0x01F0, 0x0135, 0x0010, 0x0FFB, + 0x0FF5, 0x00D4, 0x01EF, 0x013B, 0x0012, 0x0FFB, + 0x0FF4, 0x00CF, 0x01EE, 0x0141, 0x0014, 0x0FFA, + 0x0FF3, 0x00C9, 0x01ED, 0x0147, 0x0017, 0x0FF9, + 0x0FF2, 0x00C4, 0x01EB, 0x014C, 0x001A, 0x0FF9, + 0x0FF1, 0x00BF, 0x01EA, 0x0152, 0x001C, 0x0FF8, + 0x0FF1, 0x00B9, 0x01E8, 0x0157, 0x001F, 0x0FF8, + 0x0FF0, 0x00B4, 0x01E6, 0x015D, 0x0022, 0x0FF7, + 0x0FF0, 0x00AE, 0x01E4, 0x0163, 0x0025, 0x0FF6, + 0x0FEF, 0x00A9, 0x01E2, 0x0168, 0x0028, 0x0FF6, + 0x0FEF, 0x00A4, 0x01DF, 0x016E, 0x002B, 0x0FF5, + 0x0FEF, 0x009F, 0x01DD, 0x0172, 0x002E, 0x0FF5, + 0x0FEE, 0x009A, 0x01DA, 0x0178, 0x0032, 0x0FF4, + 0x0FEE, 0x0094, 0x01D8, 0x017E, 0x0035, 0x0FF3, + 0x0FEE, 0x008F, 0x01D5, 0x0182, 0x0039, 0x0FF3, + 0x0FEE, 0x008A, 0x01D2, 0x0188, 0x003C, 0x0FF2, + 0x0FEE, 0x0085, 0x01CF, 0x018C, 0x0040, 0x0FF2, + 0x0FEE, 0x0081, 0x01CB, 0x0191, 0x0044, 0x0FF1, + 0x0FEE, 0x007C, 0x01C8, 0x0196, 0x0047, 0x0FF1, + 0x0FEE, 0x0077, 0x01C4, 0x019C, 0x004B, 0x0FF0, + 0x0FEE, 0x0072, 0x01C1, 0x01A0, 0x004F, 0x0FF0, + 0x0FEE, 0x006E, 0x01BD, 0x01A4, 0x0053, 0x0FF0, + 0x0FEE, 0x0069, 0x01B9, 0x01A9, 0x0058, 0x0FEF, + 0x0FEE, 0x0065, 0x01B5, 0x01AD, 0x005C, 0x0FEF, + 0x0FEF, 0x0060, 0x01B1, 0x01B1, 0x0060, 0x0FEF, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.6_p_10qb_ +// 6 +// 64 +// input/output = 0.600000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_60[198] = { + 0x0FD9, 0x00FB, 0x0258, 0x00FB, 0x0FD9, 0x0000, + 0x0FD9, 0x00F3, 0x0258, 0x0102, 0x0FDA, 0x0000, + 0x0FD8, 0x00EB, 0x0258, 0x010B, 0x0FDB, 0x0FFF, + 0x0FD8, 0x00E3, 0x0258, 0x0112, 0x0FDC, 0x0FFF, + 0x0FD8, 0x00DC, 0x0257, 0x011B, 0x0FDC, 0x0FFE, + 0x0FD7, 0x00D4, 0x0256, 0x0123, 0x0FDE, 0x0FFE, + 0x0FD7, 0x00CD, 0x0255, 0x012B, 0x0FDF, 0x0FFD, + 0x0FD7, 0x00C5, 0x0254, 0x0133, 0x0FE0, 0x0FFD, + 0x0FD7, 0x00BE, 0x0252, 0x013C, 0x0FE1, 0x0FFC, + 0x0FD7, 0x00B6, 0x0251, 0x0143, 0x0FE3, 0x0FFC, + 0x0FD8, 0x00AF, 0x024F, 0x014B, 0x0FE4, 0x0FFB, + 0x0FD8, 0x00A8, 0x024C, 0x0154, 0x0FE6, 0x0FFA, + 0x0FD8, 0x00A1, 0x024A, 0x015B, 0x0FE8, 0x0FFA, + 0x0FD9, 0x009A, 0x0247, 0x0163, 0x0FEA, 0x0FF9, + 0x0FD9, 0x0093, 0x0244, 0x016C, 0x0FEC, 0x0FF8, + 0x0FD9, 0x008C, 0x0241, 0x0174, 0x0FEF, 0x0FF7, + 0x0FDA, 0x0085, 0x023E, 0x017B, 0x0FF1, 0x0FF7, + 0x0FDB, 0x007F, 0x023A, 0x0183, 0x0FF3, 0x0FF6, + 0x0FDB, 0x0078, 0x0237, 0x018B, 0x0FF6, 0x0FF5, + 0x0FDC, 0x0072, 0x0233, 0x0192, 0x0FF9, 0x0FF4, + 0x0FDD, 0x006C, 0x022F, 0x0199, 0x0FFC, 0x0FF3, + 0x0FDD, 0x0065, 0x022A, 0x01A3, 0x0FFF, 0x0FF2, + 0x0FDE, 0x005F, 0x0226, 0x01AA, 0x0002, 0x0FF1, + 0x0FDF, 0x005A, 0x0221, 0x01B0, 0x0006, 0x0FF0, + 0x0FE0, 0x0054, 0x021C, 0x01B7, 0x0009, 0x0FF0, + 0x0FE1, 0x004E, 0x0217, 0x01BE, 0x000D, 0x0FEF, + 0x0FE2, 0x0048, 0x0212, 0x01C6, 0x0010, 0x0FEE, + 0x0FE3, 0x0043, 0x020C, 0x01CD, 0x0014, 0x0FED, + 0x0FE4, 0x003E, 0x0207, 0x01D3, 0x0018, 0x0FEC, + 0x0FE5, 0x0039, 0x0200, 0x01DA, 0x001D, 0x0FEB, + 0x0FE6, 0x0034, 0x01FA, 0x01E1, 0x0021, 0x0FEA, + 0x0FE7, 0x002F, 0x01F5, 0x01E7, 0x0025, 0x0FE9, + 0x0FE8, 0x002A, 0x01EE, 0x01EE, 0x002A, 0x0FE8, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.7_p_10qb_ +// 6 +// 64 +// input/output = 0.700000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_70[198] = { + 0x0FC0, 0x00DA, 0x02CC, 0x00DA, 0x0FC0, 0x0000, + 0x0FC1, 0x00D0, 0x02CC, 0x00E4, 0x0FBF, 0x0000, + 0x0FC2, 0x00C6, 0x02CB, 0x00EF, 0x0FBE, 0x0000, + 0x0FC3, 0x00BC, 0x02CA, 0x00F9, 0x0FBE, 0x0000, + 0x0FC4, 0x00B2, 0x02C9, 0x0104, 0x0FBD, 0x0000, + 0x0FC5, 0x00A8, 0x02C7, 0x010F, 0x0FBD, 0x0000, + 0x0FC7, 0x009F, 0x02C5, 0x0119, 0x0FBC, 0x0000, + 0x0FC8, 0x0095, 0x02C3, 0x0124, 0x0FBC, 0x0000, + 0x0FC9, 0x008C, 0x02C0, 0x012F, 0x0FBC, 0x0000, + 0x0FCB, 0x0083, 0x02BD, 0x0139, 0x0FBC, 0x0000, + 0x0FCC, 0x007A, 0x02BA, 0x0144, 0x0FBC, 0x0000, + 0x0FCE, 0x0072, 0x02B6, 0x014D, 0x0FBD, 0x0000, + 0x0FD0, 0x0069, 0x02B2, 0x0159, 0x0FBD, 0x0FFF, + 0x0FD1, 0x0061, 0x02AD, 0x0164, 0x0FBE, 0x0FFF, + 0x0FD3, 0x0059, 0x02A9, 0x016E, 0x0FBF, 0x0FFE, + 0x0FD4, 0x0051, 0x02A4, 0x017A, 0x0FBF, 0x0FFE, + 0x0FD6, 0x0049, 0x029E, 0x0184, 0x0FC1, 0x0FFE, + 0x0FD8, 0x0042, 0x0299, 0x018E, 0x0FC2, 0x0FFD, + 0x0FD9, 0x003A, 0x0293, 0x019B, 0x0FC3, 0x0FFC, + 0x0FDB, 0x0033, 0x028D, 0x01A4, 0x0FC5, 0x0FFC, + 0x0FDC, 0x002D, 0x0286, 0x01AF, 0x0FC7, 0x0FFB, + 0x0FDE, 0x0026, 0x0280, 0x01BA, 0x0FC8, 0x0FFA, + 0x0FE0, 0x001F, 0x0279, 0x01C4, 0x0FCB, 0x0FF9, + 0x0FE1, 0x0019, 0x0272, 0x01CE, 0x0FCD, 0x0FF9, + 0x0FE3, 0x0013, 0x026A, 0x01D9, 0x0FCF, 0x0FF8, + 0x0FE4, 0x000D, 0x0263, 0x01E3, 0x0FD2, 0x0FF7, + 0x0FE6, 0x0008, 0x025B, 0x01EC, 0x0FD5, 0x0FF6, + 0x0FE7, 0x0002, 0x0253, 0x01F7, 0x0FD8, 0x0FF5, + 0x0FE9, 0x0FFD, 0x024A, 0x0202, 0x0FDB, 0x0FF3, + 0x0FEA, 0x0FF8, 0x0242, 0x020B, 0x0FDF, 0x0FF2, + 0x0FEC, 0x0FF3, 0x0239, 0x0215, 0x0FE2, 0x0FF1, + 0x0FED, 0x0FEF, 0x0230, 0x021E, 0x0FE6, 0x0FF0, + 0x0FEF, 0x0FEB, 0x0226, 0x0226, 0x0FEB, 0x0FEF, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.8_p_10qb_ +// 6 +// 64 +// input/output = 0.800000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_80[198] = { + 0x0FBF, 0x00A1, 0x0340, 0x00A1, 0x0FBF, 0x0000, + 0x0FC1, 0x0095, 0x0340, 0x00AD, 0x0FBC, 0x0001, + 0x0FC4, 0x0089, 0x033E, 0x00BA, 0x0FBA, 0x0001, + 0x0FC6, 0x007D, 0x033D, 0x00C6, 0x0FB8, 0x0002, + 0x0FC9, 0x0072, 0x033A, 0x00D3, 0x0FB6, 0x0002, + 0x0FCC, 0x0067, 0x0338, 0x00DF, 0x0FB3, 0x0003, + 0x0FCE, 0x005C, 0x0334, 0x00EE, 0x0FB1, 0x0003, + 0x0FD1, 0x0051, 0x0331, 0x00FA, 0x0FAF, 0x0004, + 0x0FD3, 0x0047, 0x032D, 0x0108, 0x0FAD, 0x0004, + 0x0FD6, 0x003D, 0x0328, 0x0116, 0x0FAB, 0x0004, + 0x0FD8, 0x0033, 0x0323, 0x0123, 0x0FAA, 0x0005, + 0x0FDB, 0x002A, 0x031D, 0x0131, 0x0FA8, 0x0005, + 0x0FDD, 0x0021, 0x0317, 0x013F, 0x0FA7, 0x0005, + 0x0FDF, 0x0018, 0x0311, 0x014D, 0x0FA5, 0x0006, + 0x0FE2, 0x0010, 0x030A, 0x015A, 0x0FA4, 0x0006, + 0x0FE4, 0x0008, 0x0302, 0x0169, 0x0FA3, 0x0006, + 0x0FE6, 0x0000, 0x02FB, 0x0177, 0x0FA2, 0x0006, + 0x0FE8, 0x0FF9, 0x02F3, 0x0185, 0x0FA1, 0x0006, + 0x0FEB, 0x0FF1, 0x02EA, 0x0193, 0x0FA1, 0x0006, + 0x0FED, 0x0FEB, 0x02E1, 0x01A1, 0x0FA0, 0x0006, + 0x0FEE, 0x0FE4, 0x02D8, 0x01B0, 0x0FA0, 0x0006, + 0x0FF0, 0x0FDE, 0x02CE, 0x01BE, 0x0FA0, 0x0006, + 0x0FF2, 0x0FD8, 0x02C5, 0x01CB, 0x0FA0, 0x0006, + 0x0FF4, 0x0FD3, 0x02BA, 0x01D8, 0x0FA1, 0x0006, + 0x0FF6, 0x0FCD, 0x02B0, 0x01E7, 0x0FA1, 0x0005, + 0x0FF7, 0x0FC8, 0x02A5, 0x01F5, 0x0FA2, 0x0005, + 0x0FF9, 0x0FC4, 0x029A, 0x0202, 0x0FA3, 0x0004, + 0x0FFA, 0x0FC0, 0x028E, 0x0210, 0x0FA4, 0x0004, + 0x0FFB, 0x0FBC, 0x0283, 0x021D, 0x0FA6, 0x0003, + 0x0FFD, 0x0FB8, 0x0276, 0x022A, 0x0FA8, 0x0003, + 0x0FFE, 0x0FB4, 0x026B, 0x0237, 0x0FAA, 0x0002, + 0x0FFF, 0x0FB1, 0x025E, 0x0245, 0x0FAC, 0x0001, + 0x0000, 0x0FAE, 0x0252, 0x0252, 0x0FAE, 0x0000, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.9_p_10qb_ +// 6 +// 64 +// input/output = 0.900000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_90[198] = { + 0x0FD8, 0x0055, 0x03A7, 0x0054, 0x0FD8, 0x0000, + 0x0FDB, 0x0047, 0x03A7, 0x0063, 0x0FD4, 0x0000, + 0x0FDF, 0x003B, 0x03A5, 0x006F, 0x0FD1, 0x0001, + 0x0FE2, 0x002E, 0x03A3, 0x007E, 0x0FCD, 0x0002, + 0x0FE5, 0x0022, 0x03A0, 0x008D, 0x0FCA, 0x0002, + 0x0FE8, 0x0017, 0x039D, 0x009B, 0x0FC6, 0x0003, + 0x0FEB, 0x000C, 0x0398, 0x00AC, 0x0FC2, 0x0003, + 0x0FEE, 0x0001, 0x0394, 0x00BA, 0x0FBF, 0x0004, + 0x0FF1, 0x0FF7, 0x038E, 0x00CA, 0x0FBB, 0x0005, + 0x0FF4, 0x0FED, 0x0388, 0x00DA, 0x0FB8, 0x0005, + 0x0FF6, 0x0FE4, 0x0381, 0x00EB, 0x0FB4, 0x0006, + 0x0FF9, 0x0FDB, 0x037A, 0x00FA, 0x0FB1, 0x0007, + 0x0FFB, 0x0FD3, 0x0372, 0x010B, 0x0FAD, 0x0008, + 0x0FFD, 0x0FCB, 0x0369, 0x011D, 0x0FAA, 0x0008, + 0x0000, 0x0FC3, 0x0360, 0x012E, 0x0FA6, 0x0009, + 0x0002, 0x0FBC, 0x0356, 0x013F, 0x0FA3, 0x000A, + 0x0003, 0x0FB6, 0x034C, 0x0150, 0x0FA0, 0x000B, + 0x0005, 0x0FB0, 0x0341, 0x0162, 0x0F9D, 0x000B, + 0x0007, 0x0FAA, 0x0336, 0x0173, 0x0F9A, 0x000C, + 0x0008, 0x0FA5, 0x032A, 0x0185, 0x0F97, 0x000D, + 0x000A, 0x0FA0, 0x031E, 0x0197, 0x0F94, 0x000D, + 0x000B, 0x0F9B, 0x0311, 0x01A9, 0x0F92, 0x000E, + 0x000C, 0x0F97, 0x0303, 0x01BC, 0x0F8F, 0x000F, + 0x000D, 0x0F94, 0x02F6, 0x01CD, 0x0F8D, 0x000F, + 0x000E, 0x0F91, 0x02E8, 0x01DE, 0x0F8B, 0x0010, + 0x000F, 0x0F8E, 0x02D9, 0x01F1, 0x0F89, 0x0010, + 0x0010, 0x0F8B, 0x02CA, 0x0202, 0x0F88, 0x0011, + 0x0010, 0x0F89, 0x02BB, 0x0214, 0x0F87, 0x0011, + 0x0011, 0x0F87, 0x02AB, 0x0226, 0x0F86, 0x0011, + 0x0011, 0x0F86, 0x029C, 0x0236, 0x0F85, 0x0012, + 0x0011, 0x0F85, 0x028B, 0x0249, 0x0F84, 0x0012, + 0x0012, 0x0F84, 0x027B, 0x0259, 0x0F84, 0x0012, + 0x0012, 0x0F84, 0x026A, 0x026A, 0x0F84, 0x0012, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_1_p_10qb_ +// 6 +// 64 +// input/output = 1.000000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_1_00[198] = { + 0x0000, 0x0000, 0x0400, 0x0000, 0x0000, 0x0000, + 0x0003, 0x0FF3, 0x0400, 0x000D, 0x0FFD, 0x0000, + 0x0006, 0x0FE7, 0x03FE, 0x001C, 0x0FF9, 0x0000, + 0x0009, 0x0FDB, 0x03FC, 0x002B, 0x0FF5, 0x0000, + 0x000C, 0x0FD0, 0x03F9, 0x003A, 0x0FF1, 0x0000, + 0x000E, 0x0FC5, 0x03F5, 0x004A, 0x0FED, 0x0001, + 0x0011, 0x0FBB, 0x03F0, 0x005A, 0x0FE9, 0x0001, + 0x0013, 0x0FB2, 0x03EB, 0x006A, 0x0FE5, 0x0001, + 0x0015, 0x0FA9, 0x03E4, 0x007B, 0x0FE1, 0x0002, + 0x0017, 0x0FA1, 0x03DD, 0x008D, 0x0FDC, 0x0002, + 0x0018, 0x0F99, 0x03D4, 0x00A0, 0x0FD8, 0x0003, + 0x001A, 0x0F92, 0x03CB, 0x00B2, 0x0FD3, 0x0004, + 0x001B, 0x0F8C, 0x03C1, 0x00C6, 0x0FCE, 0x0004, + 0x001C, 0x0F86, 0x03B7, 0x00D9, 0x0FC9, 0x0005, + 0x001D, 0x0F80, 0x03AB, 0x00EE, 0x0FC4, 0x0006, + 0x001E, 0x0F7C, 0x039F, 0x0101, 0x0FBF, 0x0007, + 0x001F, 0x0F78, 0x0392, 0x0115, 0x0FBA, 0x0008, + 0x001F, 0x0F74, 0x0385, 0x012B, 0x0FB5, 0x0008, + 0x0020, 0x0F71, 0x0376, 0x0140, 0x0FB0, 0x0009, + 0x0020, 0x0F6E, 0x0367, 0x0155, 0x0FAB, 0x000B, + 0x0020, 0x0F6C, 0x0357, 0x016B, 0x0FA6, 0x000C, + 0x0020, 0x0F6A, 0x0347, 0x0180, 0x0FA2, 0x000D, + 0x0020, 0x0F69, 0x0336, 0x0196, 0x0F9D, 0x000E, + 0x0020, 0x0F69, 0x0325, 0x01AB, 0x0F98, 0x000F, + 0x001F, 0x0F68, 0x0313, 0x01C3, 0x0F93, 0x0010, + 0x001F, 0x0F69, 0x0300, 0x01D8, 0x0F8F, 0x0011, + 0x001E, 0x0F69, 0x02ED, 0x01EF, 0x0F8B, 0x0012, + 0x001D, 0x0F6A, 0x02D9, 0x0205, 0x0F87, 0x0014, + 0x001D, 0x0F6C, 0x02C5, 0x021A, 0x0F83, 0x0015, + 0x001C, 0x0F6E, 0x02B1, 0x0230, 0x0F7F, 0x0016, + 0x001B, 0x0F70, 0x029C, 0x0247, 0x0F7B, 0x0017, + 0x001A, 0x0F72, 0x0287, 0x025D, 0x0F78, 0x0018, + 0x0019, 0x0F75, 0x0272, 0x0272, 0x0F75, 0x0019, +}; + +/* Converted scaler coeff tables from S1.10 to S1.12 */ +static uint16_t easf_filter_3tap_64p_ratio_0_30_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_40_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_50_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_60_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_70_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_80_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_90_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_1_00_s1_12[99]; +static uint16_t easf_filter_4tap_64p_ratio_0_30_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_40_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_50_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_60_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_70_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_80_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_90_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_1_00_s1_12[132]; +static uint16_t easf_filter_6tap_64p_ratio_0_30_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_40_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_50_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_60_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_70_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_80_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_90_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_1_00_s1_12[198]; + +struct scale_ratio_to_reg_value_lookup easf_v_bf3_mode_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0002}, +}; + +struct scale_ratio_to_reg_value_lookup easf_h_bf3_mode_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0002}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_6tap_lookup[] = { + {3, 10, 0x4100}, + {4, 10, 0x4100}, + {5, 10, 0x4100}, + {6, 10, 0x4100}, + {7, 10, 0x4100}, + {8, 10, 0x4100}, + {9, 10, 0x4100}, + {1, 1, 0x4100}, + {-1, -1, 0x4100}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_6tap_lookup[] = { + {3, 10, 0x4000}, + {4, 10, 0x4000}, + {5, 10, 0x4000}, + {6, 10, 0x4000}, + {7, 10, 0x4000}, + {8, 10, 0x4000}, + {9, 10, 0x4000}, + {1, 1, 0x4000}, + {-1, -1, 0x4000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring6_6tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x251F}, + {5, 10, 0x291F}, + {6, 10, 0xA51F}, + {7, 10, 0xA51F}, + {8, 10, 0xAA66}, + {9, 10, 0xA51F}, + {1, 1, 0xA640}, + {-1, -1, 0xA640}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring4_6tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x9600}, + {5, 10, 0xA460}, + {6, 10, 0xA8E0}, + {7, 10, 0xAC00}, + {8, 10, 0xAD20}, + {9, 10, 0xAFC0}, + {1, 1, 0xB058}, + {-1, -1, 0xB058}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_4tap_lookup[] = { + {3, 10, 0x4100}, + {4, 10, 0x4100}, + {5, 10, 0x4100}, + {6, 10, 0x4100}, + {7, 10, 0x4100}, + {8, 10, 0x4100}, + {9, 10, 0x4100}, + {1, 1, 0x4100}, + {-1, -1, 0x4100}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_4tap_lookup[] = { + {3, 10, 0x4000}, + {4, 10, 0x4000}, + {5, 10, 0x4000}, + {6, 10, 0x4000}, + {7, 10, 0x4000}, + {8, 10, 0x4000}, + {9, 10, 0x4000}, + {1, 1, 0x4000}, + {-1, -1, 0x4000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring6_4tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring4_4tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x9900}, + {7, 10, 0xA100}, + {8, 10, 0xA8C0}, + {9, 10, 0xAB20}, + {1, 1, 0xAC00}, + {-1, -1, 0xAC00}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_uptilt_offset_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x4100}, + {9, 10, 0x9F00}, + {1, 1, 0xA4C0}, + {-1, -1, 0xA8D8}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt_maxval_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x4000}, + {9, 10, 0x24FE}, + {1, 1, 0x2D64}, + {-1, -1, 0x3ADB}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_slope_lookup[] = { + {3, 10, 0x3800}, + {4, 10, 0x3800}, + {5, 10, 0x3800}, + {6, 10, 0x3800}, + {7, 10, 0x3800}, + {8, 10, 0x3886}, + {9, 10, 0x3940}, + {1, 1, 0x3A4E}, + {-1, -1, 0x3B66}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt1_slope_lookup[] = { + {3, 10, 0x3800}, + {4, 10, 0x3800}, + {5, 10, 0x3800}, + {6, 10, 0x3800}, + {7, 10, 0x3800}, + {8, 10, 0x36F4}, + {9, 10, 0x359C}, + {1, 1, 0x3360}, + {-1, -1, 0x2F20}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_slope_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x359C}, + {1, 1, 0x31F0}, + {-1, -1, 0x1F00}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_offset_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x9F00}, + {1, 1, 0xA400}, + {-1, -1, 0x9E00}, +}; + +void spl_init_easf_filter_coeffs(void) +{ + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_30, + easf_filter_3tap_64p_ratio_0_30_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_40, + easf_filter_3tap_64p_ratio_0_40_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_50, + easf_filter_3tap_64p_ratio_0_50_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_60, + easf_filter_3tap_64p_ratio_0_60_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_70, + easf_filter_3tap_64p_ratio_0_70_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_80, + easf_filter_3tap_64p_ratio_0_80_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_90, + easf_filter_3tap_64p_ratio_0_90_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_1_00, + easf_filter_3tap_64p_ratio_1_00_s1_12, 3); + + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_30, + easf_filter_4tap_64p_ratio_0_30_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_40, + easf_filter_4tap_64p_ratio_0_40_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_50, + easf_filter_4tap_64p_ratio_0_50_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_60, + easf_filter_4tap_64p_ratio_0_60_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_70, + easf_filter_4tap_64p_ratio_0_70_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_80, + easf_filter_4tap_64p_ratio_0_80_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_90, + easf_filter_4tap_64p_ratio_0_90_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_1_00, + easf_filter_4tap_64p_ratio_1_00_s1_12, 4); + + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_30, + easf_filter_6tap_64p_ratio_0_30_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_40, + easf_filter_6tap_64p_ratio_0_40_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_50, + easf_filter_6tap_64p_ratio_0_50_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_60, + easf_filter_6tap_64p_ratio_0_60_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_70, + easf_filter_6tap_64p_ratio_0_70_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_80, + easf_filter_6tap_64p_ratio_0_80_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_90, + easf_filter_6tap_64p_ratio_0_90_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_1_00, + easf_filter_6tap_64p_ratio_1_00_s1_12, 6); +} + +uint16_t *spl_get_easf_filter_3tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + return easf_filter_3tap_64p_ratio_0_30_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + return easf_filter_3tap_64p_ratio_0_40_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + return easf_filter_3tap_64p_ratio_0_50_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + return easf_filter_3tap_64p_ratio_0_60_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + return easf_filter_3tap_64p_ratio_0_70_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + return easf_filter_3tap_64p_ratio_0_80_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + return easf_filter_3tap_64p_ratio_0_90_s1_12; + else + return easf_filter_3tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_get_easf_filter_4tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + return easf_filter_4tap_64p_ratio_0_30_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + return easf_filter_4tap_64p_ratio_0_40_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + return easf_filter_4tap_64p_ratio_0_50_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + return easf_filter_4tap_64p_ratio_0_60_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + return easf_filter_4tap_64p_ratio_0_70_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + return easf_filter_4tap_64p_ratio_0_80_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + return easf_filter_4tap_64p_ratio_0_90_s1_12; + else + return easf_filter_4tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_get_easf_filter_6tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + return easf_filter_6tap_64p_ratio_0_30_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + return easf_filter_6tap_64p_ratio_0_40_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + return easf_filter_6tap_64p_ratio_0_50_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + return easf_filter_6tap_64p_ratio_0_60_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + return easf_filter_6tap_64p_ratio_0_70_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + return easf_filter_6tap_64p_ratio_0_80_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + return easf_filter_6tap_64p_ratio_0_90_s1_12; + else + return easf_filter_6tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +{ + if (taps == 6) + return spl_get_easf_filter_6tap_64p(ratio); + else if (taps == 4) + return spl_get_easf_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_easf_filter_3tap_64p(ratio); + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} + +void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data, bool enable_easf_v, + bool enable_easf_h) +{ + /* + * Old coefficients calculated scaling ratio = input / output + * New coefficients are calculated based on = output / input + */ + if (enable_easf_h) { + dscl_prog_data->filter_h = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.h_taps, data->recip_ratios.horz); + + dscl_prog_data->filter_h_c = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.h_taps_c, data->recip_ratios.horz_c); + } else { + dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps, data->ratios.horz); + + dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps_c, data->ratios.horz_c); + } + if (enable_easf_v) { + dscl_prog_data->filter_v = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.v_taps, data->recip_ratios.vert); + + dscl_prog_data->filter_v_c = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.v_taps_c, data->recip_ratios.vert_c); + } else { + dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps, data->ratios.vert); + + dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps_c, data->ratios.vert_c); + } +} + +static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct fixed31_32 ratio, + struct scale_ratio_to_reg_value_lookup *lookup_table_base_ptr, + unsigned int num_entries) +{ + unsigned int count = 0; + uint32_t value = 0; + struct scale_ratio_to_reg_value_lookup *lookup_table_index_ptr; + + lookup_table_index_ptr = (lookup_table_base_ptr + num_entries - 1); + value = lookup_table_index_ptr->reg_value; + + while (count < num_entries) { + + lookup_table_index_ptr = (lookup_table_base_ptr + count); + if (lookup_table_index_ptr->numer < 0) + break; + + if (ratio.value < dc_fixpt_from_fraction( + lookup_table_index_ptr->numer, + lookup_table_index_ptr->denom).value) { + value = lookup_table_index_ptr->reg_value; + break; + } + + count++; + } + return value; +} +uint32_t spl_get_v_bf3_mode(struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries = sizeof(easf_v_bf3_mode_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_v_bf3_mode_lookup, num_entries); + return value; +} +uint32_t spl_get_h_bf3_mode(struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries = sizeof(easf_h_bf3_mode_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_h_bf3_mode_lookup, num_entries); + return value; +} +uint32_t spl_get_reducer_gain6(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_reducer_gain6_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain6_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_reducer_gain6_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain6_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_reducer_gain4(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_reducer_gain4_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain4_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_reducer_gain4_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain4_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_gainRing6(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_gain_ring6_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring6_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_gain_ring6_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring6_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_gainRing4(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_gain_ring4_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring4_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_gain_ring4_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring4_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_dntilt_uptilt_offset_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_dntilt_uptilt_offset_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt_maxval_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt_maxval_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_dntilt_slope(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_dntilt_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_dntilt_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt1_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt1_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt2_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt2_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt2_offset_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt2_offset_lookup, num_entries); + } else + value = 0; + return value; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h new file mode 100644 index 000000000000..542b5ce1a385 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef __DC_SPL_SCL_EASF_FILTERS_H__ +#define __DC_SPL_SCL_EASF_FILTERS_H__ + +#include "dc_spl_types.h" + +struct scale_ratio_to_reg_value_lookup { + int numer; + int denom; + const uint32_t reg_value; +}; + +void spl_init_easf_filter_coeffs(void); +uint16_t *spl_get_easf_filter_3tap_64p(struct fixed31_32 ratio); +uint16_t *spl_get_easf_filter_4tap_64p(struct fixed31_32 ratio); +uint16_t *spl_get_easf_filter_6tap_64p(struct fixed31_32 ratio); +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct fixed31_32 ratio); +void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data, bool enable_easf_v, + bool enable_easf_h); + +uint32_t spl_get_v_bf3_mode(struct fixed31_32 ratio); +uint32_t spl_get_h_bf3_mode(struct fixed31_32 ratio); +uint32_t spl_get_reducer_gain6(int taps, struct fixed31_32 ratio); +uint32_t spl_get_reducer_gain4(int taps, struct fixed31_32 ratio); +uint32_t spl_get_gainRing6(int taps, struct fixed31_32 ratio); +uint32_t spl_get_gainRing4(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_slope(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct fixed31_32 ratio); + +#endif /* __DC_SPL_SCL_EASF_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c index c174b2e8a150..156f8171e44f 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -1423,3 +1423,29 @@ const uint16_t *spl_get_filter_2tap_64p(void) { return filter_2tap_64p; } + +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +{ + if (taps == 8) + return spl_get_filter_8tap_64p(ratio); + else if (taps == 7) + return spl_get_filter_7tap_64p(ratio); + else if (taps == 6) + return spl_get_filter_6tap_64p(ratio); + else if (taps == 5) + return spl_get_filter_5tap_64p(ratio); + else if (taps == 4) + return spl_get_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_filter_3tap_64p(ratio); + else if (taps == 2) + return spl_get_filter_2tap_64p(); + else if (taps == 1) + return NULL; + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h index 6d96aca53b24..27590846d92a 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h @@ -17,43 +17,6 @@ const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio); const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio); const uint16_t *spl_get_filter_2tap_16p(void); const uint16_t *spl_get_filter_2tap_64p(void); -const uint16_t *spl_get_filter_3tap_16p_upscale(void); -const uint16_t *spl_get_filter_3tap_16p_116(void); -const uint16_t *spl_get_filter_3tap_16p_149(void); -const uint16_t *spl_get_filter_3tap_16p_183(void); +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_16p_upscale(void); -const uint16_t *spl_get_filter_4tap_16p_116(void); -const uint16_t *spl_get_filter_4tap_16p_149(void); -const uint16_t *spl_get_filter_4tap_16p_183(void); - -const uint16_t *spl_get_filter_3tap_64p_upscale(void); -const uint16_t *spl_get_filter_3tap_64p_116(void); -const uint16_t *spl_get_filter_3tap_64p_149(void); -const uint16_t *spl_get_filter_3tap_64p_183(void); - -const uint16_t *spl_get_filter_4tap_64p_upscale(void); -const uint16_t *spl_get_filter_4tap_64p_116(void); -const uint16_t *spl_get_filter_4tap_64p_149(void); -const uint16_t *spl_get_filter_4tap_64p_183(void); - -const uint16_t *spl_get_filter_5tap_64p_upscale(void); -const uint16_t *spl_get_filter_5tap_64p_116(void); -const uint16_t *spl_get_filter_5tap_64p_149(void); -const uint16_t *spl_get_filter_5tap_64p_183(void); - -const uint16_t *spl_get_filter_6tap_64p_upscale(void); -const uint16_t *spl_get_filter_6tap_64p_116(void); -const uint16_t *spl_get_filter_6tap_64p_149(void); -const uint16_t *spl_get_filter_6tap_64p_183(void); - -const uint16_t *spl_get_filter_7tap_64p_upscale(void); -const uint16_t *spl_get_filter_7tap_64p_116(void); -const uint16_t *spl_get_filter_7tap_64p_149(void); -const uint16_t *spl_get_filter_7tap_64p_183(void); - -const uint16_t *spl_get_filter_8tap_64p_upscale(void); -const uint16_t *spl_get_filter_8tap_64p_116(void); -const uint16_t *spl_get_filter_8tap_64p_149(void); -const uint16_t *spl_get_filter_8tap_64p_183(void); #endif /* __DC_SPL_SCL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 201201d3f55b..e54da5ea4ae8 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -81,6 +81,8 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_420BPP10, /*end of pixel format definition*/ SPL_PIXEL_FORMAT_INVALID, + SPL_PIXEL_FORMAT_422BPP8, + SPL_PIXEL_FORMAT_422BPP10, SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8, SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16, SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8, @@ -120,6 +122,13 @@ enum spl_color_space { SPL_COLOR_SPACE_YCBCR709_BLACK, }; +enum chroma_cositing { + CHROMA_COSITING_NONE, + CHROMA_COSITING_LEFT, + CHROMA_COSITING_TOPLEFT, + CHROMA_COSITING_COUNT +}; + // Scratch space for calculating scaler params struct spl_scaler_data { int h_active; @@ -129,6 +138,7 @@ struct spl_scaler_data { struct spl_rect viewport_c; struct spl_rect recout; struct spl_ratios ratios; + struct spl_ratios recip_ratios; struct spl_inits inits; }; @@ -485,6 +495,8 @@ struct spl_in { bool prefer_easf; bool disable_easf; struct spl_debug debug; + bool is_fullscreen; + bool is_hdr_on; }; // end of SPL inputs From 0961367cb5208850a123d235d147f92f586b2491 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 25 Jun 2024 16:40:51 -0400 Subject: [PATCH 012/447] drm/amd/display: Don't consider cursor for no plane case in DML1 [Description] For no plane scenarios we should not consider cursor as there cannot be any cursor if there's no planes. This fixes an issue where dc_commit_streams fails due to prefetch bandwidth requirements (the display config + dummy planes + cursor causes the prefetch bandwidth to exceed what is possible). Reviewed-by: Chaitanya Dhere Signed-off-by: Jerry Zuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 8a8efe408a9d..efe337ebf7c8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -1562,6 +1562,8 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc, pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width; pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256; pipes[pipe_cnt].pipe.src.source_format = dm_444_32; + pipes[pipe_cnt].pipe.src.cur0_src_width = 0; + pipes[pipe_cnt].pipe.src.cur1_src_width = 0; pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/ pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/ pipes[pipe_cnt].pipe.dest.full_recout_width = pipes[pipe_cnt].pipe.dest.recout_width; /*when is_hsplit != 1*/ From 98579743c4561acc3b1c7d2f3fcd46b2160db5ba Mon Sep 17 00:00:00 2001 From: Ryan Seto Date: Wed, 26 Jun 2024 14:53:26 -0400 Subject: [PATCH 013/447] drm/amd/display: Added logging for automated DPM testing [Why] Added clock logs to automate DPM testing [How] Added logs and helper functions to output clocks Co-authored-by: Ryan Seto Reviewed-by: Alvin Lee Signed-off-by: Jerry Zuo Signed-off-by: Ryan Seto Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/clk_mgr/dcn401/dcn401_clk_mgr.c | 250 ++++++++++++++---- drivers/gpu/drm/amd/display/dc/core/dc.c | 9 +- .../dc/dml2/dml21/dml21_translation_helper.c | 27 ++ .../dc/dml2/dml21/dml21_translation_helper.h | 1 + .../amd/display/dc/dml2/dml21/dml21_utils.c | 2 + .../gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 1 + .../amd/display/dc/inc/hw/clk_mgr_internal.h | 4 +- 7 files changed, 237 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 45fe17a46890..c453c5f15ce7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -14,6 +14,7 @@ #include "core_types.h" #include "dm_helpers.h" #include "link.h" +#include "dc_state_priv.h" #include "atomfirmware.h" #include "dcn401_smu14_driver_if.h" @@ -29,6 +30,7 @@ #define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E6F #define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E72 #define mmCLK01_CLK0_CLK4_DFS_CNTL 0x16E75 +#define mmCLK20_CLK2_CLK2_DFS_CNTL 0x1B051 #define CLK0_CLK_PLL_REQ__FbMult_int_MASK 0x000001ffUL #define CLK0_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000f000UL @@ -302,6 +304,197 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_build_wm_range_table(clk_mgr_base); } +static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + uint32_t dprefclk_did = 0; + uint32_t dcfclk_did = 0; + uint32_t dtbclk_did = 0; + uint32_t dispclk_did = 0; + uint32_t dppclk_did = 0; + uint32_t fclk_did = 0; + uint32_t target_div = 0; + + /* DFS Slice 0 is used for DISPCLK */ + dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL); + /* DFS Slice 1 is used for DPPCLK */ + dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL); + /* DFS Slice 2 is used for DPREFCLK */ + dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL); + /* DFS Slice 3 is used for DCFCLK */ + dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL); + /* DFS Slice 4 is used for DTBCLK */ + dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL); + /* DFS Slice _ is used for FCLK */ + fclk_did = REG_READ(CLK2_CLK2_DFS_CNTL); + + /* Convert DISPCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dispclk_did); + //Get dispclk in khz + regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DISPCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dppclk_did); + //Get dppclk in khz + regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DPREFCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dprefclk_did); + //Get dprefclk in khz + regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DCFCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dcfclk_did); + //Get dcfclk in khz + regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DTBCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dtbclk_did); + //Get dtbclk in khz + regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DTBCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(fclk_did); + //Get fclk in khz + regs_and_bypass->fclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; +} + +static bool dcn401_check_native_scaling(struct pipe_ctx *pipe) +{ + bool is_native_scaling = false; + int width = pipe->plane_state->src_rect.width; + int height = pipe->plane_state->src_rect.height; + + if (pipe->stream->timing.h_addressable == width && + pipe->stream->timing.v_addressable == height && + pipe->plane_state->dst_rect.width == width && + pipe->plane_state->dst_rect.height == height) + is_native_scaling = true; + + return is_native_scaling; +} + +static void dcn401_auto_dpm_test_log( + struct dc_clocks *new_clocks, + struct clk_mgr_internal *clk_mgr, + struct dc_state *context) +{ + unsigned int mall_ss_size_bytes; + int dramclk_khz_override, fclk_khz_override, num_fclk_levels; + + struct pipe_ctx *pipe_ctx_list[MAX_PIPES]; + int active_pipe_count = 0; + + for (int i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { + pipe_ctx_list[active_pipe_count] = pipe_ctx; + active_pipe_count++; + } + } + + msleep(5); + + mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes; + + struct clk_log_info log_info = {0}; + struct clk_state_registers_and_bypass clk_register_dump; + + dcn401_dump_clk_registers(&clk_register_dump, &clk_mgr->base, &log_info); + + // Overrides for these clocks in case there is no p_state change support + dramclk_khz_override = new_clocks->dramclk_khz; + fclk_khz_override = new_clocks->fclk_khz; + + num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1; + + if (!new_clocks->p_state_change_support) + dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000; + + if (!new_clocks->fclk_p_state_change_support) + fclk_khz_override = clk_mgr->base.bw_params->clk_table.entries[num_fclk_levels].fclk_mhz * 1000; + + + //////////////////////////////////////////////////////////////////////////// + // IMPORTANT: When adding more clocks to these logs, do NOT put a newline + // anywhere other than at the very end of the string. + // + // Formatting example (make sure to have " - " between each entry): + // + // AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n" + //////////////////////////////////////////////////////////////////////////// + if (active_pipe_count > 0 && + new_clocks->dramclk_khz > 0 && + new_clocks->fclk_khz > 0 && + new_clocks->dcfclk_khz > 0 && + new_clocks->dppclk_khz > 0) { + + uint32_t pix_clk_list[MAX_PIPES] = {0}; + int p_state_list[MAX_PIPES] = {0}; + int disp_src_width_list[MAX_PIPES] = {0}; + int disp_src_height_list[MAX_PIPES] = {0}; + uint64_t disp_src_refresh_list[MAX_PIPES] = {0}; + bool is_scaled_list[MAX_PIPES] = {0}; + + for (int i = 0; i < active_pipe_count; i++) { + struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i]; + uint64_t refresh_rate; + + pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz; + p_state_list[i] = curr_pipe_ctx->p_state_type; + + refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 + + curr_pipe_ctx->stream->timing.v_total + * (uint64_t) curr_pipe_ctx->stream->timing.h_total - (uint64_t)1); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total); + disp_src_refresh_list[i] = refresh_rate; + + if (curr_pipe_ctx->plane_state) { + is_scaled_list[i] = !(dcn401_check_native_scaling(curr_pipe_ctx)); + disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width; + disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height; + } + } + + DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - " + "dcfclk:%d - dppclk:%d - dispclk_hw:%d - " + "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - " + "dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - " + "pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - " + "p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - " + "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - " + "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - " + "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - " + "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n", + dramclk_khz_override, + fclk_khz_override, + new_clocks->dcfclk_khz, + new_clocks->dppclk_khz, + clk_register_dump.dispclk, + clk_register_dump.dppclk, + clk_register_dump.dprefclk, + clk_register_dump.dcfclk, + clk_register_dump.dtbclk, + clk_register_dump.fclk, + pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2], + mall_ss_size_bytes, + p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3], + disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0], + disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1], + disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2], + disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]); + } +} + static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr, struct dc_state *context, int ref_dtbclk_khz) @@ -1194,6 +1387,10 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, /* execute sequence */ dcn401_execute_block_sequence(clk_mgr_base, num_steps); + + if (dc->config.enable_auto_dpm_test_logs) + dcn401_auto_dpm_test_log(&context->bw_ctx.bw.dcn.clk, TO_CLK_MGR_INTERNAL(clk_mgr_base), context); + } @@ -1218,59 +1415,6 @@ static uint32_t dcn401_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_m return dc_fixpt_floor(pll_req); } -static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, - struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) -{ - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - uint32_t dprefclk_did = 0; - uint32_t dcfclk_did = 0; - uint32_t dtbclk_did = 0; - uint32_t dispclk_did = 0; - uint32_t dppclk_did = 0; - uint32_t target_div = 0; - - /* DFS Slice 0 is used for DISPCLK */ - dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL); - /* DFS Slice 1 is used for DPPCLK */ - dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL); - /* DFS Slice 2 is used for DPREFCLK */ - dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL); - /* DFS Slice 3 is used for DCFCLK */ - dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL); - /* DFS Slice 4 is used for DTBCLK */ - dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL); - - /* Convert DISPCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dispclk_did); - //Get dispclk in khz - regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DISPCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dppclk_did); - //Get dppclk in khz - regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DPREFCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dprefclk_did); - //Get dprefclk in khz - regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DCFCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dcfclk_did); - //Get dcfclk in khz - regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DTBCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dtbclk_did); - //Get dtbclk in khz - regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; -} - static void dcn401_clock_read_ss_info(struct clk_mgr_internal *clk_mgr) { struct dc_bios *bp = clk_mgr->base.ctx->dc_bios; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 85a2ef82afa5..387b392f4c0d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1254,7 +1254,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); dc_update_visual_confirm_color(dc, context, pipe); } @@ -3704,7 +3705,8 @@ static void commit_planes_for_stream_fast(struct dc *dc, struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); if (dc->debug.visual_confirm) dc_update_visual_confirm_color(dc, context, pipe); @@ -3839,7 +3841,8 @@ static void commit_planes_for_stream(struct dc *dc, struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); if (dc->debug.visual_confirm) dc_update_visual_confirm_color(dc, context, pipe); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index a50fe3ec79c1..7c73efe19525 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1172,3 +1172,30 @@ void dml21_get_pipe_mcache_config( mcache_pipe_config->plane1_enabled = dml21_is_plane1_enabled(pln_prog->plane_descriptor->pixel_format); } + +void dml21_set_dc_p_state_type( + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming) +{ + switch (stream_programming->uclk_pstate_method) { + case dml2_uclk_pstate_support_method_vactive: + case dml2_uclk_pstate_support_method_fw_vactive_drr: + pipe_ctx->p_state_type = P_STATE_V_ACTIVE; + break; + case dml2_uclk_pstate_support_method_vblank: + case dml2_uclk_pstate_support_method_fw_vblank_drr: + pipe_ctx->p_state_type = P_STATE_V_BLANK; + break; + case dml2_uclk_pstate_support_method_fw_subvp_phantom: + case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: + pipe_ctx->p_state_type = P_STATE_SUB_VP; + break; + case dml2_uclk_pstate_support_method_fw_drr: + pipe_ctx->p_state_type = P_STATE_FPO; + break; + default: + pipe_ctx->p_state_type = P_STATE_UNKNOWN; + break; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h index 4cc0a1fbb93d..97a8f51b7780 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -26,4 +26,5 @@ void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_water void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); void dml21_map_hw_resources(struct dml2_context *dml_ctx); void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); +void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index d276458e50fd..622c98f4b7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -316,6 +316,8 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[pipe_ctx->pipe_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); + + dml21_set_dc_p_state_type(pipe_ctx, stream_prog); } static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index d5fefce3e74b..c55d7279fe51 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -180,6 +180,7 @@ struct clk_state_registers_and_bypass { uint32_t dispclk; uint32_t dppclk; uint32_t dtbclk; + uint32_t fclk; uint32_t dppclk_bypass; uint32_t dcfclk_bypass; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index 12282f96dfe1..c2dd061892f4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -191,7 +191,8 @@ enum dentist_divider_range { CLK_SR_DCN401(CLK0_CLK1_DFS_CNTL, CLK01, 0), \ CLK_SR_DCN401(CLK0_CLK2_DFS_CNTL, CLK01, 0), \ CLK_SR_DCN401(CLK0_CLK3_DFS_CNTL, CLK01, 0), \ - CLK_SR_DCN401(CLK0_CLK4_DFS_CNTL, CLK01, 0) + CLK_SR_DCN401(CLK0_CLK4_DFS_CNTL, CLK01, 0), \ + CLK_SR_DCN401(CLK2_CLK2_DFS_CNTL, CLK20, 0) #define CLK_COMMON_MASK_SH_LIST_DCN401(mask_sh) \ CLK_COMMON_MASK_SH_LIST_DCN321(mask_sh) @@ -235,6 +236,7 @@ struct clk_mgr_registers { uint32_t CLK1_CLK2_DFS_CNTL; uint32_t CLK1_CLK3_DFS_CNTL; uint32_t CLK1_CLK4_DFS_CNTL; + uint32_t CLK2_CLK2_DFS_CNTL; uint32_t CLK1_CLK0_CURRENT_CNT; uint32_t CLK1_CLK1_CURRENT_CNT; From e8d77cfdd2698039667432b8569bac34d13508c1 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 25 Jun 2024 13:23:36 -0400 Subject: [PATCH 014/447] drm/amd/display: Replace assert with error message in dp_retrieve_lttpr_cap() [Why] When assert in dp_retrieve_lttpr_cap() is hit, dmesg has traces like: RIP: 0010:dp_retrieve_lttpr_cap+0xcc/0x1a0 [amdgpu] Call Trace: dp_retrieve_lttpr_cap+0xcc/0x1a0 [amdgpu] report_bug+0x1e8/0x240 handle_bug+0x46/0x80 link_detect+0x35/0x580 [amdgpu] It happens when LTTPRs fail to increment dpcd repeater count. We have a recovery action in place for such cases. Assert is misleading, an indicative error in dmesg is more useful. [How] Remove ASSERT and use DC_LOG_ERROR instead. Reviewed-by: Michael Strauss Signed-off-by: Jerry Zuo Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/protocols/link_dp_capability.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 46bb7a855bc2..c257e733044a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1541,7 +1541,11 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) * Override count to 1 if we receive a known bad count (0 or an invalid value) */ if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) { - ASSERT(0); + /* If you see this message consistently, either the host platform has FIXED_VS flag + * incorrectly configured or the sink device is returning an invalid count. + */ + DC_LOG_ERROR("lttpr_caps phy_repeater_cnt is 0x%x, forcing it to 0x80.", + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80; DC_LOG_DC("lttpr_caps forced phy_repeater_cnt = %d\n", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); } From bc2fe69f16c7122b5dabc294aa2d6065d8da2169 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 25 Jun 2024 13:06:43 -0600 Subject: [PATCH 015/447] drm/amd/display: Revert "Check HDCP returned status" This reverts commit 5d93060d430b359e16e7c555c8f151ead1ac614b due to a power consumption regression. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/modules/hdcp/hdcp1_execution.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 1e495e884484..8bc377560787 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -432,18 +432,18 @@ static enum mod_hdcp_status authenticated_dp(struct mod_hdcp *hdcp, goto out; } - if (!mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, + mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, &input->bstatus_read, &status, - hdcp, "bstatus_read")) - goto out; - if (!mod_hdcp_execute_and_set(check_link_integrity_dp, + hdcp, "bstatus_read"); + + mod_hdcp_execute_and_set(check_link_integrity_dp, &input->link_integrity_check, &status, - hdcp, "link_integrity_check")) - goto out; - if (!mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, + hdcp, "link_integrity_check"); + + mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, &input->reauth_request_check, &status, - hdcp, "reauth_request_check")) - goto out; + hdcp, "reauth_request_check"); + out: return status; } From 3c9154310af71802de2f40c5689b66009a407f37 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Tue, 25 Jun 2024 15:22:25 -0400 Subject: [PATCH 016/447] drm/amd/display: fix dscclk programming sequence on DCN401 [why] The mux to switch between refclk and dto_dsc_clk is non double buffered. However dto dsc clk's phase and modulo divider registers are currently configured as double buffered update. This causes a problem when we switch to use dto dsc clk and program phase and modulo in the same sequence. In this sequence dsc clk is switched to dto but the clock divider programming doesn't take effect until next frame. When we try to program DSCC registers, SMN bus will hang because dto dsc clk divider phase is set to 0. [how] Configure phase and modulo to take effect immediately. Always switch to dto dsc clk before DSC clock is unagted. Switch back to refclk after DSC clock is gated. Acked-by: Rodrigo Siqueira Reviewed-by: Jerry Zuo Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn20/dcn20_dccg.h | 6 +-- .../amd/display/dc/dccg/dcn401/dcn401_dccg.c | 32 ++++++++++----- .../amd/display/dc/dccg/dcn401/dcn401_dccg.h | 4 -- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 2 +- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 21 +++------- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 6 +-- drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 5 +-- .../gpu/drm/amd/display/dc/link/link_dpms.c | 41 ++++++++++++------- 8 files changed, 58 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h index 1e0292861244..6ac2bd86c4db 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h @@ -346,11 +346,7 @@ type SYMCLK32_LE3_SRC_SEL;\ type SYMCLK32_LE2_EN;\ type SYMCLK32_LE3_EN;\ - type DP_DTO_ENABLE[MAX_PIPES];\ - type DSCCLK0_DTO_DB_EN;\ - type DSCCLK1_DTO_DB_EN;\ - type DSCCLK2_DTO_DB_EN;\ - type DSCCLK3_DTO_DB_EN; + type DP_DTO_ENABLE[MAX_PIPES]; struct dccg_shift { DCCG_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c index 07f1f396ba52..0b889004509a 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c @@ -730,35 +730,35 @@ void dccg401_init(struct dccg *dccg) } } -static void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst, bool enable) +static void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - uint32_t phase = enable ? 1 : 0; switch (inst) { case 0: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1, DSCCLK0_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK0_DTO_PARAM, - DSCCLK0_DTO_PHASE, phase, + DSCCLK0_DTO_PHASE, 1, DSCCLK0_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1); + break; case 1: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1, DSCCLK1_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK1_DTO_PARAM, - DSCCLK1_DTO_PHASE, phase, + DSCCLK1_DTO_PHASE, 1, DSCCLK1_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1); break; case 2: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1, DSCCLK2_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK2_DTO_PARAM, - DSCCLK2_DTO_PHASE, phase, + DSCCLK2_DTO_PHASE, 1, DSCCLK2_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1); break; case 3: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1, DSCCLK3_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK3_DTO_PARAM, - DSCCLK3_DTO_PHASE, phase, + DSCCLK3_DTO_PHASE, 1, DSCCLK3_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1); break; default: BREAK_TO_DEBUGGER(); @@ -774,15 +774,27 @@ static void dccg401_set_ref_dscclk(struct dccg *dccg, switch (dsc_inst) { case 0: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0); + REG_UPDATE_2(DSCCLK0_DTO_PARAM, + DSCCLK0_DTO_PHASE, 0, + DSCCLK0_DTO_MODULO, 0); break; case 1: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 0); + REG_UPDATE_2(DSCCLK1_DTO_PARAM, + DSCCLK1_DTO_PHASE, 0, + DSCCLK1_DTO_MODULO, 0); break; case 2: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 0); + REG_UPDATE_2(DSCCLK2_DTO_PARAM, + DSCCLK2_DTO_PHASE, 0, + DSCCLK2_DTO_MODULO, 0); break; case 3: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 0); + REG_UPDATE_2(DSCCLK3_DTO_PARAM, + DSCCLK3_DTO_PHASE, 0, + DSCCLK3_DTO_MODULO, 0); break; default: return; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h index 8bcddc836347..a196ce9e8127 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h @@ -117,10 +117,6 @@ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_EN, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_EN, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_DTO_DB_EN, mask_sh),\ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 2532ad410cb5..ea9bedf65d84 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2186,9 +2186,9 @@ static void post_unlock_reset_opp(struct dc *dc, * yet power gated. */ dsc->funcs->dsc_wait_disconnect_pending_clear(dsc); + dsc->funcs->dsc_disable(dsc); if (dccg->funcs->set_ref_dscclk) dccg->funcs->set_ref_dscclk(dccg, dsc->inst); - dsc->funcs->dsc_disable(dsc); } } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 05d8f81daa06..4534843ba66a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1029,24 +1029,20 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, true); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; ASSERT(odm_dsc); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, true); } - dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; - dsc_cfg.pic_width *= opp_cnt; - optc_dsc_mode = dsc_optc_cfg.is_pixel_format_444 ? OPTC_DSC_ENABLED_444 : OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED; - /* Enable DSC in OPTC */ DC_LOG_DSC("Setting optc DSC config for tg instance %d:", pipe_ctx->stream_res.tg->inst); pipe_ctx->stream_res.tg->funcs->set_dsc_config(pipe_ctx->stream_res.tg, @@ -1060,13 +1056,9 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) OPTC_DSC_DISABLED, 0, 0); /* only disconnect DSC block, DSC is disabled when OPP head pipe is reset */ - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, pipe_ctx->stream_res.dsc->inst, false); - dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); + dsc->funcs->dsc_disconnect(pipe_ctx->stream_res.dsc); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { ASSERT(odm_pipe->stream_res.dsc); - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_pipe->stream_res.dsc->inst, false); odm_pipe->stream_res.dsc->funcs->dsc_disconnect(odm_pipe->stream_res.dsc); } } @@ -1137,10 +1129,7 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe->stream_res.dsc) { struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc; - struct dccg *dccg = dc->res_pool->dccg; - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, false); /* disconnect DSC block from stream */ dsc->funcs->dsc_disconnect(dsc); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 2c50c0f745a0..b9378f18c020 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1542,7 +1542,6 @@ static void update_dsc_for_odm_change(struct dc *dc, struct dc_state *context, struct pipe_ctx *old_pipe; struct pipe_ctx *new_pipe; struct pipe_ctx *old_opp_heads[MAX_PIPES]; - struct dccg *dccg = dc->res_pool->dccg; struct pipe_ctx *old_otg_master; int old_opp_head_count = 0; @@ -1568,12 +1567,9 @@ static void update_dsc_for_odm_change(struct dc *dc, struct dc_state *context, for (i = 0; i < old_opp_head_count; i++) { old_pipe = old_opp_heads[i]; new_pipe = &context->res_ctx.pipe_ctx[old_pipe->pipe_idx]; - if (old_pipe->stream_res.dsc && !new_pipe->stream_res.dsc) { - dccg->funcs->set_dto_dscclk(dccg, - old_pipe->stream_res.dsc->inst, false); + if (old_pipe->stream_res.dsc && !new_pipe->stream_res.dsc) old_pipe->stream_res.dsc->funcs->dsc_disconnect( old_pipe->stream_res.dsc); - } } } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 4fb1aacee894..d619eb229a62 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -211,10 +211,7 @@ struct dccg_funcs { struct dccg *dccg, enum streamclk_source src, uint32_t otg_inst); - void (*set_dto_dscclk)( - struct dccg *dccg, - uint32_t dsc_inst, - bool enable); + void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst); void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst); }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 65607589495f..d6550b904b16 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -817,17 +817,17 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, true); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, true); } dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; dsc_cfg.pic_width *= opp_cnt; @@ -879,19 +879,32 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) } /* disable DSC block */ - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, pipe_ctx->stream_res.dsc->inst, false); - pipe_ctx->stream_res.dsc->funcs->dsc_disconnect(pipe_ctx->stream_res.dsc); - if (dccg->funcs->set_ref_dscclk) - dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst); - pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_pipe->stream_res.dsc->inst, false); + for (odm_pipe = pipe_ctx; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.dsc->funcs->dsc_disconnect(odm_pipe->stream_res.dsc); + /* + * TODO - dsc_disconnect is a double buffered register. + * by the time we call dsc_disable, dsc may still remain + * connected to OPP. In this case OPTC will no longer + * get correct pixel data because DSCC is off. However + * we also can't wait for the disconnect pending + * complete, because this function can be called + * with/without OTG master lock acquired. When the lock + * is acquired we will never get pending complete until + * we release the lock later. So there is no easy way to + * solve this problem especially when the lock is + * acquired. DSC is a front end hw block it should be + * programmed as part of front end sequence, where the + * commit sequence without lock and update sequence + * with lock are completely separated. However because + * we are programming dsc as part of back end link + * programming sequence, we don't know if front end OPTC + * master lock is acquired. The back end should be + * agnostic to front end lock. DSC programming shouldn't + * belong to this sequence. + */ + odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); if (dccg->funcs->set_ref_dscclk) dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst); - odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); } } } From 5fc77c26970d443f1c020ee8a5d475ad6b81e15f Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 31 May 2024 11:37:15 -0400 Subject: [PATCH 017/447] drm/amd/display: apply vmin optimization even if it doesn't reach vmin level [why] Based on power measurement result, in most cases when display clock is higher than Vmin display clock, lowering display clock using dynamic ODM will improve overall power consumption by 0 to 4 watts even if we can't reach Vmin. [how] Allow vmin optimization applied even if dispclk can't reach Vmin. Reviewed-by: Jun Lei Signed-off-by: Jerry Zuo Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 14 +++++++++----- .../display/dc/dml2/dml21/src/dml2_top/dml_top.c | 13 +++++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 603036df68ba..60a9faf81d3d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -591,6 +591,8 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) &in_out->base_display_config->display_config; const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; + struct dml2_optimization_stage4_state *state = + &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -611,28 +613,30 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) */ if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; for (i = 0; i < display_config->num_streams; i++) { if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * ODM Combine requires horizontal timing divisible by 2 so each * ODM segment has the same size. */ else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * Our hardware support seamless ODM transitions for DP encoders * only. */ else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; } + state->performed = true; + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index 2fb3e2f45e07..b25e9230adea 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -268,9 +268,18 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); - if (vmin_success) { + if (l->optimized_display_config_with_meta.stage4.performed) { + /* + * when performed is true, optimization has applied to + * optimized_display_config_with_meta and it has passed mode + * support. However it may or may not pass the test function to + * reach actual Vmin. As long as voltage is optimized even if it + * doesn't reach Vmin level, there is still power benefit so in + * this case we will still copy this optimization into base + * display config. + */ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = true; + l->base_display_config_with_meta.stage4.success = vmin_success; } /* From c83ecc0bee94995117329fccbfa6e8b43ce56044 Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Thu, 27 Jun 2024 17:41:36 -0400 Subject: [PATCH 018/447] drm/amd/display: Implement bias and scale pre scl why: New scaler needs the input to be full range color space. This will also fix issues that come up due to not having a predefined limited color space matrix for certain color spaces how: Use bias and scale HW to expand the range of limited color spaces to full before the scaler Reviewed-by: Krunoslav Kovac Signed-off-by: Jerry Zuo Signed-off-by: Relja Vojvodic Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/core/dc_hw_sequencer.c | 6 ++--- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- .../drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c | 27 ++++++++++++++++++- .../drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h | 3 +++ .../amd/display/dc/dpp/dcn401/dcn401_dpp.c | 3 ++- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 3 +-- .../gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 13 ++++----- 7 files changed, 43 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 87e36d51c56d..9e42a0128baa 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -901,12 +901,12 @@ void hwss_program_bias_and_scale(union block_sequence_params *params) struct pipe_ctx *pipe_ctx = params->program_bias_and_scale_params.pipe_ctx; struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_plane_state *plane_state = pipe_ctx->plane_state; - struct dc_bias_and_scale bns_params = {0}; + struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; //TODO :for CNVC set scale and bias registers if necessary - build_prescale_params(&bns_params, plane_state); - if (dpp->funcs->dpp_program_bias_and_scale) + if (dpp->funcs->dpp_program_bias_and_scale) { dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); + } } void hwss_power_on_mpc_mem_pwr(union block_sequence_params *params) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4c9bb913125d..83fe13f5a367 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1292,7 +1292,7 @@ struct dc_plane_state { struct dc_gamma gamma_correction; struct dc_transfer_func in_transfer_func; - struct dc_bias_and_scale *bias_and_scale; + struct dc_bias_and_scale bias_and_scale; struct dc_csc_transform input_csc_color_matrix; struct fixed31_32 coeff_reduction_factor; struct fixed31_32 hdr_mult; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c index e16274fee31d..8473c694bfdc 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c @@ -59,6 +59,31 @@ void dpp35_dppclk_control( DISPCLK_R_GATE_DISABLE, 0); } +void dpp35_program_bias_and_scale_fcnv( + struct dpp *dpp_base, + struct dc_bias_and_scale *params) +{ + struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); + + if (!params->bias_and_scale_valid) { + REG_SET(FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, 0); + REG_SET(FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, 0); + REG_SET(FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, 0); + + REG_SET(FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, 0x1F000); + REG_SET(FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, 0x1F000); + REG_SET(FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, 0x1F000); + } else { + REG_SET(FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, params->bias_red); + REG_SET(FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, params->bias_green); + REG_SET(FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, params->bias_blue); + + REG_SET(FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, params->scale_red); + REG_SET(FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, params->scale_green); + REG_SET(FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, params->scale_blue); + } +} + static struct dpp_funcs dcn35_dpp_funcs = { .dpp_program_gamcor_lut = dpp3_program_gamcor_lut, .dpp_read_state = dpp30_read_state, @@ -81,7 +106,7 @@ static struct dpp_funcs dcn35_dpp_funcs = { .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) - .dpp_program_bias_and_scale = NULL, + .dpp_program_bias_and_scale = dpp35_program_bias_and_scale_fcnv, .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer, .set_cursor_attributes = dpp3_set_cursor_attributes, .set_cursor_position = dpp1_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h index 135872d88219..3ca339a16e5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h @@ -61,4 +61,7 @@ bool dpp35_construct(struct dcn3_dpp *dpp3, struct dc_context *ctx, void dpp35_set_fgcg(struct dcn3_dpp *dpp, bool enable); +void dpp35_program_bias_and_scale_fcnv(struct dpp *dpp_base, + struct dc_bias_and_scale *bias_and_scale); + #endif // __DCN35_DPP_H diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c index 7cae18fd7be9..97bf26fa3573 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c @@ -30,6 +30,7 @@ #include "basics/conversion.h" #include "dcn30/dcn30_cm_common.h" #include "dcn32/dcn32_dpp.h" +#include "dcn35/dcn35_dpp.h" #define REG(reg)\ dpp->tf_regs->reg @@ -240,7 +241,7 @@ static struct dpp_funcs dcn401_dpp_funcs = { .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) - .dpp_program_bias_and_scale = NULL, + .dpp_program_bias_and_scale = dpp35_program_bias_and_scale_fcnv, .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer, .set_cursor_attributes = dpp401_set_cursor_attributes, .set_cursor_position = dpp401_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index ea9bedf65d84..9a00479f0417 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1698,7 +1698,7 @@ static void dcn20_update_dchubp_dpp( plane_state->update_flags.bits.input_csc_change || plane_state->update_flags.bits.color_space_change || plane_state->update_flags.bits.coeff_reduction_change) { - struct dc_bias_and_scale bns_params = {0}; + struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; // program the input csc dpp->funcs->dpp_setup(dpp, @@ -1715,7 +1715,6 @@ static void dcn20_update_dchubp_dpp( } if (dpp->funcs->dpp_program_bias_and_scale) { //TODO :for CNVC set scale and bias registers if necessary - build_prescale_params(&bns_params, plane_state); dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index 27bba47186e9..41c76ba9ba56 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -217,12 +217,13 @@ enum optc_dsc_mode { }; struct dc_bias_and_scale { - uint16_t scale_red; - uint16_t bias_red; - uint16_t scale_green; - uint16_t bias_green; - uint16_t scale_blue; - uint16_t bias_blue; + uint32_t scale_red; + uint32_t bias_red; + uint32_t scale_green; + uint32_t bias_green; + uint32_t scale_blue; + uint32_t bias_blue; + bool bias_and_scale_valid; }; enum test_pattern_dyn_range { From 218784049f4b90834dd7b854857df0b7e0f76567 Mon Sep 17 00:00:00 2001 From: Jingwen Zhu Date: Tue, 25 Jun 2024 16:20:58 +0800 Subject: [PATCH 019/447] drm/amd/display: avoid disable otg when dig was disabled [Why] This is a workaround for an dcn3.1 hang that happens if otg dispclk is ramped while otg is on and stream enc is off. But this w/a should not trigger when we have a dig active. [How] Avoid disable otg when dig FE/BE FIFO was not switched. Acked-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Jingwen Zhu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 18 +++++++++++++++--- .../dc/dio/dcn35/dcn35_dio_stream_encoder.c | 9 +++++++++ .../drm/amd/display/dc/inc/hw/stream_encoder.h | 1 + 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 70ee0089a20d..248d22b23a6d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -120,7 +120,6 @@ static int dcn35_get_active_display_cnt_wa( return display_count; } - static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower, bool disable) { @@ -128,14 +127,27 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; struct pipe_ctx *pipe = safe_to_lower ? &context->res_ctx.pipe_ctx[i] : &dc->current_state->res_ctx.pipe_ctx[i]; - + bool stream_changed_otg_dig_on = false; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; + stream_changed_otg_dig_on = old_pipe->stream && new_pipe->stream && + old_pipe->stream != new_pipe->stream && + old_pipe->stream_res.tg == new_pipe->stream_res.tg && + new_pipe->stream->link_enc && !new_pipe->stream->dpms_off && + new_pipe->stream->link->link_enc->funcs->is_dig_enabled && + new_pipe->stream->link->link_enc->funcs->is_dig_enabled( + new_pipe->stream->link->link_enc) && + new_pipe->stream_res.stream_enc && + new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && + new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || - !pipe->stream->link_enc)) { + !pipe->stream->link_enc) && !stream_changed_otg_dig_on) { + /* This w/a should not trigger when we have a dig active */ if (disable) { if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->disable_crtc) pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index 6a179e5ab417..fcc88ef83e6a 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -392,6 +392,14 @@ static void enc35_reset_fifo(struct stream_encoder *enc, bool reset) udelay(10); } +static bool enc35_is_fifo_enabled(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t reset_val; + + REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val); + return (reset_val == 0) ? false : true; +} void enc35_disable_fifo(struct stream_encoder *enc) { struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); @@ -465,6 +473,7 @@ static const struct stream_encoder_funcs dcn35_str_enc_funcs = { .set_input_mode = enc314_set_dig_input_mode, .enable_fifo = enc35_enable_fifo, .disable_fifo = enc35_disable_fifo, + .is_fifo_enabled = enc35_is_fifo_enabled, .map_stream_to_link = enc35_stream_encoder_map_to_link, }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index e5e11c84e9e2..6fe42120738d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -271,6 +271,7 @@ struct stream_encoder_funcs { struct stream_encoder *enc, unsigned int pix_per_container); void (*enable_fifo)(struct stream_encoder *enc); void (*disable_fifo)(struct stream_encoder *enc); + bool (*is_fifo_enabled)(struct stream_encoder *enc); void (*map_stream_to_link)(struct stream_encoder *enc, uint32_t stream_enc_inst, uint32_t link_enc_inst); }; From f5c783868855bbecfc4311050dbaca4cb28ada5e Mon Sep 17 00:00:00 2001 From: Sridevi Arvindekar Date: Thu, 27 Jun 2024 09:39:33 -0400 Subject: [PATCH 020/447] drm/amd/display: Add option to allow transition when odm is forced Added option to allow transition for forced odm. Add the variation to the nightly run. Reviewed-by: Wenjing Liu Signed-off-by: Jerry Zuo Signed-off-by: Sridevi Arvindekar Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- drivers/gpu/drm/amd/display/dc/dc_stream.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 387b392f4c0d..c35029c65223 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4338,7 +4338,8 @@ static void backup_and_set_minimal_pipe_split_policy(struct dc *dc, dc->debug.force_disable_subvp = true; for (i = 0; i < context->stream_count; i++) { policy->force_odm[i] = context->streams[i]->debug.force_odm_combine_segments; - context->streams[i]->debug.force_odm_combine_segments = 0; + if (context->streams[i]->debug.allow_transition_for_forced_odm) + context->streams[i]->debug.force_odm_combine_segments = 0; } } diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 8ebd7e9e776e..3d9ee4da7056 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -159,6 +159,12 @@ struct test_pattern { struct dc_stream_debug_options { char force_odm_combine_segments; + /* + * When force_odm_combine_segments is non zero, allow dc to + * temporarily transition to ODM bypass when minimal transition state + * is required to prevent visual glitches showing on the screen + */ + char allow_transition_for_forced_odm; }; #define LUMINANCE_DATA_TABLE_SIZE 10 From b3c9c9affd71f1f9b136534d6f56f392edb78620 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 27 Jun 2024 10:42:26 -0400 Subject: [PATCH 021/447] drm/amd/display: When resync fifo ensure to use correct pipe ctx We resync the FIFO after each pipe update in apply_ctx_to_hw. However, this means that some pipes (in hardware) are based on the new context and some are based on the current_state (since the pipes are updated on at a time). In this case we must ensure to use the pipe_ctx that's currently still configured in hardware when turning off / on OTG's and reconfiguring ODM during the resync. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Jerry Zuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 2 +- .../amd/display/dc/hwss/dcn314/dcn314_hwseq.c | 13 +++++++++--- .../amd/display/dc/hwss/dcn314/dcn314_hwseq.h | 2 +- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 20 ++++++++++++++----- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.h | 2 +- .../display/dc/hwss/hw_sequencer_private.h | 3 ++- 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 51c5195f8325..982b2d5bfb5f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -2443,7 +2443,7 @@ enum dc_status dce110_apply_ctx_to_hw( #ifdef CONFIG_DRM_AMD_DC_FP if (hws->funcs.resync_fifo_dccg_dio) - hws->funcs.resync_fifo_dccg_dio(hws, dc, context); + hws->funcs.resync_fifo_dccg_dio(hws, dc, context, i); #endif } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 388404cdeeaa..4e93eeedfc1b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -355,14 +355,18 @@ void dcn314_calculate_pix_rate_divider( } } -void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context) +void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx) { unsigned int i; struct pipe_ctx *pipe = NULL; bool otg_disabled[MAX_PIPES] = {false}; for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) { + pipe = &context->res_ctx.pipe_ctx[i]; + } else { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + } if (pipe->top_pipe || pipe->prev_odm_pipe) continue; @@ -377,7 +381,10 @@ void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc hws->ctx->dc->res_pool->dccg->funcs->trigger_dio_fifo_resync(hws->ctx->dc->res_pool->dccg); for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) + pipe = &context->res_ctx.pipe_ctx[i]; + else + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (otg_disabled[i]) { int opp_inst[MAX_PIPES] = { pipe->stream_res.opp->inst }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h index fb4f90f61b22..2305ad282f21 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h @@ -41,7 +41,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig void dcn314_calculate_pix_rate_divider(struct dc *dc, struct dc_state *context, const struct dc_stream_state *stream); -void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); +void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx); void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool clock_on); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 4534843ba66a..7f41eccefe02 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1201,20 +1201,27 @@ void dcn32_calculate_pix_rate_divider( } } -void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context) +void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx) { unsigned int i; struct pipe_ctx *pipe = NULL; bool otg_disabled[MAX_PIPES] = {false}; + struct dc_state *dc_state = NULL; for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) { + pipe = &context->res_ctx.pipe_ctx[i]; + dc_state = context; + } else { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + dc_state = dc->current_state; + } if (!resource_is_pipe_type(pipe, OTG_MASTER)) continue; if ((pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal)) - && dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_PHANTOM) { + && dc_state_get_pipe_subvp_type(dc_state, pipe) != SUBVP_PHANTOM) { pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); otg_disabled[i] = true; @@ -1224,7 +1231,10 @@ void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_ hws->ctx->dc->res_pool->dccg->funcs->trigger_dio_fifo_resync(hws->ctx->dc->res_pool->dccg); for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) + pipe = &context->res_ctx.pipe_ctx[i]; + else + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (otg_disabled[i]) { int opp_inst[MAX_PIPES] = { pipe->stream_res.opp->inst }; @@ -1572,7 +1582,7 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) #ifdef CONFIG_DRM_AMD_DC_FP if (hws->funcs.resync_fifo_dccg_dio) - hws->funcs.resync_fifo_dccg_dio(hws, dc, context); + hws->funcs.resync_fifo_dccg_dio(hws, dc, context, i); #endif } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h index db562e45d6ff..b1563e2c0491 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h @@ -75,7 +75,7 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable); unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); -void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); +void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx); void dcn32_subvp_pipe_control_lock(struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 7ac3f2a09487..7a75ff320511 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -170,7 +170,8 @@ struct hwseq_private_funcs { unsigned int *k1_div, unsigned int *k2_div); void (*resync_fifo_dccg_dio)(struct dce_hwseq *hws, struct dc *dc, - struct dc_state *context); + struct dc_state *context, + unsigned int current_pipe_idx); enum dc_status (*apply_single_controller_ctx_to_hw)( struct pipe_ctx *pipe_ctx, struct dc_state *context, From c18fa08e6fd8952e88fa9ad108371c2bd5b82564 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 27 Jun 2024 11:56:47 -0400 Subject: [PATCH 022/447] drm/amd/display: Disable subvp based on HW cursor requirement [Description] - There are situations where HW cursor is required - In these scenarios we should disable subvp based on the HW cursor requirement Reviewed-by: Dillon Varone Signed-off-by: Jerry Zuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 7 +++++++ drivers/gpu/drm/amd/display/dc/dc_stream.h | 3 +++ .../amd/display/dc/dml2/dml21/dml21_translation_helper.c | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c35029c65223..9897e322e2d5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2717,6 +2717,10 @@ static enum surface_update_type check_update_surfaces_for_stream( overall_type = UPDATE_TYPE_FULL; } + if (stream_update && stream_update->hw_cursor_req) { + overall_type = UPDATE_TYPE_FULL; + } + /* some stream updates require passive update */ if (stream_update) { union stream_update_flags *su_flags = &stream_update->stream->update_flags; @@ -3012,6 +3016,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_infopacket) stream->vrr_infopacket = *update->vrr_infopacket; + if (update->hw_cursor_req) + stream->hw_cursor_req = *update->hw_cursor_req; + if (update->allow_freesync) stream->allow_freesync = *update->allow_freesync; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 3d9ee4da7056..de9bd72ca514 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -266,6 +266,8 @@ struct dc_stream_state { struct dc_cursor_attributes cursor_attributes; struct dc_cursor_position cursor_position; + bool hw_cursor_req; + uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode /* from stream struct */ @@ -350,6 +352,7 @@ struct dc_stream_update { struct dc_cursor_attributes *cursor_attributes; struct dc_cursor_position *cursor_position; + bool *hw_cursor_req; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 7c73efe19525..405544920f3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -516,7 +516,7 @@ static void populate_dml21_stream_overrides_from_stream_state( if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy || stream->debug.force_odm_combine_segments > 0) stream_desc->overrides.disable_dynamic_odm = true; - stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp; + stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp || stream->hw_cursor_req; } static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode) From d94df7cad26b4c5958fbfa550d65794c89aa90ba Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Fri, 28 Jun 2024 16:05:36 -0400 Subject: [PATCH 023/447] drm/amd/display: Fix DP-DVI dongle hotplug [why] Hotplugging with a DVI-DP dongle on pre-rdna embedded platform working about half the time. The regression was found to be the setting of link->type here. [what] Reverts fix besides the logging added. Reviewed-by: Wenjing Liu Signed-off-by: Jerry Zuo Signed-off-by: Gabe Teeger Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_detection.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index bba644024780..391dbe81534d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -863,7 +863,6 @@ static bool detect_link_and_local_sink(struct dc_link *link, struct dc_sink *prev_sink = NULL; struct dpcd_caps prev_dpcd_caps; enum dc_connection_type new_connection_type = dc_connection_none; - enum dc_connection_type pre_connection_type = link->type; const uint32_t post_oui_delay = 30; // 30ms DC_LOGGER_INIT(link->ctx->logger); @@ -965,7 +964,6 @@ static bool detect_link_and_local_sink(struct dc_link *link, } if (!detect_dp(link, &sink_caps, reason)) { - link->type = pre_connection_type; if (prev_sink) dc_sink_release(prev_sink); @@ -1299,8 +1297,7 @@ bool link_detect(struct dc_link *link, enum dc_detect_reason reason) link->dpcd_caps.is_mst_capable) is_delegated_to_mst_top_mgr = discover_dp_mst_topology(link, reason); - if (is_local_sink_detect_success && - pre_link_type == dc_connection_mst_branch && + if (pre_link_type == dc_connection_mst_branch && link->type != dc_connection_mst_branch) is_delegated_to_mst_top_mgr = link_reset_cur_dp_mst_topology(link); From f60881ca126cf825b89b4118e93dbd82ea9bcf33 Mon Sep 17 00:00:00 2001 From: Revalla Hari Krishna Date: Wed, 26 Jun 2024 18:03:16 +0530 Subject: [PATCH 024/447] drm/amd/display: Refactoring OPP [Why] To refactor OPP files [How] Moved opp related files to specific opp folder and updated Makefiles. Acked-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Revalla Hari Krishna Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/Makefile | 1 - drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/opp/Makefile | 16 ++++++++++++++++ .../amd/display/dc/{ => opp}/dcn10/dcn10_opp.c | 0 .../amd/display/dc/{ => opp}/dcn10/dcn10_opp.h | 0 .../amd/display/dc/{ => opp}/dcn20/dcn20_opp.c | 0 .../amd/display/dc/{ => opp}/dcn20/dcn20_opp.h | 0 7 files changed, 17 insertions(+), 2 deletions(-) rename drivers/gpu/drm/amd/display/dc/{ => opp}/dcn10/dcn10_opp.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => opp}/dcn10/dcn10_opp.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => opp}/dcn20/dcn20_opp.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => opp}/dcn20/dcn20_opp.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 9923d0d620d4..75e088b479ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -24,7 +24,6 @@ DCN10 = dcn10_ipp.o \ dcn10_hw_sequencer_debug.o \ - dcn10_opp.o \ dcn10_mpc.o \ dcn10_cm_common.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index b3aeabc4d605..744a6c4ac816 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved. -DCN20 = dcn20_mpc.o dcn20_opp.o dcn20_mmhubbub.o \ +DCN20 = dcn20_mpc.o dcn20_mmhubbub.o \ dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) diff --git a/drivers/gpu/drm/amd/display/dc/opp/Makefile b/drivers/gpu/drm/amd/display/dc/opp/Makefile index fbfb3c3ad819..1be76754db30 100644 --- a/drivers/gpu/drm/amd/display/dc/opp/Makefile +++ b/drivers/gpu/drm/amd/display/dc/opp/Makefile @@ -25,6 +25,22 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN10 +############################################################################### +OPP_DCN10 = dcn10_opp.o + +AMD_DAL_OPP_DCN10 = $(addprefix $(AMDDALPATH)/dc/opp/dcn10/,$(OPP_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN10) +############################################################################### +# DCN20 +############################################################################### +OPP_DCN20 = dcn20_opp.o + +AMD_DAL_OPP_DCN20 = $(addprefix $(AMDDALPATH)/dc/opp/dcn20/,$(OPP_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN20) +############################################################################### # DCN35 ############################################################################### OPP_DCN35 = dcn35_opp.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c rename to drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h rename to drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c rename to drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h rename to drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h From b995c0a6de6c74656a0c39cd57a0626351b13e3c Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 18 Jun 2024 14:05:08 -0600 Subject: [PATCH 025/447] drm/amd/display: Initialize denominators' default to 1 [WHAT & HOW] Variables used as denominators and maybe not assigned to other values, should not be 0. Change their default to 1 so they are never 0. This fixes 10 DIVIDE_BY_ZERO issues reported by Coverity. Reviewed-by: Harry Wentland Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c | 2 +- drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c | 2 +- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 7c56ad0f8812..e7019c95ba79 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index dae13f202220..d8bfc85e5dcd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -39,7 +39,7 @@ static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c index 81f0a6f19f87..679b20031903 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -9386,8 +9386,8 @@ static void CalculateVMGroupAndRequestTimes( double TimePerVMRequestVBlank[], double TimePerVMRequestFlip[]) { - unsigned int num_group_per_lower_vm_stage = 0; - unsigned int num_req_per_lower_vm_stage = 0; + unsigned int num_group_per_lower_vm_stage = 1; + unsigned int num_req_per_lower_vm_stage = 1; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); From 367cd9ceba1933b63bc1d87d967baf6d9fd241d2 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 17:34:18 -0600 Subject: [PATCH 026/447] drm/amd/display: Check null-initialized variables [WHAT & HOW] drr_timing and subvp_pipe are initialized to null and they are not always assigned new values. It is necessary to check for null before dereferencing. This fixes 2 FORWARD_NULL issues reported by Coverity. Reviewed-by: Nevenko Stupar Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9d399c4ce957..4cb0227bdd27 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -871,8 +871,9 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, * and the max of (VBLANK blanking time, MALL region)). */ - if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + if (drr_timing && + stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) schedulable = true; return schedulable; @@ -937,7 +938,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) subvp_pipe = pipe; } - if (found) { + if (found && subvp_pipe) { phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); main_timing = &subvp_pipe->stream->timing; phantom_timing = &phantom_stream->timing; From 3718a619a8c0a53152e76bb6769b6c414e1e83f4 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 20 Jun 2024 20:23:41 -0600 Subject: [PATCH 027/447] drm/amd/display: Check phantom_stream before it is used dcn32_enable_phantom_stream can return null, so returned value must be checked before used. This fixes 1 NULL_RETURNS issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 3ed6d1fa0c44..ee009716d39b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1717,6 +1717,9 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, // be a valid candidate for SubVP (i.e. has a plane, stream, doesn't // already have phantom pipe assigned, etc.) by previous checks. phantom_stream = dcn32_enable_phantom_stream(dc, context, pipes, pipe_cnt, index); + if (!phantom_stream) + return; + dcn32_enable_phantom_plane(dc, context, phantom_stream, index); for (i = 0; i < dc->res_pool->pipe_count; i++) { From 5559598742fb4538e4c51c48ef70563c49c2af23 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 11:51:27 -0600 Subject: [PATCH 028/447] drm/amd/display: Pass non-null to dcn20_validate_apply_pipe_split_flags [WHAT & HOW] "dcn20_validate_apply_pipe_split_flags" dereferences merge, and thus it cannot be a null pointer. Let's pass a valid pointer to avoid null dereference. This fixes 2 FORWARD_NULL issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c | 3 ++- drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 5e7cfa8e8ec9..eea2b3b307cd 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -2040,6 +2040,7 @@ bool dcn20_fast_validate_bw( { bool out = false; int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; int pipe_cnt, i, pipe_idx, vlevel; ASSERT(pipes); @@ -2064,7 +2065,7 @@ bool dcn20_fast_validate_bw( if (vlevel > context->bw_ctx.dml.soc.num_states) goto validate_fail; - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); /*initialize pipe_just_split_from to invalid idx*/ for (i = 0; i < MAX_PIPES; i++) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 8663cbc3d1cf..347e6aaea582 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -774,6 +774,7 @@ bool dcn21_fast_validate_bw(struct dc *dc, { bool out = false; int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; int pipe_cnt, i, pipe_idx, vlevel; ASSERT(pipes); @@ -816,7 +817,7 @@ bool dcn21_fast_validate_bw(struct dc *dc, goto validate_fail; } - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; From 1ff12bcd7deaeed25efb5120433c6a45dd5504a8 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 17:38:16 -0600 Subject: [PATCH 029/447] drm/amd/display: Check null pointers before using them [WHAT & HOW] These pointers are null checked previously in the same function, indicating they might be null as reported by Coverity. As a result, they need to be checked when used again. This fixes 3 FORWARD_NULL issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ea1e2d8dcd8c..92774a871e98 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7195,6 +7195,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, int requested_bpc = drm_state ? drm_state->max_requested_bpc : 8; enum dc_status dc_result = DC_OK; + if (!dm_state) + return NULL; + do { stream = create_stream_for_sink(connector, drm_mode, dm_state, old_stream, @@ -9302,7 +9305,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, if (acrtc) old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); - if (!acrtc->wb_enabled) + if (!acrtc || !acrtc->wb_enabled) continue; dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); @@ -9706,9 +9709,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) DRM_INFO("[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); - hdcp_update_display( - adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector, - new_con_state->hdcp_content_type, enable_encryption); + if (aconnector->dc_link) + hdcp_update_display( + adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector, + new_con_state->hdcp_content_type, enable_encryption); } } From 35ff747c86767937ee1e0ca987545b7eed7a0810 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 20:05:14 -0600 Subject: [PATCH 030/447] drm/amd/display: Check stream before comparing them [WHAT & HOW] amdgpu_dm can pass a null stream to dc_is_stream_unchanged. It is necessary to check for null before dereferencing them. This fixes 1 FORWARD_NULL issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index bcb5267b5a6b..83aa3d8a997a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3241,6 +3241,8 @@ static bool are_stream_backends_same( bool dc_is_stream_unchanged( struct dc_stream_state *old_stream, struct dc_stream_state *stream) { + if (!old_stream || !stream) + return false; if (!are_stream_backends_same(old_stream, stream)) return false; From 892abca6877a96c9123bb1c010cafccdf8ca1b75 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Fri, 28 Jun 2024 15:09:06 -0400 Subject: [PATCH 031/447] drm/amd/display: Deallocate DML memory if allocation fails [Why] When DC state create DML memory allocation fails, memory is not deallocated subsequently, resulting in uninitialized structure that is not NULL. [How] Deallocate memory if DML memory allocation fails. Reviewed-by: Joshua Aberback Signed-off-by: Jerry Zuo Signed-off-by: Chris Park Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_state.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index e990346e51f6..665157f8d4cb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -211,10 +211,16 @@ struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *p #ifdef CONFIG_DRM_AMD_DC_FP if (dc->debug.using_dml2) { dml2_opt->use_clock_dc_limits = false; - dml2_create(dc, dml2_opt, &state->bw_ctx.dml2); + if (!dml2_create(dc, dml2_opt, &state->bw_ctx.dml2)) { + dc_state_release(state); + return NULL; + } dml2_opt->use_clock_dc_limits = true; - dml2_create(dc, dml2_opt, &state->bw_ctx.dml2_dc_power_source); + if (!dml2_create(dc, dml2_opt, &state->bw_ctx.dml2_dc_power_source)) { + dc_state_release(state); + return NULL; + } } #endif From 906fd46a65383cd639e5eec72a047efc33045d86 Mon Sep 17 00:00:00 2001 From: Revalla Hari Krishna Date: Tue, 2 Jul 2024 17:17:40 +0530 Subject: [PATCH 032/447] drm/amd/display: Refactoring MMHUBBUB [Why] To refactor MMHUBBUB files [How] Moved mmhubbub files from dcn20 to /mmhubbub/ folder and update makefile to fix compilation. Reviewed-by: Martin Leung Signed-off-by: Jerry Zuo Signed-off-by: Revalla Hari Krishna Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile | 9 +++++++++ .../amd/display/dc/{ => mmhubbub}/dcn20/dcn20_mmhubbub.c | 0 .../amd/display/dc/{ => mmhubbub}/dcn20/dcn20_mmhubbub.h | 0 4 files changed, 10 insertions(+), 1 deletion(-) rename drivers/gpu/drm/amd/display/dc/{ => mmhubbub}/dcn20/dcn20_mmhubbub.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => mmhubbub}/dcn20/dcn20_mmhubbub.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 744a6c4ac816..d92d2b4ee015 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved. -DCN20 = dcn20_mpc.o dcn20_mmhubbub.o \ +DCN20 = dcn20_mpc.o \ dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) diff --git a/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile b/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile index 505bc0517e08..eab196c57c6c 100644 --- a/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile +++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile @@ -24,6 +24,15 @@ # ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN20 +############################################################################### +MMHUBBUB_DCN20 = dcn20_mmhubbub.o + +AMD_DAL_MMHUBBUB_DCN20 = $(addprefix $(AMDDALPATH)/dc/mmhubbub/dcn20/,$(MMHUBBUB_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MMHUBBUB_DCN20) + ############################################################################### # DCN32 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c rename to drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h rename to drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h From 5d2c102deff63ff8980dfa848ee41858d255c291 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Tue, 2 Jul 2024 13:30:36 -0400 Subject: [PATCH 033/447] drm/amd/display: Do 1-to-1 mapping between OPP and DSC in DML2 [why] To determine which block instance to power-gate, we look at the available pipe resource for both plane and stream. On MPO, DSC3 was falsely powered on even though only 1 stream path was enabled because the resource mapping was not done correctly. [how] Acquire the correct DSC instance to power on / off based on the instance of OPP which determines the backend pipe index. Reviewed-by: Swapnil Patel Signed-off-by: Jerry Zuo Signed-off-by: Sung Joon Kim Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 83aa3d8a997a..4f5b23520365 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -5166,7 +5166,7 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy( sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; if (sec_pipe->stream->timing.flags.DSC == 1) { #if defined(CONFIG_DRM_AMD_DC_FP) - dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, sec_pipe->stream_res.opp->inst); #endif ASSERT(sec_pipe->stream_res.dsc); if (sec_pipe->stream_res.dsc == NULL) From 2563391e57b5a9c1d83fd36c05ac4cbafeb5efe6 Mon Sep 17 00:00:00 2001 From: Chaitanya Dhere Date: Tue, 2 Jul 2024 10:50:04 -0400 Subject: [PATCH 034/447] drm/amd/display: DML2.1 resynchronization July update for DML2.1 library from hardware team targeting DCN401 Reviewed-by: Aurabindo Pillai Signed-off-by: Jerry Zuo Signed-off-by: Chaitanya Dhere Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 3 - .../dml21/inc/bounding_boxes/dcn4_soc_bb.h | 13 +- .../amd/display/dc/dml2/dml21/inc/dml_top.h | 1 - .../dml2/dml21/inc/dml_top_dchub_registers.h | 1 - .../dml21/inc/dml_top_display_cfg_types.h | 2 +- .../dc/dml2/dml21/inc/dml_top_policy_types.h | 1 - .../dml21/inc/dml_top_soc_parameter_types.h | 6 +- .../display/dc/dml2/dml21/inc/dml_top_types.h | 2 + .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 45 +- .../dml2/dml21/src/dml2_core/dml2_core_dcn4.h | 1 - .../src/dml2_core/dml2_core_dcn4_calcs.c | 180 ++- .../src/dml2_core/dml2_core_dcn4_calcs.h | 2 +- .../dml21/src/dml2_core/dml2_core_factory.c | 1 - .../dml21/src/dml2_core/dml2_core_factory.h | 1 - .../dml21/src/dml2_core/dml2_core_shared.c | 40 +- .../dml21/src/dml2_core/dml2_core_shared.h | 1 - .../src/dml2_core/dml2_core_shared_types.h | 6 +- .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 3 +- .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h | 1 - .../dml21/src/dml2_dpmm/dml2_dpmm_factory.c | 1 - .../dml21/src/dml2_dpmm/dml2_dpmm_factory.h | 1 - .../dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c | 1 - .../dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h | 1 - .../dml21/src/dml2_mcg/dml2_mcg_factory.c | 1 - .../dml21/src/dml2_mcg/dml2_mcg_factory.h | 1 - .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c | 1 - .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h | 1 - .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c | 1250 ----------------- .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h | 25 - .../dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 545 ++++--- .../dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h | 1 - .../dml21/src/dml2_pmo/dml2_pmo_factory.c | 6 +- .../dml21/src/dml2_pmo/dml2_pmo_factory.h | 1 - .../lib_float_math.c | 1 - .../lib_float_math.h | 1 - .../src/dml2_top/dml2_top_optimization.c | 1 - .../src/dml2_top/dml2_top_optimization.h | 1 - .../dc/dml2/dml21/src/dml2_top/dml_top.c | 30 +- .../dml2/dml21/src/dml2_top/dml_top_mcache.c | 1 - .../dml2/dml21/src/dml2_top/dml_top_mcache.h | 1 - .../dc/dml2/dml21/src/inc/dml2_debug.c | 1 - .../dc/dml2/dml21/src/inc/dml2_debug.h | 1 - .../src/inc/dml2_internal_shared_types.h | 27 +- 43 files changed, 554 insertions(+), 1657 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index fea857214c0f..dcd01fe04296 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -87,7 +87,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_c CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_ccflags) @@ -110,7 +109,6 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_rcflags) @@ -132,7 +130,6 @@ DML21 += src/dml2_dpmm/dml2_dpmm_factory.o DML21 += src/dml2_mcg/dml2_mcg_dcn4.o DML21 += src/dml2_mcg/dml2_mcg_factory.o DML21 += src/dml2_pmo/dml2_pmo_dcn3.o -DML21 += src/dml2_pmo/dml2_pmo_dcn4.o DML21 += src/dml2_pmo/dml2_pmo_factory.o DML21 += src/dml2_pmo/dml2_pmo_dcn4_fams2.o DML21 += src/dml2_standalone_libraries/lib_float_math.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index fe07fcc3d0d5..5af94f06c667 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -344,6 +344,9 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .config_return_buffer_segment_size_in_kbytes = 64, .meta_fifo_size_in_kentries = 22, .compressed_buffer_segment_size_in_kbytes = 64, + .max_flip_time_us = 80, + .max_flip_time_lines = 32, + .hostvm_mode = 0, .subvp_drr_scheduling_margin_us = 100, .subvp_prefetch_end_to_mall_start_us = 15, .subvp_fw_processing_delay = 15, @@ -351,14 +354,18 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .fams2 = { .max_allow_delay_us = 100 * 1000, - .scheduling_delay_us = 50, + .scheduling_delay_us = 125, .vertical_interrupt_ack_delay_us = 18, .allow_programming_delay_us = 18, .min_allow_width_us = 20, .subvp_df_throttle_delay_us = 100, - .subvp_programming_delay_us = 18, + .subvp_programming_delay_us = 200, .subvp_prefetch_to_mall_delay_us = 18, - .drr_programming_delay_us = 18, + .drr_programming_delay_us = 35, + + .lock_timeout_us = 5000, + .recovery_timeout_us = 5000, + .flip_programming_delay_us = 300, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h index a25f4e5977cf..a64ec4dcf11a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_H__ #define __DML_TOP_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index 8247289ce7d3..83fc15bf13cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __dml2_TOP_DCHUB_REGISTERS_H__ #define __dml2_TOP_DCHUB_REGISTERS_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index daae77f2672b..fe153f4edaf5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_DISPLAY_CFG_TYPES_H__ #define __DML_TOP_DISPLAY_CFG_TYPES_H__ @@ -478,6 +477,7 @@ struct dml2_display_cfg { bool max_outstanding_when_urgent_expected_disable; bool enable_subvp_implicit_pmo; //enables PMO to switch pipe uclk strategy to subvp, and generate phantom programming unsigned int best_effort_min_active_latency_hiding_us; + bool all_streams_blanked; } overrides; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h index 2f444f448770..8f624a912e78 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_POLICY_TYPES_H__ #define __DML_TOP_POLICY_TYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 065b2afab6fb..99d775adc3e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_SOC_PARAMETER_TYPES_H__ #define __DML_TOP_SOC_PARAMETER_TYPES_H__ @@ -173,6 +172,7 @@ struct dml2_ip_capabilities { unsigned int meta_fifo_size_in_kentries; unsigned int compressed_buffer_segment_size_in_kbytes; unsigned int max_flip_time_us; + unsigned int max_flip_time_lines; unsigned int hostvm_mode; unsigned int subvp_drr_scheduling_margin_us; unsigned int subvp_prefetch_end_to_mall_start_us; @@ -190,6 +190,10 @@ struct dml2_ip_capabilities { unsigned int subvp_programming_delay_us; unsigned int subvp_prefetch_to_mall_delay_us; unsigned int drr_programming_delay_us; + + unsigned int lock_timeout_us; + unsigned int recovery_timeout_us; + unsigned int flip_programming_delay_us; } fams2; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index 8aa77bb190ea..c47a07f473e5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -262,6 +262,7 @@ union dml2_global_sync_programming { unsigned int vupdate_offset_pixels; unsigned int vupdate_vupdate_width_pixels; unsigned int vready_offset_pixels; + unsigned int pstate_keepout_start_lines; } dcn4; }; @@ -411,6 +412,7 @@ struct dml2_display_cfg_programming { /* indicates this configuration requires FW to support */ bool fams2_required; + struct dmub_cmd_fams2_global_config fams2_global_config; struct { bool supported_in_blank; // Changing to configurations where this is false requires stutter to be disabled during the transition diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 04edcde423a9..b7a6f7f4c342 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_internal_shared_types.h" #include "dml2_core_shared_types.h" #include "dml2_core_dcn4.h" @@ -70,6 +69,7 @@ struct dml2_core_ip_params core_dcn4_ip_caps_base = { .max_num_dp2p0_streams = 4, .imall_supported = 1, .max_flip_time_us = 80, + .max_flip_time_lines = 32, .words_per_channel = 16, .subvp_fw_processing_delay_us = 15, @@ -169,6 +169,7 @@ static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *i ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries; ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes; ip_caps->max_flip_time_us = ip_params->max_flip_time_us; + ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines; ip_caps->hostvm_mode = ip_params->hostvm_mode; // FIXME_STAGE2: cleanup after adding all dv override to ip_caps @@ -192,6 +193,7 @@ static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries; ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes; ip_params->max_flip_time_us = ip_caps->max_flip_time_us; + ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines; ip_params->hostvm_mode = ip_caps->hostvm_mode; } @@ -222,6 +224,7 @@ bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out) } memcpy(&core->clean_me_up.mode_lib.soc, in_out->soc_bb, sizeof(struct dml2_soc_bb)); + memcpy(&core->clean_me_up.mode_lib.ip_caps, in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); return true; } @@ -246,10 +249,12 @@ static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *p phantom->stream_index = phantom_stream_index; phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; - phantom->composition.viewport.plane0.height = (long int unsigned) math_ceil2( - (double)phantom->composition.viewport.plane0.height * (double)phantom_stream->timing.v_active / (double)main_stream->timing.v_active, 16.0); - phantom->composition.viewport.plane1.height = (long int unsigned) math_ceil2( - (double)phantom->composition.viewport.plane1.height * (double)phantom_stream->timing.v_active / (double)main_stream->timing.v_active, 16.0); + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); phantom->immediate_flip = false; phantom->dynamic_meta_data.enable = false; phantom->cursor.num_cursors = 0; @@ -344,6 +349,8 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in // Check if FAMS2 is required if (display_cfg->stage3.performed && display_cfg->stage3.success) { programming->fams2_required = display_cfg->stage3.fams2_required; + + dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); } // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) @@ -641,20 +648,20 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out for (plane_index = 0; plane_index < in_out->programming->display_config.num_planes; plane_index++) { in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; - if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else { - if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; - else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; - else - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; - } + if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else { + if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; + else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; + else + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + } dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h index 235280c6dcf5..e62b2d3eeee6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_DCN4_H__ #define __DML2_CORE_DCN4_H__ bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 6f4026e396e0..45e43a915fd6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -10,10 +10,7 @@ #include "dml_top_types.h" #include "dml2_core_shared.h" -#define DML_VM_PTE_ADL_PATCH_EN //#define DML_TVM_UPDATE_EN -#define DML_TDLUT_ROW_BYTES_FIX_EN -#define DML_REG_LIMIT_CLAMP_EN #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 @@ -235,6 +232,7 @@ dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStart dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix); dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix); dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix); +dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines); dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY); dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC); dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte); @@ -2343,16 +2341,16 @@ static void calculate_mcache_row_bytes( } if (p->gpuvm_enable) { - meta_per_mvmpg_per_channel = (float)vmpg_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic if (p->surf_vert && vmpg_bytes > blk_bytes) { - meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans; } *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom } else { - meta_per_mvmpg_per_channel = (float) blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans; if (!p->surf_vert) *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; @@ -3841,7 +3839,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64; if (*p->UnboundedRequestEnabled) { *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, - (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / 64)), 1.0); + (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); @@ -3852,21 +3850,20 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch #endif *p->hw_debug5 = false; - if (!p->mrq_present) { - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (!(*p->UnboundedRequestEnabled) - && p->display_cfg->plane_descriptors[k].surface.dcc.enable - && ((p->rob_buffer_size_kbytes * 1024 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) - *p->hw_debug5 = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!(p->mrq_present) && (!p->UnboundedRequestEnabled) && (TotalActiveDPP == 1) + && p->display_cfg->plane_descriptors[k].surface.dcc.enable + && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1) + + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) + *p->hw_debug5 = true; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); - dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); - dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); - dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); - dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); + dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); + dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); + dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); + dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); + dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); #endif - } } } @@ -4616,7 +4613,7 @@ static void calculate_tdlut_setting( *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; //the delivery cycles is DispClk cycles per line * number of lines * number of slices tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; - tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / 9.0; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); } else { //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); @@ -4980,11 +4977,14 @@ static void CalculateExtraLatency( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); + dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips); dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected); dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); @@ -5258,10 +5258,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; -#ifdef DML_TDLUT_ROW_BYTES_FIX_EN s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; -#endif s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; @@ -5274,16 +5272,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (p->setup_for_tdlut) vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); -#ifdef DML_TDLUT_ROW_BYTES_FIX_EN tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); -#else - tdlut_row_bytes = p->tdlut_pte_bytes_per_frame; -#endif -#ifdef DML_REG_LIMIT_CLAMP_EN s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); -#endif s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; if (p->display_cfg->gpuvm_enable == true) { @@ -5542,11 +5534,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw_equ = s->prefetch_bw4; } -#ifdef DML_REG_LIMIT_CLAMP_EN s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ, p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); -#endif #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK); dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK); @@ -6169,6 +6159,7 @@ static void CalculateFlipSchedule( unsigned int dpte_row_height_chroma, bool use_one_row_for_frame_flip, unsigned int max_flip_time_us, + unsigned int max_flip_time_lines, unsigned int per_pipe_flip_bytes, unsigned int meta_row_bytes, unsigned int meta_row_height, @@ -6189,6 +6180,7 @@ static void CalculateFlipSchedule( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); + dml2_printf("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); @@ -6239,7 +6231,8 @@ static void CalculateFlipSchedule( if (use_lb_flip_bw) { // For mode check, calculation the flip bw requirement with worst case flip time - l->max_flip_time = math_min2(l->min_row_time, math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); + l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio), + math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); //The lower bound on flip bandwidth // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required @@ -6541,7 +6534,8 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported; if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { - if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + if (p->display_cfg->overrides.all_streams_blanked || + (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)) p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive; else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; @@ -6921,6 +6915,28 @@ static double get_g6_temp_read_blackout_us( return (double)blackout_us; } +static void calculate_pstate_keepout_dst_lines( + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_watermarks *watermarks, + unsigned int pstate_keepout_dst_lines[]) +{ + const struct dml2_stream_parameters *stream_descriptor; + unsigned int i; + + for (i = 0; i < display_cfg->num_planes; i++) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) { + stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index]; + + pstate_keepout_dst_lines[i] = + (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz)); + + if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) { + pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1; + } + } + } +} + static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) { struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; @@ -7126,7 +7142,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.WritebackLatencySupport = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && - (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024.0 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { + (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { mode_lib->ms.support.WritebackLatencySupport = false; } } @@ -7466,6 +7482,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out &mode_lib->ms.OutputRate[k], &mode_lib->ms.RequiredSlots[k]); + if (s->OutputBpp[k] == 0.0) { + s->OutputBpp[k] = mode_lib->ms.OutputBpp[k]; + } + if (mode_lib->ms.RequiresDSC[k] == false) { mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC; mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC; @@ -7819,7 +7839,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], mode_lib->ms.ODMMode[k], mode_lib->ip.maximum_dsc_bits_per_component, - mode_lib->ms.OutputBpp[k], + s->OutputBpp[k], display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ms.support.NumberOfDSCSlices[k], @@ -8247,7 +8267,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - dml2_printf("DML::%s: urgent latency tolerance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); + dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); #endif mode_lib->ms.support.OutstandingRequestsSupport = true; @@ -8501,10 +8521,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -8845,6 +8870,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dpte_row_height_chroma[k], mode_lib->ms.use_one_row_for_frame_flip[k], mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, s->per_pipe_flip_bytes[k], mode_lib->ms.meta_row_bytes[k], s->meta_row_height_luma[k], @@ -8979,6 +9005,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); } // End of Prefetch Check @@ -9373,11 +9401,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); } -#ifdef DML_VM_PTE_ADL_PATCH_EN if (dpte_groups_per_row_luma_ub <= 2) { dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; } -#endif dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); @@ -9406,11 +9432,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); } -#ifdef DML_VM_PTE_ADL_PATCH_EN if (dpte_groups_per_row_chroma_ub <= 2) { dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; } -#endif dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); @@ -9535,17 +9559,16 @@ static void CalculateVMGroupAndRequestTimes( line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; -#ifdef DML_VM_PTE_ADL_PATCH_EN - if (num_group_per_lower_vm_stage_flip <= 2) { - num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage_flip + 1; - } + if (num_group_per_lower_vm_stage_pref > 0) + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; + else + TimePerVMGroupVBlank[k] = 0; + + if (num_group_per_lower_vm_stage_flip > 0) + TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; + else + TimePerVMGroupFlip[k] = 0; - if (num_group_per_lower_vm_stage_pref <= 2) { - num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage_pref + 1; - } -#endif - TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; - TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; if (num_req_per_lower_vm_stage_pref > 0) TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; else @@ -9599,10 +9622,6 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc bool FoundCriticalSurface = false; double LastZ8StutterPeriod = 0; - unsigned int SwathSizeCriticalSurface; - unsigned int LastChunkOfSwathSize; - unsigned int MissingPartOfLastSwathOfDETSize; - memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { @@ -9777,7 +9796,7 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) - / math_max2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); @@ -9871,19 +9890,11 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); #endif - SwathSizeCriticalSurface = (unsigned int)(l->BytePerPixelYCriticalSurface * l->SwathHeightYCriticalSurface * math_ceil2(l->SwathWidthYCriticalSurface, l->BlockWidth256BytesYCriticalSurface)); - LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024); - MissingPartOfLastSwathOfDETSize = (unsigned int)(math_ceil2(l->DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - l->DETBufferSizeYCriticalSurface); - - *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) && - (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); + *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface); dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); - dml2_printf("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize); - dml2_printf("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize); dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); #endif } @@ -10462,11 +10473,16 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); } + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderBytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, mode_lib->ip.pixel_chunk_size_kbytes, @@ -10994,6 +11010,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.dpte_row_height_chroma[k], mode_lib->mp.use_one_row_for_frame_flip[k], mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, s->per_pipe_flip_bytes[k], mode_lib->mp.meta_row_bytes[k], mode_lib->mp.meta_row_height[k], @@ -11203,6 +11220,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex } } + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines); + dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); @@ -12190,6 +12209,7 @@ void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal out->dcn4.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); out->dcn4.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); out->dcn4.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); + out->dcn4.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); } void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) @@ -12197,6 +12217,18 @@ void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_disp dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index); } +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, + const struct display_configuation_with_meta *display_cfg, + struct dmub_cmd_fams2_global_config *fams2_global_config) +{ + fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; + fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; + fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; + fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; + + fams2_global_config->num_streams = display_cfg->display_config.num_streams; +} + void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, @@ -12209,6 +12241,11 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna unsigned int i; + if (display_cfg->display_config.overrides.all_streams_blanked) { + /* stream is blanked, so do nothing */ + return; + } + /* from display configuration */ fams2_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; fams2_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; @@ -12368,6 +12405,7 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp { double phantom_processing_delay_pix; unsigned int phantom_processing_delay_lines; + unsigned int phantom_min_v_active_lines; unsigned int phantom_v_active_lines; unsigned int phantom_v_startup_lines; unsigned int phantom_v_blank_lines; @@ -12384,7 +12422,9 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp phantom_processing_delay_lines++; phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); - phantom_v_active_lines = phantom_processing_delay_lines + dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) + mode_lib->ip.subvp_swath_height_margin_lines; + phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) / + display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio); + phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines; // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank) phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1; @@ -12396,8 +12436,8 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp // phantom_vtotal = vactive + vblank out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines; - out->phantom_min_v_active = dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index); - out->phantom_v_startup = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); + out->phantom_min_v_active = phantom_min_v_active_lines; + out->phantom_v_startup = phantom_v_startup_lines; out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; #if defined(__DML_VBA_DEBUG__) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h index b280ab573fbb..df2d1550a14b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_DCN4_CALCS_H__ #define __DML2_CORE_DCN4_CALCS_H__ @@ -30,6 +29,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, enum dml2_uclk_pstate_support_method pstate_method, int plane_index); +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config); void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes); void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c index f56abe9ab919..640087e862f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_core_factory.h" #include "dml2_core_dcn4.h" #include "dml2_external_lib_deps.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h index 53636a8f52aa..411c514fe65c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_FACTORY_H__ #define __DML2_CORE_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c index 679b20031903..6d7701a97d3f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -2242,11 +2242,15 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou } double min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -5050,7 +5054,7 @@ static void calculate_mcache_row_bytes( unsigned int meta_per_mvmpg_per_channel_ub = 0; if (p->gpuvm_enable) { - meta_per_mvmpg_per_channel = (float)vmpg_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic if (p->surf_vert && vmpg_bytes > blk_bytes) { @@ -5059,7 +5063,7 @@ static void calculate_mcache_row_bytes( *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom } else { - meta_per_mvmpg_per_channel = (float)blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)blk_bytes / (float)256 / p->num_chans; if (!p->surf_vert) *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; @@ -7165,7 +7169,7 @@ static void calculate_tdlut_setting( *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; //the delivery cycles is DispClk cycles per line * number of lines * number of slices tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width / 2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; - tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / 9.0; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); } else { //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); @@ -7501,11 +7505,14 @@ static void CalculateExtraLatency( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); + dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips); dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected); dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); @@ -7739,7 +7746,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; - s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); @@ -9304,6 +9310,10 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); } + if (dpte_groups_per_row_luma_ub <= 2) { + dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; + } + dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); @@ -9332,6 +9342,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); } + if (dpte_groups_per_row_chroma_ub <= 2) { + dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; + } dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); @@ -9451,6 +9464,14 @@ static void CalculateVMGroupAndRequestTimes( double line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; + if (num_group_per_lower_vm_stage_flip <= 2) { + num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage_flip + 1; + } + + if (num_group_per_lower_vm_stage_pref <= 2) { + num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage_pref + 1; + } + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; @@ -10388,11 +10409,16 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); } + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderBytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, mode_lib->ip.pixel_chunk_size_kbytes, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h index d76bda907ec8..f3356b072b59 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_SHARED_H__ #define __DML2_CORE_SHARED_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 1343b744eeb3..02498c0e3282 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_SHARED_TYPES_H__ #define __DML2_CORE_SHARED_TYPES_H__ @@ -70,6 +69,7 @@ struct dml2_core_ip_params { unsigned int words_per_channel; bool imall_supported; unsigned int max_flip_time_us; + unsigned int max_flip_time_lines; unsigned int subvp_swath_height_margin_lines; unsigned int subvp_fw_processing_delay_us; unsigned int subvp_pstate_allow_width_us; @@ -782,6 +782,7 @@ struct dml2_core_internal_mode_program { unsigned int VUpdateOffsetPix[DML2_MAX_PLANES]; unsigned int VUpdateWidthPix[DML2_MAX_PLANES]; unsigned int VReadyOffsetPix[DML2_MAX_PLANES]; + unsigned int pstate_keepout_dst_lines[DML2_MAX_PLANES]; // Latency and Support double MaxActiveFCLKChangeLatencySupported; @@ -975,7 +976,7 @@ struct dml2_core_calcs_mode_programming_locals { unsigned int DSCFormatFactor; struct dml2_core_internal_DmlPipe SurfaceParameters[DML2_MAX_PLANES]; - unsigned int ReorderBytes; + unsigned int ReorderingBytes; double HostVMInefficiencyFactor; double HostVMInefficiencyFactorPrefetch; unsigned int TotalDCCActiveDPP; @@ -2010,6 +2011,7 @@ struct dml2_core_internal_scratch { struct dml2_core_internal_display_mode_lib { struct dml2_core_ip_params ip; struct dml2_soc_bb soc; + struct dml2_ip_capabilities ip_caps; //@brief Mode Support and Mode programming struct // Used to hold input; intermediate and output of the calculations diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index c94c4f32c957..0d847bccd5d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_dpmm_dcn4.h" #include "dml2_internal_shared_types.h" #include "dml_top_types.h" @@ -181,7 +180,7 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma clock_khz *= 1.0 + margin; - divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); + divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); /* we want to floor here to get higher clock than required rather than lower */ if (divider < DFS_DIVIDER_RANGE_2_START) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h index 3afb69dfd040..b165c58dfd11 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DPMM_DCN4_H__ #define __DML2_DPMM_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c index 2c983daf2dad..dfd01440737d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_dpmm_factory.h" #include "dml2_dpmm_dcn4.h" #include "dml2_external_lib_deps.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h index 80b44b4c2e68..20ba2e446f1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DPMM_FACTORY_H__ #define __DML2_DPMM_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c index 5d8887ac766d..f4b1a7d02d42 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_mcg_dcn4.h" #include "dml_top_soc_parameter_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h index 19d178651435..02da6f45cbf7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_MCG_DCN4_H__ #define __DML2_MCG_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c index 55085b85f8ed..c60b8fe90819 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_mcg_factory.h" #include "dml2_mcg_dcn4.h" #include "dml2_external_lib_deps.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h index 5dfdfed04e22..ad307deca3b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_MCG_FACTORY_H__ #define __DML2_MCG_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index 671f9ac2627c..717536d7bb30 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_pmo_factory.h" #include "dml2_pmo_dcn3.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h index cc350f88d4d2..f00bd9e72a86 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_DCN3_H__ #define __DML2_PMO_DCN3_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c deleted file mode 100644 index 8952dd7e36cb..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c +++ /dev/null @@ -1,1250 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#include "dml2_pmo_factory.h" -#include "dml2_pmo_dcn4.h" - -static const int MIN_VACTIVE_MARGIN_US = 100; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding -static const int SUBVP_DRR_MARGIN_US = 100; - -static const enum dml2_pmo_pstate_strategy full_strategy_list_1_display[][4] = { - // VActive Preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VBlank - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Finally DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, -}; - -static const int full_strategy_list_1_display_size = sizeof(full_strategy_list_1_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_2_display[][4] = { - // VActive only is preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VActive + VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VBlank only - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP + VBlank - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP + SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Finally DRR + DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, -}; - -static const int full_strategy_list_2_display_size = sizeof(full_strategy_list_2_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_3_display[][4] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // All VActive - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 1 VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, - -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 2 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, - -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // VActive + 3 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // All VBlank - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // All DRR -}; - -static const int full_strategy_list_3_display_size = sizeof(full_strategy_list_3_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_4_display[][4] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // All VActive - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // VActive + 1 VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, - -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // VActive + 2 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, - -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 3 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // All Vblank - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // All DRR -}; - -static const int full_strategy_list_4_display_size = sizeof(full_strategy_list_4_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) -{ - bool result = true; - - if (*odm_mode == dml2_odm_mode_auto) { - switch (odms_calculated) { - case 1: - *odm_mode = dml2_odm_mode_bypass; - break; - case 2: - *odm_mode = dml2_odm_mode_combine_2to1; - break; - case 3: - *odm_mode = dml2_odm_mode_combine_3to1; - break; - case 4: - *odm_mode = dml2_odm_mode_combine_4to1; - break; - default: - result = false; - break; - } - } - - if (result) { - if (*odm_mode == dml2_odm_mode_bypass) { - *odm_mode = dml2_odm_mode_combine_2to1; - } else if (*odm_mode == dml2_odm_mode_combine_2to1) { - *odm_mode = dml2_odm_mode_combine_3to1; - } else if (*odm_mode == dml2_odm_mode_combine_3to1) { - *odm_mode = dml2_odm_mode_combine_4to1; - } else { - result = false; - } - } - - return result; -} - -static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) -{ - if (*mpc_combine_factor < limit) { - (*mpc_combine_factor)++; - return true; - } - - return false; -} - -static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) -{ - unsigned int i; - int count; - - count = 0; - for (i = 0; i < display_cfg->num_planes; i++) { - if (display_cfg->plane_descriptors[i].stream_index == stream_index) - count++; - } - - return count; -} - -static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, - int free_pipes) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i; - bool result = true; - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 - if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { - in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = - in_out->cfg_support_info->plane_support_info[i].dpps_used; - // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. - if (free_pipes > 0) { - if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, - pmo->mpc_combine_limit)) { - // We've reached max pipes allocatable to a single plane, so we fail. - result = false; - break; - } else { - // Successfully added another pipe to this failing plane. - free_pipes--; - } - } else { - // No free pipes to add. - result = false; - break; - } - } else { - // If the stream of this plane needs ODM combine, no further optimization can be done. - result = false; - break; - } - } - } - - return result; -} - -bool pmo_dcn4_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i, used_pipes, free_pipes, planes_on_stream; - bool result; - - if (in_out->display_config != in_out->optimized_display_cfg) { - memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); - } - - //Count number of free pipes, and check if any odm combine is in use. - used_pipes = 0; - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; - } - free_pipes = pmo->ip_caps->pipe_count - used_pipes; - - // Optimization loop - // The goal here is to add more pipes to any planes - // which are failing mcache admissibility - result = true; - - // The optimization logic depends on whether ODM combine is enabled, and the stream count. - if (in_out->optimized_display_cfg->num_streams > 1) { - // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes - // which are not ODM combined. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } else if (in_out->optimized_display_cfg->num_streams == 1) { - // In single stream cases, we still optimize mcache failures when there's ODM combine with some - // additional logic. - - if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { - // If ODM combine is enabled, then the logic is to increase ODM combine factor. - - // Optimization for streams with > 1 ODM combine factor is only supported for single display. - planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. - if (free_pipes >= planes_on_stream) { - if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, - in_out->cfg_support_info->plane_support_info[i].dpps_used)) { - result = false; - } else { - free_pipes -= planes_on_stream; - break; - } - } else { - result = false; - break; - } - } - } - } else { - // If ODM combine is not enabled, then we can actually use the same logic as before. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } - } else { - result = true; - } - - return result; -} - -bool pmo_dcn4_initialize(struct dml2_pmo_initialize_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - pmo->soc_bb = in_out->soc_bb; - pmo->ip_caps = in_out->ip_caps; - pmo->mpc_combine_limit = 2; - pmo->odm_combine_limit = 4; - pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; - - pmo->fams_params.v1.subvp.fw_processing_delay_us = 10; - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us = 50; - pmo->fams_params.v1.subvp.refresh_rate_limit_max = 175; - pmo->fams_params.v1.subvp.refresh_rate_limit_min = 0; - - pmo->options = in_out->options; - - return true; -} - -static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) -{ - /* - * Htotal, Hblank start/end, and Hsync start/end all must be divisible - * in order for the horizontal timing params to be considered divisible - * by 2. Hsync start is always 0. - */ - unsigned long h_blank_start = timing->h_total - timing->h_front_porch; - - return (timing->h_total % denominator == 0) && - (h_blank_start % denominator == 0) && - (timing->h_blank_end % denominator == 0) && - (timing->h_sync_width % denominator == 0); -} - -static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) -{ - switch (encoder_type) { - case dml2_dp: - case dml2_edp: - case dml2_dp2p0: - case dml2_none: - return true; - case dml2_hdmi: - case dml2_hdmifrl: - default: - return false; - } -} - -bool pmo_dcn4_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) -{ - unsigned int i; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - - if (in_out->instance->options->disable_dyn_odm || - (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) - return false; - - for (i = 0; i < display_config->num_planes; i++) - /* - * vmin optimization is required to be seamlessly switched off - * at any time when the new configuration is no longer - * supported. However switching from ODM combine to MPC combine - * is not always seamless. When there not enough free pipes, we - * will have to use the same secondary OPP heads as secondary - * DPP pipes in MPC combine in new state. This transition is - * expected to cause glitches. To avoid the transition, we only - * allow vmin optimization if the stream's base configuration - * doesn't require MPC combine. This condition checks if MPC - * combine is enabled. If so do not optimize the stream. - */ - if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && - mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; - - for (i = 0; i < display_config->num_streams; i++) { - if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && - in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - /* - * ODM Combine requires horizontal timing divisible by 2 so each - * ODM segment has the same size. - */ - else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - /* - * Our hardware support seamless ODM transitions for DP encoders - * only. - */ - else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - } - - return true; -} - -bool pmo_dcn4_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) -{ - bool is_vmin = true; - - if (in_out->vmin_limits->dispclk_khz > 0 && - in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) - is_vmin = false; - - return is_vmin; -} - -static int find_highest_odm_load_stream_index( - const struct dml2_display_cfg *display_config, - const struct dml2_core_mode_support_result *mode_support_result) -{ - unsigned int i; - int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; - - for (i = 0; i < display_config->num_streams; i++) { - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz - / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; - if (odm_load > highest_odm_load) { - highest_odm_load_index = i; - highest_odm_load = odm_load; - } - } - - return highest_odm_load_index; -} - -bool pmo_dcn4_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) -{ - int stream_index; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - unsigned int odms_used; - struct dml2_stream_parameters *stream_descriptor; - bool optimizable = false; - - /* - * highest odm load stream must be optimizable to continue as dispclk is - * bounded by it. - */ - stream_index = find_highest_odm_load_stream_index(display_config, - mode_support_result); - - if (stream_index < 0 || - in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) - return false; - - odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; - if ((int)odms_used >= in_out->instance->odm_combine_limit) - return false; - - memcpy(in_out->optimized_display_config, - in_out->base_display_config, - sizeof(struct display_configuation_with_meta)); - - stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; - while (!optimizable && increase_odm_combine_factor( - &stream_descriptor->overrides.odm_mode, - odms_used)) { - switch (stream_descriptor->overrides.odm_mode) { - case dml2_odm_mode_combine_2to1: - optimizable = true; - break; - case dml2_odm_mode_combine_3to1: - /* - * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 3. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_combine_4to1: - /* - * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 4. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_auto: - case dml2_odm_mode_bypass: - case dml2_odm_mode_split_1to2: - case dml2_odm_mode_mso_1to2: - case dml2_odm_mode_mso_1to4: - default: - break; - } - } - - return optimizable; -} - -static bool are_timings_trivially_synchronizable(const struct display_configuation_with_meta *display_config, int mask) -{ - unsigned char i; - bool identical = true; - bool contains_drr = false; - unsigned char remap_array[DML2_MAX_PLANES]; - unsigned char remap_array_size = 0; - - // Create a remap array to enable simple iteration through only masked stream indicies - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (mask & (0x1 << i)) { - remap_array[remap_array_size++] = i; - } - } - - // 0 or 1 display is always trivially synchronizable - if (remap_array_size <= 1) - return true; - - for (i = 1; i < remap_array_size; i++) { - if (memcmp(&display_config->display_config.stream_descriptors[remap_array[i - 1]].timing, - &display_config->display_config.stream_descriptors[remap_array[i]].timing, - sizeof(struct dml2_timing_cfg))) { - identical = false; - break; - } - } - - for (i = 0; i < remap_array_size; i++) { - if (display_config->display_config.stream_descriptors[remap_array[i]].timing.drr_config.enabled) { - contains_drr = true; - break; - } - } - - return !contains_drr && identical; -} - -static void set_bit_in_bitfield(unsigned int *bit_field, unsigned int bit_offset) -{ - *bit_field = *bit_field | (0x1 << bit_offset); -} - -static bool is_bit_set_in_bitfield(unsigned int bit_field, unsigned int bit_offset) -{ - if (bit_field & (0x1 << bit_offset)) - return true; - - return false; -} - -static bool are_all_timings_drr_enabled(const struct display_configuation_with_meta *display_config, int mask) -{ - unsigned char i; - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(mask, i)) { - if (!display_config->display_config.stream_descriptors[i].timing.drr_config.enabled) - return false; - } - } - - return true; -} - -static void insert_into_candidate_list(const enum dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) -{ - int stream_index; - - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = true; - - for (stream_index = 0; stream_index < stream_count; stream_index++) { - scratch->pmo_dcn4.per_stream_pstate_strategy[scratch->pmo_dcn4.num_pstate_candidates][stream_index] = per_stream_pstate_strategy[stream_index]; - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank) - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = false; - } - - scratch->pmo_dcn4.num_pstate_candidates++; -} - -static bool all_planes_match_strategy(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_strategy strategy) -{ - unsigned char i; - enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; - - if (strategy == dml2_pmo_pstate_strategy_vactive) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - else if (strategy == dml2_pmo_pstate_strategy_vblank) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - else if (strategy == dml2_pmo_pstate_strategy_fw_svp) - matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - else if (strategy == dml2_pmo_pstate_strategy_fw_drr) - matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && - display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != matching_strategy) - return false; - } - } - - return true; -} - -static bool subvp_subvp_schedulable(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - unsigned char *svp_stream_indicies, char svp_stream_count) -{ - struct dml2_pmo_scratch *s = &pmo->scratch; - int i; - int microschedule_lines, time_us, refresh_hz; - int max_microschedule_us = 0; - int vactive1_us, vactive2_us, vblank1_us, vblank2_us; - - const struct dml2_timing_cfg *svp_timing1 = 0; - const struct dml2_implicit_svp_meta *svp_meta1 = 0; - - const struct dml2_timing_cfg *svp_timing2 = 0; - - if (svp_stream_count <= 1) - return true; - else if (svp_stream_count > 2) - return false; - - /* Loop to calculate the maximum microschedule time between the two SubVP pipes, - * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. - */ - for (i = 0; i < svp_stream_count; i++) { - svp_timing1 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[i]].timing; - svp_meta1 = &s->pmo_dcn4.stream_svp_meta[svp_stream_indicies[i]]; - - microschedule_lines = svp_meta1->v_active; - - // Round up when calculating microschedule time (+ 1 at the end) - time_us = (int)((microschedule_lines * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us + pmo->fams_params.v1.subvp.fw_processing_delay_us + 1); - - if (time_us > max_microschedule_us) - max_microschedule_us = time_us; - - refresh_hz = (int)((double)(svp_timing1->pixel_clock_khz * 1000) / (svp_timing1->v_total * svp_timing1->h_total)); - - if (refresh_hz < pmo->fams_params.v1.subvp.refresh_rate_limit_min || - refresh_hz > pmo->fams_params.v1.subvp.refresh_rate_limit_max) { - return false; - } - } - - svp_timing1 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[0]].timing; - svp_meta1 = &s->pmo_dcn4.stream_svp_meta[svp_stream_indicies[0]]; - - vactive1_us = (int)((svp_timing1->v_active * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000); - - vblank1_us = (int)(((svp_timing1->v_total - svp_timing1->v_active) * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000); - - svp_timing2 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[1]].timing; - - vactive2_us = (int)((svp_timing2->v_active * svp_timing2->h_total) / (double)(svp_timing2->pixel_clock_khz * 1000) * 1000000); - - vblank2_us = (int)(((svp_timing2->v_total - svp_timing2->v_active) * svp_timing2->h_total) / (double)(svp_timing2->pixel_clock_khz * 1000) * 1000000); - - if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && - (vactive2_us - vblank1_us) / 2 > max_microschedule_us) - return true; - - return false; -} - -static bool validate_svp_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask) -{ - bool result = false; - unsigned char stream_index; - - unsigned char svp_stream_indicies[2] = { 0 }; - unsigned char svp_stream_count = 0; - - // Find the SVP streams, store only the first 2, but count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - if (svp_stream_count < 2) - svp_stream_indicies[svp_stream_count] = stream_index; - - svp_stream_count++; - } - } - - if (svp_stream_count == 1) { - result = true; // 1 SVP is always co_functional - } else if (svp_stream_count == 2) { - result = subvp_subvp_schedulable(pmo, display_cfg, svp_stream_indicies, svp_stream_count); - } - - return result; -} - -static bool validate_drr_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int drr_stream_mask) -{ - unsigned char stream_index; - int drr_stream_count = 0; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(drr_stream_mask, stream_index)) { - drr_stream_count++; - } - } - - return drr_stream_count <= 4; -} - -static bool validate_svp_drr_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask, int drr_stream_mask) -{ - unsigned char stream_index; - int drr_stream_count = 0; - int svp_stream_count = 0; - - int prefetch_us = 0; - int mall_region_us = 0; - int drr_frame_us = 0; // nominal frame time - int subvp_active_us = 0; - int stretched_drr_us = 0; - int drr_stretched_vblank_us = 0; - int max_vblank_mallregion = 0; - - const struct dml2_timing_cfg *svp_timing = 0; - const struct dml2_timing_cfg *drr_timing = 0; - const struct dml2_implicit_svp_meta *svp_meta = 0; - - bool schedulable = false; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - svp_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; - svp_stream_count++; - } - if (is_bit_set_in_bitfield(drr_stream_mask, stream_index)) { - drr_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - drr_stream_count++; - } - } - - if (svp_stream_count == 1 && drr_stream_count == 1 && svp_timing != drr_timing) { - prefetch_us = (int)((svp_meta->v_total - svp_meta->v_front_porch) - * svp_timing->h_total / (double)(svp_timing->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us); - - subvp_active_us = (int)(svp_timing->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - drr_frame_us = (int)(drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pixel_clock_khz * 1000) * 1000000); - - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = (int)(svp_meta->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - - drr_stretched_vblank_us = (int)((drr_timing->v_total - drr_timing->v_active) * drr_timing->h_total / - (double)(drr_timing->pixel_clock_khz * 1000) * 1000000 + (stretched_drr_us - drr_frame_us)); - - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; - - /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the - * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis - * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - * and the max of (VBLANK blanking time, MALL region)). - */ - if (stretched_drr_us < (1 / (double)drr_timing->drr_config.min_refresh_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) - schedulable = true; - } - - return schedulable; -} - -static bool validate_svp_vblank_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask, int vblank_stream_mask) -{ - unsigned char stream_index; - int vblank_stream_count = 0; - int svp_stream_count = 0; - - const struct dml2_timing_cfg *svp_timing = 0; - const struct dml2_timing_cfg *vblank_timing = 0; - const struct dml2_implicit_svp_meta *svp_meta = 0; - - int prefetch_us = 0; - int mall_region_us = 0; - int vblank_frame_us = 0; - int subvp_active_us = 0; - int vblank_blank_us = 0; - int max_vblank_mallregion = 0; - - bool schedulable = false; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - svp_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; - svp_stream_count++; - } - if (is_bit_set_in_bitfield(vblank_stream_mask, stream_index)) { - vblank_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - vblank_stream_count++; - } - } - - if (svp_stream_count == 1 && vblank_stream_count > 0) { - // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe - // Also include the prefetch end to mallstart delay time - prefetch_us = (int)((svp_meta->v_total - svp_meta->v_front_porch) * svp_timing->h_total - / (double)(svp_timing->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us); - - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = (int)(svp_meta->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - vblank_frame_us = (int)(vblank_timing->v_total * vblank_timing->h_total / - (double)(vblank_timing->pixel_clock_khz * 1000) * 1000000); - - vblank_blank_us = (int)((vblank_timing->v_total - vblank_timing->v_active) * vblank_timing->h_total / - (double)(vblank_timing->pixel_clock_khz * 1000) * 1000000); - - subvp_active_us = (int)(svp_timing->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; - - // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - // and the max of (VBLANK blanking time, MALL region) - // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) - if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) - schedulable = true; - } - return schedulable; -} - -static bool validate_drr_vblank_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int drr_stream_mask, int vblank_stream_mask) -{ - return false; -} - -static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[4]) -{ - struct dml2_pmo_scratch *s = &pmo->scratch; - - unsigned char stream_index = 0; - - unsigned int svp_count = 0; - unsigned int svp_stream_mask = 0; - unsigned int drr_count = 0; - unsigned int drr_stream_mask = 0; - unsigned int vactive_count = 0; - unsigned int vactive_stream_mask = 0; - unsigned int vblank_count = 0; - unsigned int vblank_stream_mask = 0; - - bool strategy_matches_forced_requirements = true; - - bool admissible = false; - - // Tabulate everything - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - - if (!all_planes_match_strategy(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], - per_stream_pstate_strategy[stream_index])) { - strategy_matches_forced_requirements = false; - break; - } - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - svp_count++; - set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - drr_count++; - set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vactive) { - vactive_count++; - set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank) { - vblank_count++; - set_bit_in_bitfield(&vblank_stream_mask, stream_index); - } - } - - if (!strategy_matches_forced_requirements) - return false; - - // Check for trivial synchronization for vblank - if (vblank_count > 0 && (pmo->options->disable_vblank || !are_timings_trivially_synchronizable(display_cfg, vblank_stream_mask))) - return false; - - if (svp_count > 0 && pmo->options->disable_svp) - return false; - - if (drr_count > 0 && (pmo->options->disable_drr_var || !are_all_timings_drr_enabled(display_cfg, drr_stream_mask))) - return false; - - // Validate for FAMS admissibiliy - if (svp_count == 0 && drr_count == 0) { - // No FAMS - admissible = true; - } else { - admissible = false; - if (svp_count > 0 && drr_count == 0 && vactive_count == 0 && vblank_count == 0) { - // All SVP - admissible = validate_svp_cofunctionality(pmo, display_cfg, svp_stream_mask); - } else if (svp_count == 0 && drr_count > 0 && vactive_count == 0 && vblank_count == 0) { - // All DRR - admissible = validate_drr_cofunctionality(pmo, display_cfg, drr_stream_mask); - } else if (svp_count > 0 && drr_count > 0 && vactive_count == 0 && vblank_count == 0) { - // SVP + DRR - admissible = validate_svp_drr_cofunctionality(pmo, display_cfg, svp_stream_mask, drr_stream_mask); - } else if (svp_count > 0 && drr_count == 0 && vactive_count == 0 && vblank_count > 0) { - // SVP + VBlank - admissible = validate_svp_vblank_cofunctionality(pmo, display_cfg, svp_stream_mask, vblank_stream_mask); - } else if (svp_count == 0 && drr_count > 0 && vactive_count == 0 && vblank_count > 0) { - // DRR + VBlank - admissible = validate_drr_vblank_cofunctionality(pmo, display_cfg, drr_stream_mask, vblank_stream_mask); - } - } - - return admissible; -} - -static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) -{ - unsigned char i; - int min_vactive_margin_us = 0xFFFFFFF; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active < min_vactive_margin_us) - min_vactive_margin_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active; - } - } - - return min_vactive_margin_us; -} - -bool pmo_dcn4_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; - struct dml2_pmo_scratch *s = &pmo->scratch; - - struct display_configuation_with_meta *display_config; - const struct dml2_plane_parameters *plane_descriptor; - const enum dml2_pmo_pstate_strategy (*strategy_list)[4] = 0; - unsigned int strategy_list_size = 0; - unsigned int plane_index, stream_index, i; - - state->performed = true; - - display_config = in_out->base_display_config; - display_config->display_config.overrides.enable_subvp_implicit_pmo = true; - - memset(s, 0, sizeof(struct dml2_pmo_scratch)); - - pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size - 1; - pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - - // First build the stream plane mask (array of bitfields indexed by stream, indicating plane mapping) - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - plane_descriptor = &display_config->display_config.plane_descriptors[plane_index]; - - set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); - - state->pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; - } - - // Figure out which streams can do vactive, and also build up implicit SVP meta - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= - MIN_VACTIVE_MARGIN_US) - set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); - - s->pmo_dcn4.stream_svp_meta[stream_index].valid = true; - s->pmo_dcn4.stream_svp_meta[stream_index].v_active = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_active; - s->pmo_dcn4.stream_svp_meta[stream_index].v_total = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_total; - s->pmo_dcn4.stream_svp_meta[stream_index].v_front_porch = 1; - } - - switch (display_config->display_config.num_streams) { - case 1: - strategy_list = full_strategy_list_1_display; - strategy_list_size = full_strategy_list_1_display_size; - break; - case 2: - strategy_list = full_strategy_list_2_display; - strategy_list_size = full_strategy_list_2_display_size; - break; - case 3: - strategy_list = full_strategy_list_3_display; - strategy_list_size = full_strategy_list_3_display_size; - break; - case 4: - strategy_list = full_strategy_list_4_display; - strategy_list_size = full_strategy_list_4_display_size; - break; - default: - strategy_list_size = 0; - break; - } - - if (strategy_list_size == 0) - return false; - - s->pmo_dcn4.num_pstate_candidates = 0; - - for (i = 0; i < strategy_list_size && i < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { - if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, strategy_list[i])) { - insert_into_candidate_list(strategy_list[i], display_config->display_config.num_streams, s); - } - } - - if (s->pmo_dcn4.num_pstate_candidates > 0) { - // There's this funny case... - // If the first entry in the candidate list is all vactive, then we can consider it "tested", so the current index is 0 - // Otherwise the current index should be -1 because we run the optimization at least once - s->pmo_dcn4.cur_pstate_candidate = 0; - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (s->pmo_dcn4.per_stream_pstate_strategy[0][i] != dml2_pmo_pstate_strategy_vactive) { - s->pmo_dcn4.cur_pstate_candidate = -1; - break; - } - } - return true; - } else { - return false; - } -} - -static void reset_display_configuration(struct display_configuation_with_meta *display_config) -{ - unsigned int plane_index; - unsigned int stream_index; - struct dml2_plane_parameters *plane; - - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - display_config->stage3.stream_svp_meta[stream_index].valid = false; - } - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Unset SubVP - plane->overrides.legacy_svp_config = dml2_svp_mode_override_auto; - - // Remove reserve time - plane->overrides.reserved_vblank_time_ns = 0; - - // Reset strategy to auto - plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_not_supported; - } -} - -static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Setup DRR - plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_drr; - } - } -} - -static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - int stream_index = -1; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom; - } - } - - if (stream_index >= 0) { - display_config->stage3.stream_svp_meta[stream_index].valid = true; - display_config->stage3.stream_svp_meta[stream_index].v_active = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_active; - display_config->stage3.stream_svp_meta[stream_index].v_total = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_total; - display_config->stage3.stream_svp_meta[stream_index].v_front_porch = 1; - } -} - -static void setup_planes_for_vblank_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Setup reserve time - plane->overrides.reserved_vblank_time_ns = 400 * 1000; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; - } - } -} - -static void setup_planes_for_vactive_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; - } - } -} - -static bool setup_display_config(struct display_configuation_with_meta *display_config, struct dml2_pmo_scratch *scratch, int strategy_index) -{ - bool success = true; - unsigned char stream_index; - - reset_display_configuration(display_config); - - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_na) { - success = false; - break; - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vblank) { - setup_planes_for_vblank_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - setup_planes_for_svp_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - setup_planes_for_drr_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vactive) { - setup_planes_for_vactive_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } - } - - return success; -} - -static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) -{ - int min_time_us = 0xFFFFFF; - unsigned char plane_index = 0; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - if (min_time_us > (display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000)) - min_time_us = display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; - } - } - return min_time_us; -} - -bool pmo_dcn4_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) -{ - bool p_state_supported = true; - unsigned int stream_index; - struct dml2_pmo_scratch *s = &in_out->instance->scratch; - - if (s->pmo_dcn4.cur_pstate_candidate < 0) - return false; - - for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { - - if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vactive) { - if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_US) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vblank) { - if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < - in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - if (!all_planes_match_strategy(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr)) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_na) { - p_state_supported = false; - break; - } - } - - return p_state_supported; -} - -bool pmo_dcn4_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) -{ - bool success = false; - struct dml2_pmo_scratch *s = &in_out->instance->scratch; - - memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); - - if (in_out->last_candidate_failed) { - if (s->pmo_dcn4.allow_state_increase_for_strategy[s->pmo_dcn4.cur_pstate_candidate] && - s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index) { - s->pmo_dcn4.cur_latency_index++; - - success = true; - } - } - - if (!success) { - s->pmo_dcn4.cur_latency_index = s->pmo_dcn4.min_latency_index; - s->pmo_dcn4.cur_pstate_candidate++; - - if (s->pmo_dcn4.cur_pstate_candidate < s->pmo_dcn4.num_pstate_candidates) { - success = true; - } - } - - if (success) { - in_out->optimized_display_config->stage3.min_clk_index_for_latency = s->pmo_dcn4.cur_latency_index; - setup_display_config(in_out->optimized_display_config, &in_out->instance->scratch, in_out->instance->scratch.pmo_dcn4.cur_pstate_candidate); - } - - return success; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h deleted file mode 100644 index 09cacc933d21..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#ifndef __DML2_PMO_DCN4_H__ -#define __DML2_PMO_DCN4_H__ - -#include "dml2_internal_shared_types.h" - -bool pmo_dcn4_initialize(struct dml2_pmo_initialize_in_out *in_out); - -bool pmo_dcn4_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); - -bool pmo_dcn4_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); -bool pmo_dcn4_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); -bool pmo_dcn4_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); - -bool pmo_dcn4_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); -bool pmo_dcn4_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); -bool pmo_dcn4_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); - -bool pmo_dcn4_unit_test(void); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 60a9faf81d3d..fa445067782e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1,122 +1,181 @@ -/* -* Copyright 2022 Advanced Micro Devices, Inc. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE. -* -* Authors: AMD -* -*/ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. #include "dml2_pmo_factory.h" -#include "dml2_pmo_dcn4.h" #include "dml2_debug.h" #include "lib_float_math.h" #include "dml2_pmo_dcn4_fams2.h" static const double MIN_VACTIVE_MARGIN_PCT = 0.25; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding -static const enum dml2_pmo_pstate_strategy base_strategy_list_1_display[][PMO_DCN4_MAX_DISPLAYS] = { +static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = { // VActive Preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then VBlank - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, - // Finally DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + // Then DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / sizeof(struct dml2_pmo_pstate_strategy); -static const enum dml2_pmo_pstate_strategy base_strategy_list_2_display[][PMO_DCN4_MAX_DISPLAYS] = { +static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = { // VActive only is preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then VActive + VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then VBlank only - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then SVP + VBlank - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then SVP + DRR - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then SVP + SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then DRR + VActive - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - // Then DRR + VBlank - //{ dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + // Then DRR + DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - // Finally DRR + DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / sizeof(struct dml2_pmo_pstate_strategy); -static const enum dml2_pmo_pstate_strategy base_strategy_list_3_display[][PMO_DCN4_MAX_DISPLAYS] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // All VActive +static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // VActive + 1 VBlank + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 2 VBlank + // All VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // All VBlank + // All DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // VBlank + 1 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // VBlank + 2 DRR - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // All DRR + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / sizeof(struct dml2_pmo_pstate_strategy); -static const enum dml2_pmo_pstate_strategy base_strategy_list_4_display[][PMO_DCN4_MAX_DISPLAYS] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // All VActive +static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, + .allow_state_increase = true, + }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, // VActive + 1 VBlank + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = false, + }, - //{ dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 2 VBlank + // All Vblank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = false, + }, - //{ dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 3 VBlank + // All DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, + .allow_state_increase = true, + }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // All Vblank - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 1 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 2 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 3 DRR - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // All DRR + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / sizeof(struct dml2_pmo_pstate_strategy); static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) @@ -296,9 +355,9 @@ bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_o return result; } -static enum dml2_pmo_pstate_strategy convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_strategy base_strategy) +static enum dml2_pmo_pstate_method convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_method base_strategy) { - enum dml2_pmo_pstate_strategy variant_strategy = 0; + enum dml2_pmo_pstate_method variant_strategy = 0; switch (base_strategy) { case dml2_pmo_pstate_strategy_vactive: @@ -327,11 +386,9 @@ static enum dml2_pmo_pstate_strategy convert_strategy_to_drr_variant(const enum return variant_strategy; } -static enum dml2_pmo_pstate_strategy(*get_expanded_strategy_list( - struct dml2_pmo_init_data *init_data, - int stream_count))[PMO_DCN4_MAX_DISPLAYS] +static struct dml2_pmo_pstate_strategy *get_expanded_strategy_list(struct dml2_pmo_init_data *init_data, int stream_count) { - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; switch (stream_count) { case 1: @@ -361,23 +418,23 @@ static unsigned int get_num_expanded_strategies( } static void insert_strategy_into_expanded_list( - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_init_data *init_data) { - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; expanded_strategy_list = get_expanded_strategy_list(init_data, stream_count); if (expanded_strategy_list) { - memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++], - per_stream_pstate_strategy, - sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); + memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++; } } static void expand_base_strategy(struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy base_strategy_list[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *base_strategy, unsigned int stream_count) { bool skip_to_next_stream; @@ -386,19 +443,21 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, unsigned int i, j; unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; unsigned int stream_iteration_indices[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - enum dml2_pmo_pstate_strategy cur_strategy_list[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + struct dml2_pmo_pstate_strategy cur_strategy_list = { 0 }; /* determine number of displays per method */ for (i = 0; i < stream_count; i++) { /* increment the count of the earliest index with the same method */ for (j = 0; j < stream_count; j++) { - if (base_strategy_list[i] == base_strategy_list[j]) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { num_streams_per_method[j] = num_streams_per_method[j] + 1; break; } } } + cur_strategy_list.allow_state_increase = base_strategy->allow_state_increase; + i = 0; /* uses a while loop instead of recursion to build permutations of base strategy */ while (stream_iteration_indices[0] < stream_count) { @@ -409,12 +468,12 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, /* determine what to do for this iteration */ if (stream_iteration_indices[i] < stream_count && num_streams_per_method[stream_iteration_indices[i]] != 0) { /* decrement count and assign method */ - cur_strategy_list[i] = base_strategy_list[stream_iteration_indices[i]]; + cur_strategy_list.per_stream_pstate_method[i] = base_strategy->per_stream_pstate_method[stream_iteration_indices[i]]; num_streams_per_method[stream_iteration_indices[i]] -= 1; if (i >= stream_count - 1) { /* insert into strategy list */ - insert_strategy_into_expanded_list(cur_strategy_list, stream_count, &pmo->init_data); + insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, &pmo->init_data); expanded_strategy_added = true; } else { /* skip to next stream */ @@ -450,55 +509,122 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, } } -static void expand_variant_strategy(struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy base_strategy_list[PMO_DCN4_MAX_DISPLAYS], + +static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy, + const struct dml2_pmo_pstate_strategy *variant_strategy, + unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], + unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], unsigned int stream_count) { + bool valid = true; unsigned int i; - bool variant_found = false; - enum dml2_pmo_pstate_strategy cur_strategy_list[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - - /* setup variant list as base to start */ - memcpy(cur_strategy_list, base_strategy_list, sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); - + /* check all restrictions are met */ for (i = 0; i < stream_count; i++) { - cur_strategy_list[i] = convert_strategy_to_drr_variant(base_strategy_list[i]); + /* vblank + vblank_drr variants are invalid */ + if (base_strategy->per_stream_pstate_method[i] == dml2_pmo_pstate_strategy_vblank && + ((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) || + num_streams_per_variant_method[i] > 1)) { + valid = false; + break; + } + } - if (cur_strategy_list[i] != base_strategy_list[i]) { - variant_found = true; + return valid; +} + +static void expand_variant_strategy(struct dml2_pmo_instance *pmo, + const struct dml2_pmo_pstate_strategy *base_strategy, + unsigned int stream_count) +{ + bool variant_found; + unsigned int i, j; + unsigned int method_index; + unsigned int stream_index; + unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + enum dml2_pmo_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; + struct dml2_pmo_pstate_strategy variant_strategy = { 0 }; + + /* determine number of displays per method */ + for (i = 0; i < stream_count; i++) { + /* increment the count of the earliest index with the same method */ + for (j = 0; j < stream_count; j++) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { + num_streams_per_method[j] = num_streams_per_method[j] + 1; + break; + } } - if (i == stream_count - 1 && variant_found) { - insert_strategy_into_expanded_list(cur_strategy_list, stream_count, &pmo->init_data); + per_stream_variant_method[i] = convert_strategy_to_drr_variant(base_strategy->per_stream_pstate_method[i]); + } + memcpy(num_streams_per_base_method, num_streams_per_method, sizeof(unsigned int) * PMO_DCN4_MAX_DISPLAYS); + + memcpy(&variant_strategy, base_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + method_index = 0; + /* uses a while loop instead of recursion to build permutations of base strategy */ + while (num_streams_per_base_method[0] > 0 || method_index != 0) { + if (method_index == stream_count) { + /* construct variant strategy */ + variant_found = false; + stream_index = 0; + + for (i = 0; i < stream_count; i++) { + for (j = 0; j < num_streams_per_base_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = base_strategy->per_stream_pstate_method[i]; + } + + for (j = 0; j < num_streams_per_variant_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = per_stream_variant_method[i]; + if (base_strategy->per_stream_pstate_method[i] != per_stream_variant_method[i]) { + variant_found = true; + } + } + } + + if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) { + expand_base_strategy(pmo, &variant_strategy, stream_count); + } + + /* rollback to earliest method with bases remaining */ + for (method_index = stream_count - 1; method_index > 0; method_index--) { + if (num_streams_per_base_method[method_index]) { + /* bases remaining */ + break; + } else { + /* reset counters */ + num_streams_per_base_method[method_index] = num_streams_per_method[method_index]; + num_streams_per_variant_method[method_index] = 0; + } + } + } + + if (num_streams_per_base_method[method_index]) { + num_streams_per_base_method[method_index]--; + num_streams_per_variant_method[method_index]++; + + method_index++; + } else if (method_index != 0) { + method_index++; } } } static void expand_base_strategies( struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy(*base_strategies_list)[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *base_strategies_list, const unsigned int num_base_strategies, unsigned int stream_count) { unsigned int i; - unsigned int num_pre_variant_strategies; - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS]; /* expand every explicit base strategy (except all DRR) */ - for (i = 0; i < num_base_strategies - 1; i++) { - expand_base_strategy(pmo, base_strategies_list[i], stream_count); + for (i = 0; i < num_base_strategies; i++) { + expand_base_strategy(pmo, &base_strategies_list[i], stream_count); + expand_variant_strategy(pmo, &base_strategies_list[i], stream_count); } - - /* expand base strategies to DRR variants */ - num_pre_variant_strategies = get_num_expanded_strategies(&pmo->init_data, stream_count); - expanded_strategy_list = get_expanded_strategy_list(&pmo->init_data, stream_count); - for (i = 0; i < num_pre_variant_strategies; i++) { - expand_variant_strategy(pmo, expanded_strategy_list[i], stream_count); - } - - /* add back all DRR */ - insert_strategy_into_expanded_list(base_strategies_list[num_base_strategies - 1], stream_count, &pmo->init_data); } bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) @@ -591,8 +717,6 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) &in_out->base_display_config->display_config; const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; - struct dml2_optimization_stage4_state *state = - &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -613,30 +737,28 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) */ if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; for (i = 0; i < display_config->num_streams; i++) { if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - state->unoptimizable_streams[i] = true; + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - state->unoptimizable_streams[i] = true; + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; /* * ODM Combine requires horizontal timing divisible by 2 so each * ODM segment has the same size. */ else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - state->unoptimizable_streams[i] = true; + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; /* * Our hardware support seamless ODM transitions for DP encoders * only. */ else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - state->unoptimizable_streams[i] = true; + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; } - state->performed = true; - return true; } @@ -787,6 +909,7 @@ static void build_synchronized_timing_groups( /* clear all group masks */ memset(s->pmo_dcn4.synchronized_timing_group_masks, 0, sizeof(s->pmo_dcn4.synchronized_timing_group_masks)); memset(s->pmo_dcn4.group_is_drr_enabled, 0, sizeof(s->pmo_dcn4.group_is_drr_enabled)); + memset(s->pmo_dcn4.group_is_drr_active, 0, sizeof(s->pmo_dcn4.group_is_drr_active)); memset(s->pmo_dcn4.group_line_time_us, 0, sizeof(s->pmo_dcn4.group_line_time_us)); s->pmo_dcn4.num_timing_groups = 0; @@ -808,6 +931,8 @@ static void build_synchronized_timing_groups( /* if drr is in use, timing is not sychnronizable */ if (master_timing->drr_config.enabled) { s->pmo_dcn4.group_is_drr_enabled[timing_group_idx] = true; + s->pmo_dcn4.group_is_drr_active[timing_group_idx] = !master_timing->drr_config.disallowed && + (master_timing->drr_config.drr_active_fixed || master_timing->drr_config.drr_active_variable); continue; } @@ -933,8 +1058,7 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, /* check recout height covers entire otg vactive, and single plane */ if (num_planes_per_stream[plane_descriptor->stream_index] > 1 || - !plane_descriptor->composition.rect_out_height_spans_vactive || - plane_descriptor->composition.rotation_angle != dml2_rotation_0) { + !plane_descriptor->composition.rect_out_height_spans_vactive) { return false; } } @@ -970,35 +1094,24 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, return true; } -static void insert_into_candidate_list(const enum dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) +static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) { - int stream_index; - - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = true; - - for (stream_index = 0; stream_index < stream_count; stream_index++) { - scratch->pmo_dcn4.per_stream_pstate_strategy[scratch->pmo_dcn4.num_pstate_candidates][stream_index] = per_stream_pstate_strategy[stream_index]; - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = false; - } - + scratch->pmo_dcn4.pstate_strategy_candidates[scratch->pmo_dcn4.num_pstate_candidates] = *pstate_strategy; scratch->pmo_dcn4.num_pstate_candidates++; } -static bool all_planes_match_strategy(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_strategy strategy) +static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_method method) { unsigned char i; enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; - if (strategy == dml2_pmo_pstate_strategy_vactive || strategy == dml2_pmo_pstate_strategy_fw_vactive_drr) + if (method == dml2_pmo_pstate_strategy_vactive || method == dml2_pmo_pstate_strategy_fw_vactive_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - else if (strategy == dml2_pmo_pstate_strategy_vblank || strategy == dml2_pmo_pstate_strategy_fw_vblank_drr) + else if (method == dml2_pmo_pstate_strategy_vblank || method == dml2_pmo_pstate_strategy_fw_vblank_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - else if (strategy == dml2_pmo_pstate_strategy_fw_svp) + else if (method == dml2_pmo_pstate_strategy_fw_svp) matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - else if (strategy == dml2_pmo_pstate_strategy_fw_drr) + else if (method == dml2_pmo_pstate_strategy_fw_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; for (i = 0; i < DML2_MAX_PLANES; i++) { @@ -1030,12 +1143,12 @@ static void build_method_scheduling_params( static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( struct dml2_pmo_instance *pmo, - enum dml2_pmo_pstate_strategy stream_pstate_strategy, + enum dml2_pmo_pstate_method stream_pstate_method, int stream_idx) { struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta = NULL; - switch (stream_pstate_strategy) { + switch (stream_pstate_method) { case dml2_pmo_pstate_strategy_vactive: case dml2_pmo_pstate_strategy_fw_vactive_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vactive.common; @@ -1066,7 +1179,7 @@ static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( static bool is_timing_group_schedulable( struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *pstate_strategy, const unsigned int timing_group_idx, struct dml2_fams2_per_method_common_meta *group_fams2_meta) { @@ -1085,18 +1198,13 @@ static bool is_timing_group_schedulable( } /* init allow start and end lines for timing group */ - stream_method_fams2_meta = get_per_method_common_meta(pmo, per_stream_pstate_strategy[base_stream_idx], base_stream_idx); - if (!stream_method_fams2_meta) - return false; - + stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); group_fams2_meta->allow_start_otg_vline = stream_method_fams2_meta->allow_start_otg_vline; group_fams2_meta->allow_end_otg_vline = stream_method_fams2_meta->allow_end_otg_vline; group_fams2_meta->period_us = stream_method_fams2_meta->period_us; for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { - stream_method_fams2_meta = get_per_method_common_meta(pmo, per_stream_pstate_strategy[i], i); - if (!stream_method_fams2_meta) - continue; + stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) { /* set group allow start to larger otg vline */ @@ -1126,7 +1234,7 @@ static bool is_timing_group_schedulable( static bool is_config_schedulable( struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS]) + const struct dml2_pmo_pstate_strategy *pstate_strategy) { unsigned int i, j; bool schedulable; @@ -1149,7 +1257,7 @@ static bool is_config_schedulable( for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { s->pmo_dcn4.sorted_group_gtl_disallow_index[i] = i; s->pmo_dcn4.sorted_group_gtl_period_index[i] = i; - if (!is_timing_group_schedulable(pmo, display_cfg, per_stream_pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) { + if (!is_timing_group_schedulable(pmo, display_cfg, pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) { /* synchronized timing group was not schedulable */ schedulable = false; break; @@ -1251,7 +1359,7 @@ static bool is_config_schedulable( unsigned int sorted_ip1 = s->pmo_dcn4.sorted_group_gtl_period_index[i + 1]; if (s->pmo_dcn4.group_common_fams2_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_fams2_meta[sorted_ip1].period_us || - s->pmo_dcn4.group_is_drr_enabled[sorted_ip1]) { + (s->pmo_dcn4.group_is_drr_enabled[sorted_ip1] && s->pmo_dcn4.group_is_drr_active[sorted_ip1])) { schedulable = false; break; } @@ -1263,8 +1371,8 @@ static bool is_config_schedulable( /* STAGE 4: When using HW exclusive modes, check disallow alignments are within allowed threshold */ if (s->pmo_dcn4.num_timing_groups == 2 && - !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, per_stream_pstate_strategy[0]) && - !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, per_stream_pstate_strategy[1])) { + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[0]) && + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[1])) { double period_ratio; double max_shift_us; double shift_per_period; @@ -1293,44 +1401,45 @@ static bool is_config_schedulable( } static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy stream_pstate_strategy, - unsigned int stream_index) + const struct display_configuation_with_meta *display_cfg, + const enum dml2_pmo_pstate_method stream_pstate_method, + unsigned int stream_index) { const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index]; bool strategy_matches_drr_requirements = true; /* check if strategy is compatible with stream drr capability and strategy */ - if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_strategy) && + if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && display_cfg->display_config.num_streams > 1 && stream_descriptor->timing.drr_config.enabled && (stream_descriptor->timing.drr_config.drr_active_fixed || stream_descriptor->timing.drr_config.drr_active_variable)) { /* DRR is active, so config may become unschedulable */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_strategy) && - is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && + is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && stream_descriptor->timing.drr_config.enabled && stream_descriptor->timing.drr_config.drr_active_variable) { /* DRR is variable, fw exclusive methods require DRR to be clamped */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && pmo->options->disable_drr_var_when_var_active && stream_descriptor->timing.drr_config.enabled && stream_descriptor->timing.drr_config.drr_active_variable) { /* DRR variable is active, but policy blocks DRR for p-state when this happens */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && (pmo->options->disable_drr_var || !stream_descriptor->timing.drr_config.enabled || stream_descriptor->timing.drr_config.disallowed)) { /* DRR variable strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && (pmo->options->disable_drr_clamped || - !stream_descriptor->timing.drr_config.enabled)) { + !stream_descriptor->timing.drr_config.enabled || + (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable))) { /* DRR fixed strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && pmo->options->disable_fams2) { /* FW modes require FAMS2 */ strategy_matches_drr_requirements = false; @@ -1341,7 +1450,7 @@ static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS]) + const struct dml2_pmo_pstate_strategy *pstate_strategy) { struct dml2_pmo_scratch *s = &pmo->scratch; @@ -1362,28 +1471,28 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins // Tabulate everything for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (!all_planes_match_strategy(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], - per_stream_pstate_strategy[stream_index])) { + if (!all_planes_match_method(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], + pstate_strategy->per_stream_pstate_method[stream_index])) { strategy_matches_forced_requirements = false; break; } strategy_matches_drr_requirements &= - stream_matches_drr_policy(pmo, display_cfg, per_stream_pstate_strategy[stream_index], stream_index); + stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index); - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { svp_count++; set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { drr_count++; set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vactive || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { vactive_count++; set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { vblank_count++; set_bit_in_bitfield(&vblank_stream_mask, stream_index); } @@ -1392,7 +1501,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins if (!strategy_matches_forced_requirements || !strategy_matches_drr_requirements) return false; - if (vactive_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask))) + if (vactive_count > 0 && !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask)) return false; if (vblank_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vblank(pmo, display_cfg, vblank_stream_mask))) @@ -1404,7 +1513,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins if (svp_count > 0 && (pmo->options->disable_svp || !all_timings_support_svp(pmo, display_cfg, svp_stream_mask))) return false; - return is_config_schedulable(pmo, display_cfg, per_stream_pstate_strategy); + return is_config_schedulable(pmo, display_cfg, pstate_strategy); } static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) @@ -1460,6 +1569,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, (stream_fams2_meta->nom_vtotal * timing->h_total); stream_fams2_meta->nom_frame_time_us = (double)stream_fams2_meta->nom_vtotal * stream_fams2_meta->otg_vline_time_us; + stream_fams2_meta->vblank_start = timing->v_blank_end + timing->v_active; if (stream_descriptor->timing.drr_config.enabled == true) { if (stream_descriptor->timing.drr_config.min_refresh_uhz != 0.0) { @@ -1513,7 +1623,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_vactive.common.allow_start_otg_vline = timing->v_blank_end + stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; stream_fams2_meta->method_vactive.common.allow_end_otg_vline = - timing->v_blank_end + timing->v_active - + stream_fams2_meta->vblank_start - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } else { stream_fams2_meta->method_vactive.common.allow_start_otg_vline = 0; @@ -1523,8 +1633,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, build_method_scheduling_params(&stream_fams2_meta->method_vactive.common, stream_fams2_meta); /* vblank */ - stream_fams2_meta->method_vblank.common.allow_start_otg_vline = - timing->v_blank_end + timing->v_active; + stream_fams2_meta->method_vblank.common.allow_start_otg_vline = stream_fams2_meta->vblank_start; stream_fams2_meta->method_vblank.common.allow_end_otg_vline = stream_fams2_meta->method_vblank.common.allow_start_otg_vline + 1; stream_fams2_meta->method_vblank.common.period_us = stream_fams2_meta->nom_frame_time_us; @@ -1558,8 +1667,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + stream_fams2_meta->allow_to_target_delay_otg_vlines; stream_fams2_meta->method_subvp.common.allow_end_otg_vline = - stream_fams2_meta->nom_vtotal - - timing->v_front_porch - + stream_fams2_meta->vblank_start - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; stream_fams2_meta->method_subvp.common.period_us = stream_fams2_meta->nom_frame_time_us; build_method_scheduling_params(&stream_fams2_meta->method_subvp.common, stream_fams2_meta); @@ -1568,20 +1676,21 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_drr.programming_delay_otg_vlines = (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_fams2_meta->otg_vline_time_us); stream_fams2_meta->method_drr.common.allow_start_otg_vline = - stream_fams2_meta->nom_vtotal + + stream_fams2_meta->vblank_start + stream_fams2_meta->allow_to_target_delay_otg_vlines; stream_fams2_meta->method_drr.common.period_us = stream_fams2_meta->nom_frame_time_us; if (display_config->display_config.num_streams <= 1) { /* only need to stretch vblank for blackout time */ stream_fams2_meta->method_drr.stretched_vtotal = - stream_fams2_meta->method_drr.common.allow_start_otg_vline + + stream_fams2_meta->nom_vtotal + + stream_fams2_meta->allow_to_target_delay_otg_vlines + stream_fams2_meta->min_allow_width_otg_vlines + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } else { /* multi display needs to always be schedulable */ stream_fams2_meta->method_drr.stretched_vtotal = - stream_fams2_meta->method_drr.common.allow_start_otg_vline + - stream_fams2_meta->nom_vtotal + + stream_fams2_meta->nom_vtotal * 2 + + stream_fams2_meta->allow_to_target_delay_otg_vlines + stream_fams2_meta->min_allow_width_otg_vlines + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } @@ -1614,7 +1723,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp struct display_configuation_with_meta *display_config; const struct dml2_plane_parameters *plane_descriptor; - const enum dml2_pmo_pstate_strategy(*strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + const struct dml2_pmo_pstate_strategy *strategy_list = NULL; unsigned int strategy_list_size = 0; unsigned char plane_index, stream_index, i; @@ -1626,6 +1735,10 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp memset(s, 0, sizeof(struct dml2_pmo_scratch)); + if (display_config->display_config.overrides.all_streams_blanked) { + return true; + } + pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size; pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; @@ -1663,8 +1776,8 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp s->pmo_dcn4.num_pstate_candidates = 0; for (i = 0; i < strategy_list_size && s->pmo_dcn4.num_pstate_candidates < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { - if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, strategy_list[i])) { - insert_into_candidate_list(strategy_list[i], display_config->display_config.num_streams, s); + if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, &strategy_list[i])) { + insert_into_candidate_list(&strategy_list[i], display_config->display_config.num_streams, s); } } @@ -1860,26 +1973,26 @@ static bool setup_display_config(struct display_configuation_with_meta *display_ for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (pmo->scratch.pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_na) { + if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { success = false; break; - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vactive) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive) { setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vblank) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank) { setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { fams2_required = true; setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { fams2_required = true; setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { fams2_required = true; setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { fams2_required = true; setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { fams2_required = true; setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); } @@ -1920,6 +2033,10 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp int MIN_VACTIVE_MARGIN_DRR = 0; int REQUIRED_RESERVED_TIME = 0; + if (in_out->base_display_config->display_config.overrides.all_streams_blanked) { + return true; + } + MIN_VACTIVE_MARGIN_VBLANK = INT_MIN; MIN_VACTIVE_MARGIN_DRR = INT_MIN; REQUIRED_RESERVED_TIME = (int)in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; @@ -1930,34 +2047,34 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { struct dml2_fams2_meta *stream_fams2_meta = &s->pmo_dcn4.stream_fams2_meta[stream_index]; - if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vactive || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vblank || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < REQUIRED_RESERVED_TIME || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - if (!all_planes_match_strategy(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_na) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { p_state_supported = false; break; } @@ -1974,8 +2091,8 @@ bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pst memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); if (in_out->last_candidate_failed) { - if (s->pmo_dcn4.allow_state_increase_for_strategy[s->pmo_dcn4.cur_pstate_candidate] && - s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].allow_state_increase && + s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index - 1) { s->pmo_dcn4.cur_latency_index++; success = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h index 75175d93add4..0c25bd3e9ac0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_FAMS2_DCN4_H__ #define __DML2_PMO_FAMS2_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index e0b9ece7901d..95f716e2641f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -2,10 +2,8 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_pmo_factory.h" #include "dml2_pmo_dcn4_fams2.h" -#include "dml2_pmo_dcn4.h" #include "dml2_pmo_dcn3.h" #include "dml2_external_lib_deps.h" @@ -35,8 +33,8 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * switch (project_id) { case dml2_project_dcn4x_stage1: - out->initialize = pmo_dcn4_initialize; - out->optimize_dcc_mcache = pmo_dcn4_optimize_dcc_mcache; + out->initialize = pmo_dcn4_fams2_initialize; + out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; result = true; break; case dml2_project_dcn4x_stage2: diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h index 9d3dc5e94be1..7218de1824cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_FACTORY_H__ #define __DML2_PMO_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c index defe13436a2c..4822dbcc86bb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "lib_float_math.h" #define ASSERT(condition) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h index 537cf6fd4c15..e13b0c5939b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __LIB_FLOAT_MATH_H__ #define __LIB_FLOAT_MATH_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c index 1b6dbfaa7ae8..dc8af4dd0410 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_top_optimization.h" #include "dml2_internal_shared_types.h" #include "dml_top_mcache.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h index 1536afcbf73a..9f22ab33eab1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_TOP_OPTIMIZATION_H__ #define __DML2_TOP_OPTIMIZATION_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index b25e9230adea..30d07cd1065f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_internal_shared_types.h" #include "dml_top.h" #include "dml2_mcg_factory.h" @@ -28,6 +27,7 @@ bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) bool result = false; memset(l, 0, sizeof(struct dml2_initialize_instance_locals)); + memset(dml, 0, sizeof(struct dml2_instance)); memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb)); @@ -96,14 +96,12 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) { struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals; - /* Borrow the build_mode_programming_locals programming struct for DPMM call. */ - struct dml2_display_cfg_programming *dpmm_programming = dml->scratch.build_mode_programming_locals.mode_programming_params.programming; + struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming; bool result = false; bool mcache_success = false; - if (dpmm_programming) - memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); + memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); @@ -130,7 +128,7 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) /* * Call DPMM to map all requirements to minimum clock state */ - if (result && dpmm_programming) { + if (result) { l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; l->dppm_map_mode_params.programming = dpmm_programming; @@ -140,9 +138,8 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) } in_out->is_supported = mcache_success; - result = result && in_out->is_supported; - return result; + return true; } bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) @@ -268,24 +265,15 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); - if (l->optimized_display_config_with_meta.stage4.performed) { - /* - * when performed is true, optimization has applied to - * optimized_display_config_with_meta and it has passed mode - * support. However it may or may not pass the test function to - * reach actual Vmin. As long as voltage is optimized even if it - * doesn't reach Vmin level, there is still power benefit so in - * this case we will still copy this optimization into base - * display config. - */ + if (vmin_success) { memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = vmin_success; + l->base_display_config_with_meta.stage4.success = true; } /* * Phase 5: Optimize for Stutter */ - memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params)); + memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); l->stutter_phase.dml = dml; l->stutter_phase.display_config = &l->base_display_config_with_meta; l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter; @@ -298,7 +286,7 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o if (stutter_success) { memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage5.success = true; + l->base_display_config_with_meta.stage4.success = true; } /* diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c index 7afd417071a5..e69f8ce97e24 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_debug.h" #include "dml_top_mcache.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h index bb12e4c30690..7b1f6f7143d0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_MCACHE_H__ #define __DML_TOP_MCACHE_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c index de7d8a6a2d3d..e9b8e10695ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_debug.h" int dml2_printf(const char *format, ...) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h index 0403238df107..d51a1b6c62f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DEBUG_H__ #define __DML2_DEBUG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index 5632cdacb7f4..aeac9f159fa5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_INTERNAL_SHARED_TYPES_H__ #define __DML2_INTERNAL_SHARED_TYPES_H__ @@ -107,10 +106,16 @@ struct dml2_dpmm_map_watermarks_params_in_out { struct dml2_display_cfg_programming *programming; }; +struct dml2_dpmm_scratch { + struct dml2_display_cfg_programming programming; +}; + struct dml2_dpmm_instance { bool (*map_mode_to_soc_dpm)(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); bool (*map_watermarks)(struct dml2_dpmm_map_watermarks_params_in_out *in_out); bool (*unit_test)(void); + + struct dml2_dpmm_scratch dpmm_scratch; }; /* @@ -266,6 +271,7 @@ struct dml2_fams2_meta { unsigned int contention_delay_otg_vlines; unsigned int min_allow_width_otg_vlines; unsigned int nom_vtotal; + unsigned int vblank_start; double nom_refresh_rate_hz; double nom_frame_time_us; unsigned int max_vtotal; @@ -594,7 +600,7 @@ struct dml2_pmo_optimize_for_stutter_in_out { struct display_configuation_with_meta *optimized_display_config; }; -enum dml2_pmo_pstate_strategy { +enum dml2_pmo_pstate_method { dml2_pmo_pstate_strategy_na = 0, /* hw exclusive modes */ dml2_pmo_pstate_strategy_vactive = 1, @@ -612,6 +618,11 @@ enum dml2_pmo_pstate_strategy { dml2_pmo_pstate_strategy_reserved_fw_drr_var = 22, }; +struct dml2_pmo_pstate_strategy { + enum dml2_pmo_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; + bool allow_state_increase; +}; + #define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw - dml2_pmo_pstate_strategy_na + 1)) - 1) << dml2_pmo_pstate_strategy_na) #define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) #define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_clamped - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) @@ -634,8 +645,7 @@ struct dml2_pmo_scratch { int stream_mask; } pmo_dcn3; struct { - enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[DML2_MAX_PLANES][DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; - bool allow_state_increase_for_strategy[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; + struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; int num_pstate_candidates; int cur_pstate_candidate; @@ -661,6 +671,7 @@ struct dml2_pmo_scratch { unsigned int num_timing_groups; unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; bool group_is_drr_enabled[DML2_MAX_PLANES]; + bool group_is_drr_active[DML2_MAX_PLANES]; double group_line_time_us[DML2_MAX_PLANES]; /* scheduling check locals */ @@ -676,10 +687,10 @@ struct dml2_pmo_init_data { union { struct { /* populated once during initialization */ - enum dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2 * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 24 * 2][PMO_DCN4_MAX_DISPLAYS]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 4 * 4]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 6 * 6]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 8 * 8 * 8 * 8]; unsigned int num_expanded_strategies_per_list[PMO_DCN4_MAX_DISPLAYS]; } pmo_dcn4; }; From 5ed9481db74740fd33ec4079b29db88e8c9f4a1c Mon Sep 17 00:00:00 2001 From: Mounika Adhuri Date: Wed, 26 Jun 2024 18:51:04 +0530 Subject: [PATCH 035/447] drm/amd/display: Refactoring MPC [Why] To refactor MPC files [How] Moved MPC files to respective folders and updated makefiles appropriately. Reviewed-by: Martin Leung Signed-off-by: Jerry Zuo Signed-off-by: Mounika Adhuri Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/Makefile | 1 - drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 3 +-- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/mpc/Makefile | 27 +++++++++++++++++++ .../display/dc/{ => mpc}/dcn10/dcn10_mpc.c | 0 .../display/dc/{ => mpc}/dcn10/dcn10_mpc.h | 0 .../display/dc/{ => mpc}/dcn20/dcn20_mpc.c | 0 .../display/dc/{ => mpc}/dcn20/dcn20_mpc.h | 0 .../display/dc/{ => mpc}/dcn30/dcn30_mpc.c | 2 +- .../display/dc/{ => mpc}/dcn30/dcn30_mpc.h | 0 10 files changed, 30 insertions(+), 5 deletions(-) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn10/dcn10_mpc.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn10/dcn10_mpc.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn20/dcn20_mpc.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn20/dcn20_mpc.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn30/dcn30_mpc.c (99%) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn30/dcn30_mpc.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 75e088b479ea..e1f6623d4936 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -24,7 +24,6 @@ DCN10 = dcn10_ipp.o \ dcn10_hw_sequencer_debug.o \ - dcn10_mpc.o \ dcn10_cm_common.o \ AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index d92d2b4ee015..25ba0d310d46 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -1,8 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved. -DCN20 = dcn20_mpc.o \ - dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o +DCN20 = dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 4c43af867d86..804851247acc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -23,7 +23,7 @@ # # -DCN30 := dcn30_mpc.o dcn30_vpg.o \ +DCN30 := dcn30_vpg.o \ dcn30_afmt.o \ dcn30_dwb.o \ dcn30_dwb_cm.o \ diff --git a/drivers/gpu/drm/amd/display/dc/mpc/Makefile b/drivers/gpu/drm/amd/display/dc/mpc/Makefile index 7f7458c07e2a..5402c3529f5e 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/mpc/Makefile @@ -24,6 +24,33 @@ # ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN10 +############################################################################### +MPC_DCN10 = dcn10_mpc.o + +AMD_DAL_MPC_DCN10 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn10/,$(MPC_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN10) + +############################################################################### +# DCN20 +############################################################################### +MPC_DCN20 = dcn20_mpc.o + +AMD_DAL_MPC_DCN20 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn20/,$(MPC_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN20) + +############################################################################### +# DCN30 +############################################################################### +MPC_DCN30 = dcn30_mpc.o + +AMD_DAL_MPC_DCN30 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn30/,$(MPC_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN30) + ############################################################################### # DCN32 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c rename to drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h rename to drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c rename to drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h rename to drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c similarity index 99% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c rename to drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c index 3aeb85ec40b0..fe26fde12eeb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c @@ -25,7 +25,7 @@ #include "reg_helper.h" #include "dcn30_mpc.h" -#include "dcn30_cm_common.h" +#include "dcn30/dcn30_cm_common.h" #include "basics/conversion.h" #include "dcn10/dcn10_cm_common.h" #include "dc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h rename to drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h From f91a9af09dea850d83d4b217b8acbafd97b5c61f Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Wed, 3 Jul 2024 16:47:57 +0800 Subject: [PATCH 036/447] drm/amd/display: Fix VRR cannot enable [Why] Sometimes the VRR cannot enable after login to the desktop. User space may call the DRM_IOCTL_MODE_GETCONNECTOR right after the DRM_IOCTL_MODE_RMFB. After calling DRM_IOCTL_MODE_RMFB to remove all the frame buffer and it will cause the driver to disable the crtc and disable the link while calling the link_set_dpms_off(). It will cause the dpcd read failed in amdgpu_dm_update_freesync_caps() while try to get the DP_MSA_TIMING_PAR_IGNORED capability and think the sink side does not support VRR. [How] Use the dpcd_caps.allow_invalid_MSA_timing_param flag instead of reading from dpcd directly. dpcd_caps.allow_invalid_MSA_timing_param flag is updated during HPD. It is safe to replace the original method. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 23 ++----------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 92774a871e98..a3b0f2748af0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11721,25 +11721,6 @@ fail: return ret; } -static bool is_dp_capable_without_timing_msa(struct dc *dc, - struct amdgpu_dm_connector *amdgpu_dm_connector) -{ - u8 dpcd_data; - bool capable = false; - - if (amdgpu_dm_connector->dc_link && - dm_helpers_dp_read_dpcd( - NULL, - amdgpu_dm_connector->dc_link, - DP_DOWN_STREAM_PORT_COUNT, - &dpcd_data, - sizeof(dpcd_data))) { - capable = (dpcd_data & DP_MSA_TIMING_PAR_IGNORED) ? true:false; - } - - return capable; -} - static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, unsigned int offset, unsigned int total_length, @@ -12042,8 +12023,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, sink->sink_signal == SIGNAL_TYPE_EDP)) { bool edid_check_required = false; - if (is_dp_capable_without_timing_msa(adev->dm.dc, - amdgpu_dm_connector)) { + if (amdgpu_dm_connector->dc_link && + amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) { if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) { amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; From 39d496d4721ba509647a70041f38d82b03c74680 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 3 Jul 2024 10:41:26 -0400 Subject: [PATCH 037/447] drm/amd/display: Add blanked streams override to DML2.1 [WHY] DML2.1 currently has no concept of a "blanked" stream. For cases like DPMS off, things like UCLK p-state is always allowed, so PMO is not required to optimize for it. [HOW] Add flag to DML2.1 display configuration to indicate all streams are blanked, so certain operations and optimizations can be skipped for optimal programming when displays are blanked. Reviewed-by: Chaitanya Dhere Signed-off-by: Jerry Zuo Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/dml21_translation_helper.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 405544920f3b..e9647f068ee4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -957,6 +957,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s int stream_index, plane_index; int disp_cfg_stream_location, disp_cfg_plane_location; struct dml2_display_cfg *dml_dispcfg = &dml_ctx->v21.display_config; + unsigned int plane_count = 0; memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); @@ -1010,10 +1011,16 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->plane_descriptors[disp_cfg_plane_location].overrides.uclk_pstate_change_strategy = dml21_force_pstate_method_to_uclk_state_change_strategy(dml_ctx->config.pmo.force_pstate_method_values[stream_index]); } + + plane_count++; } } } + if (plane_count == 0) { + dml_dispcfg->overrides.all_streams_blanked = true; + } + return true; } From 4e8eac98046446d99cbbed740f0767204b839f3f Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 3 Jul 2024 11:36:34 -0400 Subject: [PATCH 038/447] drm/amd/display: Calculate ODM width using odm slice rect, not recout [Description] There are scenarios where ODM4:1 is used but the surface is entirely outside of the first and last ODM slice. In this case the recout.width for the first and last slice is 0 because there's no overlap with the surface and that ODM slice, but this causes the x_pos for the cursor in this scenario to be calculated incorrectly. Instead we should use the ODM slice width instead of the recout width. Reviewed-by: Nevenko Stupar Signed-off-by: Jerry Zuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index b9378f18c020..31e0e9210dd7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1115,10 +1115,10 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx) .mirror = pipe_ctx->plane_state->horizontal_mirror, .stream = pipe_ctx->stream }; + struct rect odm_slice_src = { 0 }; bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) || (pipe_ctx->prev_odm_pipe != NULL); int prev_odm_width = 0; - int prev_odm_offset = 0; struct pipe_ctx *prev_odm_pipe = NULL; bool mpc_combine_on = false; int bottom_pipe_x_pos = 0; @@ -1183,12 +1183,12 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx) prev_odm_pipe = pipe_ctx->prev_odm_pipe; while (prev_odm_pipe != NULL) { - prev_odm_width += prev_odm_pipe->plane_res.scl_data.recout.width; - prev_odm_offset += prev_odm_pipe->plane_res.scl_data.recout.x; + odm_slice_src = resource_get_odm_slice_src_rect(prev_odm_pipe); + prev_odm_width += odm_slice_src.width; prev_odm_pipe = prev_odm_pipe->prev_odm_pipe; } - x_pos -= (prev_odm_width + prev_odm_offset); + x_pos -= (prev_odm_width); } /* If the position is negative then we need to add to the hotspot From a41d58fb91248557438de4e8298d1d2ed5b39564 Mon Sep 17 00:00:00 2001 From: Nevenko Stupar Date: Wed, 3 Jul 2024 13:29:55 -0400 Subject: [PATCH 039/447] drm/amd/display: Issue with 3 or more mcaches per surface [Why & How] Current logic in mcache admissibility check has flaw if calculated number of maches are 3 or more per surface, so sometimes the check may pass when it should fail, and sometimes may fail when it should pass, fix the issue and also adding additional check to make sure that required number of mcaches per surface cannot be higher than number of pipes + 1, used on that surface. Reviewed-by: Chaitanya Dhere Signed-off-by: Jerry Zuo Signed-off-by: Nevenko Stupar Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dml2/dml21/src/dml2_top/dml_top_mcache.c | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c index e69f8ce97e24..a342ebfbe4e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c @@ -142,12 +142,12 @@ static unsigned int count_elements_in_span(int *array, unsigned int array_size, while (span_start_index < array_size) { for (i = span_start_index; i < array_size; i++) { - if (array[i] - span_start_value > span) { + if (array[i] - span_start_value <= span) { if (i - span_start_index + 1 > greatest_element_count) { greatest_element_count = i - span_start_index + 1; } + } else break; - } } span_start_index++; @@ -207,9 +207,9 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi int temp, p0shift, p1shift; unsigned int plane_index = 0; unsigned int i; - char odm_combine_factor = 1; - char mpc_combine_factor = 1; - char num_dpps; + unsigned int odm_combine_factor; + unsigned int mpc_combine_factor; + unsigned int num_dpps; unsigned int num_boundaries; enum dml2_scaling_transform scaling_transform; const struct dml2_plane_parameters *plane; @@ -226,10 +226,10 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi plane = ¶ms->display_cfg->plane_descriptors[plane_index]; stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; - odm_combine_factor = (char)params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; + num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; if (odm_combine_factor == 1) - mpc_combine_factor = (char)params->cfg_support_info->plane_support_info[plane_index].dpps_used; + num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; else mpc_combine_factor = 1; @@ -259,13 +259,13 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi // The last element in the unshifted boundary array will always be the first pixel outside the // plane, which means theres no mcache associated with it, so -1 num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; - if (count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, - num_boundaries, max_per_pipe_vp_p0) <= 1) { + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { p0pass = true; } num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; - if (count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, - num_boundaries, max_per_pipe_vp_p1) <= 1) { + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { p1pass = true; } From f82200703434522f1b35d38bdef02486d22b2f25 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Wed, 3 Jul 2024 12:23:02 -0400 Subject: [PATCH 040/447] drm/amd/display: remove dc dependencies from SPL library [Why] Make SPL library dc-independent so it can be reused by other components [How] Create separate set of fixed31_32 calls in SPL Make all inputs and outputs to SPL use primitive types For ratios and inits, return as uint32 from SPL. So add conversion from uint32 back to fixed point in SPL-to-dc translate function Reviewed-by: Relja Vojvodic Signed-off-by: Jerry Zuo Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/basics/fixpt31_32.c | 27 + .../gpu/drm/amd/display/dc/core/dc_resource.c | 2 - .../gpu/drm/amd/display/dc/dc_spl_translate.c | 43 +- drivers/gpu/drm/amd/display/dc/spl/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 564 +++++++++--------- .../display/dc/spl/dc_spl_isharp_filters.c | 27 +- .../display/dc/spl/dc_spl_isharp_filters.h | 2 +- .../display/dc/spl/dc_spl_scl_easf_filters.c | 81 +-- .../display/dc/spl/dc_spl_scl_easf_filters.h | 32 +- .../amd/display/dc/spl/dc_spl_scl_filters.c | 69 +-- .../amd/display/dc/spl/dc_spl_scl_filters.h | 18 +- .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 39 +- .../gpu/drm/amd/display/dc/spl/spl_debug.h | 23 + .../drm/amd/display/dc/spl/spl_fixpt31_32.c | 518 ++++++++++++++++ .../drm/amd/display/dc/spl/spl_fixpt31_32.h | 546 +++++++++++++++++ .../gpu/drm/amd/display/dc/spl/spl_os_types.h | 77 +++ .../gpu/drm/amd/display/include/fixed31_32.h | 6 + 17 files changed, 1645 insertions(+), 431 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_debug.h create mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c create mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h create mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 506f82cd5cc6..88d3f9d7dd55 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -486,3 +486,30 @@ int dc_fixpt_s4d19(struct fixed31_32 arg) else return ux_dy(arg.value, 4, 19); } + +struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct fixed31_32 fixpt_value = dc_fixpt_zero; + struct fixed31_32 fixpt_int_value = dc_fixpt_zero; + long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; + + fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + frac_mask = frac_mask << fractional_bits; + fixpt_int_value.value = value & frac_mask; + fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + fixpt_value.value |= fixpt_int_value.value; + return fixpt_value; +} + +struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct fixed31_32 fixpt_value = dc_fixpt_from_int(int_value); + + fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + return fixpt_value; +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 4f5b23520365..5c9091f2a8b2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1511,8 +1511,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha; - spl_out->scl_data.h_active = pipe_ctx->plane_res.scl_data.h_active; - spl_out->scl_data.v_active = pipe_ctx->plane_res.scl_data.v_active; // Convert pipe_ctx to respective input params for SPL translate_SPL_in_params_from_pipe_ctx(pipe_ctx, spl_in); diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 49ff59258c8d..339d092e711c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -42,26 +42,26 @@ static void populate_spltaps_from_taps(struct spl_taps *spl_scaling_quality, static void populate_taps_from_spltaps(struct scaling_taps *scaling_quality, const struct spl_taps *spl_scaling_quality) { - scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c; - scaling_quality->h_taps = spl_scaling_quality->h_taps; - scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c; - scaling_quality->v_taps = spl_scaling_quality->v_taps; + scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c + 1; + scaling_quality->h_taps = spl_scaling_quality->h_taps + 1; + scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c + 1; + scaling_quality->v_taps = spl_scaling_quality->v_taps + 1; } static void populate_ratios_from_splratios(struct scaling_ratios *ratios, - const struct spl_ratios *spl_ratios) + const struct ratio *spl_ratios) { - ratios->horz = spl_ratios->horz; - ratios->vert = spl_ratios->vert; - ratios->horz_c = spl_ratios->horz_c; - ratios->vert_c = spl_ratios->vert_c; + ratios->horz = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio >> 5, 3, 19); + ratios->vert = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio >> 5, 3, 19); + ratios->horz_c = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio_c >> 5, 3, 19); + ratios->vert_c = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio_c >> 5, 3, 19); } static void populate_inits_from_splinits(struct scl_inits *inits, - const struct spl_inits *spl_inits) + const struct init *spl_inits) { - inits->h = spl_inits->h; - inits->v = spl_inits->v; - inits->h_c = spl_inits->h_c; - inits->v_c = spl_inits->v_c; + inits->h = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int, spl_inits->h_filter_init_frac >> 5, 0, 19); + inits->v = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int, spl_inits->v_filter_init_frac >> 5, 0, 19); + inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19); + inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19); } /// @brief Translate SPL input parameters from pipe context /// @param pipe_ctx @@ -170,6 +170,9 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl /* Translate transfer function */ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type; spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf; + + spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active; + spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active; /* Check if it is stream is in fullscreen and if its HDR. * Use this to determine sharpness levels */ @@ -184,15 +187,15 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out) { // Make scaler data recout point to spl output field recout - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->scl_data.recout); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->dscl_prog_data->recout); // Make scaler data ratios point to spl output field ratios - populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->scl_data.ratios); + populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->dscl_prog_data->ratios); // Make scaler data viewport point to spl output field viewport - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->scl_data.viewport); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->dscl_prog_data->viewport); // Make scaler data viewport_c point to spl output field viewport_c - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->scl_data.viewport_c); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->dscl_prog_data->viewport_c); // Make scaler data taps point to spl output field scaling taps - populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->scl_data.taps); + populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->dscl_prog_data->taps); // Make scaler data init point to spl output field init - populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->scl_data.inits); + populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->dscl_prog_data->init); } diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile index af7eaf839970..05764d4d4604 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/Makefile +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'spl' sub-component of DAL. # It provides the scaling library interface. -SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o spl_fixpt31_32.o AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index dad38960d34d..b8858ea7c776 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -4,10 +4,11 @@ #include "dc_spl.h" #include "dc_spl_scl_filters.h" -#include "dc_spl_isharp_filters.h" #include "dc_spl_scl_easf_filters.h" +#include "dc_spl_isharp_filters.h" +#include "spl_debug.h" -#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) +#define IDENTITY_RATIO(ratio) (spl_fixpt_u2d19(ratio) == (1 << 19)) #define MIN_VIEWPORT_SIZE 12 static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1) @@ -108,26 +109,26 @@ static struct spl_rect calculate_plane_rec_in_timing_active( const struct spl_rect *stream_src = &spl_in->basic_out.src_rect; const struct spl_rect *stream_dst = &spl_in->basic_out.dst_rect; struct spl_rect rec_out = {0}; - struct fixed31_32 temp; + struct spl_fixed31_32 temp; - temp = dc_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, + temp = spl_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, stream_src->width); - rec_out.x = stream_dst->x + dc_fixpt_round(temp); + rec_out.x = stream_dst->x + spl_fixpt_round(temp); - temp = dc_fixpt_from_fraction( + temp = spl_fixpt_from_fraction( (rec_in->x + rec_in->width) * (long long)stream_dst->width, stream_src->width); - rec_out.width = stream_dst->x + dc_fixpt_round(temp) - rec_out.x; + rec_out.width = stream_dst->x + spl_fixpt_round(temp) - rec_out.x; - temp = dc_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, + temp = spl_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, stream_src->height); - rec_out.y = stream_dst->y + dc_fixpt_round(temp); + rec_out.y = stream_dst->y + spl_fixpt_round(temp); - temp = dc_fixpt_from_fraction( + temp = spl_fixpt_from_fraction( (rec_in->y + rec_in->height) * (long long)stream_dst->height, stream_src->height); - rec_out.height = stream_dst->y + dc_fixpt_round(temp) - rec_out.y; + rec_out.height = stream_dst->y + spl_fixpt_round(temp) - rec_out.y; return rec_out; } @@ -145,7 +146,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; mpc_rec.height = plane_clip_rec->height; mpc_rec.y = plane_clip_rec->y; - ASSERT(mpc_slice_count == 1 || + SPL_ASSERT(mpc_slice_count == 1 || spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE || mpc_rec.width % 2 == 0); @@ -158,7 +159,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( } if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) { - ASSERT(mpc_rec.height % 2 == 0); + SPL_ASSERT(mpc_rec.height % 2 == 0); mpc_rec.height /= 2; } return mpc_rec; @@ -198,7 +199,7 @@ static struct spl_rect calculate_odm_slice_in_timing_active(struct spl_in *spl_i return spl_in->basic_out.odm_slice_rect; } -static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_recout(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out) { /* * A plane clip represents the desired plane size and position in Stream @@ -341,21 +342,23 @@ static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) /* shift the overlapping area so it is with respect to current * ODM slice's position */ - spl_out->scl_data.recout = shift_rec( + spl_scratch->scl_data.recout = shift_rec( &overlapping_area, -odm_slice.x, -odm_slice.y); - spl_out->scl_data.recout.height -= + spl_scratch->scl_data.recout.height -= spl_in->debug.visual_confirm_base_offset; - spl_out->scl_data.recout.height -= + spl_scratch->scl_data.recout.height -= spl_in->debug.visual_confirm_dpp_offset; } else /* if there is no overlap, zero recout */ - memset(&spl_out->scl_data.recout, 0, + memset(&spl_scratch->scl_data.recout, 0, sizeof(struct spl_rect)); } /* Calculate scaling ratios */ -static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_scaling_ratios(struct spl_in *spl_in, + struct spl_scratch *spl_scratch, + struct spl_out *spl_out) { const int in_w = spl_in->basic_out.src_rect.width; const int in_h = spl_in->basic_out.src_rect.height; @@ -366,72 +369,72 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out * /*Swap surf_src height and width since scaling ratios are in recout rotation*/ if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) - swap(surf_src.height, surf_src.width); + spl_swap(surf_src.height, surf_src.width); - spl_out->scl_data.ratios.horz = dc_fixpt_from_fraction( + spl_scratch->scl_data.ratios.horz = spl_fixpt_from_fraction( surf_src.width, spl_in->basic_in.dst_rect.width); - spl_out->scl_data.ratios.vert = dc_fixpt_from_fraction( + spl_scratch->scl_data.ratios.vert = spl_fixpt_from_fraction( surf_src.height, spl_in->basic_in.dst_rect.height); if (spl_in->basic_out.view_format == SPL_VIEW_3D_SIDE_BY_SIDE) - spl_out->scl_data.ratios.horz.value *= 2; + spl_scratch->scl_data.ratios.horz.value *= 2; else if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) - spl_out->scl_data.ratios.vert.value *= 2; + spl_scratch->scl_data.ratios.vert.value *= 2; - spl_out->scl_data.ratios.vert.value = div64_s64( - spl_out->scl_data.ratios.vert.value * in_h, out_h); - spl_out->scl_data.ratios.horz.value = div64_s64( - spl_out->scl_data.ratios.horz.value * in_w, out_w); + spl_scratch->scl_data.ratios.vert.value = spl_div64_s64( + spl_scratch->scl_data.ratios.vert.value * in_h, out_h); + spl_scratch->scl_data.ratios.horz.value = spl_div64_s64( + spl_scratch->scl_data.ratios.horz.value * in_w, out_w); - spl_out->scl_data.ratios.horz_c = spl_out->scl_data.ratios.horz; - spl_out->scl_data.ratios.vert_c = spl_out->scl_data.ratios.vert; + spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz; + spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert; if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) { - spl_out->scl_data.ratios.horz_c.value /= 2; - spl_out->scl_data.ratios.vert_c.value /= 2; + spl_scratch->scl_data.ratios.horz_c.value /= 2; + spl_scratch->scl_data.ratios.vert_c.value /= 2; } - spl_out->scl_data.ratios.horz = dc_fixpt_truncate( - spl_out->scl_data.ratios.horz, 19); - spl_out->scl_data.ratios.vert = dc_fixpt_truncate( - spl_out->scl_data.ratios.vert, 19); - spl_out->scl_data.ratios.horz_c = dc_fixpt_truncate( - spl_out->scl_data.ratios.horz_c, 19); - spl_out->scl_data.ratios.vert_c = dc_fixpt_truncate( - spl_out->scl_data.ratios.vert_c, 19); + spl_scratch->scl_data.ratios.horz = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.horz, 19); + spl_scratch->scl_data.ratios.vert = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.vert, 19); + spl_scratch->scl_data.ratios.horz_c = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.horz_c, 19); + spl_scratch->scl_data.ratios.vert_c = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.vert_c, 19); /* * Coefficient table and some registers are different based on ratio * that is output/input. Currently we calculate input/output * Store 1/ratio in recip_ratio for those lookups */ - spl_out->scl_data.recip_ratios.horz = dc_fixpt_recip( - spl_out->scl_data.ratios.horz); - spl_out->scl_data.recip_ratios.vert = dc_fixpt_recip( - spl_out->scl_data.ratios.vert); - spl_out->scl_data.recip_ratios.horz_c = dc_fixpt_recip( - spl_out->scl_data.ratios.horz_c); - spl_out->scl_data.recip_ratios.vert_c = dc_fixpt_recip( - spl_out->scl_data.ratios.vert_c); + spl_scratch->scl_data.recip_ratios.horz = spl_fixpt_recip( + spl_scratch->scl_data.ratios.horz); + spl_scratch->scl_data.recip_ratios.vert = spl_fixpt_recip( + spl_scratch->scl_data.ratios.vert); + spl_scratch->scl_data.recip_ratios.horz_c = spl_fixpt_recip( + spl_scratch->scl_data.ratios.horz_c); + spl_scratch->scl_data.recip_ratios.vert_c = spl_fixpt_recip( + spl_scratch->scl_data.ratios.vert_c); } /* Calculate Viewport size */ -static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_scratch *spl_scratch) { - spl_out->scl_data.viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz, - spl_out->scl_data.recout.width)); - spl_out->scl_data.viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert, - spl_out->scl_data.recout.height)); - spl_out->scl_data.viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz_c, - spl_out->scl_data.recout.width)); - spl_out->scl_data.viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert_c, - spl_out->scl_data.recout.height)); + spl_scratch->scl_data.viewport.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz, + spl_scratch->scl_data.recout.width)); + spl_scratch->scl_data.viewport.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert, + spl_scratch->scl_data.recout.height)); + spl_scratch->scl_data.viewport_c.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz_c, + spl_scratch->scl_data.recout.width)); + spl_scratch->scl_data.viewport_c.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert_c, + spl_scratch->scl_data.recout.height)); if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) { - swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); - swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); + spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); + spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); } } @@ -468,13 +471,13 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, int recout_size, int src_size, int taps, - struct fixed31_32 ratio, - struct fixed31_32 init_adj, - struct fixed31_32 *init, + struct spl_fixed31_32 ratio, + struct spl_fixed31_32 init_adj, + struct spl_fixed31_32 *init, int *vp_offset, int *vp_size) { - struct fixed31_32 temp; + struct spl_fixed31_32 temp; int int_part; /* @@ -487,33 +490,33 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, * init_bot = init + scaling_ratio * to get pixel perfect combine add the fraction from calculating vp offset */ - temp = dc_fixpt_mul_int(ratio, recout_offset_within_recout_full); - *vp_offset = dc_fixpt_floor(temp); + temp = spl_fixpt_mul_int(ratio, recout_offset_within_recout_full); + *vp_offset = spl_fixpt_floor(temp); temp.value &= 0xffffffff; - *init = dc_fixpt_add(dc_fixpt_div_int(dc_fixpt_add_int(ratio, taps + 1), 2), temp); - *init = dc_fixpt_add(*init, init_adj); - *init = dc_fixpt_truncate(*init, 19); + *init = spl_fixpt_add(spl_fixpt_div_int(spl_fixpt_add_int(ratio, taps + 1), 2), temp); + *init = spl_fixpt_add(*init, init_adj); + *init = spl_fixpt_truncate(*init, 19); /* * If viewport has non 0 offset and there are more taps than covered by init then * we should decrease the offset and increase init so we are never sampling * outside of viewport. */ - int_part = dc_fixpt_floor(*init); + int_part = spl_fixpt_floor(*init); if (int_part < taps) { int_part = taps - int_part; if (int_part > *vp_offset) int_part = *vp_offset; *vp_offset -= int_part; - *init = dc_fixpt_add_int(*init, int_part); + *init = spl_fixpt_add_int(*init, int_part); } /* * If taps are sampling outside of viewport at end of recout and there are more pixels * available in the surface we should increase the viewport size, regardless set vp to * only what is used. */ - temp = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_size - 1)); - *vp_size = dc_fixpt_floor(temp); + temp = spl_fixpt_add(*init, spl_fixpt_mul_int(ratio, recout_size - 1)); + *vp_size = spl_fixpt_floor(temp); if (*vp_size + *vp_offset > src_size) *vp_size = src_size - *vp_offset; @@ -536,7 +539,8 @@ static bool spl_is_yuv420(enum spl_pixel_format format) } /*Calculate inits and viewport */ -static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, + struct spl_scratch *spl_scratch) { struct spl_rect src = spl_in->basic_in.src_rect; struct spl_rect recout_dst_in_active_timing; @@ -547,11 +551,11 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1; bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; - struct fixed31_32 init_adj_h = dc_fixpt_zero; - struct fixed31_32 init_adj_v = dc_fixpt_zero; + struct spl_fixed31_32 init_adj_h = spl_fixpt_zero; + struct spl_fixed31_32 init_adj_v = spl_fixpt_zero; recout_clip_in_active_timing = shift_rec( - &spl_out->scl_data.recout, odm_slice.x, odm_slice.y); + &spl_scratch->scl_data.recout, odm_slice.x, odm_slice.y); recout_dst_in_active_timing = calculate_plane_rec_in_timing_active( spl_in, &spl_in->basic_in.dst_rect); overlap_in_active_timing = intersect_rec(&recout_clip_in_active_timing, @@ -574,8 +578,8 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ &flip_horz_scan_dir); if (orthogonal_rotation) { - swap(src.width, src.height); - swap(flip_vert_scan_dir, flip_horz_scan_dir); + spl_swap(src.width, src.height); + spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); } if (spl_is_yuv420(spl_in->basic_in.format)) { @@ -587,17 +591,17 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ switch (spl_in->basic_in.cositing) { case CHROMA_COSITING_LEFT: - init_adj_h = dc_fixpt_zero; - init_adj_v = dc_fixpt_from_fraction(sign, 4); + init_adj_h = spl_fixpt_zero; + init_adj_v = spl_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_NONE: - init_adj_h = dc_fixpt_from_fraction(sign, 4); - init_adj_v = dc_fixpt_from_fraction(sign, 4); + init_adj_h = spl_fixpt_from_fraction(sign, 4); + init_adj_v = spl_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_TOPLEFT: default: - init_adj_h = dc_fixpt_zero; - init_adj_v = dc_fixpt_zero; + init_adj_h = spl_fixpt_zero; + init_adj_v = spl_fixpt_zero; break; } } @@ -605,58 +609,58 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_out->scl_data.recout.width, + spl_scratch->scl_data.recout.width, src.width, - spl_out->scl_data.taps.h_taps, - spl_out->scl_data.ratios.horz, - dc_fixpt_zero, - &spl_out->scl_data.inits.h, - &spl_out->scl_data.viewport.x, - &spl_out->scl_data.viewport.width); + spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.ratios.horz, + spl_fixpt_zero, + &spl_scratch->scl_data.inits.h, + &spl_scratch->scl_data.viewport.x, + &spl_scratch->scl_data.viewport.width); spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_out->scl_data.recout.width, + spl_scratch->scl_data.recout.width, src.width / vpc_div, - spl_out->scl_data.taps.h_taps_c, - spl_out->scl_data.ratios.horz_c, + spl_scratch->scl_data.taps.h_taps_c, + spl_scratch->scl_data.ratios.horz_c, init_adj_h, - &spl_out->scl_data.inits.h_c, - &spl_out->scl_data.viewport_c.x, - &spl_out->scl_data.viewport_c.width); + &spl_scratch->scl_data.inits.h_c, + &spl_scratch->scl_data.viewport_c.x, + &spl_scratch->scl_data.viewport_c.width); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_out->scl_data.recout.height, + spl_scratch->scl_data.recout.height, src.height, - spl_out->scl_data.taps.v_taps, - spl_out->scl_data.ratios.vert, - dc_fixpt_zero, - &spl_out->scl_data.inits.v, - &spl_out->scl_data.viewport.y, - &spl_out->scl_data.viewport.height); + spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.ratios.vert, + spl_fixpt_zero, + &spl_scratch->scl_data.inits.v, + &spl_scratch->scl_data.viewport.y, + &spl_scratch->scl_data.viewport.height); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_out->scl_data.recout.height, + spl_scratch->scl_data.recout.height, src.height / vpc_div, - spl_out->scl_data.taps.v_taps_c, - spl_out->scl_data.ratios.vert_c, + spl_scratch->scl_data.taps.v_taps_c, + spl_scratch->scl_data.ratios.vert_c, init_adj_v, - &spl_out->scl_data.inits.v_c, - &spl_out->scl_data.viewport_c.y, - &spl_out->scl_data.viewport_c.height); + &spl_scratch->scl_data.inits.v_c, + &spl_scratch->scl_data.viewport_c.y, + &spl_scratch->scl_data.viewport_c.height); if (orthogonal_rotation) { - swap(spl_out->scl_data.viewport.x, spl_out->scl_data.viewport.y); - swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); - swap(spl_out->scl_data.viewport_c.x, spl_out->scl_data.viewport_c.y); - swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); + spl_swap(spl_scratch->scl_data.viewport.x, spl_scratch->scl_data.viewport.y); + spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); + spl_swap(spl_scratch->scl_data.viewport_c.x, spl_scratch->scl_data.viewport_c.y); + spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); } - spl_out->scl_data.viewport.x += src.x; - spl_out->scl_data.viewport.y += src.y; - ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); - spl_out->scl_data.viewport_c.x += src.x / vpc_div; - spl_out->scl_data.viewport_c.y += src.y / vpc_div; + spl_scratch->scl_data.viewport.x += src.x; + spl_scratch->scl_data.viewport.y += src.y; + SPL_ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); + spl_scratch->scl_data.viewport_c.x += src.x / vpc_div; + spl_scratch->scl_data.viewport_c.y += src.y / vpc_div; } static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) @@ -667,7 +671,7 @@ static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) * This may break with rotation, good thing we aren't mixing hw rotation and 3d */ if (spl_in->basic_in.mpc_combine_v) { - ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || + SPL_ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || (spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM && spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE)); if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) @@ -708,7 +712,7 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, const struct spl_scaler_data *data, bool enable_isharp, bool enable_easf) { - const long long one = dc_fixpt_one.value; + const long long one = spl_fixpt_one.value; enum spl_pixel_format pixel_format = spl_in->basic_in.format; /* Bypass if ratio is 1:1 with no ISHARP or force scale on */ @@ -762,7 +766,7 @@ static bool spl_choose_lls_policy(enum spl_pixel_format format, } /* Enable EASF ?*/ -static bool enable_easf(struct spl_in *spl_in, struct spl_out *spl_out) +static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch) { int vratio = 0; int hratio = 0; @@ -778,8 +782,8 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_out *spl_out) spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, &spl_in->lls_pref); - vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); if (!lls_enable_easf || spl_in->disable_easf) skip_easf = true; @@ -799,7 +803,7 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_out *spl_out) } /* Check if video is in fullscreen mode */ -static bool spl_is_video_fullscreen(struct spl_in *spl_in, struct spl_out *spl_out) +static bool spl_is_video_fullscreen(struct spl_in *spl_in) { if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) return true; @@ -807,16 +811,16 @@ static bool spl_is_video_fullscreen(struct spl_in *spl_in, struct spl_out *spl_o } static bool spl_get_isharp_en(struct spl_in *spl_in, - struct spl_out *spl_out) + struct spl_scratch *spl_scratch) { bool enable_isharp = false; int vratio = 0; int hratio = 0; - struct spl_taps taps = spl_out->scl_data.taps; - bool fullscreen = spl_is_video_fullscreen(spl_in, spl_out); + struct spl_taps taps = spl_scratch->scl_data.taps; + bool fullscreen = spl_is_video_fullscreen(spl_in); - vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); /* Return if adaptive sharpness is disabled */ if (spl_in->adaptive_sharpness.enable == false) @@ -848,7 +852,7 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, /* Calculate optimal number of taps */ static bool spl_get_optimal_number_of_taps( - int max_downscale_src_width, struct spl_in *spl_in, struct spl_out *spl_out, + int max_downscale_src_width, struct spl_in *spl_in, struct spl_scratch *spl_scratch, const struct spl_taps *in_taps, bool *enable_easf_v, bool *enable_easf_h, bool *enable_isharp) { @@ -858,13 +862,13 @@ static bool spl_get_optimal_number_of_taps( enum lb_memory_config lb_config; bool skip_easf = false; - if (spl_out->scl_data.viewport.width > spl_out->scl_data.h_active && + if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && max_downscale_src_width != 0 && - spl_out->scl_data.viewport.width > max_downscale_src_width) + spl_scratch->scl_data.viewport.width > max_downscale_src_width) return false; /* Check if we are using EASF or not */ - skip_easf = enable_easf(spl_in, spl_out); + skip_easf = enable_easf(spl_in, spl_scratch); /* * Set default taps if none are provided @@ -873,57 +877,57 @@ static bool spl_get_optimal_number_of_taps( */ if (skip_easf) { if (in_taps->h_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) - spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil( - spl_out->scl_data.ratios.horz), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz) > 1) + spl_scratch->scl_data.taps.h_taps = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.horz), 8); else - spl_out->scl_data.taps.h_taps = 4; + spl_scratch->scl_data.taps.h_taps = 4; } else - spl_out->scl_data.taps.h_taps = in_taps->h_taps; + spl_scratch->scl_data.taps.h_taps = in_taps->h_taps; if (in_taps->v_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) - spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert, 2)), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1) + spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( + spl_scratch->scl_data.ratios.vert, 2)), 8); else - spl_out->scl_data.taps.v_taps = 4; + spl_scratch->scl_data.taps.v_taps = 4; } else - spl_out->scl_data.taps.v_taps = in_taps->v_taps; + spl_scratch->scl_data.taps.v_taps = in_taps->v_taps; if (in_taps->v_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) - spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert_c, 2)), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1) + spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( + spl_scratch->scl_data.ratios.vert_c, 2)), 8); else - spl_out->scl_data.taps.v_taps_c = 4; + spl_scratch->scl_data.taps.v_taps_c = 4; } else - spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; + spl_scratch->scl_data.taps.v_taps_c = in_taps->v_taps_c; if (in_taps->h_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) - spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil( - spl_out->scl_data.ratios.horz_c), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz_c) > 1) + spl_scratch->scl_data.taps.h_taps_c = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.horz_c), 8); else - spl_out->scl_data.taps.h_taps_c = 4; + spl_scratch->scl_data.taps.h_taps_c = 4; } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) /* Only 1 and even h_taps_c are supported by hw */ - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; + spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; else - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; + spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c; } else { if (spl_is_yuv420(spl_in->basic_in.format)) { - spl_out->scl_data.taps.h_taps = 6; - spl_out->scl_data.taps.v_taps = 6; - spl_out->scl_data.taps.h_taps_c = 4; - spl_out->scl_data.taps.v_taps_c = 4; + spl_scratch->scl_data.taps.h_taps = 6; + spl_scratch->scl_data.taps.v_taps = 6; + spl_scratch->scl_data.taps.h_taps_c = 4; + spl_scratch->scl_data.taps.v_taps_c = 4; } else { /* RGB */ - spl_out->scl_data.taps.h_taps = 6; - spl_out->scl_data.taps.v_taps = 6; - spl_out->scl_data.taps.h_taps_c = 6; - spl_out->scl_data.taps.v_taps_c = 6; + spl_scratch->scl_data.taps.h_taps = 6; + spl_scratch->scl_data.taps.v_taps = 6; + spl_scratch->scl_data.taps.h_taps_c = 6; + spl_scratch->scl_data.taps.v_taps_c = 6; } } /*Ensure we can support the requested number of vtaps*/ - min_taps_y = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - min_taps_c = dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c); + min_taps_y = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + min_taps_c = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c); /* Use LB_MEMORY_CONFIG_3 for 4:2:0 */ if ((spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8) @@ -932,16 +936,16 @@ static bool spl_get_optimal_number_of_taps( else lb_config = LB_MEMORY_CONFIG_0; // Determine max vtap support by calculating how much line buffer can fit - spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_out->scl_data, + spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_scratch->scl_data, lb_config, &num_part_y, &num_part_c); /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */ - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 2) - max_taps_y = num_part_y - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) - 2); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 2) + max_taps_y = num_part_y - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) - 2); else max_taps_y = num_part_y; - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 2) - max_taps_c = num_part_c - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) - 2); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 2) + max_taps_c = num_part_c - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) - 2); else max_taps_c = num_part_c; @@ -950,11 +954,11 @@ static bool spl_get_optimal_number_of_taps( else if (max_taps_c < min_taps_c) return false; - if (spl_out->scl_data.taps.v_taps > max_taps_y) - spl_out->scl_data.taps.v_taps = max_taps_y; + if (spl_scratch->scl_data.taps.v_taps > max_taps_y) + spl_scratch->scl_data.taps.v_taps = max_taps_y; - if (spl_out->scl_data.taps.v_taps_c > max_taps_c) - spl_out->scl_data.taps.v_taps_c = max_taps_c; + if (spl_scratch->scl_data.taps.v_taps_c > max_taps_c) + spl_scratch->scl_data.taps.v_taps_c = max_taps_c; if (!skip_easf) { /* @@ -971,45 +975,45 @@ static bool spl_get_optimal_number_of_taps( * If optimal no of taps is 7 or 8, then fine since max tap is 6 * */ - if (spl_out->scl_data.taps.v_taps == 5) - spl_out->scl_data.taps.v_taps = 4; + if (spl_scratch->scl_data.taps.v_taps == 5) + spl_scratch->scl_data.taps.v_taps = 4; - if (spl_out->scl_data.taps.v_taps_c == 5) - spl_out->scl_data.taps.v_taps_c = 4; + if (spl_scratch->scl_data.taps.v_taps_c == 5) + spl_scratch->scl_data.taps.v_taps_c = 4; - if (spl_out->scl_data.taps.h_taps == 5) - spl_out->scl_data.taps.h_taps = 4; + if (spl_scratch->scl_data.taps.h_taps == 5) + spl_scratch->scl_data.taps.h_taps = 4; - if (spl_out->scl_data.taps.h_taps_c == 5) - spl_out->scl_data.taps.h_taps_c = 4; + if (spl_scratch->scl_data.taps.h_taps_c == 5) + spl_scratch->scl_data.taps.h_taps_c = 4; if (spl_is_yuv420(spl_in->basic_in.format)) { - if ((spl_out->scl_data.taps.h_taps <= 4) || - (spl_out->scl_data.taps.h_taps_c <= 3)) { + if ((spl_scratch->scl_data.taps.h_taps <= 4) || + (spl_scratch->scl_data.taps.h_taps_c <= 3)) { *enable_easf_v = false; *enable_easf_h = false; - } else if ((spl_out->scl_data.taps.v_taps <= 3) || - (spl_out->scl_data.taps.v_taps_c <= 3)) { + } else if ((spl_scratch->scl_data.taps.v_taps <= 3) || + (spl_scratch->scl_data.taps.v_taps_c <= 3)) { *enable_easf_v = false; *enable_easf_h = true; } else { *enable_easf_v = true; *enable_easf_h = true; } - ASSERT((spl_out->scl_data.taps.v_taps > 1) && - (spl_out->scl_data.taps.v_taps_c > 1)); + SPL_ASSERT((spl_scratch->scl_data.taps.v_taps > 1) && + (spl_scratch->scl_data.taps.v_taps_c > 1)); } else { /* RGB */ - if (spl_out->scl_data.taps.h_taps <= 3) { + if (spl_scratch->scl_data.taps.h_taps <= 3) { *enable_easf_v = false; *enable_easf_h = false; - } else if (spl_out->scl_data.taps.v_taps < 3) { + } else if (spl_scratch->scl_data.taps.v_taps < 3) { *enable_easf_v = false; *enable_easf_h = true; } else { *enable_easf_v = true; *enable_easf_h = true; } - ASSERT(spl_out->scl_data.taps.v_taps > 1); + SPL_ASSERT(spl_scratch->scl_data.taps.v_taps > 1); } } else { *enable_easf_v = false; @@ -1024,29 +1028,29 @@ static bool spl_get_optimal_number_of_taps( * EASF is not enabled */ - *enable_isharp = spl_get_isharp_en(spl_in, spl_out); + *enable_isharp = spl_get_isharp_en(spl_in, spl_scratch); if (!*enable_isharp && !spl_in->basic_out.always_scale) { - if ((IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) && - (IDENTITY_RATIO(spl_out->scl_data.ratios.vert))) { - spl_out->scl_data.taps.h_taps = 1; - spl_out->scl_data.taps.v_taps = 1; + if ((IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)) && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) { + spl_scratch->scl_data.taps.h_taps = 1; + spl_scratch->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) - spl_out->scl_data.taps.h_taps_c = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)) + spl_scratch->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) - spl_out->scl_data.taps.v_taps_c = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)) + spl_scratch->scl_data.taps.v_taps_c = 1; *enable_easf_v = false; *enable_easf_h = false; } else { if ((!*enable_easf_h) && - (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c))) - spl_out->scl_data.taps.h_taps_c = 1; + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))) + spl_scratch->scl_data.taps.h_taps_c = 1; if ((!*enable_easf_v) && - (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c))) - spl_out->scl_data.taps.v_taps_c = 1; + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) + spl_scratch->scl_data.taps.v_taps_c = 1; } } return true; @@ -1069,38 +1073,38 @@ static void spl_set_black_color_data(enum spl_pixel_format format, static void spl_set_manual_ratio_init_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *scl_data) { - struct fixed31_32 bot; + struct spl_fixed31_32 bot; - dscl_prog_data->ratios.h_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.horz) << 5; - dscl_prog_data->ratios.v_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.vert) << 5; - dscl_prog_data->ratios.h_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.horz_c) << 5; - dscl_prog_data->ratios.v_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.vert_c) << 5; + dscl_prog_data->ratios.h_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.horz) << 5; + dscl_prog_data->ratios.v_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.vert) << 5; + dscl_prog_data->ratios.h_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.horz_c) << 5; + dscl_prog_data->ratios.v_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.vert_c) << 5; /* * 0.24 format for fraction, first five bits zeroed */ dscl_prog_data->init.h_filter_init_frac = - dc_fixpt_u0d19(scl_data->inits.h) << 5; + spl_fixpt_u0d19(scl_data->inits.h) << 5; dscl_prog_data->init.h_filter_init_int = - dc_fixpt_floor(scl_data->inits.h); + spl_fixpt_floor(scl_data->inits.h); dscl_prog_data->init.h_filter_init_frac_c = - dc_fixpt_u0d19(scl_data->inits.h_c) << 5; + spl_fixpt_u0d19(scl_data->inits.h_c) << 5; dscl_prog_data->init.h_filter_init_int_c = - dc_fixpt_floor(scl_data->inits.h_c); + spl_fixpt_floor(scl_data->inits.h_c); dscl_prog_data->init.v_filter_init_frac = - dc_fixpt_u0d19(scl_data->inits.v) << 5; + spl_fixpt_u0d19(scl_data->inits.v) << 5; dscl_prog_data->init.v_filter_init_int = - dc_fixpt_floor(scl_data->inits.v); + spl_fixpt_floor(scl_data->inits.v); dscl_prog_data->init.v_filter_init_frac_c = - dc_fixpt_u0d19(scl_data->inits.v_c) << 5; + spl_fixpt_u0d19(scl_data->inits.v_c) << 5; dscl_prog_data->init.v_filter_init_int_c = - dc_fixpt_floor(scl_data->inits.v_c); + spl_fixpt_floor(scl_data->inits.v_c); - bot = dc_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); - dscl_prog_data->init.v_filter_init_bot_frac = dc_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int = dc_fixpt_floor(bot); - bot = dc_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); - dscl_prog_data->init.v_filter_init_bot_frac_c = dc_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int_c = dc_fixpt_floor(bot); + bot = spl_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); + dscl_prog_data->init.v_filter_init_bot_frac = spl_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int = spl_fixpt_floor(bot); + bot = spl_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); + dscl_prog_data->init.v_filter_init_bot_frac_c = spl_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int_c = spl_fixpt_floor(bot); } static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, @@ -1113,22 +1117,22 @@ static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, } /* Populate dscl prog data structure from scaler data calculated by SPL */ -static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out, - bool enable_easf_v, bool enable_easf_h, bool enable_isharp) +static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *spl_scratch, + struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, bool enable_isharp) { struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; - const struct spl_scaler_data *data = &spl_out->scl_data; + const struct spl_scaler_data *data = &spl_scratch->scl_data; struct scl_black_color *scl_black_color = &dscl_prog_data->scl_black_color; bool enable_easf = enable_easf_v || enable_easf_h; // Set values for recout - dscl_prog_data->recout = spl_out->scl_data.recout; + dscl_prog_data->recout = spl_scratch->scl_data.recout; // Set values for MPC Size - dscl_prog_data->mpc_size.width = spl_out->scl_data.h_active; - dscl_prog_data->mpc_size.height = spl_out->scl_data.v_active; + dscl_prog_data->mpc_size.width = spl_scratch->scl_data.h_active; + dscl_prog_data->mpc_size.height = spl_scratch->scl_data.v_active; // SCL_MODE - Set SCL_MODE data dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data, enable_isharp, @@ -1143,15 +1147,15 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_ou // Set HTaps/VTaps spl_set_taps_data(dscl_prog_data, data); // Set viewport - dscl_prog_data->viewport = spl_out->scl_data.viewport; + dscl_prog_data->viewport = spl_scratch->scl_data.viewport; // Set viewport_c - dscl_prog_data->viewport_c = spl_out->scl_data.viewport_c; + dscl_prog_data->viewport_c = spl_scratch->scl_data.viewport_c; // Set filters data spl_set_filters_data(dscl_prog_data, data, enable_easf_v, enable_easf_h); } /* Set EASF data */ -static void spl_set_easf_data(struct spl_out *spl_out, bool enable_easf_v, +static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref, enum spl_pixel_format format, enum system_setup setup) { @@ -1164,47 +1168,47 @@ static void spl_set_easf_data(struct spl_out *spl_out, bool enable_easf_v, dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode /* 2-bit, BF3 chroma mode correction calculation mode */ dscl_prog_data->easf_v_bf3_mode = spl_get_v_bf3_mode( - spl_out->scl_data.recip_ratios.vert); + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ minCoef ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_uptilt = - spl_get_3tap_dntilt_uptilt_offset(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_dntilt_uptilt_offset(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ upTiltMaxVal ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt_max = - spl_get_3tap_uptilt_maxval(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_uptilt_maxval(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ dnTiltSlope ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_slope = - spl_get_3tap_dntilt_slope(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_dntilt_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ upTilt1Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt1_slope = - spl_get_3tap_uptilt1_slope(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_uptilt1_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ upTilt2Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_slope = - spl_get_3tap_uptilt2_slope(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_uptilt2_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ upTilt2Offset ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_offset = - spl_get_3tap_uptilt2_offset(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_uptilt2_offset(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_v_ringest_eventap_reduceg1 = - spl_get_reducer_gain4(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_reducer_gain4(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_v_ringest_eventap_reduceg2 = - spl_get_reducer_gain6(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_reducer_gain6(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain1 = - spl_get_gainRing4(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_gainRing4(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain2 = - spl_get_gainRing6(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_gainRing6(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); dscl_prog_data->easf_v_bf_maxa = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_maxb = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 1 dscl_prog_data->easf_v_bf_mina = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 0 @@ -1330,23 +1334,23 @@ static void spl_set_easf_data(struct spl_out *spl_out, bool enable_easf_v, 0xF; // 4-bit, BF2 calculation mode /* 2-bit, BF3 chroma mode correction calculation mode */ dscl_prog_data->easf_h_bf3_mode = spl_get_h_bf3_mode( - spl_out->scl_data.recip_ratios.horz); + spl_scratch->scl_data.recip_ratios.horz); /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_h_ringest_eventap_reduceg1 = - spl_get_reducer_gain4(spl_out->scl_data.taps.h_taps, - spl_out->scl_data.recip_ratios.horz); + spl_get_reducer_gain4(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_h_ringest_eventap_reduceg2 = - spl_get_reducer_gain6(spl_out->scl_data.taps.h_taps, - spl_out->scl_data.recip_ratios.horz); + spl_get_reducer_gain6(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain1 = - spl_get_gainRing4(spl_out->scl_data.taps.h_taps, - spl_out->scl_data.recip_ratios.horz); + spl_get_gainRing4(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain2 = - spl_get_gainRing6(spl_out->scl_data.taps.h_taps, - spl_out->scl_data.recip_ratios.horz); + spl_get_gainRing6(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); dscl_prog_data->easf_h_bf_maxa = 63; //Horz Max BF value A in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_maxb = 63; //Horz Max BF value B in U0.6 format.Selected if H_FCNTL==1 dscl_prog_data->easf_h_bf_mina = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==0 @@ -1524,7 +1528,7 @@ static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data, static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, struct adaptive_sharpness adp_sharpness, bool enable_isharp, enum linear_light_scaling lls_pref, enum spl_pixel_format format, - const struct spl_scaler_data *data, struct fixed31_32 ratio, + const struct spl_scaler_data *data, struct spl_fixed31_32 ratio, enum system_setup setup) { /* Turn off sharpener if not required */ @@ -1638,40 +1642,44 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool enable_easf_h = false; int vratio = 0; int hratio = 0; - const struct spl_scaler_data *data = &spl_out->scl_data; - struct fixed31_32 isharp_scale_ratio; + struct spl_scratch spl_scratch; + struct spl_fixed31_32 isharp_scale_ratio; enum system_setup setup; bool enable_isharp = false; + const struct spl_scaler_data *data = &spl_scratch.scl_data; + memset(&spl_scratch, 0, sizeof(struct spl_scratch)); + spl_scratch.scl_data.h_active = spl_in->h_active; + spl_scratch.scl_data.v_active = spl_in->v_active; // All SPL calls /* recout calculation */ /* depends on h_active */ - spl_calculate_recout(spl_in, spl_out); + spl_calculate_recout(spl_in, &spl_scratch, spl_out); /* depends on pixel format */ - spl_calculate_scaling_ratios(spl_in, spl_out); + spl_calculate_scaling_ratios(spl_in, &spl_scratch, spl_out); /* depends on scaling ratios and recout, does not calculate offset yet */ - spl_calculate_viewport_size(spl_in, spl_out); + spl_calculate_viewport_size(spl_in, &spl_scratch); res = spl_get_optimal_number_of_taps( spl_in->basic_out.max_downscale_src_width, spl_in, - spl_out, &spl_in->scaling_quality, &enable_easf_v, + &spl_scratch, &spl_in->scaling_quality, &enable_easf_v, &enable_easf_h, &enable_isharp); /* * Depends on recout, scaling ratios, h_active and taps * May need to re-check lb size after this in some obscure scenario */ if (res) - spl_calculate_inits_and_viewports(spl_in, spl_out); + spl_calculate_inits_and_viewports(spl_in, &spl_scratch); // Handle 3d recout - spl_handle_3d_recout(spl_in, &spl_out->scl_data.recout); + spl_handle_3d_recout(spl_in, &spl_scratch.scl_data.recout); // Clamp - spl_clamp_viewport(&spl_out->scl_data.viewport); + spl_clamp_viewport(&spl_scratch.scl_data.viewport); if (!res) return res; // Save all calculated parameters in dscl_prog_data structure to program hw registers - spl_set_dscl_prog_data(spl_in, spl_out, enable_easf_v, enable_easf_h, enable_isharp); + spl_set_dscl_prog_data(spl_in, &spl_scratch, spl_out, enable_easf_v, enable_easf_h, enable_isharp); if (spl_in->lls_pref == LLS_PREF_YES) { if (spl_in->is_hdr_on) @@ -1685,15 +1693,15 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) setup = SDR_NL; } // Set EASF - spl_set_easf_data(spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, + spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, spl_in->basic_in.format, setup); // Set iSHARP - vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.horz); if (vratio <= hratio) - isharp_scale_ratio = spl_out->scl_data.recip_ratios.vert; + isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.vert; else - isharp_scale_ratio = spl_out->scl_data.recip_ratios.horz; + isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz; spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup); diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index a5e544406e91..d483f259512e 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -3,6 +3,7 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dc_spl_types.h" +#include "spl_debug.h" #include "dc_spl_filters.h" #include "dc_spl_isharp_filters.h" @@ -631,10 +632,10 @@ uint16_t *spl_get_filter_isharp_bs_3tap_64p(void) return filter_isharp_bs_3tap_64p_s1_12; } -void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum system_setup setup) +void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup) { uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; - struct fixed31_32 sharp_base, sharp_calc, sharp_level, ratio_level; + struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level, ratio_level; int i, j; struct scale_ratio_to_sharpness_level_lookup *setup_lookup_ptr; int num_sharp_ramp_levels; @@ -680,12 +681,12 @@ void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum s * base scale ratio to sharpness curve */ j = 0; - sharp_level = dc_fixpt_zero; + sharp_level = spl_fixpt_zero; while (j < num_sharp_ramp_levels) { - ratio_level = dc_fixpt_from_fraction(setup_lookup_ptr->ratio_numer, + ratio_level = spl_fixpt_from_fraction(setup_lookup_ptr->ratio_numer, setup_lookup_ptr->ratio_denom); if (ratio.value >= ratio_level.value) { - sharp_level = dc_fixpt_from_fraction(setup_lookup_ptr->sharpness_numer, + sharp_level = spl_fixpt_from_fraction(setup_lookup_ptr->sharpness_numer, setup_lookup_ptr->sharpness_denom); break; } @@ -707,12 +708,12 @@ void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum s size_1dlut = sizeof(filter_isharp_1D_lut_3p0x); memset(byte_ptr_1dlut_dst, 0, size_1dlut); for (j = 0; j < size_1dlut; j++) { - sharp_base = dc_fixpt_from_int((int)*byte_ptr_1dlut_src); - sharp_calc = dc_fixpt_mul(sharp_base, sharp_level); - sharp_calc = dc_fixpt_div(sharp_calc, dc_fixpt_from_int(3)); - sharp_calc = dc_fixpt_min(dc_fixpt_from_int(255), sharp_calc); - sharp_calc = dc_fixpt_add(sharp_calc, dc_fixpt_from_fraction(1, 2)); - sharp_calc_int = dc_fixpt_floor(sharp_calc); + sharp_base = spl_fixpt_from_int((int)*byte_ptr_1dlut_src); + sharp_calc = spl_fixpt_mul(sharp_base, sharp_level); + sharp_calc = spl_fixpt_div(sharp_calc, spl_fixpt_from_int(3)); + sharp_calc = spl_fixpt_min(spl_fixpt_from_int(255), sharp_calc); + sharp_calc = spl_fixpt_add(sharp_calc, spl_fixpt_from_fraction(1, 2)); + sharp_calc_int = spl_fixpt_floor(sharp_calc); if (sharp_calc_int > 255) sharp_calc_int = 255; *byte_ptr_1dlut_dst = (uint8_t)sharp_calc_int; @@ -742,7 +743,6 @@ void spl_init_blur_scale_coeffs(void) filter_isharp_bs_4tap_in_6_64p_s1_12, 6); } -#ifdef CONFIG_DRM_AMD_DC_FP uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) { if (taps == 3) @@ -753,7 +753,7 @@ uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) return spl_get_filter_isharp_bs_4tap_in_6_64p(); else { /* should never happen, bug */ - BREAK_TO_DEBUGGER(); + SPL_BREAK_TO_DEBUGGER(); return NULL; } } @@ -767,5 +767,4 @@ void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->filter_blur_scale_v = spl_dscl_get_blur_scale_coeffs_64p(data->taps.v_taps); } -#endif diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index c8b7cd6404dd..6cb000bf9d53 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -43,6 +43,6 @@ void spl_init_blur_scale_coeffs(void); void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *data); -void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum system_setup setup); +void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup); uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum explicit_sharpness sharpness); #endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c index 83dd3435ebcc..09bf82f7d468 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c @@ -2,6 +2,7 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include "spl_debug.h" #include "dc_spl_filters.h" #include "dc_spl_scl_filters.h" #include "dc_spl_scl_easf_filters.h" @@ -1406,67 +1407,67 @@ void spl_init_easf_filter_coeffs(void) easf_filter_6tap_64p_ratio_1_00_s1_12, 6); } -uint16_t *spl_get_easf_filter_3tap_64p(struct fixed31_32 ratio) +uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) return easf_filter_3tap_64p_ratio_0_30_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) return easf_filter_3tap_64p_ratio_0_40_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) return easf_filter_3tap_64p_ratio_0_50_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) return easf_filter_3tap_64p_ratio_0_60_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) return easf_filter_3tap_64p_ratio_0_70_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) return easf_filter_3tap_64p_ratio_0_80_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) return easf_filter_3tap_64p_ratio_0_90_s1_12; else return easf_filter_3tap_64p_ratio_1_00_s1_12; } -uint16_t *spl_get_easf_filter_4tap_64p(struct fixed31_32 ratio) +uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) return easf_filter_4tap_64p_ratio_0_30_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) return easf_filter_4tap_64p_ratio_0_40_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) return easf_filter_4tap_64p_ratio_0_50_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) return easf_filter_4tap_64p_ratio_0_60_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) return easf_filter_4tap_64p_ratio_0_70_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) return easf_filter_4tap_64p_ratio_0_80_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) return easf_filter_4tap_64p_ratio_0_90_s1_12; else return easf_filter_4tap_64p_ratio_1_00_s1_12; } -uint16_t *spl_get_easf_filter_6tap_64p(struct fixed31_32 ratio) +uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) return easf_filter_6tap_64p_ratio_0_30_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) return easf_filter_6tap_64p_ratio_0_40_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) return easf_filter_6tap_64p_ratio_0_50_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) return easf_filter_6tap_64p_ratio_0_60_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) return easf_filter_6tap_64p_ratio_0_70_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) return easf_filter_6tap_64p_ratio_0_80_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) return easf_filter_6tap_64p_ratio_0_90_s1_12; else return easf_filter_6tap_64p_ratio_1_00_s1_12; } -uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) { if (taps == 6) return spl_get_easf_filter_6tap_64p(ratio); @@ -1476,7 +1477,7 @@ uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct fixed31_32 ratio) return spl_get_easf_filter_3tap_64p(ratio); else { /* should never happen, bug */ - BREAK_TO_DEBUGGER(); + SPL_BREAK_TO_DEBUGGER(); return NULL; } } @@ -1517,7 +1518,7 @@ void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, } } -static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct fixed31_32 ratio, +static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct spl_fixed31_32 ratio, struct scale_ratio_to_reg_value_lookup *lookup_table_base_ptr, unsigned int num_entries) { @@ -1534,7 +1535,7 @@ static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct fixed31_32 ratio, if (lookup_table_index_ptr->numer < 0) break; - if (ratio.value < dc_fixpt_from_fraction( + if (ratio.value < spl_fixpt_from_fraction( lookup_table_index_ptr->numer, lookup_table_index_ptr->denom).value) { value = lookup_table_index_ptr->reg_value; @@ -1545,7 +1546,7 @@ static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct fixed31_32 ratio, } return value; } -uint32_t spl_get_v_bf3_mode(struct fixed31_32 ratio) +uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries = sizeof(easf_v_bf3_mode_lookup) / @@ -1554,7 +1555,7 @@ uint32_t spl_get_v_bf3_mode(struct fixed31_32 ratio) easf_v_bf3_mode_lookup, num_entries); return value; } -uint32_t spl_get_h_bf3_mode(struct fixed31_32 ratio) +uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries = sizeof(easf_h_bf3_mode_lookup) / @@ -1563,7 +1564,7 @@ uint32_t spl_get_h_bf3_mode(struct fixed31_32 ratio) easf_h_bf3_mode_lookup, num_entries); return value; } -uint32_t spl_get_reducer_gain6(int taps, struct fixed31_32 ratio) +uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1582,7 +1583,7 @@ uint32_t spl_get_reducer_gain6(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_reducer_gain4(int taps, struct fixed31_32 ratio) +uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1601,7 +1602,7 @@ uint32_t spl_get_reducer_gain4(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_gainRing6(int taps, struct fixed31_32 ratio) +uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1620,7 +1621,7 @@ uint32_t spl_get_gainRing6(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_gainRing4(int taps, struct fixed31_32 ratio) +uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1639,7 +1640,7 @@ uint32_t spl_get_gainRing4(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1653,7 +1654,7 @@ uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_uptilt_maxval(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1667,7 +1668,7 @@ uint32_t spl_get_3tap_uptilt_maxval(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_dntilt_slope(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1681,7 +1682,7 @@ uint32_t spl_get_3tap_dntilt_slope(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_uptilt1_slope(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1695,7 +1696,7 @@ uint32_t spl_get_3tap_uptilt1_slope(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_uptilt2_slope(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1709,7 +1710,7 @@ uint32_t spl_get_3tap_uptilt2_slope(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_uptilt2_offset(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h index 542b5ce1a385..8bb2b8108e38 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h @@ -14,25 +14,25 @@ struct scale_ratio_to_reg_value_lookup { }; void spl_init_easf_filter_coeffs(void); -uint16_t *spl_get_easf_filter_3tap_64p(struct fixed31_32 ratio); -uint16_t *spl_get_easf_filter_4tap_64p(struct fixed31_32 ratio); -uint16_t *spl_get_easf_filter_6tap_64p(struct fixed31_32 ratio); -uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct fixed31_32 ratio); +uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *data, bool enable_easf_v, bool enable_easf_h); -uint32_t spl_get_v_bf3_mode(struct fixed31_32 ratio); -uint32_t spl_get_h_bf3_mode(struct fixed31_32 ratio); -uint32_t spl_get_reducer_gain6(int taps, struct fixed31_32 ratio); -uint32_t spl_get_reducer_gain4(int taps, struct fixed31_32 ratio); -uint32_t spl_get_gainRing6(int taps, struct fixed31_32 ratio); -uint32_t spl_get_gainRing4(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt_maxval(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_dntilt_slope(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt1_slope(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt2_slope(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt2_offset(int taps, struct fixed31_32 ratio); +uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio); +uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio); +uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio); #endif /* __DC_SPL_SCL_EASF_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c index 156f8171e44f..b9a7b77a7167 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -3,6 +3,7 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dc_spl_types.h" +#include "spl_debug.h" #include "dc_spl_scl_filters.h" //========================================= // = 2 @@ -1318,97 +1319,97 @@ static const uint16_t filter_8tap_64p_183[264] = { 0x3FD4, 0x3F84, 0x0214, 0x0694, 0x0694, 0x0214, 0x3F84, 0x3FD4 }; -const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_3tap_16p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_3tap_16p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_3tap_16p_149; else return filter_3tap_16p_183; } -const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_3tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_3tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_3tap_64p_149; else return filter_3tap_64p_183; } -const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_4tap_16p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_4tap_16p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_4tap_16p_149; else return filter_4tap_16p_183; } -const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_4tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_4tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_4tap_64p_149; else return filter_4tap_64p_183; } -const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_5tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_5tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_5tap_64p_149; else return filter_5tap_64p_183; } -const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_6tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_6tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_6tap_64p_149; else return filter_6tap_64p_183; } -const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_7tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_7tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_7tap_64p_149; else return filter_7tap_64p_183; } -const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_8tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_8tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_8tap_64p_149; else return filter_8tap_64p_183; @@ -1424,7 +1425,7 @@ const uint16_t *spl_get_filter_2tap_64p(void) return filter_2tap_64p; } -const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) { if (taps == 8) return spl_get_filter_8tap_64p(ratio); @@ -1444,7 +1445,7 @@ const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio return NULL; else { /* should never happen, bug */ - BREAK_TO_DEBUGGER(); + SPL_BREAK_TO_DEBUGGER(); return NULL; } } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h index 27590846d92a..48202bc4f81e 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h @@ -7,16 +7,16 @@ #include "dc_spl_types.h" -const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio); const uint16_t *spl_get_filter_2tap_16p(void); const uint16_t *spl_get_filter_2tap_64p(void); -const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio); +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); #endif /* __DC_SPL_SCL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index e54da5ea4ae8..1438a86826a4 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -2,15 +2,15 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "os_types.h" -#include "dc_hw_types.h" -#ifndef ASSERT -#define ASSERT(_bool) (void *)0 -#endif -#include "include/fixed31_32.h" // fixed31_32 and related functions #ifndef __DC_SPL_TYPES_H__ #define __DC_SPL_TYPES_H__ +#include "spl_os_types.h" // swap +#ifndef SPL_ASSERT +#define SPL_ASSERT(_bool) ((void *)0) +#endif +#include "spl_fixpt31_32.h" // fixed31_32 and related functions + enum lb_memory_config { /* Enable all 3 pieces of memory */ LB_MEMORY_CONFIG_0 = 0, @@ -39,16 +39,16 @@ struct spl_rect { }; struct spl_ratios { - struct fixed31_32 horz; - struct fixed31_32 vert; - struct fixed31_32 horz_c; - struct fixed31_32 vert_c; + struct spl_fixed31_32 horz; + struct spl_fixed31_32 vert; + struct spl_fixed31_32 horz_c; + struct spl_fixed31_32 vert_c; }; struct spl_inits { - struct fixed31_32 h; - struct fixed31_32 h_c; - struct fixed31_32 v; - struct fixed31_32 v_c; + struct spl_fixed31_32 h; + struct spl_fixed31_32 h_c; + struct spl_fixed31_32 v; + struct spl_fixed31_32 v_c; }; struct spl_taps { @@ -409,10 +409,15 @@ struct dscl_prog_data { }; /* SPL input and output definitions */ -// SPL outputs struct -struct spl_out { +// SPL scratch struct +struct spl_scratch { // Pack all SPL outputs in scl_data struct spl_scaler_data scl_data; +}; + +/* SPL input and output definitions */ +// SPL outputs struct +struct spl_out { // Pack all output need to program hw registers struct dscl_prog_data *dscl_prog_data; }; @@ -497,6 +502,8 @@ struct spl_in { struct spl_debug debug; bool is_fullscreen; bool is_hdr_on; + int h_active; + int v_active; }; // end of SPL inputs diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h b/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h new file mode 100644 index 000000000000..a36239ab8d1c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h @@ -0,0 +1,23 @@ +/* Copyright © 1997-2004 Advanced Micro Devices, Inc. All rights reserved. */ + +#ifndef SPL_DEBUG_H +#define SPL_DEBUG_H + +#ifdef SPL_ASSERT +#undef SPL_ASSERT +#endif +#define SPL_ASSERT(b) + +#define SPL_ASSERT_CRITICAL(expr) do {if (expr)/* Do nothing */; } while (0) + +#ifdef SPL_DALMSG +#undef SPL_DALMSG +#endif +#define SPL_DALMSG(b) + +#ifdef SPL_DAL_ASSERT_MSG +#undef SPL_DAL_ASSERT_MSG +#endif +#define SPL_DAL_ASSERT_MSG(b, m) + +#endif // SPL_DEBUG_H diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c new file mode 100644 index 000000000000..2bb1de88aef7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c @@ -0,0 +1,518 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "spl_fixpt31_32.h" + +static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL }; +static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL }; +static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL }; + +static inline unsigned long long abs_i64( + long long arg) +{ + if (arg > 0) + return (unsigned long long)arg; + else + return (unsigned long long)(-arg); +} + +/* + * @brief + * result = dividend / divisor + * *remainder = dividend % divisor + */ +static inline unsigned long long complete_integer_division_u64( + unsigned long long dividend, + unsigned long long divisor, + unsigned long long *remainder) +{ + unsigned long long result; + + ASSERT(divisor); + + result = spl_div64_u64_rem(dividend, divisor, remainder); + + return result; +} + + +#define FRACTIONAL_PART_MASK \ + ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) + +#define GET_INTEGER_PART(x) \ + ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + +#define GET_FRACTIONAL_PART(x) \ + (FRACTIONAL_PART_MASK & (x)) + +struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator) +{ + struct spl_fixed31_32 res; + + bool arg1_negative = numerator < 0; + bool arg2_negative = denominator < 0; + + unsigned long long arg1_value = arg1_negative ? -numerator : numerator; + unsigned long long arg2_value = arg2_negative ? -denominator : denominator; + + unsigned long long remainder; + + /* determine integer part */ + + unsigned long long res_value = complete_integer_division_u64( + arg1_value, arg2_value, &remainder); + + ASSERT(res_value <= LONG_MAX); + + /* determine fractional part */ + { + unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; + + do { + remainder <<= 1; + + res_value <<= 1; + + if (remainder >= arg2_value) { + res_value |= 1; + remainder -= arg2_value; + } + } while (--i != 0); + } + + /* round up LSB */ + { + unsigned long long summand = (remainder << 1) >= arg2_value; + + ASSERT(res_value <= LLONG_MAX - summand); + + res_value += summand; + } + + res.value = (long long)res_value; + + if (arg1_negative ^ arg2_negative) + res.value = -res.value; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + bool arg1_negative = arg1.value < 0; + bool arg2_negative = arg2.value < 0; + + unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; + unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; + + unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); + unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); + + unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); + unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); + + unsigned long long tmp; + + res.value = arg1_int * arg2_int; + + ASSERT(res.value <= (long long)LONG_MAX); + + res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; + + tmp = arg1_int * arg2_fra; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg2_int * arg1_fra; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg1_fra * arg2_fra; + + tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + + (tmp >= (unsigned long long)spl_fixpt_half.value); + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + if (arg1_negative ^ arg2_negative) + res.value = -res.value; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res; + + unsigned long long arg_value = abs_i64(arg.value); + + unsigned long long arg_int = GET_INTEGER_PART(arg_value); + + unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); + + unsigned long long tmp; + + res.value = arg_int * arg_int; + + ASSERT(res.value <= (long long)LONG_MAX); + + res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; + + tmp = arg_int * arg_fra; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg_fra * arg_fra; + + tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + + (tmp >= (unsigned long long)spl_fixpt_half.value); + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg) +{ + /* + * @note + * Good idea to use Newton's method + */ + + ASSERT(arg.value); + + return spl_fixpt_from_fraction( + spl_fixpt_one.value, + arg.value); +} + +struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 square; + + struct spl_fixed31_32 res = spl_fixpt_one; + + int n = 27; + + struct spl_fixed31_32 arg_norm = arg; + + if (spl_fixpt_le( + spl_fixpt_two_pi, + spl_fixpt_abs(arg))) { + arg_norm = spl_fixpt_sub( + arg_norm, + spl_fixpt_mul_int( + spl_fixpt_two_pi, + (int)spl_div64_s64( + arg_norm.value, + spl_fixpt_two_pi.value))); + } + + square = spl_fixpt_sqr(arg_norm); + + do { + res = spl_fixpt_sub( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + square, + res), + n * (n - 1))); + + n -= 2; + } while (n > 2); + + if (arg.value != arg_norm.value) + res = spl_fixpt_div( + spl_fixpt_mul(res, arg_norm), + arg); + + return res; +} + +struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg) +{ + return spl_fixpt_mul( + arg, + spl_fixpt_sinc(arg)); +} + +struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg) +{ + /* TODO implement argument normalization */ + + const struct spl_fixed31_32 square = spl_fixpt_sqr(arg); + + struct spl_fixed31_32 res = spl_fixpt_one; + + int n = 26; + + do { + res = spl_fixpt_sub( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + square, + res), + n * (n - 1))); + + n -= 2; + } while (n != 0); + + return res; +} + +/* + * @brief + * result = exp(arg), + * where abs(arg) < 1 + * + * Calculated as Taylor series. + */ +static struct spl_fixed31_32 fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg) +{ + unsigned int n = 9; + + struct spl_fixed31_32 res = spl_fixpt_from_fraction( + n + 2, + n + 1); + /* TODO find correct res */ + + ASSERT(spl_fixpt_lt(arg, spl_fixpt_one)); + + do + res = spl_fixpt_add( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + arg, + res), + n)); + while (--n != 1); + + return spl_fixpt_add( + spl_fixpt_one, + spl_fixpt_mul( + arg, + res)); +} + +struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg) +{ + /* + * @brief + * Main equation is: + * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), + * where m = round(x / ln(2)), r = x - m * ln(2) + */ + + if (spl_fixpt_le( + spl_fixpt_ln2_div_2, + spl_fixpt_abs(arg))) { + int m = spl_fixpt_round( + spl_fixpt_div( + arg, + spl_fixpt_ln2)); + + struct spl_fixed31_32 r = spl_fixpt_sub( + arg, + spl_fixpt_mul_int( + spl_fixpt_ln2, + m)); + + ASSERT(m != 0); + + ASSERT(spl_fixpt_lt( + spl_fixpt_abs(r), + spl_fixpt_one)); + + if (m > 0) + return spl_fixpt_shl( + fixed31_32_exp_from_taylor_series(r), + (unsigned char)m); + else + return spl_fixpt_div_int( + fixed31_32_exp_from_taylor_series(r), + 1LL << -m); + } else if (arg.value != 0) + return fixed31_32_exp_from_taylor_series(arg); + else + return spl_fixpt_one; +} + +struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one); + /* TODO improve 1st estimation */ + + struct spl_fixed31_32 error; + + ASSERT(arg.value > 0); + /* TODO if arg is negative, return NaN */ + /* TODO if arg is zero, return -INF */ + + do { + struct spl_fixed31_32 res1 = spl_fixpt_add( + spl_fixpt_sub( + res, + spl_fixpt_one), + spl_fixpt_div( + arg, + spl_fixpt_exp(res))); + + error = spl_fixpt_sub( + res, + res1); + + res = res1; + /* TODO determine max_allowed_error based on quality of exp() */ + } while (abs_i64(error.value) > 100ULL); + + return res; +} + + +/* this function is a generic helper to translate fixed point value to + * specified integer format that will consist of integer_bits integer part and + * fractional_bits fractional part. For example it is used in + * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional + * part in 32 bits. It is used in hw programming (scaler) + */ + +static inline unsigned int ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + /* 1. create mask of integer part */ + unsigned int result = (1 << integer_bits) - 1; + /* 2. mask out fractional part */ + unsigned int fractional_part = FRACTIONAL_PART_MASK & value; + /* 3. shrink fixed point integer part to be of integer_bits width*/ + result &= GET_INTEGER_PART(value); + /* 4. make space for fractional part to be filled in after integer */ + result <<= fractional_bits; + /* 5. shrink fixed point fractional part to of fractional_bits width*/ + fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; + /* 6. merge the result */ + return result | fractional_part; +} + +static inline unsigned int clamp_ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits, + unsigned int min_clamp) +{ + unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); + + if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) + return (1 << (integer_bits + fractional_bits)) - 1; + else if (truncated_val > min_clamp) + return truncated_val; + else + return min_clamp; +} + +unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 4, 19); +} + +unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 3, 19); +} + +unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 2, 19); +} + +unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 0, 19); +} + +unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg) +{ + return clamp_ux_dy(arg.value, 0, 14, 1); +} + +unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg) +{ + return clamp_ux_dy(arg.value, 0, 10, 1); +} + +int spl_fixpt_s4d19(struct spl_fixed31_32 arg) +{ + if (arg.value < 0) + return -(int)ux_dy(spl_fixpt_abs(arg).value, 4, 19); + else + return ux_dy(arg.value, 4, 19); +} + +struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct spl_fixed31_32 fixpt_value = spl_fixpt_zero; + struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero; + long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; + + fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + frac_mask = frac_mask << fractional_bits; + fixpt_int_value.value = value & frac_mask; + fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + fixpt_value.value |= fixpt_int_value.value; + return fixpt_value; +} + +struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value); + + fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + return fixpt_value; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h new file mode 100644 index 000000000000..27ec6d416b7c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h @@ -0,0 +1,546 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __SPL_FIXED31_32_H__ +#define __SPL_FIXED31_32_H__ + +#include "os_types.h" +#include "spl_os_types.h" // swap +#ifndef ASSERT +#define ASSERT(_bool) ((void *)0) +#endif + +#ifndef LLONG_MAX +#define LLONG_MAX 9223372036854775807ll +#endif +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1ll) +#endif + +#define FIXED31_32_BITS_PER_FRACTIONAL_PART 32 +#ifndef LLONG_MIN +#define LLONG_MIN (1LL<<63) +#endif +#ifndef LLONG_MAX +#define LLONG_MAX (-1LL>>1) +#endif + +/* + * @brief + * Arithmetic operations on real numbers + * represented as fixed-point numbers. + * There are: 1 bit for sign, + * 31 bit for integer part, + * 32 bits for fractional part. + * + * @note + * Currently, overflows and underflows are asserted; + * no special result returned. + */ + +struct spl_fixed31_32 { + long long value; +}; + + +/* + * @brief + * Useful constants + */ + +static const struct spl_fixed31_32 spl_fixpt_zero = { 0 }; +static const struct spl_fixed31_32 spl_fixpt_epsilon = { 1LL }; +static const struct spl_fixed31_32 spl_fixpt_half = { 0x80000000LL }; +static const struct spl_fixed31_32 spl_fixpt_one = { 0x100000000LL }; + +/* + * @brief + * Initialization routines + */ + +/* + * @brief + * result = numerator / denominator + */ +struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator); + +/* + * @brief + * result = arg + */ +static inline struct spl_fixed31_32 spl_fixpt_from_int(int arg) +{ + struct spl_fixed31_32 res; + + res.value = (long long) arg << FIXED31_32_BITS_PER_FRACTIONAL_PART; + + return res; +} + +/* + * @brief + * Unary operators + */ + +/* + * @brief + * result = -arg + */ +static inline struct spl_fixed31_32 spl_fixpt_neg(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res; + + res.value = -arg.value; + + return res; +} + +/* + * @brief + * result = abs(arg) := (arg >= 0) ? arg : -arg + */ +static inline struct spl_fixed31_32 spl_fixpt_abs(struct spl_fixed31_32 arg) +{ + if (arg.value < 0) + return spl_fixpt_neg(arg); + else + return arg; +} + +/* + * @brief + * Binary relational operators + */ + +/* + * @brief + * result = arg1 < arg2 + */ +static inline bool spl_fixpt_lt(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value < arg2.value; +} + +/* + * @brief + * result = arg1 <= arg2 + */ +static inline bool spl_fixpt_le(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value <= arg2.value; +} + +/* + * @brief + * result = arg1 == arg2 + */ +static inline bool spl_fixpt_eq(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value == arg2.value; +} + +/* + * @brief + * result = min(arg1, arg2) := (arg1 <= arg2) ? arg1 : arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_min(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value <= arg2.value) + return arg1; + else + return arg2; +} + +/* + * @brief + * result = max(arg1, arg2) := (arg1 <= arg2) ? arg2 : arg1 + */ +static inline struct spl_fixed31_32 spl_fixpt_max(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value <= arg2.value) + return arg2; + else + return arg1; +} + +/* + * @brief + * | min_value, when arg <= min_value + * result = | arg, when min_value < arg < max_value + * | max_value, when arg >= max_value + */ +static inline struct spl_fixed31_32 spl_fixpt_clamp( + struct spl_fixed31_32 arg, + struct spl_fixed31_32 min_value, + struct spl_fixed31_32 max_value) +{ + if (spl_fixpt_le(arg, min_value)) + return min_value; + else if (spl_fixpt_le(max_value, arg)) + return max_value; + else + return arg; +} + +/* + * @brief + * Binary shift operators + */ + +/* + * @brief + * result = arg << shift + */ +static inline struct spl_fixed31_32 spl_fixpt_shl(struct spl_fixed31_32 arg, unsigned char shift) +{ + ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) || + ((arg.value < 0) && (arg.value >= ~(LLONG_MAX >> shift)))); + + arg.value = arg.value << shift; + + return arg; +} + +/* + * @brief + * result = arg >> shift + */ +static inline struct spl_fixed31_32 spl_fixpt_shr(struct spl_fixed31_32 arg, unsigned char shift) +{ + bool negative = arg.value < 0; + + if (negative) + arg.value = -arg.value; + arg.value = arg.value >> shift; + if (negative) + arg.value = -arg.value; + return arg; +} + +/* + * @brief + * Binary additive operators + */ + +/* + * @brief + * result = arg1 + arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_add(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) || + ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value))); + + res.value = arg1.value + arg2.value; + + return res; +} + +/* + * @brief + * result = arg1 + arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_add_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_add(arg1, spl_fixpt_from_int(arg2)); +} + +/* + * @brief + * result = arg1 - arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_sub(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) || + ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value))); + + res.value = arg1.value - arg2.value; + + return res; +} + +/* + * @brief + * result = arg1 - arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_sub_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_sub(arg1, spl_fixpt_from_int(arg2)); +} + + +/* + * @brief + * Binary multiplicative operators + */ + +/* + * @brief + * result = arg1 * arg2 + */ +struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2); + + +/* + * @brief + * result = arg1 * arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_mul_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_mul(arg1, spl_fixpt_from_int(arg2)); +} + +/* + * @brief + * result = square(arg) := arg * arg + */ +struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg); + +/* + * @brief + * result = arg1 / arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_div_int(struct spl_fixed31_32 arg1, long long arg2) +{ + return spl_fixpt_from_fraction(arg1.value, spl_fixpt_from_int((int)arg2).value); +} + +/* + * @brief + * result = arg1 / arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_div(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return spl_fixpt_from_fraction(arg1.value, arg2.value); +} + +/* + * @brief + * Reciprocal function + */ + +/* + * @brief + * result = reciprocal(arg) := 1 / arg + * + * @note + * No special actions taken in case argument is zero. + */ +struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg); + +/* + * @brief + * Trigonometric functions + */ + +/* + * @brief + * result = sinc(arg) := sin(arg) / arg + * + * @note + * Argument specified in radians, + * internally it's normalized to [-2pi...2pi] range. + */ +struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg); + +/* + * @brief + * result = sin(arg) + * + * @note + * Argument specified in radians, + * internally it's normalized to [-2pi...2pi] range. + */ +struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg); + +/* + * @brief + * result = cos(arg) + * + * @note + * Argument specified in radians + * and should be in [-2pi...2pi] range - + * passing arguments outside that range + * will cause incorrect result! + */ +struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg); + +/* + * @brief + * Transcendent functions + */ + +/* + * @brief + * result = exp(arg) + * + * @note + * Currently, function is verified for abs(arg) <= 1. + */ +struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg); + +/* + * @brief + * result = log(arg) + * + * @note + * Currently, abs(arg) should be less than 1. + * No normalization is done. + * Currently, no special actions taken + * in case of invalid argument(s). Take care! + */ +struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg); + +/* + * @brief + * Power function + */ + +/* + * @brief + * result = pow(arg1, arg2) + * + * @note + * Currently, abs(arg1) should be less than 1. Take care! + */ +static inline struct spl_fixed31_32 spl_fixpt_pow(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value == 0) + return arg2.value == 0 ? spl_fixpt_one : spl_fixpt_zero; + + return spl_fixpt_exp( + spl_fixpt_mul( + spl_fixpt_log(arg1), + arg2)); +} + +/* + * @brief + * Rounding functions + */ + +/* + * @brief + * result = floor(arg) := greatest integer lower than or equal to arg + */ +static inline int spl_fixpt_floor(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* + * @brief + * result = round(arg) := integer nearest to arg + */ +static inline int spl_fixpt_round(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + const long long summand = spl_fixpt_half.value; + + ASSERT(LLONG_MAX - (long long)arg_value >= summand); + + arg_value += summand; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* + * @brief + * result = ceil(arg) := lowest integer greater than or equal to arg + */ +static inline int spl_fixpt_ceil(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + const long long summand = spl_fixpt_one.value - + spl_fixpt_epsilon.value; + + ASSERT(LLONG_MAX - (long long)arg_value >= summand); + + arg_value += summand; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* the following two function are used in scaler hw programming to convert fixed + * point value to format 2 bits from integer part and 19 bits from fractional + * part. The same applies for u0d19, 0 bits from integer part and 19 bits from + * fractional + */ + +unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg); + +int spl_fixpt_s4d19(struct spl_fixed31_32 arg); + +static inline struct spl_fixed31_32 spl_fixpt_truncate(struct spl_fixed31_32 arg, unsigned int frac_bits) +{ + bool negative = arg.value < 0; + + if (frac_bits >= FIXED31_32_BITS_PER_FRACTIONAL_PART) { + ASSERT(frac_bits == FIXED31_32_BITS_PER_FRACTIONAL_PART); + return arg; + } + + if (negative) + arg.value = -arg.value; + arg.value &= (~0ULL) << (FIXED31_32_BITS_PER_FRACTIONAL_PART - frac_bits); + if (negative) + arg.value = -arg.value; + return arg; +} + +struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits); +struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h new file mode 100644 index 000000000000..7ebea91c84f6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h @@ -0,0 +1,77 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * Copyright 2019 Raptor Engineering, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _SPL_OS_TYPES_H_ +#define _SPL_OS_TYPES_H_ + +#include +#include +#include +#include +#include +#include + +/* + * + * general debug capabilities + * + */ +// TODO: need backport +#define SPL_BREAK_TO_DEBUGGER() ASSERT(0) + +static inline uint64_t spl_div_u64_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder) +{ + return div_u64_rem(dividend, divisor, remainder); +} + +static inline uint64_t spl_div_u64(uint64_t dividend, uint32_t divisor) +{ + return div_u64(dividend, divisor); +} + +static inline uint64_t spl_div64_u64(uint64_t dividend, uint64_t divisor) +{ + return div64_u64(dividend, divisor); +} + +static inline uint64_t spl_div64_u64_rem(uint64_t dividend, uint64_t divisor, uint64_t *remainder) +{ + return div64_u64_rem(dividend, divisor, remainder); +} + +static inline int64_t spl_div64_s64(int64_t dividend, int64_t divisor) +{ + return div64_s64(dividend, divisor); +} + +#define spl_swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#ifndef spl_min +#define spl_min(a, b) (((a) < (b)) ? (a):(b)) +#endif + +#endif /* _SPL_OS_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index d4cf7ead1d87..990fa1f19c22 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -531,4 +531,10 @@ static inline struct fixed31_32 dc_fixpt_truncate(struct fixed31_32 arg, unsigne return arg; } +struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits); +struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits); + #endif From 9932ca4c03c8181601f611dd366fceb765329800 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 3 Jul 2024 12:55:26 -0400 Subject: [PATCH 041/447] drm/amd/display: Add P-State Keepout to dcn401 Global Sync [WHY&HOW] OTG has new functionality to allow P-State relative to VStartup. Keepout region for this should be configured based on DML outputs same as other global sync params. Reviewed-by: Alvin Lee Signed-off-by: Jerry Zuo Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/dce110/dce110_timing_generator.c | 1 + .../dc/dce110/dce110_timing_generator.h | 1 + .../dc/dce110/dce110_timing_generator_v.c | 1 + .../dc/dce120/dce120_timing_generator.c | 1 + .../display/dc/dce60/dce60_timing_generator.c | 3 +- .../display/dc/dce80/dce80_timing_generator.c | 3 +- .../amd/display/dc/dml/display_mode_structs.h | 1 + .../dc/dml2/dml21/dml21_translation_helper.c | 1 + .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 1 + .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 4 ++- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 7 ++-- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 1 + drivers/gpu/drm/amd/display/dc/inc/hw/optc.h | 5 ++- .../amd/display/dc/inc/hw/timing_generator.h | 4 ++- .../amd/display/dc/optc/dcn10/dcn10_optc.c | 9 +++-- .../amd/display/dc/optc/dcn10/dcn10_optc.h | 7 +++- .../amd/display/dc/optc/dcn401/dcn401_optc.c | 36 ++++++++++++++++++- .../amd/display/dc/optc/dcn401/dcn401_optc.h | 6 +++- .../dc/resource/dce110/dce110_resource.c | 1 + .../dc/resource/dcn401/dcn401_resource.h | 3 +- 20 files changed, 83 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 49bcfe6ec999..fa422a8cbced 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -1955,6 +1955,7 @@ void dce110_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 28c58f1dff2d..ee4de740aceb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -261,6 +261,7 @@ void dce110_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c index bf35dc65ca29..9837dec837ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c @@ -438,6 +438,7 @@ static void dce110_timing_generator_v_program_timing(struct timing_generator *tg int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index eb3557965781..fcf59348eb62 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -697,6 +697,7 @@ static void dce120_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c index c1a85ee374d9..e5fb0e8333e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c @@ -111,13 +111,14 @@ static void program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { if (!use_vbios) program_pix_dur(tg, timing->pix_clk_100hz); - dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios); + dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios); } static void dce60_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 2df4654858be..003a9330c286 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -111,13 +111,14 @@ static void program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { if (!use_vbios) program_pix_dur(tg, timing->pix_clk_100hz); - dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios); + dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios); } static void dce80_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 410e4b671228..641a8cd019cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -523,6 +523,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned int vupdate_offset; unsigned int vupdate_width; unsigned int vready_offset; + unsigned int pstate_keepout; unsigned char interlaced; double pixel_rate_mhz; unsigned char synchronized_vblank_all_planes; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index e9647f068ee4..1fce61323201 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1129,6 +1129,7 @@ void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_ pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4.vupdate_offset_pixels; pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4.vupdate_vupdate_width_pixels; pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4.vready_offset_pixels; + pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4.pstate_keepout_start_lines; pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 982b2d5bfb5f..849b41f886d3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1549,6 +1549,7 @@ static enum dc_status dce110_enable_stream_timing( 0, 0, 0, + 0, pipe_ctx->stream->signal, true); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e06fc370267b..4846601c612d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1005,6 +1005,7 @@ enum dc_status dcn10_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); @@ -2995,7 +2996,8 @@ void dcn10_program_pipe( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); pipe_ctx->stream_res.tg->funcs->set_vtg_params( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 9a00479f0417..dd652436a539 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -909,6 +909,7 @@ enum dc_status dcn20_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); @@ -1885,7 +1886,8 @@ static void dcn20_program_pipe( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); @@ -2458,7 +2460,8 @@ bool dcn20_update_bandwidth( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); pipe_ctx->stream_res.tg->funcs->set_vtg_params( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 31e0e9210dd7..d0b4308dca96 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -871,6 +871,7 @@ enum dc_status dcn401_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h index 287bf8a90ff6..03cbcbb36f1c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h @@ -65,6 +65,7 @@ struct optc { int vupdate_offset; int vupdate_width; int vready_offset; + int pstate_keepout; struct dc_crtc_timing orginal_patched_timing; enum signal_type signal; }; @@ -110,6 +111,7 @@ void optc1_program_timing(struct timing_generator *optc, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios); @@ -127,7 +129,8 @@ void optc1_program_global_sync(struct timing_generator *optc, int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width); + int vupdate_width, + int pstate_keepout); bool optc1_disable_crtc(struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 0f453452234c..3d4c8bd42b49 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -172,6 +172,7 @@ struct timing_generator_funcs { int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios ); @@ -256,7 +257,8 @@ struct timing_generator_funcs { int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width); + int vupdate_width, + int pstate_keepout); void (*enable_optc_clock)(struct timing_generator *tg, bool enable); void (*program_stereo)(struct timing_generator *tg, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index 94427875bcdd..f00d27b7c6fe 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -65,7 +65,8 @@ void optc1_program_global_sync( int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width) + int vupdate_width, + int pstate_keepout) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -73,6 +74,7 @@ void optc1_program_global_sync( optc1->vstartup_start = vstartup_start; optc1->vupdate_offset = vupdate_offset; optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; if (optc1->vstartup_start == 0) { BREAK_TO_DEBUGGER(); @@ -157,6 +159,7 @@ void optc1_program_timing( int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { @@ -177,6 +180,7 @@ void optc1_program_timing( optc1->vstartup_start = vstartup_start; optc1->vupdate_offset = vupdate_offset; optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; patched_crtc_timing = *dc_crtc_timing; apply_front_porch_workaround(&patched_crtc_timing); optc1->orginal_patched_timing = patched_crtc_timing; @@ -282,7 +286,8 @@ void optc1_program_timing( vready_offset, vstartup_start, vupdate_offset, - vupdate_width); + vupdate_width, + pstate_keepout); optc->funcs->set_vtg_params(optc, dc_crtc_timing, true); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h index 369a13244e5e..b7a57f98553d 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h @@ -201,6 +201,7 @@ struct dcn_optc_registers { uint32_t OTG_CRC1_WINDOWB_Y_CONTROL_READBACK; uint32_t OPTC_CLOCK_CONTROL; uint32_t OPTC_WIDTH_CONTROL2; + uint32_t OTG_PSTATE_REGISTER; }; #define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\ @@ -590,7 +591,11 @@ struct dcn_optc_registers { type OTG_V_COUNT_STOP_TIMER; #define TG_REG_FIELD_LIST_DCN401(type) \ - type OPTC_SEGMENT_WIDTH_LAST; + type OPTC_SEGMENT_WIDTH_LAST;\ + type OTG_PSTATE_KEEPOUT_START;\ + type OTG_PSTATE_EXTEND;\ + type OTG_UNBLANK;\ + type OTG_PSTATE_ALLOW_WIDTH_MIN; struct dcn_optc_shift { diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c index 9f5c2efa7560..a5d6a7dca554 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -396,13 +396,47 @@ void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, i } } +static void optc401_program_global_sync( + struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + int pstate_keepout) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + optc1->vready_offset = vready_offset; + optc1->vstartup_start = vstartup_start; + optc1->vupdate_offset = vupdate_offset; + optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; + + if (optc1->vstartup_start == 0) { + BREAK_TO_DEBUGGER(); + return; + } + + REG_SET(OTG_VSTARTUP_PARAM, 0, + VSTARTUP_START, optc1->vstartup_start); + + REG_SET_2(OTG_VUPDATE_PARAM, 0, + VUPDATE_OFFSET, optc1->vupdate_offset, + VUPDATE_WIDTH, optc1->vupdate_width); + + REG_SET(OTG_VREADY_PARAM, 0, + VREADY_OFFSET, optc1->vready_offset); + + REG_UPDATE(OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, pstate_keepout); +} + static struct timing_generator_funcs dcn401_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, + .program_global_sync = optc401_program_global_sync, .enable_crtc = optc401_enable_crtc, .disable_crtc = optc401_disable_crtc, .phantom_crtc_post_enable = optc401_phantom_crtc_post_enable, diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h index 3114ecef332a..bb13a645802d 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h @@ -155,7 +155,11 @@ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_EXTEND, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_UNBLANK, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_ALLOW_WIDTH_MIN, mask_sh) void dcn401_timing_generator_init(struct optc *optc1); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index fe518fd27b08..91da5cf85b69 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -1163,6 +1163,7 @@ static struct pipe_ctx *dce110_acquire_underlay( 0, 0, 0, + 0, pipe_ctx->stream->signal, false); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 26efeada4f41..106008593464 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -534,7 +534,8 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \ SRI_ARR(OPTC_WIDTH_CONTROL2, ODM, inst), \ SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \ - SRI_ARR(OTG_DRR_CONTROL, OTG, inst) + SRI_ARR(OTG_DRR_CONTROL, OTG, inst), \ + SRI_ARR(OTG_PSTATE_REGISTER, OTG, inst) /* HUBBUB */ #define HUBBUB_REG_LIST_DCN4_01_RI(id) \ From 0beca868cde8742240cd0038141c30482d2b7eb8 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 16:45:39 -0600 Subject: [PATCH 042/447] drm/amd/display: Check link_res->hpo_dp_link_enc before using it [WHAT & HOW] Functions dp_enable_link_phy and dp_disable_link_phy can pass link_res without initializing hpo_dp_link_enc and it is necessary to check for null before dereferencing. This fixes 2 FORWARD_NULL issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c index e1257404357b..d0148f10dfc0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c @@ -28,6 +28,8 @@ #include "dccg.h" #include "clk_mgr.h" +#define DC_LOGGER link->ctx->logger + void set_hpo_dp_throttled_vcp_size(struct pipe_ctx *pipe_ctx, struct fixed31_32 throttled_vcp_size) { @@ -124,6 +126,11 @@ void disable_hpo_dp_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal) { + if (!link_res->hpo_dp_link_enc) { + DC_LOG_ERROR("%s: invalid hpo_dp_link_enc\n", __func__); + return; + } + link_res->hpo_dp_link_enc->funcs->link_disable(link_res->hpo_dp_link_enc); link_res->hpo_dp_link_enc->funcs->disable_link_phy( link_res->hpo_dp_link_enc, signal); From be1fb44389ca3038ad2430dac4234669bc177ee3 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 25 Jun 2024 10:35:52 -0600 Subject: [PATCH 043/447] drm/amd/display: Check null pointers before used [WHAT & HOW] Poniters, such as dc->clk_mgr, are null checked previously in the same function, so Coverity warns "implies that "dc->clk_mgr" might be null". As a result, these pointers need to be checked when used again. This fixes 10 FORWARD_NULL issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c | 2 +- drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c | 3 ++- drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c | 3 ++- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 5 +++-- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 4 ++-- drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 4 ++-- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 8 ++++---- .../amd/display/dc/link/protocols/link_dp_capability.c | 2 +- 8 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index 78df96882d6e..f8409453434c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -195,7 +195,7 @@ void dce11_pplib_apply_display_requirements( * , then change minimum memory clock based on real-time bandwidth * limitation. */ - if ((dc->ctx->asic_id.chip_family == FAMILY_AI) && + if (dc->bw_vbios && (dc->ctx->asic_id.chip_family == FAMILY_AI) && ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) { pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz, (uint32_t) div64_s64( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c index bf399819ca80..22ac2b7e49ae 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c @@ -749,7 +749,8 @@ bool hubp1_is_flip_pending(struct hubp *hubp) if (flip_pending) return true; - if (earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) + if (hubp && + earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) return true; return false; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c index 6bba020ad6fb..0637e4c552d8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c @@ -927,7 +927,8 @@ bool hubp2_is_flip_pending(struct hubp *hubp) if (flip_pending) return true; - if (earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) + if (hubp && + earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) return true; return false; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 849b41f886d3..4593fb2a0536 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -949,7 +949,7 @@ void dce110_edp_backlight_control( { struct dc_context *ctx = link->ctx; struct bp_transmitter_control cntl = { 0 }; - uint8_t pwrseq_instance; + uint8_t pwrseq_instance = 0; unsigned int pre_T11_delay = OLED_PRE_T11_DELAY; unsigned int post_T7_delay = OLED_POST_T7_DELAY; @@ -1002,7 +1002,8 @@ void dce110_edp_backlight_control( */ /* dc_service_sleep_in_milliseconds(50); */ /*edp 1.2*/ - pwrseq_instance = link->panel_cntl->pwrseq_inst; + if (link->panel_cntl) + pwrseq_instance = link->panel_cntl->pwrseq_inst; if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) { if (!link->dc->config.edp_no_power_sequencing) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 4846601c612d..212576dbc336 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1553,7 +1553,7 @@ void dcn10_init_hw(struct dc *dc) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); /* Align bw context with hw config when system resume. */ - if (dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) { + if (dc->clk_mgr && dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) { dc->current_state->bw_ctx.bw.dcn.clk.dispclk_khz = dc->clk_mgr->clks.dispclk_khz; dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz = dc->clk_mgr->clks.dppclk_khz; } @@ -1673,7 +1673,7 @@ void dcn10_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 746c522adf84..3d4b31bd9946 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -256,10 +256,10 @@ void dcn31_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index e4f7078c1026..ddf0807db627 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -235,7 +235,7 @@ void dcn35_init_hw(struct dc *dc) if (hws->funcs.enable_power_gating_plane) hws->funcs.enable_power_gating_plane(dc->hwseq, true); */ - if (res_pool->hubbub->funcs->dchubbub_init) + if (res_pool->hubbub && res_pool->hubbub->funcs->dchubbub_init) res_pool->hubbub->funcs->dchubbub_init(dc->res_pool->hubbub); /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which @@ -328,10 +328,10 @@ void dcn35_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); @@ -1039,7 +1039,7 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (pipe_ctx->plane_res.hubp) update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->plane_res.hubp->inst] = false; - if (pipe_ctx->plane_res.dpp) + if (pipe_ctx->plane_res.dpp && pipe_ctx->plane_res.hubp) update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->plane_res.hubp->inst] = false; if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index c257e733044a..f3b6d8936f91 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -2258,7 +2258,7 @@ bool dp_verify_link_cap_with_retries( memset(&link->verified_link_cap, 0, sizeof(struct dc_link_settings)); - if (!link_detect_connection_type(link, &type) || type == dc_connection_none) { + if (link->link_enc && (!link_detect_connection_type(link, &type) || type == dc_connection_none)) { link->verified_link_cap = fail_safe_link_settings; break; } else if (dp_verify_link_cap(link, known_limit_link_setting, &fail_count)) { From fdd5ecbbff751c3b9061d8ebb08e5c96119915b4 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 25 Jun 2024 10:37:35 -0600 Subject: [PATCH 044/447] drm/amd/display: Check null pointers before multiple uses [WHAT & HOW] Poniters, such as stream_enc and dc->bw_vbios, are null checked previously in the same function, so Coverity warns "implies that stream_enc and dc->bw_vbios might be null". They are used multiple times in the subsequent code and need to be checked. This fixes 10 FORWARD_NULL issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/core/dc_hw_sequencer.c | 96 ++++++++++--------- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 8 +- .../display/dc/link/accessories/link_dp_cts.c | 5 +- .../amd/display/dc/link/hwss/link_hwss_dio.c | 5 +- .../dc/resource/dce112/dce112_resource.c | 5 +- .../dc/resource/dcn32/dcn32_resource.c | 3 + .../resource/dcn32/dcn32_resource_helpers.c | 10 +- 7 files changed, 76 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 9e42a0128baa..5f9b6e8ef428 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -636,57 +636,59 @@ void hwss_build_fast_sequence(struct dc *dc, while (current_pipe) { current_mpc_pipe = current_pipe; while (current_mpc_pipe) { - if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state && current_mpc_pipe->plane_state->update_flags.raw) { - block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate; - block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL; - (*num_steps)++; - } - if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) { - block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc; - block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips; - block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER; - (*num_steps)++; - } - if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { - if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && - stream_status->mall_stream_config.type == SUBVP_MAIN) { - block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; - block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; - block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; - block_sequence[*num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR; + if (current_mpc_pipe->plane_state) { + if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state->update_flags.raw) { + block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate; + block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL; + (*num_steps)++; + } + if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) { + block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc; + block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips; + block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER; + (*num_steps)++; + } + if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { + if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && + stream_status->mall_stream_config.type == SUBVP_MAIN) { + block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; + block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; + block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; + block_sequence[*num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR; + (*num_steps)++; + } + + block_sequence[*num_steps].params.update_plane_addr_params.dc = dc; + block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR; (*num_steps)++; } - block_sequence[*num_steps].params.update_plane_addr_params.dc = dc; - block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR; - (*num_steps)++; - } + if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) { + block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc; + block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state; + block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC; + (*num_steps)++; + } - if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) { - block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc; - block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state; - block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC; - (*num_steps)++; - } - - if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) { - block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP; - (*num_steps)++; - } - if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) { - block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_SETUP_DPP; - (*num_steps)++; - } - if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) { - block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE; - (*num_steps)++; + if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) { + block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP; + (*num_steps)++; + } + if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) { + block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_SETUP_DPP; + (*num_steps)++; + } + if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) { + block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE; + (*num_steps)++; + } } if (hws->funcs.set_output_transfer_func && current_mpc_pipe->stream->update_flags.bits.out_tf) { block_sequence[*num_steps].params.set_output_transfer_func_params.dc = dc; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index dd652436a539..bd7b186fb2e4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2284,6 +2284,9 @@ void dcn20_post_unlock_program_front_end( } } + if (!hwseq) + return; + /* P-State support transitions: * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) @@ -2291,7 +2294,7 @@ void dcn20_post_unlock_program_front_end( * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes */ - if (hwseq && hwseq->funcs.update_force_pstate) + if (hwseq->funcs.update_force_pstate) dc->hwseq->funcs.update_force_pstate(dc, context); /* Only program the MALL registers after all the main and phantom pipes @@ -2531,6 +2534,9 @@ bool dcn20_wait_for_blank_complete( { int counter; + if (!opp) + return false; + for (counter = 0; counter < 1000; counter++) { if (!opp->funcs->dpg_is_pending(opp)) break; diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 555c1c484cfd..df3781081da7 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -804,8 +804,11 @@ bool dp_set_test_pattern( break; } + if (!pipe_ctx->stream) + return false; + if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_enable) { - if (pipe_ctx->stream && should_use_dmub_lock(pipe_ctx->stream->link)) { + if (should_use_dmub_lock(pipe_ctx->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index b76737b7b9e4..3e47a6735912 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -74,7 +74,10 @@ void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx) struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; - if (stream_enc && stream_enc->funcs->disable_fifo) + if (!stream_enc) + return; + + if (stream_enc->funcs->disable_fifo) stream_enc->funcs->disable_fifo(stream_enc); if (stream_enc->funcs->set_input_mode) stream_enc->funcs->set_input_mode(stream_enc, 0); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 88afb2a30eef..162856c523e4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -1067,7 +1067,10 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) struct dm_pp_clock_levels clks = {0}; int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; - if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm) + if (!dc->bw_vbios) + return; + + if (dc->bw_vbios->memory_type == bw_def_hbm) memory_type_multiplier = MEMORY_TYPE_HBM; /*do system clock TODO PPLIB: after PPLIB implement, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index ee009716d39b..6eaf3cfebcb7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1651,6 +1651,9 @@ static void dcn32_enable_phantom_plane(struct dc *dc, else phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state); + if (!phantom_plane) + continue; + memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, sizeof(phantom_plane->scaling_quality)); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c index d184105ce2b3..47c8a9fbe754 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c @@ -218,12 +218,12 @@ bool dcn32_is_center_timing(struct pipe_ctx *pipe) pipe->stream->timing.v_addressable != pipe->stream->src.height) { is_center_timing = true; } - } - if (pipe->plane_state) { - if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height && - pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) { - is_center_timing = true; + if (pipe->plane_state) { + if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height && + pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) { + is_center_timing = true; + } } } From 6d64d39486197083497a01b39e23f2f8474b35d3 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 3 Jul 2024 10:50:35 -0600 Subject: [PATCH 045/447] drm/amd/display: Increase array size of dummy_boolean [WHY] dml2_core_shared_mode_support and dml_core_mode_support access the third element of dummy_boolean, i.e. hw_debug5 = &s->dummy_boolean[2], when dummy_boolean has size of 2. Any assignment to hw_debug5 causes an OVERRUN. [HOW] Increase dummy_boolean's array size to 3. This fixes 2 OVERRUN issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 02498c0e3282..317008eff61b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -866,7 +866,7 @@ struct dml2_core_calcs_mode_support_locals { unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES]; unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES]; - bool dummy_boolean[2]; + bool dummy_boolean[3]; unsigned int dummy_integer[3]; unsigned int dummy_integer_array[36][DML2_MAX_PLANES]; enum dml2_odm_mode dummy_odm_mode[DML2_MAX_PLANES]; From 323b19f424486e61784e8797a8bfb2e24585e162 Mon Sep 17 00:00:00 2001 From: Fudongwang Date: Mon, 1 Jul 2024 16:47:28 +0800 Subject: [PATCH 046/447] drm/amd/display: add dmcub support check [Why & How] For DCN harvest case, if there is no dmcub support, we should return false to avoid bugcheck later. Reviewed-by: Aric Cyr Signed-off-by: Jerry Zuo Signed-off-by: Fudongwang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 9897e322e2d5..f07b13ad4ead 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5574,6 +5574,9 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc) */ bool dc_is_dmub_outbox_supported(struct dc *dc) { + if (!dc->caps.dmcub_support) + return false; + switch (dc->ctx->asic_id.chip_family) { case FAMILY_YELLOW_CARP: From b8d3782da396215615c3d125f0829d1d06ad2c97 Mon Sep 17 00:00:00 2001 From: Mudimela Date: Tue, 2 Jul 2024 11:55:56 +0530 Subject: [PATCH 047/447] drm/amd/display: Refactoring DWB related files from dcn30 Files [Why] To refactor DWB related files from dcn30 Files [How] Moved DWB related files from dcn30 to specific DWB folder and updated Makefiles to fix Compilation. Reviewed-by: Martin Leung Signed-off-by: Jerry Zuo Signed-off-by: Mudimela Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 2 -- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c | 2 +- drivers/gpu/drm/amd/display/dc/dwb/Makefile | 9 +++++++++ .../drm/amd/display/dc/{ => dwb}/dcn30/dcn30_cm_common.h | 0 .../gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb.c | 0 .../gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb.h | 0 .../drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb_cm.c | 0 drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c | 1 - 8 files changed, 10 insertions(+), 4 deletions(-) rename drivers/gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_cm_common.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb_cm.c (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 804851247acc..ccb4b21338b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -25,8 +25,6 @@ DCN30 := dcn30_vpg.o \ dcn30_afmt.o \ - dcn30_dwb.o \ - dcn30_dwb_cm.o \ dcn30_cm_common.o \ dcn30_mmhubbub.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index b8327237ed44..685702321d32 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -28,7 +28,7 @@ #include "reg_helper.h" #include "dcn30/dcn30_dpp.h" #include "basics/conversion.h" -#include "dcn30_cm_common.h" +#include "dcn30/dcn30_cm_common.h" #include "custom_float.h" #define REG(reg) reg diff --git a/drivers/gpu/drm/amd/display/dc/dwb/Makefile b/drivers/gpu/drm/amd/display/dc/dwb/Makefile index 16f7a454fed9..3952ba4cd508 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dwb/Makefile @@ -24,6 +24,15 @@ # ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN30 +############################################################################### +DWB_DCN30 = dcn30_dwb.o dcn30_dwb_cm.o + +AMD_DAL_DWB_DCN30 = $(addprefix $(AMDDALPATH)/dc/dwb/dcn30/,$(DWB_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DWB_DCN30) + ############################################################################### # DCN35 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c index b23a809999ed..d5e8294f5a16 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c @@ -21,7 +21,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ - #include "reg_helper.h" #include "dcn35_dwb.h" From 08cbe68d4aafcf651a86730a485df53e6ee9f594 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 3 Jul 2024 14:00:14 -0400 Subject: [PATCH 048/447] drm/amd/display: Export additional FAMS2 global configuration options from DML [WHY&HOW] Some global configuration options were previously hardcoded in DC, now they are exported by DML and sent to FW. Reviewed-by: Martin Leung Signed-off-by: Jerry Zuo Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_state.c | 4 +- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 36 ++--- .../amd/display/dc/dml2/dml21/dml21_utils.c | 145 ++++++++++-------- .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 1 + .../src/dml2_core/dml2_core_dcn4_calcs.c | 17 +- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 4 +- .../gpu/drm/amd/display/dc/inc/core_types.h | 2 +- 7 files changed, 112 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 665157f8d4cb..2597e3fd562b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -967,10 +967,10 @@ bool dc_state_is_fams2_in_use( bool is_fams2_in_use = false; if (state) - is_fams2_in_use |= state->bw_ctx.bw.dcn.fams2_stream_count > 0; + is_fams2_in_use |= state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; if (dc->current_state) - is_fams2_in_use |= dc->current_state->bw_ctx.bw.dcn.fams2_stream_count > 0; + is_fams2_in_use |= dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; return is_fams2_in_use; } diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index ded13026c8ff..fb3391854eed 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1672,22 +1672,17 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; global_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); - /* send global configuration parameters */ - global_cmd->config.global.max_allow_delay_us = 100 * 1000; //100ms - global_cmd->config.global.lock_wait_time_us = 5000; //5ms - global_cmd->config.global.recovery_timeout_us = 5000; //5ms - global_cmd->config.global.hwfq_flip_programming_delay_us = 100; //100us - - /* copy static feature configuration */ - global_cmd->config.global.features.all = dc->debug.fams2_config.all; - - /* apply feature configuration based on current driver state */ - global_cmd->config.global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2; - global_cmd->config.global.features.bits.enable = enable; - - /* construct per-stream configs */ if (enable) { - for (i = 0; i < context->bw_ctx.bw.dcn.fams2_stream_count; i++) { + /* send global configuration parameters */ + memcpy(&global_cmd->config.global, &context->bw_ctx.bw.dcn.fams2_global_config, sizeof(struct dmub_cmd_fams2_global_config)); + + /* copy static feature configuration overrides */ + global_cmd->config.global.features.bits.enable_stall_recovery = dc->debug.fams2_config.bits.enable_stall_recovery; + global_cmd->config.global.features.bits.enable_debug = dc->debug.fams2_config.bits.enable_debug; + global_cmd->config.global.features.bits.enable_offload_flip = dc->debug.fams2_config.bits.enable_offload_flip; + + /* construct per-stream configs */ + for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) { struct dmub_rb_cmd_fams2 *stream_cmd = &cmd[i+1].fams2_config; /* configure command header */ @@ -1702,12 +1697,15 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, } } - if (enable && context->bw_ctx.bw.dcn.fams2_stream_count) { + /* apply feature configuration based on current driver state */ + global_cmd->config.global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2; + global_cmd->config.global.features.bits.enable = enable; + + if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) { /* set multi pending for global, and unset for last stream cmd */ - global_cmd->config.global.num_streams = context->bw_ctx.bw.dcn.fams2_stream_count; global_cmd->header.multi_cmd_pending = 1; - cmd[context->bw_ctx.bw.dcn.fams2_stream_count].fams2_config.header.multi_cmd_pending = 0; - num_cmds += context->bw_ctx.bw.dcn.fams2_stream_count; + cmd[context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0; + num_cmds += context->bw_ctx.bw.dcn.fams2_global_config.num_streams; } dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index 622c98f4b7fb..e11246e525ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -461,94 +461,103 @@ void dml21_build_fams2_programming(const struct dc *dc, struct dml2_context *dml_ctx) { int i, j, k; + unsigned int num_fams2_streams = 0; /* reset fams2 data */ - context->bw_ctx.bw.dcn.fams2_stream_count = 0; memset(&context->bw_ctx.bw.dcn.fams2_stream_params, 0, sizeof(struct dmub_fams2_stream_static_state) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config)); - if (!dml_ctx->v21.mode_programming.programming->fams2_required) - return; + if (dml_ctx->v21.mode_programming.programming->fams2_required) { + for (i = 0; i < context->stream_count; i++) { + int dml_stream_idx; + struct dc_stream_state *phantom_stream; + struct dc_stream_status *phantom_status; - for (i = 0; i < context->stream_count; i++) { - int dml_stream_idx; - struct dc_stream_state *phantom_stream; - struct dc_stream_status *phantom_status; + struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[num_fams2_streams]; - struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[context->bw_ctx.bw.dcn.fams2_stream_count]; + struct dc_stream_state *stream = context->streams[i]; - struct dc_stream_state *stream = context->streams[i]; - - if (context->stream_status[i].plane_count == 0 || - dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { - /* can ignore blanked or phantom streams */ - continue; - } - - dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); - if (dml_stream_idx < 0) { - ASSERT(dml_stream_idx >= 0); - continue; - } - - /* copy static state from PMO */ - memcpy(static_state, - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, - sizeof(struct dmub_fams2_stream_static_state)); - - /* get information from context */ - static_state->num_planes = context->stream_status[i].plane_count; - static_state->otg_inst = context->stream_status[i].primary_otg_inst; - - /* populate pipe masks for planes */ - for (j = 0; j < context->stream_status[i].plane_count; j++) { - for (k = 0; k < dc->res_pool->pipe_count; k++) { - if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { - static_state->pipe_mask |= (1 << k); - static_state->plane_pipe_masks[j] |= (1 << k); - } + if (context->stream_status[i].plane_count == 0 || + dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { + /* can ignore blanked or phantom streams */ + continue; } - } - /* get per method programming */ - switch (static_state->type) { - case FAMS2_STREAM_TYPE_VBLANK: - case FAMS2_STREAM_TYPE_VACTIVE: - case FAMS2_STREAM_TYPE_DRR: - break; - case FAMS2_STREAM_TYPE_SUBVP: - phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); - if (!phantom_stream) - break; + dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); + if (dml_stream_idx < 0) { + ASSERT(dml_stream_idx >= 0); + continue; + } - phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + /* copy static state from PMO */ + memcpy(static_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, + sizeof(struct dmub_fams2_stream_static_state)); - /* phantom status should always be present */ - ASSERT(phantom_status); - static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + /* get information from context */ + static_state->num_planes = context->stream_status[i].plane_count; + static_state->otg_inst = context->stream_status[i].primary_otg_inst; - /* populate pipe masks for phantom planes */ - for (j = 0; j < phantom_status->plane_count; j++) { + /* populate pipe masks for planes */ + for (j = 0; j < context->stream_status[i].plane_count; j++) { for (k = 0; k < dc->res_pool->pipe_count; k++) { if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { - static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); - static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { + static_state->pipe_mask |= (1 << k); + static_state->plane_pipe_masks[j] |= (1 << k); } } } - break; - default: - ASSERT(false); - break; - } - context->bw_ctx.bw.dcn.fams2_stream_count++; + /* get per method programming */ + switch (static_state->type) { + case FAMS2_STREAM_TYPE_VBLANK: + case FAMS2_STREAM_TYPE_VACTIVE: + case FAMS2_STREAM_TYPE_DRR: + break; + case FAMS2_STREAM_TYPE_SUBVP: + phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); + if (!phantom_stream) + break; + + phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + + /* phantom status should always be present */ + ASSERT(phantom_status); + static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + + /* populate pipe masks for phantom planes */ + for (j = 0; j < phantom_status->plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { + static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); + static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + } + } + } + break; + default: + ASSERT(false); + break; + } + + num_fams2_streams++; + } } - context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_stream_count > 0; + if (num_fams2_streams > 0) { + /* copy FAMS2 configuration */ + memcpy(&context->bw_ctx.bw.dcn.fams2_global_config, + &dml_ctx->v21.mode_programming.programming->fams2_global_config, + sizeof(struct dmub_cmd_fams2_global_config)); + + context->bw_ctx.bw.dcn.fams2_global_config.num_streams = num_fams2_streams; + } + + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; } bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index b7a6f7f4c342..8c803b12404b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -351,6 +351,7 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in programming->fams2_required = display_cfg->stage3.fams2_required; dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); + programming->fams2_global_config.features.bits.enable = display_cfg->stage3.fams2_required; } // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 45e43a915fd6..3b1e5c548435 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -12221,12 +12221,19 @@ void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_interna const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config) { - fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; - fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; - fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; - fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; + fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required; - fams2_global_config->num_streams = display_cfg->display_config.num_streams; + if (fams2_global_config->features.bits.enable) { + fams2_global_config->features.bits.enable_stall_recovery = true; + fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START; + + fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; + fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; + fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; + fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; + + fams2_global_config->num_streams = display_cfg->display_config.num_streams; + } } void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index d0b4308dca96..f4c1547a368f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1531,7 +1531,7 @@ void dcn401_fams2_update_config(struct dc *dc, struct dc_state *context, bool en if (!dc->ctx || !dc->ctx->dmub_srv || !dc->debug.fams2_config.bits.enable) return; - fams2_required = context->bw_ctx.bw.dcn.fams2_stream_count > 0; + fams2_required = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; dc_dmub_srv_fams2_update_config(dc, context, enable && fams2_required); } @@ -1656,7 +1656,7 @@ void dcn401_hardware_release(struct dc *dc) */ if (dc->current_state) { if ((!dc->clk_mgr->clks.p_state_change_support || - dc->current_state->bw_ctx.bw.dcn.fams2_stream_count > 0) && + dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) && dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( dc->res_pool->hubbub, true, true); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 4c8e6436c7e1..bfb8b8502d20 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -534,8 +534,8 @@ struct dcn_bw_output { unsigned int legacy_svp_drr_stream_index; bool legacy_svp_drr_stream_index_valid; struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; + struct dmub_cmd_fams2_global_config fams2_global_config; struct dmub_fams2_stream_static_state fams2_stream_params[DML2_MAX_PLANES]; - unsigned fams2_stream_count; struct dml2_display_arb_regs arb_regs; }; From be7a6a5171649c39f79a6259518218351082bd99 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Wed, 3 Jul 2024 16:15:59 -0400 Subject: [PATCH 049/447] drm/amd/display: Check stream pointer is initialized before accessing [why & how] We calculate static screen wait frames based on the current timing info in the active stream. If stream is not initialized, then we should skip the calculation and go with the default values. Reviewed-by: Gabe Teeger Signed-off-by: Jerry Zuo Signed-off-by: Sung Joon Kim Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index ddf0807db627..ac1e3331a77c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1461,10 +1461,9 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, for (i = 0; i < num_pipes; i++) { if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { - struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; - struct dc *dc = pipe_ctx[i]->stream->ctx->dc; - - if (dc->debug.static_screen_wait_frames) { + if (pipe_ctx[i]->stream && pipe_ctx[i]->stream->ctx->dc->debug.static_screen_wait_frames) { + struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; + struct dc *dc = pipe_ctx[i]->stream->ctx->dc; unsigned int frame_rate = timing->pix_clk_100hz / (timing->h_total * timing->v_total); if (frame_rate >= 120 && dc->caps.ips_support && From acce1eb8eb5de360fd82fd900454ff180912d8b1 Mon Sep 17 00:00:00 2001 From: Daniel Sa Date: Thu, 4 Jul 2024 13:41:15 -0400 Subject: [PATCH 050/447] drm/amd/display: Set Cursor Matrix to bypass instead of Input Plane why: When the cursor disappears/reappears on fullscreen video, there is a short transitional period where the cursor's color matrix is using the same format as the video plane. This sets the cursor to the wrong color momentarily before the UI plane appears, correcting the color. how: Instead of defaulting to using the color space from the input plane, default to bypass mode. Reviewed-by: Nevenko Stupar Signed-off-by: Jerry Zuo Signed-off-by: Daniel Sa Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index d0f8c9ff5232..92b34fe47f74 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -246,16 +246,6 @@ void dpp401_set_cursor_matrix( enum dc_color_space color_space, struct dc_csc_transform cursor_csc_color_matrix) { - struct dpp_input_csc_matrix cursor_tbl_entry; - unsigned int i; - - if (cursor_csc_color_matrix.enable_adjustment == true) { - for (i = 0; i < 12; i++) - cursor_tbl_entry.regval[i] = cursor_csc_color_matrix.matrix[i]; - - cursor_tbl_entry.color_space = color_space; - dpp401_program_cursor_csc(dpp_base, color_space, &cursor_tbl_entry); - } else { - dpp401_program_cursor_csc(dpp_base, color_space, NULL); - } + //Since we don't have cursor matrix information, force bypass mode by passing in unknown color space + dpp401_program_cursor_csc(dpp_base, COLOR_SPACE_UNKNOWN, NULL); } From 748b3c4ca0bf43cec186ace2ecb33457d7a4653d Mon Sep 17 00:00:00 2001 From: Duncan Ma Date: Thu, 27 Jun 2024 12:11:58 -0400 Subject: [PATCH 051/447] drm/amd/display: Add visual confirm for Idle State [Why] Visual Confirm would tell us if it ever entered idle state. [How] Add debug option for IPS visual confirm Reviewed-by: Ovidiu Bunea Signed-off-by: Jerry Zuo Signed-off-by: Duncan Ma Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 1 + drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c | 2 ++ drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 ++++ 4 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 83fe13f5a367..9d4b821ab219 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1056,6 +1056,7 @@ struct dc_debug_options { unsigned int force_sharpness; unsigned int force_lls; bool notify_dpia_hr_bw; + bool enable_ips_visual_confirm; }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index ccf153b7a467..0f3d15126a1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -363,6 +363,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR; copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1; copy_settings_data->debug.bitfields.force_full_frame_update = 0; + copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; if (psr_context->su_granularity_required == 0) copy_settings_data->su_y_granularity = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 2a21bcf5224f..44df9e2351c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -167,6 +167,8 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->smu_optimizations_en = link->replay_settings.replay_smu_opt_enable; copy_settings_data->replay_timing_sync_supported = link->replay_settings.config.replay_timing_sync_supported; + copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; + copy_settings_data->flags.u32All = 0; copy_settings_data->flags.bitfields.fec_enable_status = (link->fec_state == dc_link_fec_enabled); copy_settings_data->flags.bitfields.dsc_enable_status = (pipe_ctx->stream->timing.flags.DSC == 1); diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 5ff0a865705f..7c3838362c49 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -336,6 +336,10 @@ union dmub_psr_debug_flags { */ uint32_t back_to_back_flip : 1; + /** + * Enable visual confirm for IPS + */ + uint32_t enable_ips_visual_confirm : 1; } bitfields; /** From f59549c7e705be0087d08bc116ccc767b86d8362 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 3 Jul 2024 16:41:52 -0400 Subject: [PATCH 052/447] drm/amd/display: free bo used for dmub bounding box fix a memleak introduced by not removing the buffer object for use with early dmub bounding box value storage Fixes: 234e94555800 ("drm/amd/display: Enable copying of bounding box data from VBIOS DMUB") Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Jerry Zuo Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a3b0f2748af0..fa8d455de7f5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1740,7 +1740,7 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * /* Send the chunk */ ret = dm_dmub_send_vbios_gpint_command(adev, send_addrs[i], chunk, 30000); if (ret != DMUB_STATUS_OK) - /* No need to free bb here since it shall be done unconditionally */ + /* No need to free bb here since it shall be done in dm_sw_fini() */ return NULL; } @@ -2465,8 +2465,17 @@ static int dm_sw_init(void *handle) static int dm_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct dal_allocation *da; + + list_for_each_entry(da, &adev->dm.da_list, list) { + if (adev->dm.bb_from_dmub == (void *) da->cpu_ptr) { + amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr); + list_del(&da->list); + kfree(da); + break; + } + } - kfree(adev->dm.bb_from_dmub); adev->dm.bb_from_dmub = NULL; kfree(adev->dm.dmub_fb_info); From 48eba83018c14ba6c102f5801d31c398807535d9 Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Thu, 4 Jul 2024 15:22:13 -0400 Subject: [PATCH 053/447] drm/amd/display: Remove unnecessary DSC power gating for DCN401 [Why] In some cases during topology changes, a pipe that was used to drive a stream being removed can be re-assigned to drive a different stream. In these cases, DSC power gating is not handled properly, leading to situations where DSC is being setup while power gated. [How] - remove enable_stream_gating and disable_stream_gating for DCN401 Reviewed-by: Wenjing Liu Signed-off-by: Jerry Zuo Signed-off-by: Joshua Aberback Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 6a768702c7bd..1439f07f0b64 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -115,8 +115,6 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, .enable_stream_timing = dcn401_enable_stream_timing, .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, .did_underflow_occur = dcn10_did_underflow_occur, .init_blank = dcn32_init_blank, From 6b0a9bf72c04cd8d2864fccce42817c24b060aec Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 9 Jul 2024 14:34:25 -0600 Subject: [PATCH 054/447] drm/amd/display: Remove unused dml2_core_ip_params struct Acked-by: Jerry Zuo Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 78 ------------------- 1 file changed, 78 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 8c803b12404b..f5c6cd5cf5e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -77,84 +77,6 @@ struct dml2_core_ip_params core_dcn4_ip_caps_base = { .subvp_swath_height_margin_lines = 16, }; -struct dml2_core_ip_params core_dcn4sw_ip_caps_base = { - .vblank_nom_default_us = 668, - .remote_iommu_outstanding_translations = 256, - .rob_buffer_size_kbytes = 192, - .config_return_buffer_size_in_kbytes = 1280, - .config_return_buffer_segment_size_in_kbytes = 64, - .compressed_buffer_segment_size_in_kbytes = 64, - .dpte_buffer_size_in_pte_reqs_luma = 68, - .dpte_buffer_size_in_pte_reqs_chroma = 36, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, - .min_pixel_chunk_size_bytes = 1024, - .writeback_chunk_size_kbytes = 8, - .line_buffer_size_bits = 1171920, - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - - //Number of pipes after DCN Pipe harvesting - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dispclk_ramp_margin_percent = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .cursor_buffer_size = 24, - .cursor_chunk_size = 2, - .dispclk_delay_subtotal = 125, - .max_inter_dcn_tile_repeaters = 8, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .writeback_line_buffer_buffer_size = 0, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 5760, - .dsc422_native_support = true, - .dcc_supported = true, - .ptoi_supported = false, - - .cursor_64bpp_support = true, - .dynamic_metadata_vm_enabled = false, - - .max_num_hdmi_frl_outputs = 1, - .max_num_dp2p0_outputs = 4, - .max_num_dp2p0_streams = 4, - .imall_supported = 1, - .max_flip_time_us = 80, - .words_per_channel = 16, - - .subvp_fw_processing_delay_us = 15, - .subvp_pstate_allow_width_us = 20, - .subvp_swath_height_margin_lines = 16, - - .dcn_mrq_present = 1, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_zs = 64, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - - .dchub_arb_to_ret_delay = 102, - .hostvm_mode = 1, -}; - static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *ip_caps, const struct dml2_core_ip_params *ip_params) { ip_caps->pipe_count = ip_params->max_num_dpp; From 2d67c4b54909982d462bfe227279d1499b329545 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 8 Jul 2024 10:03:28 -0400 Subject: [PATCH 055/447] drm/amd/display: 3.2.292 * FW Release 0.0.225.0 * DML2 fixes * Allow display DCC for DCN401 * Refactor DWB, OPP, MPC, MMHUBBUB * Fix dscclk Programming issue on DCN401 Acked-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 9d4b821ab219..036b23a6e324 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.291" +#define DC_VER "3.2.292" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 60a9472cf0a8568f32be6cbbd32ee7759bffe215 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:17 -0400 Subject: [PATCH 056/447] drm/radeon: change variable name "dev" to "ddev" for consistency In the probe function of amdgpu, it uses "ddev" as the name of "struct drm_device *", so I suggest renaming it to be consistent. Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 7bf08164140e..739bb1da9dcc 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -259,7 +259,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned long flags = 0; - struct drm_device *dev; + struct drm_device *ddev; int ret; if (!ent) @@ -300,28 +300,28 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) return ret; - dev = drm_dev_alloc(&kms_driver, &pdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); + ddev = drm_dev_alloc(&kms_driver, &pdev->dev); + if (IS_ERR(ddev)) + return PTR_ERR(ddev); ret = pci_enable_device(pdev); if (ret) goto err_free; - pci_set_drvdata(pdev, dev); + pci_set_drvdata(pdev, ddev); - ret = drm_dev_register(dev, ent->driver_data); + ret = drm_dev_register(ddev, ent->driver_data); if (ret) goto err_agp; - radeon_fbdev_setup(dev->dev_private); + radeon_fbdev_setup(ddev->dev_private); return 0; err_agp: pci_disable_device(pdev); err_free: - drm_dev_put(dev); + drm_dev_put(ddev); return ret; } From 90985660ba488cd3428706e7d53d6c9cdbbf3101 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:18 -0400 Subject: [PATCH 057/447] drm/radeon: remove load callback from kms_driver The ".load" callback in "struct drm_driver" is deprecated. In order to remove the callback, we have to manually call "radeon_driver_load_kms" instead. Acked-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 739bb1da9dcc..88d3de2a79f8 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -310,6 +310,10 @@ static int radeon_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, ddev); + ret = radeon_driver_load_kms(ddev, flags); + if (ret) + goto err_agp; + ret = drm_dev_register(ddev, ent->driver_data); if (ret) goto err_agp; @@ -569,7 +573,6 @@ static const struct drm_ioctl_desc radeon_ioctls_kms[] = { static const struct drm_driver kms_driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER | DRIVER_MODESET, - .load = radeon_driver_load_kms, .open = radeon_driver_open_kms, .postclose = radeon_driver_postclose_kms, .unload = radeon_driver_unload_kms, From 78dd6a8d33a3363fb4196e173be0eb4002962798 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:19 -0400 Subject: [PATCH 058/447] drm/radeon: use variable flags as parameter To be consistent with amdgpu driver, use "flags" as the parameter because it is already assigned as "ent->driver_data". Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 88d3de2a79f8..7b8aa8406751 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -314,7 +314,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) goto err_agp; - ret = drm_dev_register(ddev, ent->driver_data); + ret = drm_dev_register(ddev, flags); if (ret) goto err_agp; From a6e23bec8ed184ed2a11080b28cdbd7a3024f0c0 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:20 -0400 Subject: [PATCH 059/447] drm/radeon: add helper rdev_to_drm(rdev) Add helper rdev_to_drm(rdev), similar to amdgpu, most function should access the "drm_device" with "rdev_to_drm(rdev)" instead, where amdgpu has "adev_to_drm(adev)". It also makes changing from "*drm_device" to "drm_device" in "radeon_devicce" later on easier. Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 0999c8eaae94..ae35c102a487 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2476,6 +2476,11 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index); void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); +static inline struct drm_device *rdev_to_drm(struct radeon_device *rdev) +{ + return rdev->ddev; +} + /* * Cast helper */ From fb1b5e1dd53fc834e12f69749cbc8484382599c4 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:21 -0400 Subject: [PATCH 060/447] drm/radeon: change rdev->ddev to rdev_to_drm(rdev) This patch changes the way "drm_device" is accessed. It uses "rdev_to_drm(rdev)" instead of accessing the struct member directly. Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/atombios_encoders.c | 2 +- drivers/gpu/drm/radeon/cik.c | 14 ++-- drivers/gpu/drm/radeon/dce6_afmt.c | 2 +- drivers/gpu/drm/radeon/evergreen.c | 12 ++-- drivers/gpu/drm/radeon/ni.c | 2 +- drivers/gpu/drm/radeon/r100.c | 24 +++---- drivers/gpu/drm/radeon/r300.c | 6 +- drivers/gpu/drm/radeon/r420.c | 6 +- drivers/gpu/drm/radeon/r520.c | 2 +- drivers/gpu/drm/radeon/r600.c | 12 ++-- drivers/gpu/drm/radeon/r600_cs.c | 2 +- drivers/gpu/drm/radeon/r600_dpm.c | 4 +- drivers/gpu/drm/radeon/r600_hdmi.c | 2 +- drivers/gpu/drm/radeon/radeon_acpi.c | 10 +-- drivers/gpu/drm/radeon/radeon_agp.c | 2 +- drivers/gpu/drm/radeon/radeon_atombios.c | 2 +- drivers/gpu/drm/radeon/radeon_audio.c | 4 +- drivers/gpu/drm/radeon/radeon_combios.c | 12 ++-- drivers/gpu/drm/radeon/radeon_device.c | 10 +-- drivers/gpu/drm/radeon/radeon_display.c | 74 +++++++++++----------- drivers/gpu/drm/radeon/radeon_fbdev.c | 26 ++++---- drivers/gpu/drm/radeon/radeon_fence.c | 8 +-- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_i2c.c | 2 +- drivers/gpu/drm/radeon/radeon_ib.c | 2 +- drivers/gpu/drm/radeon/radeon_irq_kms.c | 12 ++-- drivers/gpu/drm/radeon/radeon_object.c | 2 +- drivers/gpu/drm/radeon/radeon_pm.c | 20 +++--- drivers/gpu/drm/radeon/radeon_ring.c | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 6 +- drivers/gpu/drm/radeon/rs400.c | 6 +- drivers/gpu/drm/radeon/rs600.c | 14 ++-- drivers/gpu/drm/radeon/rs690.c | 2 +- drivers/gpu/drm/radeon/rv515.c | 4 +- drivers/gpu/drm/radeon/rv770.c | 2 +- drivers/gpu/drm/radeon/si.c | 4 +- 36 files changed, 159 insertions(+), 159 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 03e6871b3065..c82e0fbc49b4 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -2179,7 +2179,7 @@ assigned: void radeon_atom_encoder_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_encoder *encoder; list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b5e96a8fc2c1..11a492f21157 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7585,7 +7585,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7615,7 +7615,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7645,7 +7645,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); + drm_handle_vblank(rdev_to_drm(rdev), 2); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7675,7 +7675,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); + drm_handle_vblank(rdev_to_drm(rdev), 3); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7705,7 +7705,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); + drm_handle_vblank(rdev_to_drm(rdev), 4); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7735,7 +7735,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); + drm_handle_vblank(rdev_to_drm(rdev), 5); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -8581,7 +8581,7 @@ int cik_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 4c06f47453fd..d6ab93ed9ec4 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -91,7 +91,7 @@ struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev) pin = &rdev->audio.pin[i]; pin_count = 0; - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (radeon_encoder_is_digital(encoder)) { radeon_encoder = to_radeon_encoder(encoder); dig = radeon_encoder->enc_priv; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index c634dc28e6c3..bc4ab71613a5 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1673,7 +1673,7 @@ void evergreen_pm_misc(struct radeon_device *rdev) */ void evergreen_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -1698,7 +1698,7 @@ void evergreen_pm_prepare(struct radeon_device *rdev) */ void evergreen_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -1763,7 +1763,7 @@ void evergreen_hpd_set_polarity(struct radeon_device *rdev, */ void evergreen_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enabled = 0; u32 tmp = DC_HPDx_CONNECTION_TIMER(0x9c4) | @@ -1804,7 +1804,7 @@ void evergreen_hpd_init(struct radeon_device *rdev) */ void evergreen_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disabled = 0; @@ -4753,7 +4753,7 @@ restart_ih: event_name = "vblank"; if (rdev->irq.crtc_vblank_int[crtc_idx]) { - drm_handle_vblank(rdev->ddev, crtc_idx); + drm_handle_vblank(rdev_to_drm(rdev), crtc_idx); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -5211,7 +5211,7 @@ int evergreen_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize AGP */ diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 77aee99e473a..3890911fe693 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2360,7 +2360,7 @@ int cayman_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize memory controller */ diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 0b1e19345f43..d7d7d23bf9a1 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -459,7 +459,7 @@ void r100_pm_misc(struct radeon_device *rdev) */ void r100_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -490,7 +490,7 @@ void r100_pm_prepare(struct radeon_device *rdev) */ void r100_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -603,7 +603,7 @@ void r100_hpd_set_polarity(struct radeon_device *rdev, */ void r100_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -626,7 +626,7 @@ void r100_hpd_init(struct radeon_device *rdev) */ void r100_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -798,7 +798,7 @@ int r100_irq_process(struct radeon_device *rdev) /* Vertical blank interrupts */ if (status & RADEON_CRTC_VBLANK_STAT) { if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -807,7 +807,7 @@ int r100_irq_process(struct radeon_device *rdev) } if (status & RADEON_CRTC2_VBLANK_STAT) { if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -1471,7 +1471,7 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) header = radeon_get_ib_value(p, h_idx); crtc_id = radeon_get_ib_value(p, h_idx + 5); reg = R100_CP_PACKET0_GET_REG(header); - crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id); + crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { DRM_ERROR("cannot find crtc %d\n", crtc_id); return -ENOENT; @@ -3059,7 +3059,7 @@ DEFINE_SHOW_ATTRIBUTE(r100_debugfs_mc_info); void r100_debugfs_rbbm_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_rbbm_info", 0444, root, rdev, &r100_debugfs_rbbm_info_fops); @@ -3069,7 +3069,7 @@ void r100_debugfs_rbbm_init(struct radeon_device *rdev) void r100_debugfs_cp_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_cp_ring_info", 0444, root, rdev, &r100_debugfs_cp_ring_info_fops); @@ -3081,7 +3081,7 @@ void r100_debugfs_cp_init(struct radeon_device *rdev) void r100_debugfs_mc_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_mc_info", 0444, root, rdev, &r100_debugfs_mc_info_fops); @@ -3947,7 +3947,7 @@ int r100_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r100_clock_startup(rdev); /* Initialize surface registers */ @@ -4056,7 +4056,7 @@ int r100_init(struct radeon_device *rdev) /* Set asic errata */ r100_errata(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 1620f534f55f..05c13102a8cb 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -616,7 +616,7 @@ DEFINE_SHOW_ATTRIBUTE(rv370_debugfs_pcie_gart_info); static void rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rv370_pcie_gart_info", 0444, root, rdev, &rv370_debugfs_pcie_gart_info_fops); @@ -1452,7 +1452,7 @@ int r300_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r300_clock_startup(rdev); /* Initialize surface registers */ @@ -1538,7 +1538,7 @@ int r300_init(struct radeon_device *rdev) /* Set asic errata */ r300_errata(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index a979662eaa73..9a31cdec6415 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -322,7 +322,7 @@ int r420_resume(struct radeon_device *rdev) if (rdev->is_atom_bios) { atom_asic_init(rdev->mode_info.atom_context); } else { - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); } /* Resume clock after posting */ r420_clock_resume(rdev); @@ -414,7 +414,7 @@ int r420_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); @@ -493,7 +493,7 @@ DEFINE_SHOW_ATTRIBUTE(r420_debugfs_pipes_info); void r420_debugfs_pipes_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r420_pipes_info", 0444, root, rdev, &r420_debugfs_pipes_info_fops); diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index 6cbcaa845192..08e127b3249a 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -287,7 +287,7 @@ int r520_init(struct radeon_device *rdev) atom_asic_init(rdev->mode_info.atom_context); } /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 087d41e370fd..8b62f7faa5b9 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -950,7 +950,7 @@ void r600_hpd_set_polarity(struct radeon_device *rdev, void r600_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -1017,7 +1017,7 @@ void r600_hpd_init(struct radeon_device *rdev) void r600_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -3280,7 +3280,7 @@ int r600_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); if (rdev->flags & RADEON_IS_AGP) { @@ -4136,7 +4136,7 @@ restart_ih: DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -4166,7 +4166,7 @@ restart_ih: DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -4358,7 +4358,7 @@ DEFINE_SHOW_ATTRIBUTE(r600_debugfs_mc_info); static void r600_debugfs_mc_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r600_mc_info", 0444, root, rdev, &r600_debugfs_mc_info_fops); diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 6cf54a747749..1b2d31c4d77c 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -884,7 +884,7 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p, crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1); reg = R600_CP_PACKET0_GET_REG(header); - crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id); + crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { DRM_ERROR("cannot find crtc %d\n", crtc_id); return -ENOENT; diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index 64980a61d38a..81d58ef667dd 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -153,7 +153,7 @@ void r600_dpm_print_ps_status(struct radeon_device *rdev, u32 r600_dpm_get_vblank_time(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 vblank_in_pixels; @@ -180,7 +180,7 @@ u32 r600_dpm_get_vblank_time(struct radeon_device *rdev) u32 r600_dpm_get_vrefresh(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 vrefresh = 0; diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index f3551ebaa2f0..661f374f5f27 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -116,7 +116,7 @@ void r600_audio_update_hdmi(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, audio_work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct r600_audio_pin audio_status = r600_audio_status(rdev); struct drm_encoder *encoder; bool changed = false; diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c index 603a78e41ba5..22ce61bdfc06 100644 --- a/drivers/gpu/drm/radeon/radeon_acpi.c +++ b/drivers/gpu/drm/radeon/radeon_acpi.c @@ -405,11 +405,11 @@ static int radeon_atif_handler(struct radeon_device *rdev, if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { if ((rdev->flags & RADEON_IS_PX) && radeon_atpx_dgpu_req_power_for_displays()) { - pm_runtime_get_sync(rdev->ddev->dev); + pm_runtime_get_sync(rdev_to_drm(rdev)->dev); /* Just fire off a uevent and let userspace tell us what to do */ - drm_helper_hpd_irq_event(rdev->ddev); - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + drm_helper_hpd_irq_event(rdev_to_drm(rdev)); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); } } /* TODO: check other events */ @@ -736,7 +736,7 @@ int radeon_acpi_init(struct radeon_device *rdev) struct radeon_encoder *target = NULL; /* Find the encoder controlling the brightness */ - list_for_each_entry(tmp, &rdev->ddev->mode_config.encoder_list, + list_for_each_entry(tmp, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { struct radeon_encoder *enc = to_radeon_encoder(tmp); diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c index a3d749e350f9..89d7b0e9e79f 100644 --- a/drivers/gpu/drm/radeon/radeon_agp.c +++ b/drivers/gpu/drm/radeon/radeon_agp.c @@ -161,7 +161,7 @@ struct radeon_agp_head *radeon_agp_head_init(struct drm_device *dev) static int radeon_agp_head_acquire(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct pci_dev *pdev = to_pci_dev(dev->dev); if (!rdev->agp) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 10793a433bf5..97c4e10d0550 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -187,7 +187,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev) if (i2c.valid) { sprintf(stmp, "0x%x", i2c.i2c_id); - rdev->i2c_bus[i] = radeon_i2c_create(rdev->ddev, &i2c, stmp); + rdev->i2c_bus[i] = radeon_i2c_create(rdev_to_drm(rdev), &i2c, stmp); } gpio = (ATOM_GPIO_I2C_ASSIGMENT *) ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT)); diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 0bcd767b9f47..47aa06a9a942 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -196,7 +196,7 @@ static void radeon_audio_enable(struct radeon_device *rdev, return; if (rdev->mode_info.mode_config_initialized) { - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (radeon_encoder_is_digital(encoder)) { radeon_encoder = to_radeon_encoder(encoder); dig = radeon_encoder->enc_priv; @@ -760,7 +760,7 @@ static int radeon_audio_component_get_eld(struct device *kdev, int port, if (!rdev->audio.enabled || !rdev->mode_info.mode_config_initialized) return 0; - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (!radeon_encoder_is_digital(encoder)) continue; radeon_encoder = to_radeon_encoder(encoder); diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 6952b1273b0f..41ddc576f8f8 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -372,7 +372,7 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) int edid_info, size; struct edid *edid; unsigned char *raw; - edid_info = combios_get_table_offset(rdev->ddev, COMBIOS_HARDCODED_EDID_TABLE); + edid_info = combios_get_table_offset(rdev_to_drm(rdev), COMBIOS_HARDCODED_EDID_TABLE); if (!edid_info) return false; @@ -642,7 +642,7 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde static struct radeon_i2c_bus_rec radeon_combios_get_i2c_info_from_table(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_i2c_bus_rec i2c; u16 offset; u8 id, blocks, clk, data; @@ -670,7 +670,7 @@ static struct radeon_i2c_bus_rec radeon_combios_get_i2c_info_from_table(struct r void radeon_combios_i2c_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_i2c_bus_rec i2c; /* actual hw pads @@ -812,7 +812,7 @@ bool radeon_combios_get_clock_info(struct drm_device *dev) bool radeon_combios_sideport_present(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); u16 igp_info; /* sideport is AMD only */ @@ -915,7 +915,7 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct enum radeon_tv_std radeon_combios_get_tv_info(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); uint16_t tv_info; enum radeon_tv_std tv_std = TV_STD_NTSC; @@ -2637,7 +2637,7 @@ static const char *thermal_controller_names[] = { void radeon_combios_get_power_modes(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); u16 offset, misc, misc2 = 0; u8 rev, tmp; int state_index = 0; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index afbb3a80c0c6..32851632643d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -760,7 +760,7 @@ bool radeon_boot_test_post_card(struct radeon_device *rdev) if (rdev->is_atom_bios) atom_asic_init(rdev->mode_info.atom_context); else - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); return true; } else { dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); @@ -980,7 +980,7 @@ int radeon_atombios_init(struct radeon_device *rdev) return -ENOMEM; rdev->mode_info.atom_card_info = atom_card_info; - atom_card_info->dev = rdev->ddev; + atom_card_info->dev = rdev_to_drm(rdev); atom_card_info->reg_read = cail_reg_read; atom_card_info->reg_write = cail_reg_write; /* needed for iio ops */ @@ -1005,7 +1005,7 @@ int radeon_atombios_init(struct radeon_device *rdev) mutex_init(&rdev->mode_info.atom_context->mutex); mutex_init(&rdev->mode_info.atom_context->scratch_mutex); - radeon_atom_initialize_bios_scratch_regs(rdev->ddev); + radeon_atom_initialize_bios_scratch_regs(rdev_to_drm(rdev)); atom_allocate_fb_scratch(rdev->mode_info.atom_context); return 0; } @@ -1049,7 +1049,7 @@ void radeon_atombios_fini(struct radeon_device *rdev) */ int radeon_combios_init(struct radeon_device *rdev) { - radeon_combios_initialize_bios_scratch_regs(rdev->ddev); + radeon_combios_initialize_bios_scratch_regs(rdev_to_drm(rdev)); return 0; } @@ -1847,7 +1847,7 @@ int radeon_gpu_reset(struct radeon_device *rdev) downgrade_write(&rdev->exclusive_lock); - drm_helper_resume_force_mode(rdev->ddev); + drm_helper_resume_force_mode(rdev_to_drm(rdev)); /* set the power state here in case we are a PX system or headless */ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 843383f7237f..10fd58f400bc 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -302,13 +302,13 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) if ((radeon_use_pflipirq == 2) && ASIC_IS_DCE4(rdev)) return; - spin_lock_irqsave(&rdev->ddev->event_lock, flags); + spin_lock_irqsave(&rdev_to_drm(rdev)->event_lock, flags); if (radeon_crtc->flip_status != RADEON_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("radeon_crtc->flip_status = %d != " "RADEON_FLIP_SUBMITTED(%d)\n", radeon_crtc->flip_status, RADEON_FLIP_SUBMITTED); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); return; } @@ -334,7 +334,7 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) */ if (update_pending && (DRM_SCANOUTPOS_VALID & - radeon_get_crtc_scanoutpos(rdev->ddev, crtc_id, + radeon_get_crtc_scanoutpos(rdev_to_drm(rdev), crtc_id, GET_DISTANCE_TO_VBLANKSTART, &vpos, &hpos, NULL, NULL, &rdev->mode_info.crtcs[crtc_id]->base.hwmode)) && @@ -347,7 +347,7 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) */ update_pending = 0; } - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); if (!update_pending) radeon_crtc_handle_flip(rdev, crtc_id); } @@ -370,14 +370,14 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) if (radeon_crtc == NULL) return; - spin_lock_irqsave(&rdev->ddev->event_lock, flags); + spin_lock_irqsave(&rdev_to_drm(rdev)->event_lock, flags); work = radeon_crtc->flip_work; if (radeon_crtc->flip_status != RADEON_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("radeon_crtc->flip_status = %d != " "RADEON_FLIP_SUBMITTED(%d)\n", radeon_crtc->flip_status, RADEON_FLIP_SUBMITTED); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); return; } @@ -389,7 +389,7 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) if (work->event) drm_crtc_send_vblank_event(&radeon_crtc->base, work->event); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); drm_crtc_vblank_put(&radeon_crtc->base); radeon_irq_kms_pflip_irq_put(rdev, work->crtc_id); @@ -408,7 +408,7 @@ static void radeon_flip_work_func(struct work_struct *__work) struct radeon_flip_work *work = container_of(__work, struct radeon_flip_work, flip_work); struct radeon_device *rdev = work->rdev; - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id]; struct drm_crtc *crtc = &radeon_crtc->base; @@ -1401,7 +1401,7 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) if (rdev->is_atom_bios) { rdev->mode_info.coherent_mode_property = - drm_property_create_range(rdev->ddev, 0 , "coherent", 0, 1); + drm_property_create_range(rdev_to_drm(rdev), 0, "coherent", 0, 1); if (!rdev->mode_info.coherent_mode_property) return -ENOMEM; } @@ -1409,57 +1409,57 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) if (!ASIC_IS_AVIVO(rdev)) { sz = ARRAY_SIZE(radeon_tmds_pll_enum_list); rdev->mode_info.tmds_pll_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "tmds_pll", radeon_tmds_pll_enum_list, sz); } rdev->mode_info.load_detect_property = - drm_property_create_range(rdev->ddev, 0, "load detection", 0, 1); + drm_property_create_range(rdev_to_drm(rdev), 0, "load detection", 0, 1); if (!rdev->mode_info.load_detect_property) return -ENOMEM; - drm_mode_create_scaling_mode_property(rdev->ddev); + drm_mode_create_scaling_mode_property(rdev_to_drm(rdev)); sz = ARRAY_SIZE(radeon_tv_std_enum_list); rdev->mode_info.tv_std_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "tv standard", radeon_tv_std_enum_list, sz); sz = ARRAY_SIZE(radeon_underscan_enum_list); rdev->mode_info.underscan_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "underscan", radeon_underscan_enum_list, sz); rdev->mode_info.underscan_hborder_property = - drm_property_create_range(rdev->ddev, 0, + drm_property_create_range(rdev_to_drm(rdev), 0, "underscan hborder", 0, 128); if (!rdev->mode_info.underscan_hborder_property) return -ENOMEM; rdev->mode_info.underscan_vborder_property = - drm_property_create_range(rdev->ddev, 0, + drm_property_create_range(rdev_to_drm(rdev), 0, "underscan vborder", 0, 128); if (!rdev->mode_info.underscan_vborder_property) return -ENOMEM; sz = ARRAY_SIZE(radeon_audio_enum_list); rdev->mode_info.audio_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "audio", radeon_audio_enum_list, sz); sz = ARRAY_SIZE(radeon_dither_enum_list); rdev->mode_info.dither_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "dither", radeon_dither_enum_list, sz); sz = ARRAY_SIZE(radeon_output_csc_enum_list); rdev->mode_info.output_csc_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "output_csc", radeon_output_csc_enum_list, sz); @@ -1578,29 +1578,29 @@ int radeon_modeset_init(struct radeon_device *rdev) int i; int ret; - drm_mode_config_init(rdev->ddev); + drm_mode_config_init(rdev_to_drm(rdev)); rdev->mode_info.mode_config_initialized = true; - rdev->ddev->mode_config.funcs = &radeon_mode_funcs; + rdev_to_drm(rdev)->mode_config.funcs = &radeon_mode_funcs; if (radeon_use_pflipirq == 2 && rdev->family >= CHIP_R600) - rdev->ddev->mode_config.async_page_flip = true; + rdev_to_drm(rdev)->mode_config.async_page_flip = true; if (ASIC_IS_DCE5(rdev)) { - rdev->ddev->mode_config.max_width = 16384; - rdev->ddev->mode_config.max_height = 16384; + rdev_to_drm(rdev)->mode_config.max_width = 16384; + rdev_to_drm(rdev)->mode_config.max_height = 16384; } else if (ASIC_IS_AVIVO(rdev)) { - rdev->ddev->mode_config.max_width = 8192; - rdev->ddev->mode_config.max_height = 8192; + rdev_to_drm(rdev)->mode_config.max_width = 8192; + rdev_to_drm(rdev)->mode_config.max_height = 8192; } else { - rdev->ddev->mode_config.max_width = 4096; - rdev->ddev->mode_config.max_height = 4096; + rdev_to_drm(rdev)->mode_config.max_width = 4096; + rdev_to_drm(rdev)->mode_config.max_height = 4096; } - rdev->ddev->mode_config.preferred_depth = 24; - rdev->ddev->mode_config.prefer_shadow = 1; + rdev_to_drm(rdev)->mode_config.preferred_depth = 24; + rdev_to_drm(rdev)->mode_config.prefer_shadow = 1; - rdev->ddev->mode_config.fb_modifiers_not_supported = true; + rdev_to_drm(rdev)->mode_config.fb_modifiers_not_supported = true; ret = radeon_modeset_create_props(rdev); if (ret) { @@ -1618,11 +1618,11 @@ int radeon_modeset_init(struct radeon_device *rdev) /* allocate crtcs */ for (i = 0; i < rdev->num_crtc; i++) { - radeon_crtc_init(rdev->ddev, i); + radeon_crtc_init(rdev_to_drm(rdev), i); } /* okay we should have all the bios connectors */ - ret = radeon_setup_enc_conn(rdev->ddev); + ret = radeon_setup_enc_conn(rdev_to_drm(rdev)); if (!ret) { return ret; } @@ -1639,7 +1639,7 @@ int radeon_modeset_init(struct radeon_device *rdev) /* setup afmt */ radeon_afmt_init(rdev); - drm_kms_helper_poll_init(rdev->ddev); + drm_kms_helper_poll_init(rdev_to_drm(rdev)); /* do pm late init */ ret = radeon_pm_late_init(rdev); @@ -1650,11 +1650,11 @@ int radeon_modeset_init(struct radeon_device *rdev) void radeon_modeset_fini(struct radeon_device *rdev) { if (rdev->mode_info.mode_config_initialized) { - drm_kms_helper_poll_fini(rdev->ddev); + drm_kms_helper_poll_fini(rdev_to_drm(rdev)); radeon_hpd_fini(rdev); - drm_helper_force_disable_all(rdev->ddev); + drm_helper_force_disable_all(rdev_to_drm(rdev)); radeon_afmt_fini(rdev); - drm_mode_config_cleanup(rdev->ddev); + drm_mode_config_cleanup(rdev_to_drm(rdev)); rdev->mode_info.mode_config_initialized = false; } diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index 02bf25759059..fb70de29545c 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -67,7 +67,7 @@ static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, int height = mode_cmd->height; u32 cpp; - info = drm_get_format_info(rdev->ddev, mode_cmd); + info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd); cpp = info->cpp[0]; /* need to align pitch with crtc limits */ @@ -148,15 +148,15 @@ static int radeon_fbdev_fb_open(struct fb_info *info, int user) struct radeon_device *rdev = fb_helper->dev->dev_private; int ret; - ret = pm_runtime_get_sync(rdev->ddev->dev); + ret = pm_runtime_get_sync(rdev_to_drm(rdev)->dev); if (ret < 0 && ret != -EACCES) goto err_pm_runtime_mark_last_busy; return 0; err_pm_runtime_mark_last_busy: - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); return ret; } @@ -165,8 +165,8 @@ static int radeon_fbdev_fb_release(struct fb_info *info, int user) struct drm_fb_helper *fb_helper = info->par; struct radeon_device *rdev = fb_helper->dev->dev_private; - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); return 0; } @@ -236,7 +236,7 @@ static int radeon_fbdev_fb_helper_fb_probe(struct drm_fb_helper *fb_helper, ret = -ENOMEM; goto err_radeon_fbdev_destroy_pinned_object; } - ret = radeon_framebuffer_init(rdev->ddev, fb, &mode_cmd, gobj); + ret = radeon_framebuffer_init(rdev_to_drm(rdev), fb, &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto err_kfree; @@ -374,12 +374,12 @@ void radeon_fbdev_setup(struct radeon_device *rdev) fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL); if (!fb_helper) return; - drm_fb_helper_prepare(rdev->ddev, fb_helper, bpp_sel, &radeon_fbdev_fb_helper_funcs); + drm_fb_helper_prepare(rdev_to_drm(rdev), fb_helper, bpp_sel, &radeon_fbdev_fb_helper_funcs); - ret = drm_client_init(rdev->ddev, &fb_helper->client, "radeon-fbdev", + ret = drm_client_init(rdev_to_drm(rdev), &fb_helper->client, "radeon-fbdev", &radeon_fbdev_client_funcs); if (ret) { - drm_err(rdev->ddev, "Failed to register client: %d\n", ret); + drm_err(rdev_to_drm(rdev), "Failed to register client: %d\n", ret); goto err_drm_client_init; } @@ -394,13 +394,13 @@ err_drm_client_init: void radeon_fbdev_set_suspend(struct radeon_device *rdev, int state) { - if (rdev->ddev->fb_helper) - drm_fb_helper_set_suspend(rdev->ddev->fb_helper, state); + if (rdev_to_drm(rdev)->fb_helper) + drm_fb_helper_set_suspend(rdev_to_drm(rdev)->fb_helper, state); } bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj) { - struct drm_fb_helper *fb_helper = rdev->ddev->fb_helper; + struct drm_fb_helper *fb_helper = rdev_to_drm(rdev)->fb_helper; struct drm_gem_object *gobj; if (!fb_helper) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 4fb780d96f32..daff61586be5 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -150,7 +150,7 @@ int radeon_fence_emit(struct radeon_device *rdev, rdev->fence_context + ring, seq); radeon_fence_ring_emit(rdev, ring, *fence); - trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); + trace_radeon_fence_emit(rdev_to_drm(rdev), ring, (*fence)->seq); radeon_fence_schedule_check(rdev, ring); return 0; } @@ -489,7 +489,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, if (!target_seq[i]) continue; - trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); + trace_radeon_fence_wait_begin(rdev_to_drm(rdev), i, target_seq[i]); radeon_irq_kms_sw_irq_get(rdev, i); } @@ -511,7 +511,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, continue; radeon_irq_kms_sw_irq_put(rdev, i); - trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); + trace_radeon_fence_wait_end(rdev_to_drm(rdev), i, target_seq[i]); } return r; @@ -995,7 +995,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(radeon_debugfs_gpu_reset_fops, void radeon_debugfs_fence_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_gpu_reset", 0444, root, rdev, &radeon_debugfs_gpu_reset_fops); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index e66a230331ee..210e8d43bb23 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -899,7 +899,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_gem_info); void radeon_gem_debugfs_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_gem_info", 0444, root, rdev, &radeon_debugfs_gem_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c index 3d174390a8af..1f16619ed06e 100644 --- a/drivers/gpu/drm/radeon/radeon_i2c.c +++ b/drivers/gpu/drm/radeon/radeon_i2c.c @@ -1011,7 +1011,7 @@ void radeon_i2c_add(struct radeon_device *rdev, struct radeon_i2c_bus_rec *rec, const char *name) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); int i; for (i = 0; i < RADEON_MAX_I2C_BUS; i++) { diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 63d914f3414d..1aa41cc3f991 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -309,7 +309,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_sa_info); static void radeon_debugfs_sa_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_sa_info", 0444, root, rdev, &radeon_debugfs_sa_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index c4dda908666c..9961251b44ba 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -80,7 +80,7 @@ static void radeon_hotplug_work_func(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, hotplug_work.work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; @@ -101,7 +101,7 @@ static void radeon_dp_work_func(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, dp_work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; @@ -197,7 +197,7 @@ static void radeon_driver_irq_uninstall_kms(struct drm_device *dev) static int radeon_irq_install(struct radeon_device *rdev, int irq) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); int ret; if (irq == IRQ_NOTCONNECTED) @@ -218,7 +218,7 @@ static int radeon_irq_install(struct radeon_device *rdev, int irq) static void radeon_irq_uninstall(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct pci_dev *pdev = to_pci_dev(dev->dev); radeon_driver_irq_uninstall_kms(dev); @@ -322,9 +322,9 @@ int radeon_irq_kms_init(struct radeon_device *rdev) spin_lock_init(&rdev->irq.lock); /* Disable vblank irqs aggressively for power-saving */ - rdev->ddev->vblank_disable_immediate = true; + rdev_to_drm(rdev)->vblank_disable_immediate = true; - r = drm_vblank_init(rdev->ddev, rdev->num_crtc); + r = drm_vblank_init(rdev_to_drm(rdev), rdev->num_crtc); if (r) { return r; } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index a955f8a2f7fe..450ff7daa46c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -150,7 +150,7 @@ int radeon_bo_create(struct radeon_device *rdev, bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); if (bo == NULL) return -ENOMEM; - drm_gem_private_object_init(rdev->ddev, &bo->tbo.base, size); + drm_gem_private_object_init(rdev_to_drm(rdev), &bo->tbo.base, size); bo->rdev = rdev; bo->surface_reg = -1; INIT_LIST_HEAD(&bo->list); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 2d9d9f46f243..b4fb7e70320b 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -282,7 +282,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) if (rdev->irq.installed) { i = 0; - drm_for_each_crtc(crtc, rdev->ddev) { + drm_for_each_crtc(crtc, rdev_to_drm(rdev)) { if (rdev->pm.active_crtcs & (1 << i)) { /* This can fail if a modeset is in progress */ if (drm_crtc_vblank_get(crtc) == 0) @@ -299,7 +299,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) if (rdev->irq.installed) { i = 0; - drm_for_each_crtc(crtc, rdev->ddev) { + drm_for_each_crtc(crtc, rdev_to_drm(rdev)) { if (rdev->pm.req_vblank & (1 << i)) { rdev->pm.req_vblank &= ~(1 << i); drm_crtc_vblank_put(crtc); @@ -671,7 +671,7 @@ static ssize_t radeon_hwmon_show_temp(struct device *dev, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); int temp; /* Can't get temperature when the card is off */ @@ -715,7 +715,7 @@ static ssize_t radeon_hwmon_show_sclk(struct device *dev, struct device_attribute *attr, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); u32 sclk = 0; /* Can't get clock frequency when the card is off */ @@ -740,7 +740,7 @@ static ssize_t radeon_hwmon_show_vddc(struct device *dev, struct device_attribute *attr, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); u16 vddc = 0; /* Can't get vddc when the card is off */ @@ -1692,7 +1692,7 @@ void radeon_pm_fini(struct radeon_device *rdev) static void radeon_pm_compute_clocks_old(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; @@ -1765,7 +1765,7 @@ static void radeon_pm_compute_clocks_old(struct radeon_device *rdev) static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; struct radeon_connector *radeon_connector; @@ -1826,7 +1826,7 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev) */ for (crtc = 0; (crtc < rdev->num_crtc) && in_vbl; crtc++) { if (rdev->pm.active_crtcs & (1 << crtc)) { - vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev, + vbl_status = radeon_get_crtc_scanoutpos(rdev_to_drm(rdev), crtc, USE_REAL_VBLANKSTART, &vpos, &hpos, NULL, NULL, @@ -1918,7 +1918,7 @@ static void radeon_dynpm_idle_work_handler(struct work_struct *work) static int radeon_debugfs_pm_info_show(struct seq_file *m, void *unused) { struct radeon_device *rdev = m->private; - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); if ((rdev->flags & RADEON_IS_PX) && (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { @@ -1955,7 +1955,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_pm_info); static void radeon_debugfs_pm_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_pm_info", 0444, root, rdev, &radeon_debugfs_pm_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 8d1d458286a8..581ae20c46e4 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -550,7 +550,7 @@ static void radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_r { #if defined(CONFIG_DEBUG_FS) const char *ring_name = radeon_debugfs_ring_idx_to_name(ring->idx); - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; if (ring_name) debugfs_create_file(ring_name, 0444, root, ring, diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 5c65b6dfb99a..69d0c12fa419 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -682,8 +682,8 @@ int radeon_ttm_init(struct radeon_device *rdev) /* No others user of address space so set it to 0 */ r = ttm_device_init(&rdev->mman.bdev, &radeon_bo_driver, rdev->dev, - rdev->ddev->anon_inode->i_mapping, - rdev->ddev->vma_offset_manager, + rdev_to_drm(rdev)->anon_inode->i_mapping, + rdev_to_drm(rdev)->vma_offset_manager, rdev->need_swiotlb, dma_addressing_limited(&rdev->pdev->dev)); if (r) { @@ -890,7 +890,7 @@ static const struct file_operations radeon_ttm_gtt_fops = { static void radeon_ttm_debugfs_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct drm_minor *minor = rdev->ddev->primary; + struct drm_minor *minor = rdev_to_drm(rdev)->primary; struct dentry *root = minor->debugfs_root; debugfs_create_file("radeon_vram", 0444, root, rdev, diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index d4d1501e6576..d6c18fd740ec 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -379,7 +379,7 @@ DEFINE_SHOW_ATTRIBUTE(rs400_debugfs_gart_info); static void rs400_debugfs_pcie_gart_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rs400_gart_info", 0444, root, rdev, &rs400_debugfs_gart_info_fops); @@ -474,7 +474,7 @@ int rs400_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r300_clock_startup(rdev); /* Initialize surface registers */ @@ -552,7 +552,7 @@ int rs400_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs400_mc_init(rdev); /* Fence driver */ diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 5c162778899b..88c8e91ea651 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -321,7 +321,7 @@ void rs600_pm_misc(struct radeon_device *rdev) void rs600_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -339,7 +339,7 @@ void rs600_pm_prepare(struct radeon_device *rdev) void rs600_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -408,7 +408,7 @@ void rs600_hpd_set_polarity(struct radeon_device *rdev, void rs600_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -435,7 +435,7 @@ void rs600_hpd_init(struct radeon_device *rdev) void rs600_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -797,7 +797,7 @@ int rs600_irq_process(struct radeon_device *rdev) /* Vertical blank interrupts */ if (G_007EDC_LB_D1_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -806,7 +806,7 @@ int rs600_irq_process(struct radeon_device *rdev) } if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -1133,7 +1133,7 @@ int rs600_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs600_mc_init(rdev); r100_debugfs_rbbm_init(rdev); diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 14fb0819b8c1..016eb4992803 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -845,7 +845,7 @@ int rs690_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs690_mc_init(rdev); rv515_debugfs(rdev); diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index bbc6ccabf788..1b4dfb645585 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -255,7 +255,7 @@ DEFINE_SHOW_ATTRIBUTE(rv515_debugfs_ga_info); void rv515_debugfs(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rv515_pipes_info", 0444, root, rdev, &rv515_debugfs_pipes_info_fops); @@ -636,7 +636,7 @@ int rv515_init(struct radeon_device *rdev) if (radeon_boot_test_post_card(rdev) == false) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 9ce12fa3c356..7d4b0bf59109 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1935,7 +1935,7 @@ int rv770_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize AGP */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 15759c8ca5b7..6c95575ce109 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6277,7 +6277,7 @@ restart_ih: event_name = "vblank"; if (rdev->irq.crtc_vblank_int[crtc_idx]) { - drm_handle_vblank(rdev->ddev, crtc_idx); + drm_handle_vblank(rdev_to_drm(rdev), crtc_idx); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -6839,7 +6839,7 @@ int si_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); From a9ed2f052c5c14e4be58c5ec8794dffc87588123 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:22 -0400 Subject: [PATCH 061/447] drm/radeon: change drm_dev_alloc to devm_drm_dev_alloc "drm_dev_alloc" is deprecated, in order to use the newer "devm_drm_dev_alloc", the "drm_device" is stored inside "radeon_device", by changing "rdev_to_drm(rdev)" other functions still gain access to the member "drm_device". Also, "devm_drm_dev_alloc" is now allocating "radeon_device", allocation inside "radeon_driver_load_kms" has to be removed. In "radeon_device_init", it originally assigned "rdev->dev" etc. However it is already done right after "devm_drm_dev_alloc" as you can see down below. It is better remove them. Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 4 ++-- drivers/gpu/drm/radeon/radeon_device.c | 3 --- drivers/gpu/drm/radeon/radeon_drv.c | 12 +++++++++--- drivers/gpu/drm/radeon/radeon_kms.c | 8 +------- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ae35c102a487..fd8a4513025f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2297,7 +2297,7 @@ typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t); struct radeon_device { struct device *dev; - struct drm_device *ddev; + struct drm_device ddev; struct pci_dev *pdev; #ifdef __alpha__ struct pci_controller *hose; @@ -2478,7 +2478,7 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); static inline struct drm_device *rdev_to_drm(struct radeon_device *rdev) { - return rdev->ddev; + return &rdev->ddev; } /* diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 32851632643d..554b236c2328 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1285,9 +1285,6 @@ int radeon_device_init(struct radeon_device *rdev, bool runtime = false; rdev->shutdown = false; - rdev->dev = &pdev->dev; - rdev->ddev = ddev; - rdev->pdev = pdev; rdev->flags = flags; rdev->family = flags & RADEON_FAMILY_MASK; rdev->is_atom_bios = false; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 7b8aa8406751..f36aa71c57c7 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -260,6 +260,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, { unsigned long flags = 0; struct drm_device *ddev; + struct radeon_device *rdev; int ret; if (!ent) @@ -300,9 +301,14 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) return ret; - ddev = drm_dev_alloc(&kms_driver, &pdev->dev); - if (IS_ERR(ddev)) - return PTR_ERR(ddev); + rdev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*rdev), ddev); + if (IS_ERR(rdev)) + return PTR_ERR(rdev); + + rdev->dev = &pdev->dev; + rdev->pdev = pdev; + ddev = rdev_to_drm(rdev); + ddev->dev_private = rdev; ret = pci_enable_device(pdev); if (ret) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index a16590c6247f..645e33bf7947 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -104,15 +104,9 @@ done_free: int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) { struct pci_dev *pdev = to_pci_dev(dev->dev); - struct radeon_device *rdev; + struct radeon_device *rdev = dev->dev_private; int r, acpi_status; - rdev = kzalloc(sizeof(struct radeon_device), GFP_KERNEL); - if (rdev == NULL) { - return -ENOMEM; - } - dev->dev_private = (void *)rdev; - #ifdef __alpha__ rdev->hose = pdev->sysdata; #endif From 27cdf8c3cae2ad1dbfe1a32ff999c98349366862 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 11 Jul 2024 16:06:25 +0800 Subject: [PATCH 062/447] drm/amdgpu: optimize umc v12 address conversion function Split into 3 parts: 1. Convert soc physical address via ras ta. 2. Expand bad pages from soc physical address. 3. Dump bad address info. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 120 ++++++++++++++++--------- 1 file changed, 79 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 9dbb13adb661..54d9f0a44f18 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -225,26 +225,16 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, } } -static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, - struct ta_ras_query_address_input *addr_in, - uint64_t *pfns, int len) +static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev, + struct ta_ras_query_address_output *addr_out, + uint64_t err_addr) { uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column, err_addr; - struct ta_ras_query_address_output addr_out; - uint32_t pos = 0; + uint64_t soc_pa, retired_page, column; - err_addr = addr_in->ma.err_addr; - addr_in->addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { - dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", - err_addr); - return 0; - } - - soc_pa = addr_out.pa.pa; - bank = addr_out.pa.bank; - channel_index = addr_out.pa.channel_idx; + soc_pa = addr_out->pa.pa; + bank = addr_out->pa.bank; + channel_index = addr_out->pa.channel_idx; col = (err_addr >> 1) & 0x1fULL; row = (err_addr >> 10) & 0x3fffULL; @@ -254,6 +244,37 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, /* clear [C4] in soc physical address */ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); + retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + /* include column bit 0 and 1 */ + col &= 0x3; + col |= (column << 2); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row, col, bank, channel_index); + + /* shift R13 bit */ + retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row_xor, col, bank, channel_index); + } +} + +static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t pa_addr, uint64_t *pfns, int len) +{ + uint64_t soc_pa, retired_page, column; + uint32_t pos = 0; + + soc_pa = pa_addr; + /* clear [C3 C2] in soc physical address */ + soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); + /* clear [C4] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + /* loop for all possibilities of [C4 C3 C2] */ for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); @@ -263,13 +284,6 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, return 0; pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - /* include column bit 0 and 1 */ - col &= 0x3; - col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); - /* shift R13 bit */ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); @@ -277,14 +291,40 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, return 0; pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); } return pos; } +static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch, uint32_t umc, + uint32_t node, uint32_t socket, + uint64_t *addr, bool dump_addr) +{ + struct ta_ras_query_address_input addr_in; + struct ta_ras_query_address_output addr_out; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch; + addr_in.ma.umc_inst = umc; + addr_in.ma.node_inst = node; + addr_in.ma.socket_id = socket; + addr_in.addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); + return -EINVAL; + } + + if (dump_addr) + umc_v12_0_dump_addr_info(adev, &addr_out, err_addr); + + *addr = addr_out.pa.pa; + + return 0; +} + static int umc_v12_0_query_error_address(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data) @@ -483,12 +523,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); uint16_t hwid, mcatype; - struct ta_ras_query_address_input addr_in; uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; - uint64_t err_addr, hash_val = 0; + uint64_t err_addr, hash_val = 0, pa_addr = 0; struct ras_ecc_err *ecc_err; - int count; - int ret; + int count, ret; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); @@ -514,17 +552,17 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, MCA_IPID_2_UMC_CH(ipid), err_addr); + ret = umc_v12_0_convert_mca_to_addr(adev, + err_addr, MCA_IPID_2_UMC_CH(ipid), + MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid), + MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true); + if (ret) + return ret; + memset(page_pfn, 0, sizeof(page_pfn)); - - memset(&addr_in, 0, sizeof(addr_in)); - addr_in.ma.err_addr = err_addr; - addr_in.ma.ch_inst = MCA_IPID_2_UMC_CH(ipid); - addr_in.ma.umc_inst = MCA_IPID_2_UMC_INST(ipid); - addr_in.ma.node_inst = MCA_IPID_2_DIE_ID(ipid); - addr_in.ma.socket_id = MCA_IPID_2_SOCKET_ID(ipid); - - count = umc_v12_0_convert_err_addr(adev, - &addr_in, page_pfn, ARRAY_SIZE(page_pfn)); + count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + pa_addr, + page_pfn, ARRAY_SIZE(page_pfn)); if (count <= 0) { dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count); return 0; From 56631dee2932dbc203f0abd1011aa9d3d621e206 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 11 Jul 2024 16:14:22 +0800 Subject: [PATCH 063/447] drm/amdgpu: optimize logging deferred error info 1. Use pa_pfn as the radix-tree key index to log deferred error info. 2. Use local array to store a row of bad pages. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 14 +--- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 87 ++++++++++++------------- drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 5 ++ 4 files changed, 51 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index dcf1f3dbb5c4..f607ff620015 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -476,10 +476,10 @@ struct ras_err_pages { }; struct ras_ecc_err { - u64 hash_index; uint64_t status; uint64_t ipid; uint64_t addr; + uint64_t pa_pfn; struct ras_err_pages err_pages; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 2f84bdb8c594..096e867a6a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -519,18 +519,10 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, ecc_log = &con->umc_ecc_log; mutex_lock(&ecc_log->lock); - ret = radix_tree_insert(ecc_tree, ecc_err->hash_index, ecc_err); - if (!ret) { - struct ras_err_pages *err_pages = &ecc_err->err_pages; - int i; - - /* Reserve memory */ - for (i = 0; i < err_pages->count; i++) - amdgpu_ras_reserve_page(adev, err_pages->pfn[i]); - + ret = radix_tree_insert(ecc_tree, ecc_err->pa_pfn, ecc_err); + if (!ret) radix_tree_tag_set(ecc_tree, - ecc_err->hash_index, UMC_ECC_NEW_DETECTED_TAG); - } + ecc_err->pa_pfn, UMC_ECC_NEW_DETECTED_TAG); mutex_unlock(&ecc_log->lock); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 54d9f0a44f18..0e6c3ce3ea8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -524,9 +524,9 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); uint16_t hwid, mcatype; uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; - uint64_t err_addr, hash_val = 0, pa_addr = 0; + uint64_t err_addr, pa_addr = 0; struct ras_ecc_err *ecc_err; - int count, ret; + int count, ret, i; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); @@ -559,6 +559,32 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, if (ret) return ret; + ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL); + if (!ecc_err) + return -ENOMEM; + + ecc_err->status = status; + ecc_err->ipid = ipid; + ecc_err->addr = addr; + ecc_err->pa_pfn = UMC_V12_ADDR_MASK_BAD_COLS(pa_addr) >> AMDGPU_GPU_PAGE_SHIFT; + + /* If converted pa_pfn is 0, use pa C4 pfn. */ + if (!ecc_err->pa_pfn) + ecc_err->pa_pfn = BIT_ULL(UMC_V12_0_PA_C4_BIT) >> AMDGPU_GPU_PAGE_SHIFT; + + ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); + if (ret) { + if (ret == -EEXIST) + con->umc_ecc_log.de_queried_count++; + else + dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret); + + kfree(ecc_err); + return ret; + } + + con->umc_ecc_log.de_queried_count++; + memset(page_pfn, 0, sizeof(page_pfn)); count = umc_v12_0_lookup_bad_pages_in_a_row(adev, pa_addr, @@ -568,44 +594,9 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, return 0; } - ret = amdgpu_umc_build_pages_hash(adev, - page_pfn, count, &hash_val); - if (ret) { - dev_err(adev->dev, "Fail to build error pages hash\n"); - return ret; - } - - ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL); - if (!ecc_err) - return -ENOMEM; - - ecc_err->err_pages.pfn = kcalloc(count, sizeof(*ecc_err->err_pages.pfn), GFP_KERNEL); - if (!ecc_err->err_pages.pfn) { - kfree(ecc_err); - return -ENOMEM; - } - - memcpy(ecc_err->err_pages.pfn, page_pfn, count * sizeof(*ecc_err->err_pages.pfn)); - ecc_err->err_pages.count = count; - - ecc_err->hash_index = hash_val; - ecc_err->status = status; - ecc_err->ipid = ipid; - ecc_err->addr = addr; - - ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); - if (ret) { - if (ret == -EEXIST) - con->umc_ecc_log.de_queried_count++; - else - dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret); - - kfree(ecc_err->err_pages.pfn); - kfree(ecc_err); - return ret; - } - - con->umc_ecc_log.de_queried_count++; + /* Reserve memory */ + for (i = 0; i < count; i++) + amdgpu_ras_reserve_page(adev, page_pfn[i]); /* The problem case is as follows: * 1. GPU A triggers a gpu ras reset, and GPU A drives @@ -631,16 +622,21 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, struct ras_ecc_err *ecc_err, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - uint32_t i = 0; - int ret = 0; + uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; + int ret, i, count; if (!err_data || !ecc_err) return -EINVAL; - for (i = 0; i < ecc_err->err_pages.count; i++) { + memset(page_pfn, 0, sizeof(page_pfn)); + count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT, + page_pfn, ARRAY_SIZE(page_pfn)); + + for (i = 0; i < count; i++) { ret = amdgpu_umc_fill_error_record(err_data, ecc_err->addr, - ecc_err->err_pages.pfn[i] << AMDGPU_GPU_PAGE_SHIFT, + page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT, MCA_IPID_2_UMC_CH(ecc_err->ipid), MCA_IPID_2_UMC_INST(ecc_err->ipid)); if (ret) @@ -674,7 +670,8 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev, dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret); break; } - radix_tree_tag_clear(ecc_tree, entries[i]->hash_index, UMC_ECC_NEW_DETECTED_TAG); + radix_tree_tag_clear(ecc_tree, + entries[i]->pa_pfn, UMC_ECC_NEW_DETECTED_TAG); } mutex_unlock(&con->umc_ecc_log.lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index b4974793850b..be5598d76c1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -81,6 +81,11 @@ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) +#define UMC_V12_ADDR_MASK_BAD_COLS(addr) \ + ((addr) & ~((0x3ULL << UMC_V12_0_PA_C2_BIT) | \ + (0x1ULL << UMC_V12_0_PA_C4_BIT) | \ + (0x1ULL << UMC_V12_0_PA_R13_BIT))) + bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); From a7e8467fbeee654e390aad1736291d273b407a2c Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 11 Jul 2024 16:27:08 +0800 Subject: [PATCH 064/447] drm/amdgpu: Remove unused code Remove unused code. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 23 ------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 86 ------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 5 -- 4 files changed, 124 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d0307c55da50..0fb2d9285834 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2881,9 +2881,6 @@ static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) { mutex_init(&ecc_log->lock); - /* Set any value as siphash key */ - memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key)); - INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL); ecc_log->de_queried_count = 0; ecc_log->prev_de_queried_count = 0; @@ -4611,8 +4608,6 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d if (!err_node) return NULL; - INIT_LIST_HEAD(&err_node->err_info.err_addr_list); - memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); err_data->err_list_count++; @@ -4622,18 +4617,6 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d return &err_node->err_info; } -void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr) -{ - /* This function will be retired. */ - return; -} - -void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr) -{ - list_del(&mca_err_addr->node); - kfree(mca_err_addr); -} - int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info, struct ras_err_addr *err_addr, u64 count) @@ -4650,9 +4633,6 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, if (!err_info) return -EINVAL; - if (err_addr && err_addr->err_status) - amdgpu_ras_add_mca_err_addr(err_info, err_addr); - err_info->ue_count += count; err_data->ue_count += count; @@ -4697,9 +4677,6 @@ int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, if (!err_info) return -EINVAL; - if (err_addr && err_addr->err_status) - amdgpu_ras_add_mca_err_addr(err_info, err_addr); - err_info->de_count += count; err_data->de_count += count; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index f607ff620015..7ddd13d5c06b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -28,7 +28,6 @@ #include #include #include -#include #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" @@ -485,7 +484,6 @@ struct ras_ecc_err { struct ras_ecc_log_info { struct mutex lock; - siphash_key_t ecc_key; struct radix_tree_root de_page_tree; uint64_t de_queried_count; uint64_t prev_de_queried_count; @@ -573,7 +571,6 @@ struct ras_fs_data { }; struct ras_err_addr { - struct list_head node; uint64_t err_status; uint64_t err_ipid; uint64_t err_addr; @@ -584,7 +581,6 @@ struct ras_err_info { u64 ce_count; u64 ue_count; u64 de_count; - struct list_head err_addr_list; }; struct ras_err_node { @@ -957,12 +953,6 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, struct aca_handle *handle, char *buf, void *data); -void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, - struct ras_err_addr *err_addr); - -void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, - struct ras_err_addr *mca_err_addr); - void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 096e867a6a6d..2ed55f3c5fa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -204,55 +204,6 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, return AMDGPU_RAS_SUCCESS; } -int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - uint32_t reset, uint32_t timeout_ms) -{ - struct ras_err_data err_data; - struct ras_common_if head = { - .block = AMDGPU_RAS_BLOCK__UMC, - }; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - uint32_t timeout = timeout_ms; - - memset(&err_data, 0, sizeof(err_data)); - amdgpu_ras_error_data_init(&err_data); - - do { - - amdgpu_umc_handle_bad_pages(adev, &err_data); - - if (timeout && !err_data.de_count) { - msleep(1); - timeout--; - } - - } while (timeout && !err_data.de_count); - - if (!timeout) - dev_warn(adev->dev, "Can't find bad pages\n"); - - if (err_data.de_count) - dev_info(adev->dev, "%ld new deferred hardware errors detected\n", err_data.de_count); - - if (obj) { - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; - obj->err_data.de_count += err_data.de_count; - } - - amdgpu_ras_error_data_fini(&err_data); - - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - - if (reset || (err_data.err_addr_cnt && con && con->is_rma)) { - con->gpu_reset_flags |= reset; - amdgpu_ras_reset_gpu(adev); - } - - return 0; -} - int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset) @@ -472,43 +423,6 @@ int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, return 0; } -static int amdgpu_umc_uint64_cmp(const void *a, const void *b) -{ - uint64_t *addr_a = (uint64_t *)a; - uint64_t *addr_b = (uint64_t *)b; - - if (*addr_a > *addr_b) - return 1; - else if (*addr_a < *addr_b) - return -1; - else - return 0; -} - -/* Use string hash to avoid logging the same bad pages repeatedly */ -int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, - uint64_t *pfns, int len, uint64_t *val) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - char buf[MAX_UMC_HASH_STRING_SIZE] = {0}; - int offset = 0, i = 0; - uint64_t hash_val; - - if (!pfns || !len) - return -EINVAL; - - sort(pfns, len, sizeof(uint64_t), amdgpu_umc_uint64_cmp, NULL); - - for (i = 0; i < len; i++) - offset += snprintf(&buf[offset], sizeof(buf) - offset, "%llx", pfns[i]); - - hash_val = siphash(buf, offset, &con->umc_ecc_log.ecc_key); - - *val = hash_val; - - return 0; -} - int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 5f50c69c3cec..ce4179db2a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -127,13 +127,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data); -int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - uint32_t reset, uint32_t timeout_ms); - int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, uint64_t status, uint64_t ipid, uint64_t addr); -int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, - uint64_t *pfns, int len, uint64_t *val); int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err); From 7a38efeee6b59d0984ff0470d234a06fe6a7cf3c Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Thu, 18 Jul 2024 21:13:29 +0800 Subject: [PATCH 065/447] drm/radeon: fix null pointer dereference in radeon_add_common_modes In radeon_add_common_modes(), the return value of drm_cvt_mode() is assigned to mode, which will lead to a possible NULL pointer dereference on failure of drm_cvt_mode(). Add a check to avoid npd. Cc: stable@vger.kernel.org Fixes: d50ba256b5f1 ("drm/kms: start adding command line interface using fb.") Signed-off-by: Ma Ke Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_connectors.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 69693ba5949e..880edabfc9e3 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -505,6 +505,9 @@ static void radeon_add_common_modes(struct drm_encoder *encoder, struct drm_conn continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + if (!mode) + continue; + drm_mode_probed_add(connector, mode); } } From ca82ee4e9fc1443f9ceec615918b299766432eec Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 18 Jul 2024 09:30:01 +0530 Subject: [PATCH 066/447] drm/amd/display: Add 'pstate_keepout' kdoc entry in 'optc1_program_timing' Fixes the below with gcc W=1: Function parameter or struct member 'pstate_keepout' not described in 'optc1_program_timing' Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index f00d27b7c6fe..097d06023e64 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -148,6 +148,7 @@ void optc1_setup_vertical_interrupt2( * @vstartup_start: Vstartup period. * @vupdate_offset: Vupdate starting position. * @vupdate_width: Vupdate duration. + * @pstate_keepout: determines low power mode timing during refresh * @signal: DC signal types. * @use_vbios: to program timings from BIOS command table. * From f2ac52634963fc38e4935e11077b6f7854e5d700 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Jul 2024 17:54:11 -0400 Subject: [PATCH 067/447] drm/amdgpu/sdma5.2: Update wptr registers as well as doorbell We seem to have a case where SDMA will sometimes miss a doorbell if GFX is entering the powergating state when the doorbell comes in. To workaround this, we can update the wptr via MMIO, however, this is only safe because we disallow gfxoff in begin_ring() for SDMA 5.2 and then allow it again in end_ring(). Enable this workaround while we are root causing the issue with the HW team. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/3440 Tested-by: Friedrich Vock Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 66bb85955fa4..93890f83e270 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -225,6 +225,14 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " @@ -1707,6 +1715,10 @@ static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) * but it shouldn't hurt for other parts since * this GFXOFF will be disallowed anyway when SDMA is * active, this just makes it explicit. + * sdma_v5_2_ring_set_wptr() takes advantage of this + * to update the wptr because sometimes SDMA seems to miss + * doorbells when entering PG. If you remove this, update + * sdma_v5_2_ring_set_wptr() as well! */ amdgpu_gfx_off_ctrl(adev, false); } From 4df9e2200fb8ae7199e46eaffeb9430bc0ba3ea7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 12:01:36 +0530 Subject: [PATCH 068/447] drm/amdgpu: Add sdma_v7_0 ip dump for devcoredump Add ip dump for sdma_v7_0 for devcoredump for all instances of sdma. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 91 ++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 41b5e45697dc..327b5387949a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -51,6 +51,64 @@ MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); #define SDMA0_HYP_DEC_REG_END 0x589a #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_REV), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS), +}; + static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1217,6 +1275,8 @@ static int sdma_v7_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, @@ -1247,6 +1307,13 @@ static int sdma_v7_0_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1263,6 +1330,8 @@ static int sdma_v7_0_sw_fini(void *handle) if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) sdma_v12_0_free_ucode_buffer(adev); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1466,6 +1535,27 @@ static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v7_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v7_0_get_reg_offset(adev, i, + sdma_reg_list_7_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v7_0_ip_funcs = { .name = "sdma_v7_0", .early_init = sdma_v7_0_early_init, @@ -1483,6 +1573,7 @@ const struct amd_ip_funcs sdma_v7_0_ip_funcs = { .set_clockgating_state = sdma_v7_0_set_clockgating_state, .set_powergating_state = sdma_v7_0_set_powergating_state, .get_clockgating_state = sdma_v7_0_get_clockgating_state, + .dump_ip_state = sdma_v7_0_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { From 666f14cab21b17ccc1bdfe1e82458aa429b3b7e0 Mon Sep 17 00:00:00 2001 From: David Belanger Date: Mon, 10 Jun 2024 16:38:55 -0400 Subject: [PATCH 069/447] drm/amdgpu: Fix atomics on GFX12 If PCIe supports atomics, configure register to prevent DF from breaking atomics in separate load/store operations. Signed-off-by: David Belanger Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_df.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 +++ drivers/gpu/drm/amd/amdgpu/df_v4_15.c | 45 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/df_v4_15.h | 30 +++++++++++++ drivers/gpu/drm/amd/amdgpu/soc24.c | 4 ++ .../amd/include/asic_reg/df/df_4_15_offset.h | 28 ++++++++++++ .../amd/include/asic_reg/df/df_4_15_sh_mask.h | 28 ++++++++++++ 8 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/df_v4_15.c create mode 100644 drivers/gpu/drm/amd/amdgpu/df_v4_15.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 9dd8294032ef..38408e4e158e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -106,7 +106,8 @@ amdgpu-y += \ df_v1_7.o \ df_v3_6.o \ df_v4_3.o \ - df_v4_6_2.o + df_v4_6_2.o \ + df_v4_15.o # add GMC block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 1538b2dbfff1..eb605e79ae0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -33,6 +33,7 @@ struct amdgpu_df_hash_status { struct amdgpu_df_funcs { void (*sw_init)(struct amdgpu_device *adev); void (*sw_fini)(struct amdgpu_device *adev); + void (*hw_init)(struct amdgpu_device *adev); void (*enable_broadcast_mode)(struct amdgpu_device *adev, bool enable); u32 (*get_fb_channel_number)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index b241f61fe9c9..ac108fca64fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -37,6 +37,7 @@ #include "df_v3_6.h" #include "df_v4_3.h" #include "df_v4_6_2.h" +#include "df_v4_15.h" #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" @@ -2803,6 +2804,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 6, 2): adev->df.funcs = &df_v4_6_2_funcs; break; + case IP_VERSION(4, 15, 0): + case IP_VERSION(4, 15, 1): + adev->df.funcs = &df_v4_15_funcs; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c new file mode 100644 index 000000000000..2a573e33908b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c @@ -0,0 +1,45 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v4_15.h" + +#include "df/df_4_15_offset.h" +#include "df/df_4_15_sh_mask.h" + +static void df_v4_15_hw_init(struct amdgpu_device *adev) +{ + if (adev->have_atomics_support) { + uint32_t tmp; + uint32_t dis_lcl_proc = (1 << 1 | + 1 << 2 | + 1 << 13); + + tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1); + tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT); + WREG32_SOC15(DF, 0, regNCSConfigurationRegister1, tmp); + } +} + +const struct amdgpu_df_funcs df_v4_15_funcs = { + .hw_init = df_v4_15_hw_init +}; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.h b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h new file mode 100644 index 000000000000..dddf2422112a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h @@ -0,0 +1,30 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DF_V4_15_H__ +#define __DF_V4_15_H__ + +extern const struct amdgpu_df_funcs df_v4_15_funcs; + +#endif /* __DF_V4_15_H__ */ + diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index d27fb4ea6612..7d641d0dadba 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -484,6 +484,10 @@ static int soc24_common_hw_init(void *handle) */ if (adev->nbio.funcs->remap_hdp_registers) adev->nbio.funcs->remap_hdp_registers(adev); + + if (adev->df.funcs->hw_init) + adev->df.funcs->hw_init(adev); + /* enable the doorbell aperture */ soc24_enable_doorbell_aperture(adev, true); diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h new file mode 100644 index 000000000000..c2b009752f60 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _df_4_15_OFFSET_HEADER +#define _df_4_15_OFFSET_HEADER + +#define regNCSConfigurationRegister1 0x0901 +#define regNCSConfigurationRegister1_BASE_IDX 4 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h new file mode 100644 index 000000000000..9868a9c32795 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _df_4_15_SH_MASK_HEADER +#define _df_4_15_SH_MASK_HEADER + +#define NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT 0x3 +#define NCSConfigurationRegister1__DisIntAtomicsLclProcessing_MASK 0x0003FFF8L + +#endif From 93381e6b61804b777f60357d96d6254eb10b9b56 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Thu, 18 Jul 2024 22:11:41 +0800 Subject: [PATCH 070/447] drm/amdgpu: fix a possible null pointer dereference In amdgpu_connector_add_common_modes(), the return value of drm_cvt_mode() is assigned to mode, which will lead to a NULL pointer dereference on failure of drm_cvt_mode(). Add a check to avoid npd. Cc: stable@vger.kernel.org Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Signed-off-by: Ma Ke Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index cae7479c3ecf..bd0fbdc5f55d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -442,6 +442,9 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + if (!mode) + return; + drm_mode_probed_add(connector, mode); } } From 6472de66c0aa18d50a4b5ca85f8272e88a737676 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Thu, 18 Jul 2024 22:17:35 +0800 Subject: [PATCH 071/447] drm/amd/amdgpu: Fix uninitialized variable warnings Return 0 to avoid returning an uninitialized variable r. Cc: stable@vger.kernel.org Fixes: 230dd6bb6117 ("drm/amd/amdgpu: implement mode2 reset on smu_v13_0_10") Signed-off-by: Ma Ke Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c index 04c797d54511..0af648931df5 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c @@ -91,7 +91,7 @@ static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int From abf839f5ebd98134f51764f2a2841faa6cbf268a Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 12:03:33 +0530 Subject: [PATCH 072/447] drm/amdgpu: add print support for sdma_v_7_0 ip_dump Add print support for ip dump for sdma_v_7_0 in devcoredump. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 327b5387949a..62ef4a737a56 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1535,6 +1535,27 @@ static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v7_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + static void sdma_v7_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1574,6 +1595,7 @@ const struct amd_ip_funcs sdma_v7_0_ip_funcs = { .set_powergating_state = sdma_v7_0_set_powergating_state, .get_clockgating_state = sdma_v7_0_get_clockgating_state, .dump_ip_state = sdma_v7_0_dump_ip_state, + .print_ip_state = sdma_v7_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { From 80237bfc031cd74cb8abf0d21094207284d56a48 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 17:10:33 +0530 Subject: [PATCH 073/447] drm/amdgpu: Add sdma_v4_0 ip dump for devcoredump Add ip dump for sdma_v4_0 for devcoredump for all instances of sdma. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 80 ++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 772604feb6ac..f39d3d94ba9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -72,6 +72,53 @@ MODULE_FIRMWARE("amdgpu/renoir_sdma.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin"); +static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL) +}; + #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -1750,6 +1797,8 @@ static int sdma_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1870,6 +1919,13 @@ static int sdma_v4_0_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1890,6 +1946,8 @@ static int sdma_v4_0_sw_fini(void *handle) else amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -2292,6 +2350,27 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v4_0_get_reg_offset(adev, i, + sdma_reg_list_4_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .name = "sdma_v4_0", .early_init = sdma_v4_0_early_init, @@ -2308,6 +2387,7 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .set_clockgating_state = sdma_v4_0_set_clockgating_state, .set_powergating_state = sdma_v4_0_set_powergating_state, .get_clockgating_state = sdma_v4_0_get_clockgating_state, + .dump_ip_state = sdma_v4_0_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { From fec5f8e8c6bcf83ed7a392801d7b44c5ecfc1e82 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 2 Jul 2024 11:54:30 +0200 Subject: [PATCH 074/447] drm/amdgpu: disallow multiple BO_HANDLES chunks in one submit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this commit, only submits with both a BO_HANDLES chunk and a 'bo_list_handle' would be rejected (by amdgpu_cs_parser_bos). But if UMD sent multiple BO_HANDLES, what would happen is: * only the last one would be really used * all the others would leak memory as amdgpu_cs_p1_bo_handles would overwrite the previous p->bo_list value This commit rejects submissions with multiple BO_HANDLES chunks to match the implementation of the parser. Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 916b6b8cf7d9..cde2f4548a62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -263,6 +263,10 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, if (size < sizeof(struct drm_amdgpu_bo_list_in)) goto free_partial_kdata; + /* Only a single BO list is allowed to simplify handling. */ + if (p->bo_list) + ret = -EINVAL; + ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); if (ret) goto free_partial_kdata; From 585e3fdb36f59c5cfed0ae06c852dc1df22b1d60 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 11 Dec 2023 10:45:38 +0530 Subject: [PATCH 075/447] drm/amdgpu: Add empty HDP flush function to JPEG v4.0.3 JPEG v4.0.3 doesn't support HDP flush when RRMT is enabled. Instead, mmsch fw will do the flush. This change is necessary for JPEG v4.0.3, no need for backward compatibility Signed-off-by: Lijo Lazar Signed-off-by: Jane Jian Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 04d8966423de..30a143ab592d 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -621,6 +621,13 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); } +static void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* JPEG engine access for HDP flush doesn't work when RRMT is enabled. + * This is a workaround to avoid any HDP flush through JPEG ring. + */ +} + /** * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer * @@ -1072,6 +1079,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_jpeg_dec_ring_test_ring, .test_ib = amdgpu_jpeg_dec_ring_test_ib, .insert_nop = jpeg_v4_0_3_dec_ring_nop, From 49cfaebe48e97500a68d5322a8194736b0a2c3cf Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 11 Dec 2023 11:18:42 +0530 Subject: [PATCH 076/447] drm/amdgpu: Add empty HDP flush function to VCN v4.0.3 VCN 4.0.3 does not HDP flush with RRMT enabled. Instead, mmsch will do the HDP flush. This change is necessary for VCN v4.0.3, no need for backward compatibility Signed-off-by: Lijo Lazar Signed-off-by: Jane Jian Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index f53054e39ebb..101b120f6fbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1375,6 +1375,13 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) regUVD_RB_WPTR); } +static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* VCN engine access for HDP flush doesn't work when RRMT is enabled. + * This is a workaround to avoid any HDP flush through VCN ring. + */ +} + /** * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer * @@ -1415,6 +1422,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_ib = vcn_v2_0_enc_ring_emit_ib, .emit_fence = vcn_v2_0_enc_ring_emit_fence, .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, .test_ib = amdgpu_vcn_unified_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, From caaf576292f8ccef5cdc0ac16e77b87dbf6e17ab Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Mon, 15 Jul 2024 18:48:31 +0800 Subject: [PATCH 077/447] drm/amdgpu/vcn: Use offsets local to VCN/JPEG in VF For VCN/JPEG 4.0.3, use only the local addressing scheme. - Mask bit higher than AID0 range v2 remain the case for mmhub use master XCC Signed-off-by: Jane Jian Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 19 ++++++++-- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 46 ++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 30a143ab592d..ad524ddc9760 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -32,6 +32,9 @@ #include "vcn/vcn_4_0_3_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#define NORMALIZE_JPEG_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + enum jpeg_engin_status { UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, @@ -824,7 +827,13 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -865,7 +874,13 @@ void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 101b120f6fbd..9bae95538b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define NORMALIZE_VCN_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); @@ -1375,6 +1378,43 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) regUVD_RB_WPTR); } +static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for reg writes */ + vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, + lower_32_bits(pd_addr), 0xffffffff); +} + static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) { /* VCN engine access for HDP flush doesn't work when RRMT is enabled. @@ -1421,7 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ .emit_ib = vcn_v2_0_enc_ring_emit_ib, .emit_fence = vcn_v2_0_enc_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, .test_ib = amdgpu_vcn_unified_ring_test_ib, @@ -1430,8 +1470,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vcn_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; From f9e292cbba21e79abea7315b41a52c36ea2b6980 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 11:17:59 -0400 Subject: [PATCH 078/447] drm/amdkfd: kfd_bo_mapped_dev support partition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change amdgpu_amdkfd_bo_mapped_to_dev to use drm_priv as parameter instead of adev, to support spatial partition. This is only used by CRIU checkpoint restore now. No functional change. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index e7bb1ca35801..66b1c72c81e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -345,7 +345,7 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad pasid_notify pasid_fn, void *data, uint32_t reset); bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); -bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); +bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 11672bfe4fad..199e387d35f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -3200,12 +3200,13 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem) +bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem) { + struct amdgpu_vm *vm = drm_priv_to_vm(drm_priv); struct kfd_mem_attachment *entry; list_for_each_entry(entry, &mem->attachments, list) { - if (entry->is_mapped && entry->adev == adev) + if (entry->is_mapped && entry->bo_va->base.vm == vm) return true; } return false; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 32e5db509560..1d9b21628be7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1963,7 +1963,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo); for (i = 0; i < p->n_pdds; i++) { - if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem)) + if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem)) bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id; } From c86ad39140bbcb9dc75a10046c2221f657e8083b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Sun, 14 Jul 2024 11:11:05 -0400 Subject: [PATCH 079/447] drm/amdkfd: amdkfd_free_gtt_mem clear the correct pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass pointer reference to amdgpu_bo_unref to clear the correct pointer, otherwise amdgpu_bo_unref clear the local variable, the original pointer not set to NULL, this could cause use-after-free bug. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 14 +++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 ++-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 ++-- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 03205e3c3746..c272461d70a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -364,15 +364,15 @@ allocate_mem_reserve_bo_failed: return r; } -void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj) +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj) { - struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; + struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj; - amdgpu_bo_reserve(bo, true); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unpin(bo); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&(bo)); + amdgpu_bo_reserve(*bo, true); + amdgpu_bo_kunmap(*bo); + amdgpu_bo_unpin(*bo); + amdgpu_bo_unreserve(*bo); + amdgpu_bo_unref(bo); } int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 66b1c72c81e5..6e591280774b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -235,7 +235,7 @@ int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); -void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj); +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj); int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, void **mem_obj); void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1d9b21628be7..823f245dc7d0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -423,7 +423,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, err_create_queue: if (wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&wptr_bo); err_wptr_map_gart: err_bind_process: err_pdd: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index f4d20adaa068..6619028dd58b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -907,7 +907,7 @@ node_alloc_error: kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: - amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); alloc_gtt_mem_failure: dev_err(kfd_device, "device %x:%x NOT added due to errors\n", @@ -925,7 +925,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); - amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); } kfree(kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 4f48507418d2..420444eb8e98 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2621,7 +2621,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, { WARN(!mqd, "No hiq sdma mqd trunk to free"); - amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem); } void device_queue_manager_uninit(struct device_queue_manager *dqm) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 50a81da43ce1..d9ae854b6908 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -225,7 +225,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { if (mqd_mem_obj->gtt_mem) { - amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, &mqd_mem_obj->gtt_mem); kfree(mqd_mem_obj); } else { kfd_gtt_sa_free(mm->dev, mqd_mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 17e42161b015..9e29b92eb523 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1048,7 +1048,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->dev->kfd->shared_resources.enable_mes) amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, - pdd->proc_ctx_bo); + &pdd->proc_ctx_bo); /* * before destroying pdd, make sure to report availability * for auto suspend diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 21f5a1fb3bf8..36f0460cbffe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -204,9 +204,9 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, } if (dev->kfd->shared_resources.enable_mes) { - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo); if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo); } } From a11b36ba9c1ac494c6a5cf7f1a5e68c1ce4dbe18 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 17:12:56 +0530 Subject: [PATCH 080/447] drm/amdgpu: add print support for sdma_v_4_0 ip_dump Add print support for ip dump for sdma_v_4_0 in devcoredump. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index f39d3d94ba9b..23ef4eb36b40 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2350,6 +2350,27 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + static void sdma_v4_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2388,6 +2409,7 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .set_powergating_state = sdma_v4_0_set_powergating_state, .get_clockgating_state = sdma_v4_0_get_clockgating_state, .dump_ip_state = sdma_v4_0_dump_ip_state, + .print_ip_state = sdma_v4_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { From db54a725d57985c869f6fe4153a36cd229ab0b73 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 18:40:47 +0530 Subject: [PATCH 081/447] drm/amdgpu: Add sdma_v4_4_2 ip dump for devcoredump Add ip dump for sdma_v4_4_2 for devcoredump for all instances of sdma. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 80 ++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 2c55bfd935bb..67e0e894579a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -46,6 +46,53 @@ MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); +static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_VM_CNTL) +}; + #define mmSMNAID_AID0_MCA_SMU 0x03b30400 #define WREG32_SDMA(instance, offset, value) \ @@ -1291,6 +1338,8 @@ static int sdma_v4_4_2_sw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 aid_id; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) { @@ -1386,6 +1435,13 @@ static int sdma_v4_4_2_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1406,6 +1462,8 @@ static int sdma_v4_4_2_sw_fini(void *handle) else amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1799,6 +1857,27 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_4_2_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v4_4_2_get_reg_offset(adev, i, + sdma_reg_list_4_4_2[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { .name = "sdma_v4_4_2", .early_init = sdma_v4_4_2_early_init, @@ -1815,6 +1894,7 @@ const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { .set_clockgating_state = sdma_v4_4_2_set_clockgating_state, .set_powergating_state = sdma_v4_4_2_set_powergating_state, .get_clockgating_state = sdma_v4_4_2_get_clockgating_state, + .dump_ip_state = sdma_v4_4_2_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { From fb91065851cd5f2735348c5f3eddeeca3d7c2973 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 11:53:50 -0400 Subject: [PATCH 082/447] drm/amdkfd: Refactor queue wptr_bo GART mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helper function kfd_queue_acquire_buffers to get queue wptr_bo reference from queue write_ptr if it is mapped to the KFD node with expected size. Add wptr_bo to structure queue_properties because structure queue is allocated after queue buffers are validated, then we can remove wptr_bo parameter from pqm_create_queue. Rename structure queue wptr_bo_gart to hold wptr_bo reference for GART mapping and umapping. Move MES wptr_bo_gart mapping to init_user_queue, the same location with queue ctx_bo GART mapping. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 56 +++--------------- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 13 +++-- .../amd/amdkfd/kfd_process_queue_manager.c | 45 +++++++++++---- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 57 +++++++++++++++++++ 7 files changed, 116 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 6e591280774b..4ed49265c764 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -322,7 +322,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, void **kptr, uint64_t *size); void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo); +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence __rcu **ef); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 199e387d35f4..0ab37e7aec26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2226,11 +2226,12 @@ int amdgpu_amdkfd_gpuvm_sync_memory( /** * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count * @bo: Buffer object to be mapped + * @bo_gart: Return bo reference * * Before return, bo reference count is incremented. To release the reference and unpin/ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. */ -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart) { int ret; @@ -2257,7 +2258,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) amdgpu_bo_unreserve(bo); - bo = amdgpu_bo_ref(bo); + *bo_gart = amdgpu_bo_ref(bo); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 823f245dc7d0..202f24ee4bd7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -247,8 +247,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; q_properties->queue_size = args->ring_size; - q_properties->read_ptr = (uint32_t *) args->read_pointer_address; - q_properties->write_ptr = (uint32_t *) args->write_pointer_address; + q_properties->read_ptr = (void __user *)args->read_pointer_address; + q_properties->write_ptr = (void __user *)args->write_pointer_address; q_properties->eop_ring_buffer_address = args->eop_buffer_address; q_properties->eop_ring_buffer_size = args->eop_buffer_size; q_properties->ctx_save_restore_area_address = @@ -306,7 +306,6 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, struct kfd_process_device *pdd; struct queue_properties q_properties; uint32_t doorbell_offset_in_process = 0; - struct amdgpu_bo *wptr_bo = NULL; memset(&q_properties, 0, sizeof(struct queue_properties)); @@ -342,53 +341,17 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, } } - /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work - * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) - */ - if (dev->kfd->shared_resources.enable_mes && - ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) - >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { - struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; - - wptr_vm = drm_priv_to_vm(pdd->drm_priv); - err = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (err) - goto err_wptr_map_gart; - - wptr_mapping = amdgpu_vm_bo_lookup_mapping( - wptr_vm, args->write_pointer_address >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - pr_err("Failed to lookup wptr bo\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - - wptr_bo = wptr_mapping->bo_va->base.bo; - if (wptr_bo->tbo.base.size > PAGE_SIZE) { - pr_err("Requested GART mapping for wptr bo larger than one page\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - if (dev->adev != amdgpu_ttm_adev(wptr_bo->tbo.bdev)) { - pr_err("Queue memory allocated to wrong device\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - - err = amdgpu_amdkfd_map_gtt_bo_to_gart(wptr_bo); - if (err) { - pr_err("Failed to map wptr bo to GART\n"); - goto err_wptr_map_gart; - } + err = kfd_queue_acquire_buffers(pdd, &q_properties); + if (err) { + pr_debug("failed to acquire user queue buffers\n"); + goto err_acquire_queue_buf; } pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo, + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL, &doorbell_offset_in_process); if (err != 0) goto err_create_queue; @@ -422,9 +385,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, return 0; err_create_queue: - if (wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&wptr_bo); -err_wptr_map_gart: + kfd_queue_release_buffers(pdd, &q_properties); +err_acquire_queue_buf: err_bind_process: err_pdd: mutex_unlock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 420444eb8e98..fdc76c24b2e7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -208,10 +208,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.mqd_addr = q->gart_mqd_addr; queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; - if (q->wptr_bo) { - wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); - queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off; - } + wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); + queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; queue_input.is_kfd_process = 1; queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2b3ec92981e8..aba9bcd91f65 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -494,8 +494,8 @@ struct queue_properties { uint64_t queue_size; uint32_t priority; uint32_t queue_percent; - uint32_t *read_ptr; - uint32_t *write_ptr; + void __user *read_ptr; + void __user *write_ptr; void __iomem *doorbell_ptr; uint32_t doorbell_off; bool is_interop; @@ -522,6 +522,8 @@ struct queue_properties { uint64_t tba_addr; uint64_t tma_addr; uint64_t exception_status; + + struct amdgpu_bo *wptr_bo; }; #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \ @@ -604,7 +606,7 @@ struct queue { uint64_t gang_ctx_gpu_addr; void *gang_ctx_cpu_ptr; - struct amdgpu_bo *wptr_bo; + struct amdgpu_bo *wptr_bo_gart; }; enum KFD_MQD_TYPE { @@ -1284,6 +1286,10 @@ int init_queue(struct queue **q, const struct queue_properties *properties); void uninit_queue(struct queue *q); void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); +int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, + u64 expected_size); +int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); +int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_node *dev); @@ -1320,7 +1326,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct file *f, struct queue_properties *properties, unsigned int *qid, - struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 36f0460cbffe..4947f28b3afb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -205,18 +205,21 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, if (dev->kfd->shared_resources.enable_mes) { amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo); - if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart); } } void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; + struct kfd_process_device *pdd; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - if (pqn->q) + if (pqn->q) { + pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); + kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); + } kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); @@ -231,8 +234,7 @@ void pqm_uninit(struct process_queue_manager *pqm) static int init_user_queue(struct process_queue_manager *pqm, struct kfd_node *dev, struct queue **q, struct queue_properties *q_properties, - struct file *f, struct amdgpu_bo *wptr_bo, - unsigned int qid) + struct file *f, unsigned int qid) { int retval; @@ -263,12 +265,32 @@ static int init_user_queue(struct process_queue_manager *pqm, goto cleanup; } memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); - (*q)->wptr_bo = wptr_bo; + + /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work + * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) + */ + if (((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) + >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { + if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) { + pr_err("Queue memory allocated to wrong device\n"); + retval = -EINVAL; + goto free_gang_ctx_bo; + } + + retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo, + &(*q)->wptr_bo_gart); + if (retval) { + pr_err("Failed to map wptr bo to GART\n"); + goto free_gang_ctx_bo; + } + } } pr_debug("PQM After init queue"); return 0; +free_gang_ctx_bo: + amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo); cleanup: uninit_queue(*q); *q = NULL; @@ -280,7 +302,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct file *f, struct queue_properties *properties, unsigned int *qid, - struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, @@ -351,7 +372,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, * allocate_sdma_queue() in create_queue() has the * corresponding check logic. */ - retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -372,7 +393,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -490,6 +511,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q) { + retval = kfd_queue_release_buffers(pdd, &pqn->q->properties); + if (retval) + goto err_destroy_queue; + kfd_procfs_del_queue(pqn->q); dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); @@ -971,7 +996,7 @@ int kfd_criu_restore_queue(struct kfd_process *p, print_queue_properties(&qp); - ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack, + ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 0f6992b1895c..b4529ec298a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -82,3 +82,60 @@ void uninit_queue(struct queue *q) { kfree(q); } + +int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, + u64 expected_size) +{ + struct amdgpu_bo_va_mapping *mapping; + u64 user_addr; + u64 size; + + user_addr = (u64)addr >> AMDGPU_GPU_PAGE_SHIFT; + size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; + + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr); + if (!mapping) + goto out_err; + + if (user_addr != mapping->start || user_addr + size - 1 != mapping->last) { + pr_debug("expected size 0x%llx not equal to mapping addr 0x%llx size 0x%llx\n", + expected_size, mapping->start << AMDGPU_GPU_PAGE_SHIFT, + (mapping->last - mapping->start + 1) << AMDGPU_GPU_PAGE_SHIFT); + goto out_err; + } + + *pbo = amdgpu_bo_ref(mapping->bo_va->base.bo); + return 0; + +out_err: + *pbo = NULL; + return -EINVAL; +} + +int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) +{ + struct amdgpu_vm *vm; + int err; + + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE); + if (err) + goto out_unreserve; + + amdgpu_bo_unreserve(vm->root.bo); + return 0; + +out_unreserve: + amdgpu_bo_unreserve(vm->root.bo); + return err; +} + +int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) +{ + amdgpu_bo_unref(&properties->wptr_bo); + return 0; +} From 72dc6bf159467f43667bead6016965821186490b Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 3 Jul 2024 14:44:15 -0400 Subject: [PATCH 083/447] drm/amd/display: Remove hardmax usage for dcn401 [WHY&HOW] Hardmax message will be retired for dcn4, so this removes it. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../dc/clk_mgr/dcn401/dcn401_clk_mgr.c | 44 ++++++++++--------- drivers/gpu/drm/amd/display/dc/core/dc.c | 11 ++--- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 3 -- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index c453c5f15ce7..cce425dd62d2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -931,12 +931,12 @@ static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned static unsigned int dcn401_build_update_bandwidth_clocks_sequence( struct clk_mgr *clk_mgr_base, struct dc_state *context, + struct dc_clocks *new_clocks, bool safe_to_lower) { struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal); struct dc *dc = clk_mgr_base->ctx->dc; - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence; bool enter_display_off = false; bool update_active_fclk = false; @@ -1218,13 +1218,13 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( static unsigned int dcn401_build_update_display_clocks_sequence( struct clk_mgr *clk_mgr_base, struct dc_state *context, + struct dc_clocks *new_clocks, bool safe_to_lower) { struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal); struct dc *dc = clk_mgr_base->ctx->dc; struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence; bool force_reset = false; bool update_dispclk = false; @@ -1375,6 +1375,7 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, /* build bandwidth related clocks update sequence */ num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, context, + &context->bw_ctx.bw.dcn.clk, safe_to_lower); /* execute sequence */ @@ -1383,6 +1384,7 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, /* build display related clocks update sequence */ num_steps = dcn401_build_update_display_clocks_sequence(clk_mgr_base, context, + &context->bw_ctx.bw.dcn.clk, safe_to_lower); /* execute sequence */ @@ -1474,33 +1476,34 @@ static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base) static void dcn401_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool current_mode) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + const struct dc *dc = clk_mgr->base.ctx->dc; + struct dc_state *context = dc->current_state; + struct dc_clocks new_clocks; + int num_steps; if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) return; + /* build clock update */ + memcpy(&new_clocks, &clk_mgr_base->clks, sizeof(struct dc_clocks)); + if (current_mode) { - if (clk_mgr_base->clks.p_state_change_support) - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); - else - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->max_memclk_mhz); + new_clocks.dramclk_khz = context->bw_ctx.bw.dcn.clk.dramclk_khz; + new_clocks.idle_dramclk_khz = context->bw_ctx.bw.dcn.clk.idle_dramclk_khz; + new_clocks.p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; } else { - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz); + new_clocks.dramclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz * 1000; + new_clocks.idle_dramclk_khz = new_clocks.dramclk_khz; + new_clocks.p_state_change_support = true; } -} -/* Set max memclk to highest DPM value */ -static void dcn401_set_hard_max_memclk(struct clk_mgr *clk_mgr_base) -{ - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, + context, + &new_clocks, + true); - if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - return; - - dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->max_memclk_mhz); + /* execute sequence */ + dcn401_execute_block_sequence(clk_mgr_base, num_steps); } /* Get current memclk states, update bounding box */ @@ -1631,7 +1634,6 @@ static struct clk_mgr_funcs dcn401_funcs = { .init_clocks = dcn401_init_clocks, .notify_wm_ranges = dcn401_notify_wm_ranges, .set_hard_min_memclk = dcn401_set_hard_min_memclk, - .set_hard_max_memclk = dcn401_set_hard_max_memclk, .get_memclk_states_from_smu = dcn401_get_memclk_states_from_smu, .are_clock_states_equal = dcn401_are_clock_states_equal, .enable_pme_wa = dcn401_enable_pme_wa, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f07b13ad4ead..b71c4d8e73dd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5462,9 +5462,10 @@ static void blank_and_force_memclk(struct dc *dc, bool apply, unsigned int memcl hubp->funcs->set_blank_regs(hubp, true); } } - - dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz); - dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz); + if (dc->clk_mgr->funcs->set_max_memclk) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz); + if (dc->clk_mgr->funcs->set_min_memclk) + dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz); for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; @@ -5513,7 +5514,7 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) if (enable && !dc->clk_mgr->dc_mode_softmax_enabled) { if (p_state_change_support) { - if (funcMin <= softMax) + if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk) dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, softMax); // else: No-Op } else { @@ -5523,7 +5524,7 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) } } else if (!enable && dc->clk_mgr->dc_mode_softmax_enabled) { if (p_state_change_support) { - if (funcMin <= softMax) + if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk) dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, maxDPM); // else: No-Op } else { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index f4c1547a368f..779960278a5c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -416,9 +416,6 @@ void dcn401_init_hw(struct dc *dc) if (dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) - dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); - if (dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( dc->res_pool->hubbub, false, false); From 295d91cbc700651782a60572f83c24861607b648 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Mon, 8 Jul 2024 19:29:49 -0400 Subject: [PATCH 084/447] drm/amd/display: Check for NULL pointer [why & how] Need to make sure plane_state is initialized before accessing its members. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Xi (Alex) Liu Signed-off-by: Sung Joon Kim Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_surface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index 067f6555cfdf..ccbb15f1638c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -143,7 +143,8 @@ const struct dc_plane_status *dc_plane_get_status( if (pipe_ctx->plane_state != plane_state) continue; - pipe_ctx->plane_state->status.is_flip_pending = false; + if (pipe_ctx->plane_state) + pipe_ctx->plane_state->status.is_flip_pending = false; break; } From 3f7477bfbb906ec1cd9ad681475a04a142345eae Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Tue, 9 Jul 2024 15:56:36 -0400 Subject: [PATCH 085/447] drm/amd/display: Add private data type for RCG [why & how] Add private data types for better RCG control Reviewed-by: Chris Park Reviewed-by: Yihan Zhu Signed-off-by: Hansen Dsouza Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 68cd3258f4a9..64b25e5d9d7a 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -41,6 +41,87 @@ #define DC_LOGGER \ dccg->ctx->logger +enum physymclk_fe_source { + PHYSYMCLK_FE_SYMCLK_A = 0, // Select functional clock from backend symclk A + PHYSYMCLK_FE_SYMCLK_B, + PHYSYMCLK_FE_SYMCLK_C, + PHYSYMCLK_FE_SYMCLK_D, + PHYSYMCLK_FE_SYMCLK_E, + PHYSYMCLK_FE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum physymclk_source { + PHYSYMCLK_PHYCLK = 0, // Select symclk as source of clock which is output to PHY through DCIO. + PHYSYMCLK_PHYD18CLK, // Select phyd18clk as the source of clock which is output to PHY through DCIO. + PHYSYMCLK_PHYD32CLK, // Select phyd32clk as the source of clock which is output to PHY through DCIO. + PHYSYMCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dtbclk_source { + DTBCLK_DPREFCLK = 0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same) + DTBCLK_DPREFCLK_0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same) + DTBCLK_DTBCLK0, // Selects source for DTBCLK_P# as DTBCLK0 + DTBCLK_DTBCLK1, // Selects source for DTBCLK_P# as DTBCLK0 + DTBCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dppclk_clock_source { + DPP_REFCLK = 0, // refclk is selected + DPP_DCCG_DTO, // Functional clock selected is DTO tuned DPPCLK +}; + +enum dp_stream_clk_source { + DP_STREAM_DTBCLK_P0 = 0, // Selects functional for DP_STREAM_CLK as DTBCLK_P# + DP_STREAM_DTBCLK_P1, + DP_STREAM_DTBCLK_P2, + DP_STREAM_DTBCLK_P3, + DP_STREAM_DTBCLK_P4, + DP_STREAM_DTBCLK_P5, + DP_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum hdmi_char_clk { + HDMI_CHAR_PHYAD18CLK = 0, // Selects functional for hdmi_char_clk as UNIPHYA PHYD18CLK + HDMI_CHAR_PHYBD18CLK, + HDMI_CHAR_PHYCD18CLK, + HDMI_CHAR_PHYDD18CLK, + HDMI_CHAR_PHYED18CLK, + HDMI_CHAR_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum hdmi_stream_clk_source { + HDMI_STREAM_DTBCLK_P0 = 0, // Selects functional for HDMI_STREAM_CLK as DTBCLK_P# + HDMI_STREAM_DTBCLK_P1, + HDMI_STREAM_DTBCLK_P2, + HDMI_STREAM_DTBCLK_P3, + HDMI_STREAM_DTBCLK_P4, + HDMI_STREAM_DTBCLK_P5, + HDMI_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk32_se_clk_source { + SYMCLK32_SE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK + SYMCLK32_SE_PHYBD32CLK, + SYMCLK32_SE_PHYCD32CLK, + SYMCLK32_SE_PHYDD32CLK, + SYMCLK32_SE_PHYED32CLK, + SYMCLK32_SE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk32_le_clk_source { + SYMCLK32_LE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK + SYMCLK32_LE_PHYBD32CLK, + SYMCLK32_LE_PHYCD32CLK, + SYMCLK32_LE_PHYDD32CLK, + SYMCLK32_LE_PHYED32CLK, + SYMCLK32_LE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dsc_clk_source { + DSC_CLK_REF_CLK = 0, // Ref clock selected for DSC_CLK + DSC_DTO_TUNED_CK_GPU_DISCLK_3, // DTO divided clock selected as functional clock +}; + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); From 6fa4bf3dce0668a96faca0024e382f4489a9cc9b Mon Sep 17 00:00:00 2001 From: Revalla Hari Krishna Date: Mon, 8 Jul 2024 15:35:08 +0530 Subject: [PATCH 086/447] drm/amd/display: Refactoring HPO [Why] To refactor HPO files [How] Moved hpo related files to specific hpo folder and update Makefiles. Reviewed-by: Martin Leung Signed-off-by: Revalla Hari Krishna Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 2 -- drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/hpo/Makefile | 15 +++++++++++++++ .../{ => hpo}/dcn31/dcn31_hpo_dp_link_encoder.c | 0 .../{ => hpo}/dcn31/dcn31_hpo_dp_link_encoder.h | 0 .../{ => hpo}/dcn31/dcn31_hpo_dp_stream_encoder.c | 0 .../{ => hpo}/dcn31/dcn31_hpo_dp_stream_encoder.h | 0 7 files changed, 16 insertions(+), 3 deletions(-) rename drivers/gpu/drm/amd/display/dc/{ => hpo}/dcn31/dcn31_hpo_dp_link_encoder.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => hpo}/dcn31/dcn31_hpo_dp_link_encoder.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => hpo}/dcn31/dcn31_hpo_dp_stream_encoder.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => hpo}/dcn31/dcn31_hpo_dp_stream_encoder.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index ccb4b21338b9..b17277de0340 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -28,8 +28,6 @@ DCN30 := dcn30_vpg.o \ dcn30_cm_common.o \ dcn30_mmhubbub.o \ - - AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN30) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index e2601d0aba41..d510e4652c18 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -5,7 +5,7 @@ # Makefile for dcn31. DCN31 = dcn31_panel_cntl.o \ - dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ + dcn31_apg.o \ dcn31_afmt.o dcn31_vpg.o AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31)) diff --git a/drivers/gpu/drm/amd/display/dc/hpo/Makefile b/drivers/gpu/drm/amd/display/dc/hpo/Makefile index c248bd86b477..7f2c9ee0dff1 100644 --- a/drivers/gpu/drm/amd/display/dc/hpo/Makefile +++ b/drivers/gpu/drm/amd/display/dc/hpo/Makefile @@ -25,6 +25,21 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN30 +############################################################################### + +AMD_DAL_HPO_DCN30 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn30/,$(HPO_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN30) +############################################################################### +# DCN31 +############################################################################### +HPO_DCN31 = dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o + +AMD_DAL_HPO_DCN31 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn31/,$(HPO_DCN31)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN31) +############################################################################### # DCN32 ############################################################################### HPO_DCN32 = dcn32_hpo_dp_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h From 332315885d3ccc6d8fe99700f3c2e4c24aa65ab7 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 4 Jul 2024 11:54:34 -0600 Subject: [PATCH 087/447] drm/amd/display: Remove ASSERT if significance is zero in math_ceil2 In the DML math_ceil2 function, there is one ASSERT if the significance is equal to zero. However, significance might be equal to zero sometimes, and this is not an issue for a ceil function, but the current ASSERT will trigger warnings in those cases. This commit removes the ASSERT if the significance is equal to zero to avoid unnecessary noise. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Chaitanya Dhere Signed-off-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c index 4822dbcc86bb..e17b5ceba447 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -63,8 +63,6 @@ double math_ceil(const double arg) double math_ceil2(const double arg, const double significance) { - ASSERT(significance != 0); - return ((int)(arg / significance + 0.99999)) * significance; } From 14d6ca0740e6237f4bca2dabee4e240b6f4be508 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Tue, 9 Jul 2024 16:50:05 -0400 Subject: [PATCH 088/447] drm/amd/display: Add RCG helper functions [why & how] Add standard RCG helpers based on DCCG spec Reviewed-by: Daniel Miess Reviewed-by: Muhammad Ahmed Signed-off-by: Hansen Dsouza Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 307 ++++++++++++++++++ 1 file changed, 307 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 64b25e5d9d7a..76f069f703ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -122,6 +122,302 @@ enum dsc_clk_source { DSC_DTO_TUNED_CK_GPU_DISCLK_3, // DTO divided clock selected as functional clock }; + +static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_se_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + return; + + /* SYMCLK32_ROOT_SE#_GATE_DISABLE will clock gate in DCCG */ + /* SYMCLK32_SE#_GATE_DISABLE will clock gate in HPO only */ + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_le_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_LE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_LE1_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_physymclk_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_physymclk_fe_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dppclk_rcg(struct dccg *dccg, + int inst, bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dpstreamclk_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK0_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK1_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK2_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK3_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_smclk32_se_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -1123,6 +1419,17 @@ struct dccg *dccg35_create( return NULL; } + /* Temporary declaration to handle unused static functions */ + (void)&dccg35_set_dsc_clk_rcg; + (void)&dccg35_set_symclk32_se_rcg; + (void)&dccg35_set_symclk32_le_rcg; + (void)&dccg35_set_physymclk_rcg; + (void)&dccg35_set_physymclk_fe_rcg; + (void)&dccg35_set_dtbclk_p_rcg; + (void)&dccg35_set_dppclk_rcg; + (void)&dccg35_set_dpstreamclk_rcg; + (void)&dccg35_set_smclk32_se_rcg; + base = &dccg_dcn->base; base->ctx = ctx; base->funcs = &dccg35_funcs; From 0cf80506918da0d2cd9d31e3b07831f94a2c5cc6 Mon Sep 17 00:00:00 2001 From: Ryan Seto Date: Wed, 10 Jul 2024 17:03:32 -0400 Subject: [PATCH 089/447] drm/amd/display: Fix visual confirm bug for SubVP [Why] Visual confirm was incorrect on dual monitor SubVP setup [How] Adjusted p_state assignment for dual monitor SubVP setup Signed-off-by: Ryan Seto Reviewed-by: Chaitanya Dhere Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/dml21_translation_helper.c | 13 ++++++++--- .../dc/dml2/dml21/dml21_translation_helper.h | 2 +- .../amd/display/dc/dml2/dml21/dml21_utils.c | 22 +++++++++++++++++-- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 1fce61323201..9fdb209bcab2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1183,7 +1183,8 @@ void dml21_get_pipe_mcache_config( void dml21_set_dc_p_state_type( struct pipe_ctx *pipe_ctx, - struct dml2_per_stream_programming *stream_programming) + struct dml2_per_stream_programming *stream_programming, + bool sub_vp_enabled) { switch (stream_programming->uclk_pstate_method) { case dml2_uclk_pstate_support_method_vactive: @@ -1192,14 +1193,20 @@ void dml21_set_dc_p_state_type( break; case dml2_uclk_pstate_support_method_vblank: case dml2_uclk_pstate_support_method_fw_vblank_drr: - pipe_ctx->p_state_type = P_STATE_V_BLANK; + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_V_BLANK; break; case dml2_uclk_pstate_support_method_fw_subvp_phantom: case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: pipe_ctx->p_state_type = P_STATE_SUB_VP; break; case dml2_uclk_pstate_support_method_fw_drr: - pipe_ctx->p_state_type = P_STATE_FPO; + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_FPO; break; default: pipe_ctx->p_state_type = P_STATE_UNKNOWN; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h index 97a8f51b7780..476a7f6e4875 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -26,5 +26,5 @@ void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_water void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); void dml21_map_hw_resources(struct dml2_context *dml_ctx); void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); -void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming); +void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming, bool sub_vp_enabled); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index e11246e525ac..ec4195336444 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -11,7 +11,6 @@ #include "dml2_core_dcn4_calcs.h" - int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) { int i; @@ -280,6 +279,23 @@ bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) dc_is_dp_signal(pipe_ctx->stream->signal)); } + +static bool is_sub_vp_enabled(struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_paired_subvp_stream(context, pipe_ctx->stream) && + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { + return true; + } + } + return false; +} + + void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_per_stream_programming *stream_prog) { @@ -317,7 +333,9 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[pipe_ctx->pipe_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); - dml21_set_dc_p_state_type(pipe_ctx, stream_prog); + bool sub_vp_enabled = is_sub_vp_enabled(pipe_ctx->stream->ctx->dc, context); + + dml21_set_dc_p_state_type(pipe_ctx, stream_prog, sub_vp_enabled); } static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx, From 94beb4ac1b3bc5fbeef977960a90ee4f594b4465 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Wed, 10 Jul 2024 17:09:04 -0400 Subject: [PATCH 090/447] drm/amd/display: ensure EASF and ISHARP coefficients are programmed together [Why] EASF coefficients are programmed to RAM and then RAM selector is toggled. ISHARP coefficients are programmed after so they will not be in the same RAM block [How] Move ISHARP programming before EASF programming Add flag if ISHARP coefficients are updated. If so, then force EASF coefficients programming Reviewed-by: Alvin Lee Signed-off-by: Samson Tam Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 27cbda1cf8cf..703d7b51c6c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -280,7 +280,8 @@ static void dpp401_dscl_set_scaler_filter( static void dpp401_dscl_set_scl_filter( struct dcn401_dpp *dpp, const struct scaler_data *scl_data, - bool chroma_coef_mode) + bool chroma_coef_mode, + bool force_coeffs_update) { bool h_2tap_hardcode_coef_en = false; bool v_2tap_hardcode_coef_en = false; @@ -343,7 +344,7 @@ static void dpp401_dscl_set_scl_filter( || (filter_v_c && (filter_v_c != dpp->filter_v_c)); } - if (filter_updated) { + if ((filter_updated) || (force_coeffs_update)) { uint32_t scl_mode = REG_READ(SCL_MODE); if (!h_2tap_hardcode_coef_en && filter_h) { @@ -955,9 +956,11 @@ static void dpp401_dscl_set_isharp_filter( * */ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, - const struct scaler_data *scl_data) + const struct scaler_data *scl_data, + bool *bs_coeffs_updated) { struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + *bs_coeffs_updated = false; PERF_TRACE(); /* ISHARP_MODE */ @@ -1030,12 +1033,14 @@ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, dpp, scl_data->taps.v_taps, SCL_COEF_VERTICAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_v); + *bs_coeffs_updated = true; } if (scl_data->dscl_prog_data.filter_blur_scale_h) { dpp401_dscl_set_scaler_filter( dpp, scl_data->taps.h_taps, SCL_COEF_HORIZONTAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_h); + *bs_coeffs_updated = true; } } PERF_TRACE(); @@ -1066,6 +1071,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN && scl_data->format <= PIXEL_FORMAT_VIDEO_END; + bool bs_coeffs_updated = false; if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) return; @@ -1125,7 +1131,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) { if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_disable_easf(dpp_base, scl_data); - dpp401_dscl_program_isharp(dpp_base, scl_data); + dpp401_dscl_program_isharp(dpp_base, scl_data, &bs_coeffs_updated); return; } @@ -1152,12 +1158,18 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, SCL_V_NUM_TAPS_C, v_num_taps_c, SCL_H_NUM_TAPS_C, h_num_taps_c); - dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr); + /* ISharp configuration + * - B&S coeffs are written to same coeff RAM as WB scaler coeffs + * - coeff RAM toggle is in EASF programming + * - if we are only programming B&S coeffs, then need to reprogram + * WB scaler coeffs and toggle coeff RAM together + */ + //if (dpp->base.ctx->dc->config.prefer_easf) + dpp401_dscl_program_isharp(dpp_base, scl_data, &bs_coeffs_updated); + + dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr, bs_coeffs_updated); /* Edge adaptive scaler function configuration */ if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_program_easf(dpp_base, scl_data); - /* isharp configuration */ - //if (dpp->base.ctx->dc->config.prefer_easf) - dpp401_dscl_program_isharp(dpp_base, scl_data); PERF_TRACE(); } From aaa21e6a33bae017fc190bd75f76baa29d259346 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Wed, 10 Jul 2024 14:15:57 -0400 Subject: [PATCH 091/447] drm/amd/display: Check if Mode is Supported Before Returning Result [Why] Even if the mode is not supported dml2_check_mode_supported() would still return true. This causes an unsupported mode to be programmed. [How] Check if the mode is supported or not and return the proper result. Reviewed-by: Chaitanya Dhere Signed-off-by: Austin Zheng Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index 30d07cd1065f..e9b40a45ffdd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -138,8 +138,9 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) } in_out->is_supported = mcache_success; + result = result && in_out->is_supported; - return true; + return result; } bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) From f52ea01925f4eeb967c09cb5dae61608a2330541 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Thu, 11 Jul 2024 10:58:51 -0400 Subject: [PATCH 092/447] drm/amd/display: Add source select helper functions [why & how] Add source select helpers based on DCCG spec Reviewed-by: Daniel Miess Signed-off-by: Hansen Dsouza Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 324 ++++++++++++++++++ 1 file changed, 324 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 76f069f703ef..b698b773338a 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -418,6 +418,320 @@ static void dccg35_set_smclk32_se_rcg( } } +static void dccg35_set_dsc_clk_src_new(struct dccg *dccg, int inst, enum dsc_clk_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* DSCCLK#_EN=0 switches to refclock from functional clock */ + + switch (inst) { + case 0: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, src); + break; + case 1: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, src); + break; + case 2: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, src); + break; + case 3: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_se_src_new( + struct dccg *dccg, + int inst, + enum symclk32_se_clk_source src + ) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE0_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE0_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE1_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE1_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 2: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE2_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE2_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 3: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE3_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE3_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static int +dccg35_is_symclk32_se_src_functional_le_new(struct dccg *dccg, int symclk_32_se_inst, int symclk_32_le_inst) +{ + uint32_t en; + uint32_t src_sel; + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + REG_GET_2(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, &src_sel, SYMCLK32_SE3_EN, &en); + + if (en == 1 && src_sel == symclk_32_le_inst) + return 1; + + return 0; +} + + +static void dccg35_set_symclk32_le_src_new( + struct dccg *dccg, + int inst, + enum symclk32_le_clk_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE0_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src, + SYMCLK32_LE0_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE1_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src, + SYMCLK32_LE1_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dcn35_set_dppclk_src_new(struct dccg *dccg, + int inst, enum dppclk_clock_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, src); + break; + case 1: + REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, src); + break; + case 2: + REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, src); + break; + case 3: + REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, src); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dtbclk_p_src_new( + struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* If DTBCLK_P#_EN is 0 refclock is selected as functional clock + * If DTBCLK_P#_EN is 1 functional clock is selected as DTBCLK_P#_SRC_SEL + */ + + switch (inst) { + case 0: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P0_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P0_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P1_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P1_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P2_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P2_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P3_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P3_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_dpstreamclk_src_new( + struct dccg *dccg, + enum dp_stream_clk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK0_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK1_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + + break; + case 2: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK2_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + + break; + case 3: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK3_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_physymclk_src_new( + struct dccg *dccg, + enum physymclk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYASYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYBSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYCSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYDSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYESYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static int dccg35_is_symclk_fe_src_functional_be(struct dccg *dccg, + int symclk_fe_inst, + int symclk_be_inst) +{ + + uint32_t en = 0; + uint32_t src_sel = 0; + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (symclk_fe_inst) { + case 0: + REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, &src_sel, SYMCLKA_FE_EN, &en); + break; + case 1: + REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, &src_sel, SYMCLKB_FE_EN, &en); + break; + case 2: + REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, &src_sel, SYMCLKC_FE_EN, &en); + break; + case 3: + REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, &src_sel, SYMCLKD_FE_EN, &en); + break; + case 4: + REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, &src_sel, SYMCLKE_FE_EN, &en); + break; + } + + if (en == 1 && src_sel == symclk_be_inst) + return 1; + + return 0; +} + +static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum physymclk_fe_source src, int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, + SYMCLKA_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKA_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE, + SYMCLKB_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKB_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE, + SYMCLKC_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKC_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE, + SYMCLKD_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKD_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE, + SYMCLKE_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKE_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + break; + } +} + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -1429,6 +1743,16 @@ struct dccg *dccg35_create( (void)&dccg35_set_dppclk_rcg; (void)&dccg35_set_dpstreamclk_rcg; (void)&dccg35_set_smclk32_se_rcg; + (void)&dccg35_set_dsc_clk_src_new; + (void)&dccg35_set_symclk32_se_src_new; + (void)&dccg35_is_symclk32_se_src_functional_le_new; + (void)&dccg35_set_symclk32_le_src_new; + (void)&dcn35_set_dppclk_src_new; + (void)&dccg35_set_dtbclk_p_src_new; + (void)&dccg35_set_dpstreamclk_src_new; + (void)&dccg35_set_physymclk_src_new; + (void)&dccg35_is_symclk_fe_src_functional_be; + (void)&dccg35_set_symclk_fe_src_new; base = &dccg_dcn->base; base->ctx = ctx; From f7543209ce5dc09e3f5a27a7d4ee53e226283719 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 4 Jul 2024 18:33:02 +0000 Subject: [PATCH 093/447] drm/amd/display: rename dcn3/dcn4 to more sound terms Use more accurate names to refer to the asic architecture. dcn3 in DML actually refers to DCN32 and DCN321, so rename it to dcn32x dcn4 refers to any DCN4x soc., and hence rename dcn4 to dcn4x Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/dml21_translation_helper.c | 36 +- .../amd/display/dc/dml2/dml21/dml21_utils.c | 2 +- .../dml21/inc/bounding_boxes/dcn3_soc_bb.h | 8 +- .../dml21/inc/bounding_boxes/dcn4_soc_bb.h | 8 +- .../dml21/inc/dml_top_soc_parameter_types.h | 10 +- .../display/dc/dml2/dml21/inc/dml_top_types.h | 10 +- .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 2 +- .../src/dml2_core/dml2_core_dcn4_calcs.c | 186 ++++----- .../dml21/src/dml2_core/dml2_core_shared.c | 190 ++++----- .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 130 +++---- .../display/dc/hubbub/dcn401/dcn401_hubbub.c | 364 +++++++++--------- .../gpu/drm/amd/display/dc/inc/hw/mem_input.h | 2 +- 12 files changed, 474 insertions(+), 474 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 9fdb209bcab2..ec663c7442e5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1027,17 +1027,17 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context) { /* TODO these should be the max of active, svp prefetch and idle should be tracked seperately */ - context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dispclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.dcfclk_khz; - context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.uclk_khz; - context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.fclk_khz; - context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.idle.uclk_khz; - context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.idle.fclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.deepsleep_dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dispclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.uclk_khz; + context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.fclk_khz; + context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.uclk_khz; + context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.fclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.deepsleep_dcfclk_khz; context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = in_ctx->v21.mode_programming.programming->fclk_pstate_supported; context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported; - context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dtbrefclk_khz > 0; - context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dtbrefclk_khz; + context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0; + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz; } void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) @@ -1068,16 +1068,16 @@ static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_wat switch (wm_index) { case DML2_DCHUB_WATERMARK_SET_A: - wm_regs = &watermarks->dcn4.a; + wm_regs = &watermarks->dcn4x.a; break; case DML2_DCHUB_WATERMARK_SET_B: - wm_regs = &watermarks->dcn4.b; + wm_regs = &watermarks->dcn4x.b; break; case DML2_DCHUB_WATERMARK_SET_C: - wm_regs = &watermarks->dcn4.c; + wm_regs = &watermarks->dcn4x.c; break; case DML2_DCHUB_WATERMARK_SET_D: - wm_regs = &watermarks->dcn4.d; + wm_regs = &watermarks->dcn4x.d; break; case DML2_DCHUB_WATERMARK_SET_NUM: default: @@ -1125,11 +1125,11 @@ void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_ global_sync = &stream_programming->phantom_stream.global_sync; } - pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4.vstartup_lines; - pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4.vupdate_offset_pixels; - pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4.vupdate_vupdate_width_pixels; - pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4.vready_offset_pixels; - pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4.pstate_keepout_start_lines; + pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4x.vstartup_lines; + pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4x.vupdate_offset_pixels; + pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4x.vupdate_vupdate_width_pixels; + pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4x.vready_offset_pixels; + pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4x.pstate_keepout_start_lines; pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index ec4195336444..51d491bffa32 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -326,7 +326,7 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; } - pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4.dppclk_khz; + pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4x.dppclk_khz; if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipe_ctx->plane_res.bw.dppclk_khz) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h index 521f77b8ac44..d82c681a5402 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h @@ -72,7 +72,7 @@ static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -128,7 +128,7 @@ static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }; static const struct dml2_soc_bb dml2_socbb_dcn31 = { @@ -228,7 +228,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn31 = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -332,7 +332,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn31 = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }, .power_management_parameters = { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index 5af94f06c667..0fe70bd0dbac 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -52,7 +52,7 @@ static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -78,7 +78,7 @@ static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }; static const struct dml2_soc_bb dml2_socbb_dcn401 = { @@ -178,7 +178,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn401 = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -282,7 +282,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn401 = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }, .power_management_parameters = { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 99d775adc3e0..4a46b21c3e55 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -26,7 +26,7 @@ struct dml2_soc_derates { struct dml2_soc_derate_values system_idle_average; }; -struct dml2_dcn3_soc_qos_params { +struct dml2_dcn32x_soc_qos_params { struct { unsigned int base_latency_us; unsigned int base_latency_pixel_vm_us; @@ -52,7 +52,7 @@ struct dml2_dcn4_uclk_dpm_dependent_qos_params { unsigned int average_latency_when_non_urgent_uclk_cycles; }; -struct dml2_dcn4_soc_qos_params { +struct dml2_dcn4x_soc_qos_params { unsigned int df_qos_response_time_fclk_cycles; unsigned int max_round_trip_to_furthest_cs_fclk_cycles; unsigned int mall_overhead_fclk_cycles; @@ -68,7 +68,7 @@ struct dml2_dcn4_soc_qos_params { enum dml2_qos_param_type { dml2_qos_param_type_dcn3, - dml2_qos_param_type_dcn4 + dml2_qos_param_type_dcn4x }; struct dml2_soc_qos_parameters { @@ -80,8 +80,8 @@ struct dml2_soc_qos_parameters { } writeback; union { - struct dml2_dcn3_soc_qos_params dcn3; - struct dml2_dcn4_soc_qos_params dcn4; + struct dml2_dcn32x_soc_qos_params dcn32x; + struct dml2_dcn4x_soc_qos_params dcn4x; } qos_params; enum dml2_qos_param_type qos_type; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index c47a07f473e5..a824ce56c54e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -228,7 +228,7 @@ struct dml2_per_plane_programming { union { struct { unsigned long dppclk_khz; - } dcn4; + } dcn4x; } min_clocks; struct dml2_mcache_surface_allocation mcache_allocation; @@ -263,7 +263,7 @@ union dml2_global_sync_programming { unsigned int vupdate_vupdate_width_pixels; unsigned int vready_offset_pixels; unsigned int pstate_keepout_start_lines; - } dcn4; + } dcn4x; }; struct dml2_per_stream_programming { @@ -274,7 +274,7 @@ struct dml2_per_stream_programming { unsigned long dscclk_khz; unsigned long dtbclk_khz; unsigned long phyclk_khz; - } dcn4; + } dcn4x; } min_clocks; union dml2_global_sync_programming global_sync; @@ -375,7 +375,7 @@ struct dml2_display_cfg_programming { unsigned long dispclk_khz; unsigned long dcfclk_deepsleep_khz; unsigned long dpp_ref_khz; - } dcn3; + } dcn32x; struct { struct { unsigned long uclk_khz; @@ -404,7 +404,7 @@ struct dml2_display_cfg_programming { uint32_t dpprefclk_did; uint32_t dtbrefclk_did; } divider_ids; - } dcn4; + } dcn4x; } min_clocks; bool uclk_pstate_supported; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index f5c6cd5cf5e9..9375c6ae1147 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -551,7 +551,7 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out l->mode_programming_ex_params.min_clk_table = in_out->instance->minimum_clock_table; l->mode_programming_ex_params.cfg_support_info = in_out->cfg_support_info; l->mode_programming_ex_params.programming = in_out->programming; - l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4.active.uclk_khz, + l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4x.active.uclk_khz, &core->clean_me_up.mode_lib.soc); result = dml2_core_calcs_mode_programming_ex(&l->mode_programming_ex_params); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 3b1e5c548435..0b671c665373 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -3183,7 +3183,7 @@ static double CalculateUrgentLatency( double fabric_max_transport_latency_margin) { double urgent_latency = 0; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); @@ -3194,7 +3194,7 @@ static double CalculateUrgentLatency( } } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); @@ -3224,7 +3224,7 @@ static double CalculateTripToMemory( double fabric_max_transport_latency_margin) { double trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); @@ -3233,7 +3233,7 @@ static double CalculateTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); @@ -3263,7 +3263,7 @@ static double CalculateMetaTripToMemory( double fabric_max_transport_latency_margin) { double meta_trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); } else { @@ -3271,7 +3271,7 @@ static double CalculateMetaTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); @@ -4961,7 +4961,7 @@ static void CalculateExtraLatency( max_request_size_bytes = request_size_bytes_chroma[k]; } - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; *ExtraLatency = *ExtraLatency_sr; if (max_oustanding_when_urgent_expected) @@ -6979,7 +6979,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); - mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); #if defined(__DML_VBA_DEBUG__) @@ -8079,32 +8079,32 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.excess_vactive_fill_bw_c); mode_lib->ms.UrgLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = CalculateTripToMemory( mode_lib->ms.UrgLatency, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); @@ -8274,20 +8274,20 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; mode_lib->ms.support.avg_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); mode_lib->ms.support.avg_non_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -8307,7 +8307,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #endif } - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4 && mode_lib->ms.BytePerPixelC[k] > 0) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -8522,14 +8522,14 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -9015,13 +9015,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out //Re-ordering Buffer Support Check mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; @@ -9029,7 +9029,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.ROBSupport = false; } } else { - if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -9939,14 +9939,14 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); - mode_lib->mp.Dcfclk = programming->min_clocks.dcn4.active.dcfclk_khz / 1000.0; - mode_lib->mp.FabricClock = programming->min_clocks.dcn4.active.fclk_khz / 1000.0; - mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); - mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4.active.uclk_khz / 1000.0; - mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4.dpprefclk_khz / 1000.0; - s->SOCCLK = (double)programming->min_clocks.dcn4.socclk_khz / 1000; - mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); - mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table); + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; @@ -9981,18 +9981,18 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; - mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4.dppclk_khz / 1000.0; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dppclk[k] > 0); } for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4.dscclk_khz / 1000.0; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); } - mode_lib->mp.Dispclk = programming->min_clocks.dcn4.dispclk_khz / 1000.0; - mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4.deepsleep_dcfclk_khz / 1000.0; + mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dcfclk > 0); dml2_assert(mode_lib->mp.FabricClock > 0); @@ -10474,14 +10474,14 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex } if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, @@ -10567,32 +10567,32 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.excess_vactive_fill_bw_c); mode_lib->mp.UrgentLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = CalculateTripToMemory( mode_lib->mp.UrgentLatency, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); @@ -10601,10 +10601,10 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.meta_trip_adder_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); for (k = 0; k < s->num_active_planes; ++k) { bool cursor_not_enough_urgent_latency_hiding = 0; @@ -12205,11 +12205,11 @@ void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg, void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index) { - out->dcn4.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); - out->dcn4.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); - out->dcn4.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); - out->dcn4.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); - out->dcn4.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); + out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); + out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); + out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); + out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); + out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); } void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) @@ -12658,7 +12658,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib); - out->min_clocks.dcn4.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); + out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); @@ -12771,13 +12771,13 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod } } - out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { out->informative.misc.ROBUrgencyAvoidance = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c index 6d7701a97d3f..c54c29711a65 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -779,7 +779,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000; mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); - mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); #if defined(__DML_VBA_DEBUG__) @@ -1776,32 +1776,32 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou #endif mode_lib->ms.UrgLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = CalculateTripToMemory( mode_lib->ms.UrgLatency, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); @@ -1995,21 +1995,21 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; mode_lib->ms.support.avg_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); mode_lib->ms.support.avg_non_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); double outstanding_latency_us = 0; for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -2029,7 +2029,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou #endif } - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4 && mode_lib->ms.BytePerPixelC[k] > 0) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -2243,14 +2243,14 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou double min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -2717,13 +2717,13 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou //Re-ordering Buffer Support Check mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; @@ -2731,7 +2731,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.support.ROBSupport = false; } } else { - if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -5885,7 +5885,7 @@ static double CalculateUrgentLatency( double fabric_max_transport_latency_margin) { double urgent_latency = 0; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); @@ -5896,7 +5896,7 @@ static double CalculateUrgentLatency( } } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); @@ -5926,7 +5926,7 @@ static double CalculateTripToMemory( double fabric_max_transport_latency_margin) { double trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); @@ -5935,7 +5935,7 @@ static double CalculateTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); @@ -5965,7 +5965,7 @@ static double CalculateMetaTripToMemory( double fabric_max_transport_latency_margin) { double meta_trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); } else { @@ -5973,7 +5973,7 @@ static double CalculateMetaTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); @@ -7489,7 +7489,7 @@ static void CalculateExtraLatency( max_request_size_bytes = request_size_bytes_chroma[k]; } - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; *ExtraLatency = *ExtraLatency_sr; if (max_oustanding_when_urgent_expected) @@ -9835,14 +9835,14 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); - mode_lib->mp.Dcfclk = programming->min_clocks.dcn4.active.dcfclk_khz / 1000.0; - mode_lib->mp.FabricClock = programming->min_clocks.dcn4.active.fclk_khz / 1000.0; - mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); - mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4.active.uclk_khz / 1000.0; - mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4.dpprefclk_khz / 1000.0; - s->SOCCLK = (double)programming->min_clocks.dcn4.socclk_khz / 1000; - mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); - mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table); + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; @@ -9877,18 +9877,18 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; - mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4.dppclk_khz / 1000.0; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dppclk[k] > 0); } for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4.dscclk_khz / 1000.0; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); } - mode_lib->mp.Dispclk = programming->min_clocks.dcn4.dispclk_khz / 1000.0; - mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4.deepsleep_dcfclk_khz / 1000.0; + mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dcfclk > 0); dml2_assert(mode_lib->mp.FabricClock > 0); @@ -10410,14 +10410,14 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e } if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, @@ -10491,32 +10491,32 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.WritebackDelay[k] = mode_lib->mp.WritebackDelay[j]; mode_lib->mp.UrgentLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = CalculateTripToMemory( mode_lib->mp.UrgentLatency, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); @@ -10525,10 +10525,10 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.meta_trip_adder_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); for (k = 0; k < s->num_active_planes; ++k) { calculate_cursor_req_attributes( @@ -11971,14 +11971,14 @@ void dml2_core_shared_get_pipe_regs(const struct dml2_display_cfg *display_cfg, void dml2_core_shared_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) { - // out->min_clocks.dcn4.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2 - // out->min_clocks.dcn4.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); - // out->min_clocks.dcn4.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + // out->min_clocks.dcn4x.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2 + // out->min_clocks.dcn4x.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + // out->min_clocks.dcn4x.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); - out->global_sync.dcn4.vready_offset_pixels = mode_lib->mp.VReadyOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vstartup_lines = mode_lib->mp.VStartup[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vupdate_offset_pixels = mode_lib->mp.VUpdateOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vupdate_vupdate_width_pixels = mode_lib->mp.VUpdateWidthPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vready_offset_pixels = mode_lib->mp.VReadyOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vstartup_lines = mode_lib->mp.VStartup[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vupdate_offset_pixels = mode_lib->mp.VUpdateOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vupdate_vupdate_width_pixels = mode_lib->mp.VUpdateWidthPix[mode_lib->mp.pipe_plane[pipe_index]]; } void dml2_core_shared_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx) @@ -12281,7 +12281,7 @@ void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mo out->informative.misc.cstate_max_cap_mode = mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; - out->min_clocks.dcn4.dpprefclk_khz = (int unsigned)(mode_lib->mp.GlobalDPPCLK * 1000.0); + out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)(mode_lib->mp.GlobalDPPCLK * 1000.0); out->informative.qos.max_active_fclk_change_latency_supported = mode_lib->mp.MaxActiveFCLKChangeLatencySupported; @@ -12394,13 +12394,13 @@ void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mo } } - out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { out->informative.misc.ROBUrgencyAvoidance = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index 0d847bccd5d2..0021bbaa4b91 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -82,9 +82,9 @@ static void calculate_system_active_minimums(struct dml2_dpmm_map_mode_to_soc_dp get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); } static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -122,9 +122,9 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); } static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -146,9 +146,9 @@ static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_ get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); - in_out->programming->min_clocks.dcn4.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); - in_out->programming->min_clocks.dcn4.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); } static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double margin, unsigned long vco_freq_khz, unsigned long *rounded_khz, uint32_t *divider_id) @@ -232,25 +232,25 @@ static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_progr { bool result; - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.uclk_khz, &state_table->uclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz, &state_table->uclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk); return result; } @@ -262,12 +262,12 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro result = false; for (index = 0; index < state_table->uclk.num_clk_values; index++) { - if (display_cfg->min_clocks.dcn4.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { - display_cfg->min_clocks.dcn4.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.active.fclk_khz = state_table->fclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.active.uclk_khz = state_table->uclk.clk_values_khz[index]; + if (display_cfg->min_clocks.dcn4x.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.uclk_khz = state_table->uclk.clk_values_khz[index]; result = true; break; } @@ -276,12 +276,12 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro if (result) { result = false; for (index = 0; index < state_table->uclk.num_clk_values; index++) { - if (display_cfg->min_clocks.dcn4.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { - display_cfg->min_clocks.dcn4.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; + if (display_cfg->min_clocks.dcn4x.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; result = true; break; } @@ -289,9 +289,9 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro } // SVP is not supported on any coarse grained SoCs - display_cfg->min_clocks.dcn4.svp_prefetch.dcfclk_khz = 0; - display_cfg->min_clocks.dcn4.svp_prefetch.fclk_khz = 0; - display_cfg->min_clocks.dcn4.svp_prefetch.uclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz = 0; return result; } @@ -324,30 +324,30 @@ static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mo result = map_soc_min_clocks_to_dpm_coarse_grained(display_cfg, state_table); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dispclk_khz, &state_table->dispclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dispclk_khz, &state_table->dispclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.deepsleep_dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.deepsleep_dcfclk_khz, &state_table->dcfclk); for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (result) - result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4.dppclk_khz, &state_table->dppclk); + result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4x.dppclk_khz, &state_table->dppclk); } for (i = 0; i < display_cfg->display_config.num_streams; i++) { if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.dscclk_khz, &state_table->dscclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dscclk_khz, &state_table->dscclk); if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.dtbclk_khz, &state_table->dtbclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dtbclk_khz, &state_table->dtbclk); if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.phyclk_khz, &state_table->phyclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.phyclk_khz, &state_table->phyclk); } if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dpprefclk_khz, &state_table->dppclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dpprefclk_khz, &state_table->dppclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dtbrefclk_khz, &state_table->dtbclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dtbrefclk_khz, &state_table->dtbclk); return result; } @@ -515,15 +515,15 @@ static bool determine_power_management_features_with_fams(struct dml2_dpmm_map_m static void clamp_uclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) { - in_out->programming->min_clocks.dcn4.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; } static void clamp_fclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) { - in_out->programming->min_clocks.dcn4.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; } static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -539,14 +539,14 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o // In NV4, there's no support for FCLK or DCFCLK DPM change before SVP prefetch starts, therefore // active minimums must be boosted to prefetch minimums - if (in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4.active.uclk_khz) - in_out->programming->min_clocks.dcn4.active.uclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4x.active.uclk_khz) + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz; - if (in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4.active.fclk_khz) - in_out->programming->min_clocks.dcn4.active.fclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4x.active.fclk_khz) + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz; - if (in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4.active.dcfclk_khz) - in_out->programming->min_clocks.dcn4.active.dcfclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4x.active.dcfclk_khz) + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz; // need some massaging for the dispclk ramping cases: dispclk_khz = mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0) * (1.0 + in_out->ip->dispclk_ramp_margin_percent / 100.0); @@ -556,33 +556,33 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dispclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dispclk_did); + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); // DPP Ref is always set to max of all DPP clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (in_out->programming->min_clocks.dcn4.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) - in_out->programming->min_clocks.dcn4.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; + if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; } - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4.dpprefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dpprefclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dpprefclk_did); + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - in_out->programming->plane_programming[i].min_clocks.dcn4.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4.dpprefclk_khz / 255.0 - * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4.dpprefclk_khz, 1.0)); + in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 + * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); } // DTB Ref is always set to max of all DTB clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (in_out->programming->min_clocks.dcn4.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) - in_out->programming->min_clocks.dcn4.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; + if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; } - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4.dtbrefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dtbrefclk_did); + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); - in_out->programming->min_clocks.dcn4.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; - in_out->programming->min_clocks.dcn4.socclk_khz = mode_support_result->global.socclk_khz; + in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; + in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz; result = map_min_clocks_to_dpm(mode_support_result, in_out->programming, &in_out->soc_bb->clk_table); diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 181041d6d177..d36f758971a8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -75,108 +75,108 @@ bool hubbub401_program_urgent_watermarks( /* Repeat for water mark set A and B */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.urgent > hubbub2->watermarks.dcn4.a.urgent) { - hubbub2->watermarks.dcn4.a.urgent = watermarks->dcn4.a.urgent; + if (safe_to_lower || watermarks->dcn4x.a.urgent > hubbub2->watermarks.dcn4x.a.urgent) { + hubbub2->watermarks.dcn4x.a.urgent = watermarks->dcn4x.a.urgent; REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0, - DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, watermarks->dcn4.a.urgent); + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, watermarks->dcn4x.a.urgent); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.urgent, watermarks->dcn4.a.urgent); - } else if (watermarks->dcn4.a.urgent < hubbub2->watermarks.dcn4.a.urgent) + watermarks->dcn4x.a.urgent, watermarks->dcn4x.a.urgent); + } else if (watermarks->dcn4x.a.urgent < hubbub2->watermarks.dcn4x.a.urgent) wm_pending = true; /* determine the transfer time for a quantity of data for a particular requestor.*/ - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_flip - > hubbub2->watermarks.dcn4.a.frac_urg_bw_flip) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_flip = watermarks->dcn4.a.frac_urg_bw_flip; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_flip + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip = watermarks->dcn4x.a.frac_urg_bw_flip; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, watermarks->dcn4.a.frac_urg_bw_flip); - } else if (watermarks->dcn4.a.frac_urg_bw_flip - < hubbub2->watermarks.dcn4.a.frac_urg_bw_flip) + DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, watermarks->dcn4x.a.frac_urg_bw_flip); + } else if (watermarks->dcn4x.a.frac_urg_bw_flip + < hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_nom - > hubbub2->watermarks.dcn4.a.frac_urg_bw_nom) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_nom = watermarks->dcn4.a.frac_urg_bw_nom; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_nom + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom = watermarks->dcn4x.a.frac_urg_bw_nom; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->dcn4.a.frac_urg_bw_nom); - } else if (watermarks->dcn4.a.frac_urg_bw_nom - < hubbub2->watermarks.dcn4.a.frac_urg_bw_nom) + DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->dcn4x.a.frac_urg_bw_nom); + } else if (watermarks->dcn4x.a.frac_urg_bw_nom + < hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_mall - > hubbub2->watermarks.dcn4.a.frac_urg_bw_mall) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_mall = watermarks->dcn4.a.frac_urg_bw_mall; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_mall + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall = watermarks->dcn4x.a.frac_urg_bw_mall; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, watermarks->dcn4.a.frac_urg_bw_mall); - } else if (watermarks->dcn4.a.frac_urg_bw_mall < hubbub2->watermarks.dcn4.a.frac_urg_bw_mall) + DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, watermarks->dcn4x.a.frac_urg_bw_mall); + } else if (watermarks->dcn4x.a.frac_urg_bw_mall < hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem) { - hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem = watermarks->dcn4.a.refcyc_per_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.a.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem) { + hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem = watermarks->dcn4x.a.refcyc_per_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, watermarks->dcn4.a.refcyc_per_trip_to_mem); - } else if (watermarks->dcn4.a.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, watermarks->dcn4x.a.refcyc_per_trip_to_mem); + } else if (watermarks->dcn4x.a.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem) { - hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem = watermarks->dcn4.a.refcyc_per_meta_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem) { + hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem = watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, 0, - DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, watermarks->dcn4.a.refcyc_per_meta_trip_to_mem); - } else if (watermarks->dcn4.a.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem); + } else if (watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.urgent > hubbub2->watermarks.dcn4.b.urgent) { - hubbub2->watermarks.dcn4.b.urgent = watermarks->dcn4.b.urgent; + if (safe_to_lower || watermarks->dcn4x.b.urgent > hubbub2->watermarks.dcn4x.b.urgent) { + hubbub2->watermarks.dcn4x.b.urgent = watermarks->dcn4x.b.urgent; REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0, - DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, watermarks->dcn4.b.urgent); + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, watermarks->dcn4x.b.urgent); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.urgent, watermarks->dcn4.b.urgent); - } else if (watermarks->dcn4.b.urgent < hubbub2->watermarks.dcn4.b.urgent) + watermarks->dcn4x.b.urgent, watermarks->dcn4x.b.urgent); + } else if (watermarks->dcn4x.b.urgent < hubbub2->watermarks.dcn4x.b.urgent) wm_pending = true; /* determine the transfer time for a quantity of data for a particular requestor.*/ - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_flip - > hubbub2->watermarks.dcn4.b.frac_urg_bw_flip) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_flip = watermarks->dcn4.b.frac_urg_bw_flip; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_flip + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip = watermarks->dcn4x.b.frac_urg_bw_flip; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, watermarks->dcn4.b.frac_urg_bw_flip); - } else if (watermarks->dcn4.b.frac_urg_bw_flip - < hubbub2->watermarks.dcn4.b.frac_urg_bw_flip) + DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, watermarks->dcn4x.b.frac_urg_bw_flip); + } else if (watermarks->dcn4x.b.frac_urg_bw_flip + < hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_nom - > hubbub2->watermarks.dcn4.b.frac_urg_bw_nom) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_nom = watermarks->dcn4.b.frac_urg_bw_nom; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_nom + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom = watermarks->dcn4x.b.frac_urg_bw_nom; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->dcn4.b.frac_urg_bw_nom); - } else if (watermarks->dcn4.b.frac_urg_bw_nom - < hubbub2->watermarks.dcn4.b.frac_urg_bw_nom) + DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->dcn4x.b.frac_urg_bw_nom); + } else if (watermarks->dcn4x.b.frac_urg_bw_nom + < hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_mall - > hubbub2->watermarks.dcn4.b.frac_urg_bw_mall) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_mall = watermarks->dcn4.b.frac_urg_bw_mall; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_mall + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall = watermarks->dcn4x.b.frac_urg_bw_mall; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, watermarks->dcn4.b.frac_urg_bw_mall); - } else if (watermarks->dcn4.b.frac_urg_bw_mall < hubbub2->watermarks.dcn4.b.frac_urg_bw_mall) + DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, watermarks->dcn4x.b.frac_urg_bw_mall); + } else if (watermarks->dcn4x.b.frac_urg_bw_mall < hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem) { - hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem = watermarks->dcn4.b.refcyc_per_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.b.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem) { + hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem = watermarks->dcn4x.b.refcyc_per_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, watermarks->dcn4.b.refcyc_per_trip_to_mem); - } else if (watermarks->dcn4.b.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, watermarks->dcn4x.b.refcyc_per_trip_to_mem); + } else if (watermarks->dcn4x.b.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem) { - hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem = watermarks->dcn4.b.refcyc_per_meta_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem) { + hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem = watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, 0, - DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, watermarks->dcn4.b.refcyc_per_meta_trip_to_mem); - } else if (watermarks->dcn4.b.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem); + } else if (watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem) wm_pending = true; return wm_pending; @@ -192,89 +192,89 @@ bool hubbub401_program_stutter_watermarks( bool wm_pending = false; /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.sr_enter - > hubbub2->watermarks.dcn4.a.sr_enter) { - hubbub2->watermarks.dcn4.a.sr_enter = - watermarks->dcn4.a.sr_enter; + if (safe_to_lower || watermarks->dcn4x.a.sr_enter + > hubbub2->watermarks.dcn4x.a.sr_enter) { + hubbub2->watermarks.dcn4x.a.sr_enter = + watermarks->dcn4x.a.sr_enter; REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, watermarks->dcn4x.a.sr_enter); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.sr_enter, watermarks->dcn4.a.sr_enter); + watermarks->dcn4x.a.sr_enter, watermarks->dcn4x.a.sr_enter); // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, watermarks->dcn4x.a.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, watermarks->dcn4x.a.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, watermarks->dcn4x.a.sr_enter); - } else if (watermarks->dcn4.a.sr_enter - < hubbub2->watermarks.dcn4.a.sr_enter) + } else if (watermarks->dcn4x.a.sr_enter + < hubbub2->watermarks.dcn4x.a.sr_enter) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.sr_exit - > hubbub2->watermarks.dcn4.a.sr_exit) { - hubbub2->watermarks.dcn4.a.sr_exit = - watermarks->dcn4.a.sr_exit; + if (safe_to_lower || watermarks->dcn4x.a.sr_exit + > hubbub2->watermarks.dcn4x.a.sr_exit) { + hubbub2->watermarks.dcn4x.a.sr_exit = + watermarks->dcn4x.a.sr_exit; REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, watermarks->dcn4x.a.sr_exit); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.sr_exit, watermarks->dcn4.a.sr_exit); + watermarks->dcn4x.a.sr_exit, watermarks->dcn4x.a.sr_exit); // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, watermarks->dcn4x.a.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, watermarks->dcn4x.a.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, watermarks->dcn4x.a.sr_exit); - } else if (watermarks->dcn4.a.sr_exit - < hubbub2->watermarks.dcn4.a.sr_exit) + } else if (watermarks->dcn4x.a.sr_exit + < hubbub2->watermarks.dcn4x.a.sr_exit) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.sr_enter - > hubbub2->watermarks.dcn4.b.sr_enter) { - hubbub2->watermarks.dcn4.b.sr_enter = - watermarks->dcn4.b.sr_enter; + if (safe_to_lower || watermarks->dcn4x.b.sr_enter + > hubbub2->watermarks.dcn4x.b.sr_enter) { + hubbub2->watermarks.dcn4x.b.sr_enter = + watermarks->dcn4x.b.sr_enter; REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, watermarks->dcn4x.b.sr_enter); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.sr_enter, watermarks->dcn4.b.sr_enter); + watermarks->dcn4x.b.sr_enter, watermarks->dcn4x.b.sr_enter); // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, watermarks->dcn4x.b.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, watermarks->dcn4x.b.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, watermarks->dcn4x.b.sr_enter); - } else if (watermarks->dcn4.b.sr_enter - < hubbub2->watermarks.dcn4.b.sr_enter) + } else if (watermarks->dcn4x.b.sr_enter + < hubbub2->watermarks.dcn4x.b.sr_enter) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.sr_exit - > hubbub2->watermarks.dcn4.b.sr_exit) { - hubbub2->watermarks.dcn4.b.sr_exit = - watermarks->dcn4.b.sr_exit; + if (safe_to_lower || watermarks->dcn4x.b.sr_exit + > hubbub2->watermarks.dcn4x.b.sr_exit) { + hubbub2->watermarks.dcn4x.b.sr_exit = + watermarks->dcn4x.b.sr_exit; REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, watermarks->dcn4x.b.sr_exit); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.sr_exit, watermarks->dcn4.b.sr_exit); + watermarks->dcn4x.b.sr_exit, watermarks->dcn4x.b.sr_exit); // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, watermarks->dcn4x.b.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, watermarks->dcn4x.b.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, watermarks->dcn4x.b.sr_exit); - } else if (watermarks->dcn4.b.sr_exit - < hubbub2->watermarks.dcn4.b.sr_exit) + } else if (watermarks->dcn4x.b.sr_exit + < hubbub2->watermarks.dcn4x.b.sr_exit) wm_pending = true; return wm_pending; @@ -292,116 +292,116 @@ bool hubbub401_program_pstate_watermarks( /* Section for UCLK_PSTATE_CHANGE_WATERMARKS */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.uclk_pstate - > hubbub2->watermarks.dcn4.a.uclk_pstate) { - hubbub2->watermarks.dcn4.a.uclk_pstate = - watermarks->dcn4.a.uclk_pstate; + if (safe_to_lower || watermarks->dcn4x.a.uclk_pstate + > hubbub2->watermarks.dcn4x.a.uclk_pstate) { + hubbub2->watermarks.dcn4x.a.uclk_pstate = + watermarks->dcn4x.a.uclk_pstate; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4.a.uclk_pstate); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4x.a.uclk_pstate); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.uclk_pstate, watermarks->dcn4.a.uclk_pstate); - } else if (watermarks->dcn4.a.uclk_pstate - < hubbub2->watermarks.dcn4.a.uclk_pstate) + watermarks->dcn4x.a.uclk_pstate, watermarks->dcn4x.a.uclk_pstate); + } else if (watermarks->dcn4x.a.uclk_pstate + < hubbub2->watermarks.dcn4x.a.uclk_pstate) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.uclk_pstate - > hubbub2->watermarks.dcn4.b.uclk_pstate) { - hubbub2->watermarks.dcn4.b.uclk_pstate = - watermarks->dcn4.b.uclk_pstate; + if (safe_to_lower || watermarks->dcn4x.b.uclk_pstate + > hubbub2->watermarks.dcn4x.b.uclk_pstate) { + hubbub2->watermarks.dcn4x.b.uclk_pstate = + watermarks->dcn4x.b.uclk_pstate; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4.b.uclk_pstate); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4x.b.uclk_pstate); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.uclk_pstate, watermarks->dcn4.b.uclk_pstate); - } else if (watermarks->dcn4.b.uclk_pstate - < hubbub2->watermarks.dcn4.b.uclk_pstate) + watermarks->dcn4x.b.uclk_pstate, watermarks->dcn4x.b.uclk_pstate); + } else if (watermarks->dcn4x.b.uclk_pstate + < hubbub2->watermarks.dcn4x.b.uclk_pstate) wm_pending = true; /* Section for UCLK_PSTATE_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */ - if (safe_to_lower || watermarks->dcn4.a.temp_read_or_ppt - > hubbub2->watermarks.dcn4.a.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.a.temp_read_or_ppt = - watermarks->dcn4.a.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.a.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.a.temp_read_or_ppt = + watermarks->dcn4x.a.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4.a.temp_read_or_ppt); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4x.a.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.temp_read_or_ppt, watermarks->dcn4.a.temp_read_or_ppt); - } else if (watermarks->dcn4.a.temp_read_or_ppt - < hubbub2->watermarks.dcn4.a.temp_read_or_ppt) + watermarks->dcn4x.a.temp_read_or_ppt, watermarks->dcn4x.a.temp_read_or_ppt); + } else if (watermarks->dcn4x.a.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.temp_read_or_ppt - > hubbub2->watermarks.dcn4.b.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.b.temp_read_or_ppt = - watermarks->dcn4.b.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.b.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.b.temp_read_or_ppt = + watermarks->dcn4x.b.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4.b.temp_read_or_ppt); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4x.b.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.temp_read_or_ppt, watermarks->dcn4.b.temp_read_or_ppt); - } else if (watermarks->dcn4.b.temp_read_or_ppt - < hubbub2->watermarks.dcn4.b.temp_read_or_ppt) + watermarks->dcn4x.b.temp_read_or_ppt, watermarks->dcn4x.b.temp_read_or_ppt); + } else if (watermarks->dcn4x.b.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) wm_pending = true; /* Section for FCLK_PSTATE_CHANGE_WATERMARKS */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.fclk_pstate - > hubbub2->watermarks.dcn4.a.fclk_pstate) { - hubbub2->watermarks.dcn4.a.fclk_pstate = - watermarks->dcn4.a.fclk_pstate; + if (safe_to_lower || watermarks->dcn4x.a.fclk_pstate + > hubbub2->watermarks.dcn4x.a.fclk_pstate) { + hubbub2->watermarks.dcn4x.a.fclk_pstate = + watermarks->dcn4x.a.fclk_pstate; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4.a.fclk_pstate); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4x.a.fclk_pstate); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.fclk_pstate, watermarks->dcn4.a.fclk_pstate); - } else if (watermarks->dcn4.a.fclk_pstate - < hubbub2->watermarks.dcn4.a.fclk_pstate) + watermarks->dcn4x.a.fclk_pstate, watermarks->dcn4x.a.fclk_pstate); + } else if (watermarks->dcn4x.a.fclk_pstate + < hubbub2->watermarks.dcn4x.a.fclk_pstate) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.fclk_pstate - > hubbub2->watermarks.dcn4.b.fclk_pstate) { - hubbub2->watermarks.dcn4.b.fclk_pstate = - watermarks->dcn4.b.fclk_pstate; + if (safe_to_lower || watermarks->dcn4x.b.fclk_pstate + > hubbub2->watermarks.dcn4x.b.fclk_pstate) { + hubbub2->watermarks.dcn4x.b.fclk_pstate = + watermarks->dcn4x.b.fclk_pstate; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4.b.fclk_pstate); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4x.b.fclk_pstate); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.fclk_pstate, watermarks->dcn4.b.fclk_pstate); - } else if (watermarks->dcn4.b.fclk_pstate - < hubbub2->watermarks.dcn4.b.fclk_pstate) + watermarks->dcn4x.b.fclk_pstate, watermarks->dcn4x.b.fclk_pstate); + } else if (watermarks->dcn4x.b.fclk_pstate + < hubbub2->watermarks.dcn4x.b.fclk_pstate) wm_pending = true; /* Section for FCLK_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */ - if (safe_to_lower || watermarks->dcn4.a.temp_read_or_ppt - > hubbub2->watermarks.dcn4.a.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.a.temp_read_or_ppt = - watermarks->dcn4.a.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.a.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.a.temp_read_or_ppt = + watermarks->dcn4x.a.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4.a.temp_read_or_ppt); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4x.a.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.temp_read_or_ppt, watermarks->dcn4.a.temp_read_or_ppt); - } else if (watermarks->dcn4.a.temp_read_or_ppt - < hubbub2->watermarks.dcn4.a.temp_read_or_ppt) + watermarks->dcn4x.a.temp_read_or_ppt, watermarks->dcn4x.a.temp_read_or_ppt); + } else if (watermarks->dcn4x.a.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.temp_read_or_ppt - > hubbub2->watermarks.dcn4.b.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.b.temp_read_or_ppt = - watermarks->dcn4.b.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.b.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.b.temp_read_or_ppt = + watermarks->dcn4x.b.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4.b.temp_read_or_ppt); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4x.b.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.temp_read_or_ppt, watermarks->dcn4.b.temp_read_or_ppt); - } else if (watermarks->dcn4.b.temp_read_or_ppt - < hubbub2->watermarks.dcn4.b.temp_read_or_ppt) + watermarks->dcn4x.b.temp_read_or_ppt, watermarks->dcn4x.b.temp_read_or_ppt); + } else if (watermarks->dcn4x.b.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) wm_pending = true; return wm_pending; @@ -418,29 +418,29 @@ bool hubbub401_program_usr_watermarks( bool wm_pending = false; /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.usr - > hubbub2->watermarks.dcn4.a.usr) { - hubbub2->watermarks.dcn4.a.usr = watermarks->dcn4.a.usr; + if (safe_to_lower || watermarks->dcn4x.a.usr + > hubbub2->watermarks.dcn4x.a.usr) { + hubbub2->watermarks.dcn4x.a.usr = watermarks->dcn4x.a.usr; REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, 0, - DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, watermarks->dcn4.a.usr); + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, watermarks->dcn4x.a.usr); DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.usr, watermarks->dcn4.a.usr); - } else if (watermarks->dcn4.a.usr - < hubbub2->watermarks.dcn4.a.usr) + watermarks->dcn4x.a.usr, watermarks->dcn4x.a.usr); + } else if (watermarks->dcn4x.a.usr + < hubbub2->watermarks.dcn4x.a.usr) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.usr - > hubbub2->watermarks.dcn4.b.usr) { - hubbub2->watermarks.dcn4.b.usr = watermarks->dcn4.b.usr; + if (safe_to_lower || watermarks->dcn4x.b.usr + > hubbub2->watermarks.dcn4x.b.usr) { + hubbub2->watermarks.dcn4x.b.usr = watermarks->dcn4x.b.usr; REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, 0, - DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, watermarks->dcn4.b.usr); + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, watermarks->dcn4x.b.usr); DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.usr, watermarks->dcn4.b.usr); - } else if (watermarks->dcn4.b.usr - < hubbub2->watermarks.dcn4.b.usr) + watermarks->dcn4x.b.usr, watermarks->dcn4x.b.usr); + } else if (watermarks->dcn4x.b.usr + < hubbub2->watermarks.dcn4x.b.usr) wm_pending = true; return wm_pending; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index 5f6c7daa14d9..a8b44f398ce6 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -63,7 +63,7 @@ union dcn_watermark_set { struct dml2_dchub_watermark_regs b; struct dml2_dchub_watermark_regs c; struct dml2_dchub_watermark_regs d; - } dcn4; //dcn4+ + } dcn4x; //dcn4+ }; struct dce_watermarks { From fa53b23ff7c98930c7e529b8f5228d9f182ae2bc Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 4 Jul 2024 18:41:58 +0000 Subject: [PATCH 094/447] drm/amd/display: rename dcn401_soc to dcn4_variant_a_soc To distinguish between different soc with same DCN IP, use variants starting with alphabets Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 2 +- .../amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index ec663c7442e5..4164cda90b2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -31,7 +31,7 @@ static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_ else soc_bb = &dml2_socbb_dcn401; - qos_params = &dml_dcn401_soc_qos_params; + qos_params = &dml_dcn4_variant_a_soc_qos_params; } /* patch soc bb */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index 0fe70bd0dbac..898b1dd69edd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -8,7 +8,7 @@ #include "dml_top_soc_parameter_types.h" -static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { +static const struct dml2_soc_qos_parameters dml_dcn4_variant_a_soc_qos_params = { .derate_table = { .system_active_urgent = { .dram_derate_percent_pixel = 22, From a90e1dc25c064bf21353cb342aac938662a148e4 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Tue, 9 Jul 2024 13:11:55 -0400 Subject: [PATCH 095/447] drm/amd/display: Add helper function to check for non-address fast updates [Why/How] Need to identify which fast updates will update more than just the address. Reviewed-by: Alvin Lee Signed-off-by: Ilya Bakoulin Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 25 +++++++++++++++++++++++- drivers/gpu/drm/amd/display/dc/dc.h | 6 ++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b71c4d8e73dd..b8a6c062426d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4697,7 +4697,7 @@ static bool commit_minimal_transition_state(struct dc *dc, return true; } -static void populate_fast_updates(struct dc_fast_update *fast_update, +void populate_fast_updates(struct dc_fast_update *fast_update, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_update *stream_update) @@ -4707,6 +4707,9 @@ static void populate_fast_updates(struct dc_fast_update *fast_update, if (stream_update) { fast_update[0].out_transfer_func = stream_update->out_transfer_func; fast_update[0].output_csc_transform = stream_update->output_csc_transform; + } else { + fast_update[0].out_transfer_func = NULL; + fast_update[0].output_csc_transform = NULL; } for (i = 0; i < surface_count; i++) { @@ -4740,6 +4743,26 @@ static bool fast_updates_exist(struct dc_fast_update *fast_update, int surface_c return false; } +bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count) +{ + int i; + + if (fast_update[0].out_transfer_func || + fast_update[0].output_csc_transform) + return true; + + for (i = 0; i < surface_count; i++) { + if (fast_update[i].input_csc_color_matrix || + fast_update[i].gamma || + fast_update[i].gamut_remap_matrix || + fast_update[i].coeff_reduction_factor || + fast_update[i].cursor_csc_color_matrix) + return true; + } + + return false; +} + static bool full_update_required(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 036b23a6e324..272ae1bdc57f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1587,6 +1587,12 @@ bool dc_acquire_release_mpc_3dlut( bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); void get_audio_check(struct audio_info *aud_modes, struct audio_check *aud_chk); + +bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count); +void populate_fast_updates(struct dc_fast_update *fast_update, + struct dc_surface_update *srf_updates, + int surface_count, + struct dc_stream_update *stream_update); /* * Set up streams and links associated to drive sinks * The streams parameter is an absolute set of all active streams. From ec0d7abbb0d464619d6d1646f03603c6616d966e Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Thu, 11 Jul 2024 14:56:29 -0400 Subject: [PATCH 096/447] drm/amd/display: Fix Potential Null Dereference [what & why] System hang after s4 regression points to code change here. Removing possible NULL dereference. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Signed-off-by: Gabe Teeger Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 248d22b23a6d..2d5bd5c7ab94 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -139,9 +139,9 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * old_pipe->stream != new_pipe->stream && old_pipe->stream_res.tg == new_pipe->stream_res.tg && new_pipe->stream->link_enc && !new_pipe->stream->dpms_off && - new_pipe->stream->link->link_enc->funcs->is_dig_enabled && - new_pipe->stream->link->link_enc->funcs->is_dig_enabled( - new_pipe->stream->link->link_enc) && + new_pipe->stream->link_enc->funcs->is_dig_enabled && + new_pipe->stream->link_enc->funcs->is_dig_enabled( + new_pipe->stream->link_enc) && new_pipe->stream_res.stream_enc && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); From fae1879dc229249c17e5fd4ef81bf33684ee58f0 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Thu, 11 Jul 2024 11:24:07 -0400 Subject: [PATCH 097/447] drm/amd/display: Check top sink only when multiple streams for DP2 [why] When switching from extended to second display only mode, the top remote sink is not removed while the top stream itself is released. This causes DML to think there is no DP2 output encoder because top remote sink does not match with the second stream and disables DTBCLK and causes hang. [how] For DP2.0 MST hubs, only treat 1st remote sink as an encoder only when there are multiple displays connected. Reviewed-by: Michael Strauss Signed-off-by: Sung Joon Kim Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml2_internal_types.h | 1 + .../display/dc/dml2/dml2_translation_helper.c | 41 +++++++++++++++++-- .../display/dc/dml2/dml2_translation_helper.h | 2 +- .../gpu/drm/amd/display/dc/dml2/dml2_utils.c | 6 +-- 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h index b566f53608c6..3ba184be25d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h @@ -101,6 +101,7 @@ struct dml2_wrapper_scratch { struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; bool enable_flexible_pipe_mapping; bool plane_duplicate_exists; + unsigned int dp2_mst_stream_count; }; struct dml2_helper_det_policy_scratch { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 8b9dcee77266..25d4ef040173 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -733,7 +733,8 @@ static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st * } static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location, - const struct dc_stream_state *in, const struct pipe_ctx *pipe) + const struct dc_stream_state *in, const struct pipe_ctx *pipe, + unsigned int dp2_mst_stream_count) { unsigned int output_bpc; @@ -746,7 +747,7 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: out->OutputEncoder[location] = dml_dp; - if (is_dp2p0_output_encoder(pipe)) + if (is_dp2p0_output_encoder(pipe, dp2_mst_stream_count)) out->OutputEncoder[location] = dml_dp2p0; break; case SIGNAL_TYPE_EDP: @@ -1193,6 +1194,37 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, plane_index = 0; } } + +static unsigned int calculate_dp2_mst_stream_count(struct dc_state *context) +{ + int i, j; + unsigned int dp2_mst_stream_count = 0; + + for (i = 0; i < context->stream_count; i++) { + struct dc_stream_state *stream = context->streams[i]; + + if (!stream || stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST) + continue; + + for (j = 0; j < MAX_PIPES; j++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; + + if (!pipe_ctx || !pipe_ctx->stream) + continue; + + if (stream != pipe_ctx->stream) + continue; + + if (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc) { + dp2_mst_stream_count++; + break; + } + } + } + + return dp2_mst_stream_count; +} + static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cfg_st *out, unsigned int location, const struct dc_stream_state *in) { @@ -1255,6 +1287,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (dml2->v20.dml_core_ctx.ip.hostvm_enable) dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; + dml2->v20.scratch.dp2_mst_stream_count = calculate_dp2_mst_stream_count(context); dml2_populate_pipe_to_plane_index_mapping(dml2, context); for (i = 0; i < context->stream_count; i++) { @@ -1276,7 +1309,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); /*Call site for populate_dml_writeback_cfg_from_stream_state*/ populate_dml_writeback_cfg_from_stream_state(&dml_dispcfg->writeback, disp_cfg_stream_location, context->streams[i]); @@ -1337,7 +1370,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (j >= 1) { populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); switch (context->streams[i]->debug.force_odm_combine_segments) { case 2: dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h index d764773938f4..55659b22d87f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h @@ -36,6 +36,6 @@ void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe, unsigned int dp2_mst_stream_count); #endif //__DML2_TRANSLATION_HELPER_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 92238ff333a4..7655501e75d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -153,7 +153,7 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e } } -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx, unsigned int dp2_mst_stream_count) { if (pipe_ctx == NULL || pipe_ctx->stream == NULL) return false; @@ -162,7 +162,7 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ - if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) { + if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0] && dp2_mst_stream_count > 1) { return (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc && dc_is_dp_signal(pipe_ctx->stream->signal) && @@ -181,7 +181,7 @@ bool is_dtbclk_required(const struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; - if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i])) + if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i], context->bw_ctx.dml2->v20.scratch.dp2_mst_stream_count)) return true; } return false; From 1a03b0e6c5fdb476203be4f4597205ff8799369b Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 12 Jul 2024 09:39:13 -0600 Subject: [PATCH 098/447] drm/amd/display: Add MST debug message when link detection fails [WHY & HOW] dc_link_detect returns a boolean value which can be used to print debug messages when it fails. This fixes 1 CHECKED_RETURN issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 62cb59f00929..db56b0aa5454 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3804,9 +3804,12 @@ static int trigger_hpd_mst_set(void *data, u64 val) if (aconnector->dc_link->type == dc_connection_mst_branch && aconnector->mst_mgr.aux) { mutex_lock(&adev->dm.dc_lock); - dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); + ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); mutex_unlock(&adev->dm.dc_lock); + if (!ret) + DRM_ERROR("DM_MST: Failed to detect dc link!"); + ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true); if (ret < 0) DRM_ERROR("DM_MST: Failed to set the device into MST mode!"); From d925c04d974c657d10471c0c2dba3bc9c7d994ee Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 16:45:39 -0600 Subject: [PATCH 099/447] drm/amd/display: Check link_res->hpo_dp_link_enc before using it [WHAT & HOW] Functions dp_enable_link_phy and dp_disable_link_phy can pass link_res without initializing hpo_dp_link_enc and it is necessary to check for null before dereferencing. This fixes 1 FORWARD_NULL issue reported by Coverity. Fixes: 0beca868cde8 ("drm/amd/display: Check link_res->hpo_dp_link_enc before using it") Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c index d0148f10dfc0..cec68c5dba13 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c @@ -110,6 +110,11 @@ void enable_hpo_dp_link_output(struct dc_link *link, enum clock_source_id clock_source, const struct dc_link_settings *link_settings) { + if (!link_res->hpo_dp_link_enc) { + DC_LOG_ERROR("%s: invalid hpo_dp_link_enc\n", __func__); + return; + } + if (link->dc->res_pool->dccg->funcs->set_symclk32_le_root_clock_gating) link->dc->res_pool->dccg->funcs->set_symclk32_le_root_clock_gating( link->dc->res_pool->dccg, From 3237403b98e173c296ec83cbba5e9def331c5e13 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 11 Jul 2024 10:53:41 -0600 Subject: [PATCH 100/447] drm/amd/display: Remove old comments Remove some old comments from DCN32/321. Signed-off-by: Rodrigo Siqueira Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 4cb0227bdd27..6f490d8d7038 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -160,8 +160,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .pct_ideal_sdp_bw_after_urgent = 90.0, .pct_ideal_fabric_bw_after_urgent = 67.0, .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, .pct_ideal_dram_bw_after_urgent_strobe = 67.0, .max_avg_sdp_bw_use_normal_percent = 80.0, .max_avg_fabric_bw_use_normal_percent = 60.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 4297402bdab3..8839faf42207 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -139,8 +139,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .pct_ideal_sdp_bw_after_urgent = 90.0, .pct_ideal_fabric_bw_after_urgent = 67.0, .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, .pct_ideal_dram_bw_after_urgent_strobe = 67.0, .max_avg_sdp_bw_use_normal_percent = 80.0, .max_avg_fabric_bw_use_normal_percent = 60.0, From 58ed441367b9b0fb48aa8cd471a73073d1143e16 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Mon, 20 May 2024 11:12:07 -0400 Subject: [PATCH 101/447] drm/amd/display: Various DML2 fixes for FAMS2 The disable fams2 operation was reworked, but some of the old code remained. This commit removes the disable_fams2_drr from the dml2_stream_parameters. Reviewed-by: Rodrigo Siqueira Signed-off-by: Dillon Varone Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h | 1 - .../display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index fe153f4edaf5..b132f676a68d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -410,7 +410,6 @@ struct dml2_stream_parameters { enum dml2_odm_mode odm_mode; bool disable_dynamic_odm; bool disable_subvp; - bool disable_fams2_drr; int minimum_vblank_idle_requirement_us; bool minimize_active_latency_hiding; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index fa445067782e..dddb21818f8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1012,7 +1012,7 @@ static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, stream_descriptor = &display_config->display_config.stream_descriptors[i]; stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[i]; - if (!stream_descriptor->timing.drr_config.enabled || stream_descriptor->overrides.disable_fams2_drr) + if (!stream_descriptor->timing.drr_config.enabled) return false; /* cannot support required vtotal */ From 8732594017d32132ca741f4ec4eec91211f5d10c Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Thu, 6 Jun 2024 15:51:16 -0400 Subject: [PATCH 102/447] drm/amd/display: Remove duplicate HWSS interfaces [Why] Some interface functions are defined in both the public and private HWSS interfaces, which can lead to confusion and runtime issues, therefore the duplicates should be eliminated. [How] - power_down should only be private, because it's only used within HWSS. - update_plane_addr should only be public, as it's used outside HWSS. Reviewed-by: Rodrigo Siqueira Signed-off-by: Joshua Aberback Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 1 - .../gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 10 +++++----- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 2 +- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c | 1 - .../gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c | 1 - drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c | 1 - .../gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c | 1 - drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c | 1 - .../gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 8 ++++---- .../gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c | 2 -- drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h | 1 - .../gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h | 2 -- 21 files changed, 22 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 4593fb2a0536..e2f5c4d34a55 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -3319,7 +3319,6 @@ static const struct hw_sequencer_funcs dce110_funcs = { static const struct hwseq_private_funcs dce110_private_funcs = { .init_pipes = init_pipes, - .update_plane_addr = update_plane_addr, .set_input_transfer_func = dce110_set_input_transfer_func, .set_output_transfer_func = dce110_set_output_transfer_func, .power_down = dce110_power_down, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 212576dbc336..e31249d1dd22 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1696,10 +1696,10 @@ void dcn10_power_down_on_boot(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && + dc->hwseq->funcs.power_down && dc->hwss.edp_power_control) { dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -1707,8 +1707,8 @@ void dcn10_power_down_on_boot(struct dc *dc) if (link->link_enc && link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down) { + dc->hwseq->funcs.power_down(dc); break; } @@ -2913,7 +2913,7 @@ static void dcn10_update_dchubp_dpp( hubp->power_gated = false; - hws->funcs.update_plane_addr(dc, pipe_ctx); + dc->hwss.update_plane_addr(dc, pipe_ctx); if (is_pipe_tree_visible(pipe_ctx)) hubp->funcs->set_blank(hubp, false); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c index a5bdac79a744..5e51e1761707 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c @@ -78,7 +78,6 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, @@ -92,7 +91,6 @@ static const struct hw_sequencer_funcs dcn10_funcs = { static const struct hwseq_private_funcs dcn10_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn10_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .program_pipe = dcn10_program_pipe, .update_mpcc = dcn10_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index bd7b186fb2e4..270e337ae27b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1825,7 +1825,7 @@ static void dcn20_update_dchubp_dpp( params.subvp_save_surf_addr.subvp_index = pipe_ctx->subvp_index; hwss_subvp_save_surf_addr(¶ms); } - hws->funcs.update_plane_addr(dc, pipe_ctx); + dc->hwss.update_plane_addr(dc, pipe_ctx); } if (pipe_ctx->update_flags.bits.enable) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c index ef6488165b8f..32707b344f0b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c @@ -105,7 +105,6 @@ static const struct hw_sequencer_funcs dcn20_funcs = { static const struct hwseq_private_funcs dcn20_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn20_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c index a13bf6c9386e..78351408e864 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c @@ -96,7 +96,6 @@ static const struct hw_sequencer_funcs dcn201_funcs = { static const struct hwseq_private_funcs dcn201_private_funcs = { .init_pipes = NULL, - .update_plane_addr = dcn201_update_plane_addr, .plane_atomic_disconnect = dcn201_plane_atomic_disconnect, .program_pipe = dcn10_program_pipe, .update_mpcc = dcn201_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c index 3dfac372d165..e044e9e0a3a1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c @@ -93,7 +93,6 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -109,7 +108,6 @@ static const struct hw_sequencer_funcs dcn21_funcs = { static const struct hwseq_private_funcs dcn21_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn20_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index eaeeade31ed7..fc5936460ac2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -731,10 +731,10 @@ void dcn30_init_hw(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -742,8 +742,8 @@ void dcn30_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c index 4b32497c09d0..2a8dc40d2847 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c @@ -113,7 +113,6 @@ static const struct hw_sequencer_funcs dcn30_funcs = { static const struct hwseq_private_funcs dcn30_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c index 97e33eb7ac5a..93e49d87a67c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c @@ -111,7 +111,6 @@ static const struct hw_sequencer_funcs dcn301_funcs = { static const struct hwseq_private_funcs dcn301_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index 9cb7afe0e731..b57dd45611f2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -98,7 +98,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -116,7 +115,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { static const struct hwseq_private_funcs dcn31_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index 7a8db4b81471..fe5495a8e7a2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -100,7 +100,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -119,7 +118,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = { static const struct hwseq_private_funcs dcn314_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 7f41eccefe02..a7cb003f1dfb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -901,10 +901,10 @@ void dcn32_init_hw(struct dc *dc) if (edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } } @@ -914,8 +914,8 @@ void dcn32_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 5c50458b12cb..968b010971ea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -124,7 +124,6 @@ static const struct hw_sequencer_funcs dcn32_funcs = { static const struct hwseq_private_funcs dcn32_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index ac1e3331a77c..a9dc7cf12dac 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -629,10 +629,10 @@ void dcn35_power_down_on_boot(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && + dc->hwseq->funcs.power_down && dc->hwss.edp_power_control) { dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -640,8 +640,8 @@ void dcn35_power_down_on_boot(struct dc *dc) if (link->link_enc && link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down) { + dc->hwseq->funcs.power_down(dc); break; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 428912f37129..55dc5799e725 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -101,7 +101,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -128,7 +127,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { static const struct hwseq_private_funcs dcn35_private_funcs = { .init_pipes = dcn35_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 55e791552bca..b1b2a58684e7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -100,7 +100,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -127,7 +126,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { static const struct hwseq_private_funcs dcn351_private_funcs = { .init_pipes = dcn35_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 779960278a5c..87c5ef579ecb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -354,10 +354,10 @@ void dcn401_init_hw(struct dc *dc) if (edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } } @@ -367,8 +367,8 @@ void dcn401_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 1439f07f0b64..2533f16510ba 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -99,12 +99,10 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .fams2_global_control_lock = dcn401_fams2_global_control_lock, .fams2_update_config = dcn401_fams2_update_config, .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, - .power_down = dce110_power_down, }; static const struct hwseq_private_funcs dcn401_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index d05be65a2256..f50b2955ce8c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -240,7 +240,6 @@ struct hw_sequencer_funcs { void (*program_triplebuffer)(const struct dc *dc, struct pipe_ctx *pipe_ctx, bool enableTripleBuffer); void (*update_pending_status)(struct pipe_ctx *pipe_ctx); - void (*power_down)(struct dc *dc); void (*update_dsc_pg)(struct dc *dc, struct dc_state *context, bool safe_to_disable); /* Pipe Lock Related */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 7a75ff320511..0ac675456979 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -76,8 +76,6 @@ struct hwseq_private_funcs { void (*enable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx); void (*init_pipes)(struct dc *dc, struct dc_state *context); void (*reset_hw_ctx_wrap)(struct dc *dc, struct dc_state *context); - void (*update_plane_addr)(const struct dc *dc, - struct pipe_ctx *pipe_ctx); void (*plane_atomic_disconnect)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); From 779ea9d32612f8e78a2f362d52cf31c23ba878f0 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Mon, 15 Jul 2024 16:03:30 -0400 Subject: [PATCH 103/447] drm/amd/display: remove unused folder dc/{dcn401,dcn303} are unused since the files in it got moved under their respective new components location. Hence they are no longer necessary Fixes: 2d62bb450ed1 ("drm/amd/display: Refactor DCN3X into component folder") Reviewed-by: Leo Li Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn303/Makefile | 13 ------------- drivers/gpu/drm/amd/display/dc/dcn401/Makefile | 10 ---------- 2 files changed, 23 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn303/Makefile delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn401/Makefile diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile deleted file mode 100644 index a954e316aca2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: MIT -# -# Copyright (C) 2021 Advanced Micro Devices, Inc. All the rights reserved -# -# Authors: AMD -# -# Makefile for dcn303. - -DCN3_03 = dcn303_init.o - -AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_03) diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/Makefile b/drivers/gpu/drm/amd/display/dc/dcn401/Makefile deleted file mode 100644 index ded1f3140beb..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn401/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved. - -DCN401 += dcn401_dio_link_encoder.o -DCN401 += dcn401_dio_stream_encoder.o -DCN401 += dcn401_mpc.o - -AMD_DAL_DCN401 = $(addprefix $(AMDDALPATH)/dc/dcn401/,$(DCN401)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN401) From c8a0222dedf90e9a79b88814e32ffb43ac4beef6 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 14 Jul 2024 21:54:49 -0400 Subject: [PATCH 104/447] drm/amd/display: 3.2.293 Signed-off-by: Aurabindo Pillai Signed-off-by: Aric Cyr Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 272ae1bdc57f..4077c1ddb9c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.292" +#define DC_VER "3.2.293" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 0352e39e7e781fe6a408c70a336d0f321dfe108b Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 18:15:18 +0530 Subject: [PATCH 105/447] drm/amd/display: Add kdoc entry for 'bs_coeffs_updated' in dpp401_dscl_program_isharp Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/../display/dc/dpp/dcn401/dcn401_dpp_dscl.c:961: warning: Function parameter or struct member 'bs_coeffs_updated' not described in 'dpp401_dscl_program_isharp' Fixes: 94beb4ac1b3b ("drm/amd/display: ensure EASF and ISHARP coefficients are programmed together") Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 703d7b51c6c2..3a3745597f0c 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -951,6 +951,7 @@ static void dpp401_dscl_set_isharp_filter( * * @dpp_base: High level DPP struct * @scl_data: scalaer_data info + * @bs_coeffs_updated: coeffs update flag * * This is the primary function to program isharp * From bc50b614d59990747dd5aeced9ec22f9258991ff Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 20 Jul 2024 18:44:02 +0530 Subject: [PATCH 106/447] drm/amd/display: Fix index out of bounds in DCN30 degamma hardware format translation This commit addresses a potential index out of bounds issue in the `cm3_helper_translate_curve_to_degamma_hw_format` function in the DCN30 color management module. The issue could occur when the index 'i' exceeds the number of transfer function points (TRANSFER_FUNC_POINTS). The fix adds a check to ensure 'i' is within bounds before accessing the transfer function points. If 'i' is out of bounds, the function returns false to indicate an error. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:338 cm3_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.red' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:339 cm3_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.green' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:340 cm3_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.blue' 1025 <= s32max Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index 685702321d32..e55d7ff346d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -335,6 +335,8 @@ bool cm3_helper_translate_curve_to_degamma_hw_format( i += increment) { if (j == hw_points - 1) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; From b7e99058eb2e86aabd7a10761e76cae33d22b49f Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 20 Jul 2024 17:48:27 +0530 Subject: [PATCH 107/447] drm/amd/display: Fix index out of bounds in degamma hardware format translation Fixes index out of bounds issue in `cm_helper_translate_curve_to_degamma_hw_format` function. The issue could occur when the index 'i' exceeds the number of transfer function points (TRANSFER_FUNC_POINTS). The fix adds a check to ensure 'i' is within bounds before accessing the transfer function points. If 'i' is out of bounds the function returns false to indicate an error. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_cm_common.c:594 cm_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.red' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_cm_common.c:595 cm_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.green' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_cm_common.c:596 cm_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.blue' 1025 <= s32max Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 0b49362f71b0..eaed5d1c398a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -591,6 +591,8 @@ bool cm_helper_translate_curve_to_degamma_hw_format( i += increment) { if (j == hw_points - 1) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; From bdf606810210e8e07a0cdf1af3c467291363b295 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 19 Jul 2024 21:39:57 +0530 Subject: [PATCH 108/447] drm/amd/display: Implement bounds check for stream encoder creation in DCN401 'stream_enc_regs' array is an array of dcn10_stream_enc_registers structures. The array is initialized with four elements, corresponding to the four calls to stream_enc_regs() in the array initializer. This means that valid indices for this array are 0, 1, 2, and 3. The error message 'stream_enc_regs' 4 <= 5 below, is indicating that there is an attempt to access this array with an index of 5, which is out of bounds. This could lead to undefined behavior Here, eng_id is used as an index to access the stream_enc_regs array. If eng_id is 5, this would result in an out-of-bounds access on the stream_enc_regs array. Thus fixing Buffer overflow error in dcn401_stream_encoder_create Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn401/dcn401_resource.c:1209 dcn401_stream_encoder_create() error: buffer overflow 'stream_enc_regs' 4 <= 5 Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index d3808c49d298..5ee20753572e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -1190,7 +1190,7 @@ static struct stream_encoder *dcn401_stream_encoder_create( vpg = dcn401_vpg_create(ctx, vpg_inst); afmt = dcn401_afmt_create(ctx, afmt_inst); - if (!enc1 || !vpg || !afmt) { + if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) { kfree(enc1); kfree(vpg); kfree(afmt); From d81873f9e715b72d4f8d391c8eb243946f784dfc Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 20 Jul 2024 18:05:20 +0530 Subject: [PATCH 109/447] drm/amd/display: Fix index out of bounds in DCN30 color transformation This commit addresses a potential index out of bounds issue in the `cm3_helper_translate_curve_to_hw_format` function in the DCN30 color management module. The issue could occur when the index 'i' exceeds the number of transfer function points (TRANSFER_FUNC_POINTS). The fix adds a check to ensure 'i' is within bounds before accessing the transfer function points. If 'i' is out of bounds, the function returns false to indicate an error. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:180 cm3_helper_translate_curve_to_hw_format() error: buffer overflow 'output_tf->tf_pts.red' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:181 cm3_helper_translate_curve_to_hw_format() error: buffer overflow 'output_tf->tf_pts.green' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:182 cm3_helper_translate_curve_to_hw_format() error: buffer overflow 'output_tf->tf_pts.blue' 1025 <= s32max Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index e55d7ff346d2..f31f0e3abfc0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -177,6 +177,8 @@ bool cm3_helper_translate_curve_to_hw_format( i += increment) { if (j == hw_points) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; From f22f4754aaa47d8c59f166ba3042182859e5dff7 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 21 Jul 2024 19:18:58 +0530 Subject: [PATCH 110/447] drm/amd/display: Add null check for head_pipe in dcn201_acquire_free_pipe_for_layer This commit addresses a potential null pointer dereference issue in the `dcn201_acquire_free_pipe_for_layer` function. The issue could occur when `head_pipe` is null. The fix adds a check to ensure `head_pipe` is not null before asserting it. If `head_pipe` is null, the function returns NULL to prevent a potential null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn201/dcn201_resource.c:1016 dcn201_acquire_free_pipe_for_layer() error: we previously assumed 'head_pipe' could be null (see line 1010) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 131d98025bd4..fc54483b9104 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -1007,8 +1007,10 @@ static struct pipe_ctx *dcn201_acquire_free_pipe_for_layer( struct pipe_ctx *head_pipe = resource_get_otg_master_for_stream(res_ctx, opp_head_pipe->stream); struct pipe_ctx *idle_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, head_pipe); - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } if (!idle_pipe) return NULL; From ac2140449184a26eac99585b7f69814bd3ba8f2d Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 21 Jul 2024 19:30:16 +0530 Subject: [PATCH 111/447] drm/amd/display: Add null check for head_pipe in dcn32_acquire_idle_pipe_for_head_pipe_in_layer This commit addresses a potential null pointer dereference issue in the `dcn32_acquire_idle_pipe_for_head_pipe_in_layer` function. The issue could occur when `head_pipe` is null. The fix adds a check to ensure `head_pipe` is not null before asserting it. If `head_pipe` is null, the function returns NULL to prevent a potential null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn32/dcn32_resource.c:2690 dcn32_acquire_idle_pipe_for_head_pipe_in_layer() error: we previously assumed 'head_pipe' could be null (see line 2681) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 6eaf3cfebcb7..a124ad9bd108 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2678,8 +2678,10 @@ static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx; int head_index; - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } /* * Modified from dcn20_acquire_idle_pipe_for_layer From 12fb3e9c88406732749fb2f111911a2438eeb0fc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:29:20 -0400 Subject: [PATCH 112/447] drm/amdgpu/gfx7: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d84589137df9..5fbdef04c9aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5002,6 +5002,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v7_0_ring_emit_wreg, + .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, }; From 7e60ecc2b70adb41b92752cbcd749040e00b57b8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:29:59 -0400 Subject: [PATCH 113/447] drm/amdgpu/gfx8: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b4658c7db0e1..a1963e6c5cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6955,6 +6955,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v8_0_ring_emit_wreg, + .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, .emit_wave_limit = gfx_v8_0_emit_wave_limit, }; From 9c7e69d2e1245fdd5fa5c65cd022530b2a5ef1b7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:21:48 -0400 Subject: [PATCH 114/447] drm/amdgpu/gfx9: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2929c8972ea7..d4e38edc9353 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7244,6 +7244,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_wave_limit = gfx_v9_0_emit_wave_limit, }; From 68e599db7a549f010a329515f3508d8a8c3467a4 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 12:21:57 -0400 Subject: [PATCH 115/447] drm/amdkfd: Validate user queue buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Find user queue rptr, ring buf, eop buffer and cwsr area BOs, and check BOs are mapped on the GPU with correct size and take the BO reference. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +++ drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 40 ++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index aba9bcd91f65..80d8080c5764 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -524,6 +524,10 @@ struct queue_properties { uint64_t exception_status; struct amdgpu_bo *wptr_bo; + struct amdgpu_bo *rptr_bo; + struct amdgpu_bo *ring_bo; + struct amdgpu_bo *eop_buf_bo; + struct amdgpu_bo *cwsr_bo; }; #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index b4529ec298a9..0e661160c295 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -97,7 +97,8 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_ if (!mapping) goto out_err; - if (user_addr != mapping->start || user_addr + size - 1 != mapping->last) { + if (user_addr != mapping->start || + (size != 0 && user_addr + size - 1 != mapping->last)) { pr_debug("expected size 0x%llx not equal to mapping addr 0x%llx size 0x%llx\n", expected_size, mapping->start << AMDGPU_GPU_PAGE_SHIFT, (mapping->last - mapping->start + 1) << AMDGPU_GPU_PAGE_SHIFT); @@ -124,18 +125,51 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE); if (err) + goto out_err_unreserve; + + err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE); + if (err) + goto out_err_unreserve; + + err = kfd_queue_buffer_get(vm, (void *)properties->queue_address, + &properties->ring_bo, properties->queue_size); + if (err) + goto out_err_unreserve; + + /* only compute queue requires EOP buffer and CWSR area */ + if (properties->type != KFD_QUEUE_TYPE_COMPUTE) goto out_unreserve; - amdgpu_bo_unreserve(vm->root.bo); - return 0; + /* EOP buffer is not required for all ASICs */ + if (properties->eop_ring_buffer_address) { + err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, + &properties->eop_buf_bo, + properties->eop_ring_buffer_size); + if (err) + goto out_err_unreserve; + } + + err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, + &properties->cwsr_bo, 0); + if (err) + goto out_err_unreserve; out_unreserve: amdgpu_bo_unreserve(vm->root.bo); + return 0; + +out_err_unreserve: + amdgpu_bo_unreserve(vm->root.bo); + kfd_queue_release_buffers(pdd, properties); return err; } int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { amdgpu_bo_unref(&properties->wptr_bo); + amdgpu_bo_unref(&properties->rptr_bo); + amdgpu_bo_unref(&properties->ring_bo); + amdgpu_bo_unref(&properties->eop_buf_bo); + amdgpu_bo_unref(&properties->cwsr_bo); return 0; } From cba7fec864172dadd953daefdd26e01742b71a6a Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 16:21:19 +0530 Subject: [PATCH 116/447] drm/amd/display: Add NULL check for clk_mgr and clk_mgr->funcs in dcn30_init_hw This commit addresses a potential null pointer dereference issue in the `dcn30_init_hw` function. The issue could occur when `dc->clk_mgr` or `dc->clk_mgr->funcs` is null. The fix adds a check to ensure `dc->clk_mgr` and `dc->clk_mgr->funcs` is not null before accessing its functions. This prevents a potential null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn30/dcn30_hwseq.c:789 dcn30_init_hw() error: we previously assumed 'dc->clk_mgr' could be null (see line 628) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index fc5936460ac2..98a40d46aaae 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -625,7 +625,7 @@ void dcn30_init_hw(struct dc *dc) uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // Initialize the dccg @@ -786,11 +786,12 @@ void dcn30_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); //if softmax is enabled then hardmax will be set by a different call - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->set_hard_max_memclk && + !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) From 4b6377f0e96085cbec96eb7f0b282430ccdd3d75 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 16:58:32 +0530 Subject: [PATCH 117/447] drm/amd/display: Add NULL check for clk_mgr and clk_mgr->funcs in dcn401_init_hw This commit addresses a potential null pointer dereference issue in the `dcn401_init_hw` function. The issue could occur when `dc->clk_mgr` or `dc->clk_mgr->funcs` is null. The fix adds a check to ensure `dc->clk_mgr` and `dc->clk_mgr->funcs` is not null before accessing its functions. This prevents a potential null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn401/dcn401_hwseq.c:416 dcn401_init_hw() error: we previously assumed 'dc->clk_mgr' could be null (see line 225) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 87c5ef579ecb..0fa610590245 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -222,7 +222,7 @@ void dcn401_init_hw(struct dc *dc) uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) { + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) { dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // mark dcmode limits present if any clock has distinct AC and DC values from SMU @@ -413,7 +413,7 @@ void dcn401_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) @@ -435,7 +435,9 @@ void dcn401_init_hw(struct dc *dc) dc->debug.fams2_config.bits.enable &= dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver == 2; if (!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) { /* update bounding box if FAMS2 disabled */ - dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + if (dc->clk_mgr) + dc->res_pool->funcs->update_bw_bounding_box(dc, + dc->clk_mgr->bw_params); } } } From eac3b274aaea11ec4ade8e8f684055db80d5f8b7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 18:45:50 +0530 Subject: [PATCH 118/447] drm/amdgpu: add print support for sdma_v_4_4_2 ip_dump Add print support for ip dump for sdma_v_4_4_2 in devcoredump. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 67e0e894579a..cb7fedb34fa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1857,6 +1857,27 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_4_2_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_4_2[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + static void sdma_v4_4_2_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1895,6 +1916,7 @@ const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { .set_powergating_state = sdma_v4_4_2_set_powergating_state, .get_clockgating_state = sdma_v4_4_2_get_clockgating_state, .dump_ip_state = sdma_v4_4_2_dump_ip_state, + .print_ip_state = sdma_v4_4_2_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { From 22a9d5cbf88a92ac6cd473c3ba1c369aee8fec9a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:27:37 -0400 Subject: [PATCH 119/447] drm/amdgpu/gfx9.4.3: implement wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on gfx9.0 implementation. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 20ea6cb01edf..2ac398184e12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2833,6 +2833,19 @@ static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } +static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring, + unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regSQ_CMD, value); +} + static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( struct amdgpu_device *adev, int me, int pipe, enum amdgpu_interrupt_state state, int xcc_id) @@ -4116,6 +4129,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .emit_wreg = gfx_v9_4_3_ring_emit_wreg, .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_4_3_ring_soft_recovery, .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, }; From 834368eab36922595a402b9e76470f8efa2fac7f Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 12:31:36 -0400 Subject: [PATCH 120/447] drm/amdkfd: Ensure user queue buffers residency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add atomic queue_refcount to struct bo_va, return -EBUSY to fail unmap BO from the GPU if the bo_va queue_refcount is not zero. Create queue to increase the bo_va queue_refcount, destroy queue to decrease the bo_va queue_refcount, to ensure the queue buffers mapped on the GPU when queue is active. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 ++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 ++++ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 34 ++++++++++++++++--- 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 0ab37e7aec26..6d5fd371d5ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1252,7 +1252,7 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, return ret; } -static void unmap_bo_from_gpuvm(struct kgd_mem *mem, +static int unmap_bo_from_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync) { @@ -1260,11 +1260,18 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, struct amdgpu_device *adev = entry->adev; struct amdgpu_vm *vm = bo_va->base.vm; + if (bo_va->queue_refcount) { + pr_debug("bo_va->queue_refcount %d\n", bo_va->queue_refcount); + return -EBUSY; + } + amdgpu_vm_bo_unmap(adev, bo_va, entry->va); amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); amdgpu_sync_fence(sync, bo_va->last_pt_update); + + return 0; } static int update_gpuvm_pte(struct kgd_mem *mem, @@ -2191,7 +2198,10 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", entry->va, entry->va + bo_size, entry); - unmap_bo_from_gpuvm(mem, entry, ctx.sync); + ret = unmap_bo_from_gpuvm(mem, entry, ctx.sync); + if (ret) + goto unreserve_out; + entry->is_mapped = false; mem->mapped_to_gpu_memory--; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index bc42ccbde659..d7e27957013f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -90,6 +90,12 @@ struct amdgpu_bo_va { bool cleared; bool is_xgmi; + + /* + * protected by vm reservation lock + * if non-zero, cannot unmap from GPU because user queues may still access it + */ + unsigned int queue_refcount; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 202f24ee4bd7..65a37ac5a0f0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1384,8 +1384,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { - pr_err("Failed to unmap from gpu %d/%d\n", - i, args->n_devices); + pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices); goto unmap_memory_from_gpu_failed; } args->n_success = i+1; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 80d8080c5764..c31589043d5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1292,6 +1292,7 @@ void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, u64 expected_size); +void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo); int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 0e661160c295..3fd386dcb011 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -106,6 +106,7 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_ } *pbo = amdgpu_bo_ref(mapping->bo_va->base.bo); + mapping->bo_va->queue_refcount++; return 0; out_err: @@ -113,6 +114,19 @@ out_err: return -EINVAL; } +void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo) +{ + if (*bo) { + struct amdgpu_bo_va *bo_va; + + bo_va = amdgpu_vm_bo_find(vm, *bo); + if (bo_va) + bo_va->queue_refcount--; + } + + amdgpu_bo_unref(bo); +} + int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { struct amdgpu_vm *vm; @@ -166,10 +180,20 @@ out_err_unreserve: int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { - amdgpu_bo_unref(&properties->wptr_bo); - amdgpu_bo_unref(&properties->rptr_bo); - amdgpu_bo_unref(&properties->ring_bo); - amdgpu_bo_unref(&properties->eop_buf_bo); - amdgpu_bo_unref(&properties->cwsr_bo); + struct amdgpu_vm *vm; + int err; + + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + kfd_queue_buffer_put(vm, &properties->wptr_bo); + kfd_queue_buffer_put(vm, &properties->rptr_bo); + kfd_queue_buffer_put(vm, &properties->ring_bo); + kfd_queue_buffer_put(vm, &properties->eop_buf_bo); + kfd_queue_buffer_put(vm, &properties->cwsr_bo); + + amdgpu_bo_unreserve(vm->root.bo); return 0; } From 8284951a6e79c6806c675e5f68a4cd425dd56bc4 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 19 Jul 2024 20:43:04 +0800 Subject: [PATCH 121/447] drm/amdgpu: fix ras UE error injection failure issue The ras command shared memory is allocated from VRAM and the response status of the command buffer will not be zero due to gpu being in fatal error state after ras UE error injection. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7cdff355cedb..189574d53ebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1630,9 +1630,7 @@ static int psp_ras_send_cmd(struct psp_context *psp, switch (cmd) { case TA_RAS_COMMAND__TRIGGER_ERROR: - if (ret || psp->cmd_buf_mem->resp.status) - ret = -EINVAL; - else if (out) + if (!ret && out) memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status)); break; case TA_RAS_COMMAND__QUERY_ADDRESS: From c395fd47d1565bd67671f45cca281b3acc2c31ef Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 16:44:40 +0530 Subject: [PATCH 122/447] drm/amd/display: Add NULL check for clk_mgr in dcn32_init_hw This commit addresses a potential null pointer dereference issue in the `dcn32_init_hw` function. The issue could occur when `dc->clk_mgr` is null. The fix adds a check to ensure `dc->clk_mgr` is not null before accessing its functions. This prevents a potential null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn32/dcn32_hwseq.c:961 dcn32_init_hw() error: we previously assumed 'dc->clk_mgr' could be null (see line 782) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index a7cb003f1dfb..fcaabad204a2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -779,7 +779,7 @@ void dcn32_init_hw(struct dc *dc) uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // Initialize the dccg @@ -958,10 +958,11 @@ void dcn32_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->set_hard_max_memclk && + !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) From 015b8a2fdf39a4c288ff24e7b715b8d9198e56dc Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Thu, 18 Jul 2024 10:58:04 +0800 Subject: [PATCH 123/447] drm/amdgpu: Fix eeprom max record count The eeprom table is empty before initializing, set eeprom table version first before initializing. Changed from V1: Reuse amdgpu_ras_set_eeprom_table_version function Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index eae0a555df3c..aab8077e5098 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1011,6 +1011,9 @@ Out: uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control) { + /* get available eeprom table version first before eeprom table init */ + amdgpu_ras_set_eeprom_table_version(control); + if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) return RAS_MAX_RECORD_COUNT_V2_1; else From a2737c404cb2c1c335db30737925a306a2e7cc11 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:17:18 -0400 Subject: [PATCH 124/447] drm/amdgpu/gfx10: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2957702fca0c..c4002db6e569 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9480,6 +9480,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, }; From f53f526f7050041718af84e33fc24f670e7dccdb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:19:42 -0400 Subject: [PATCH 125/447] drm/amdgpu/gfx11: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index dcef39907449..554aae995f41 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6603,6 +6603,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, }; From af4808ac40dbf668183d0b69ef6b31e62e1fc5df Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:20:37 -0400 Subject: [PATCH 126/447] drm/amdgpu/gfx12: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f384be0d1800..567f9196d6a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5081,6 +5081,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, }; From 4b95cec68937a6302c7085b26258cf721d726684 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 1 Jul 2024 11:08:52 -0400 Subject: [PATCH 127/447] drm/amdgpu/gfx10: properly handle error ints on all pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to handle the interrupt enables for all pipes. v2: fix indexing (Jessie) Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 128 +++++++++++++++++++++---- 1 file changed, 108 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c4002db6e569..66d80f3dc661 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5212,26 +5212,74 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) } +static u32 gfx_v10_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1); + default: + return 0; + } +} + +static u32 gfx_v10_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp; + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v10_0_init_csb(struct amdgpu_device *adev) @@ -9073,12 +9121,39 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -9092,12 +9167,25 @@ static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -9121,8 +9209,8 @@ static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; From 2662b7d9d8bc1dda1f89f0dd33422e069f2f861c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 1 Jul 2024 11:18:00 -0400 Subject: [PATCH 128/447] drm/amdgpu/gfx11: properly handle error ints on all pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to handle the interrupt enables for all pipes. v2: fix indexing (Jessie) Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 132 ++++++++++++++++++++----- 1 file changed, 110 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 554aae995f41..02efa475eb7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1953,26 +1953,74 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) gfx_v11_0_init_gds_vmid(adev); } +static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); + default: + return 0; + } +} + +static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp; + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v11_0_init_csb(struct amdgpu_device *adev) @@ -6201,15 +6249,42 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6220,15 +6295,28 @@ static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6252,8 +6340,8 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; From 39879321769cc2d9a690725959ef76af92a38ac1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 1 Jul 2024 17:40:55 -0400 Subject: [PATCH 129/447] drm/amdgpu/gfx12: properly handle error ints on all pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to handle the interrupt enables for all pipes. v2: fix indexing (Jessie) Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 130 ++++++++++++++++++++----- 1 file changed, 106 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 567f9196d6a0..c74c8a60a23a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1680,26 +1680,68 @@ static void gfx_v12_0_constants_init(struct amdgpu_device *adev) gfx_v12_0_init_compute_vmid(adev); } -static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, - bool enable) +static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) { - u32 tmp; + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); + default: + return 0; + } +} + +static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + default: + return 0; + } +} + +static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v12_0_init_csb(struct amdgpu_device *adev) @@ -4745,15 +4787,42 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -4764,15 +4833,28 @@ static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -4796,8 +4878,8 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; From 48695573d2feaf42812c1ad54e01caff0d1c2d71 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 Jul 2024 10:24:59 -0400 Subject: [PATCH 130/447] drm/amdgpu/gfx9: properly handle error ints on all pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to handle the interrupt enables for all pipes. Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 44 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 50 +++++++++++++++++++++++-- 2 files changed, 89 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d4e38edc9353..97476fb2ca40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2634,7 +2634,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); - if(adev->gfx.num_gfx_rings) + if (adev->gfx.num_gfx_rings) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); @@ -5929,17 +5929,59 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, } } +static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 2ac398184e12..43a3ef276b5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2899,21 +2899,63 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( } } +static u32 gfx_v9_4_3_get_cpc_int_cntl(struct amdgpu_device *adev, + int xcc_id, int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, enum amdgpu_interrupt_state state) { - int i, num_xcc; + u32 mec_int_cntl_reg, mec_int_cntl; + int i, j, k, num_xcc; num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - for (i = 0; i < num_xcc; i++) + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (j = 0; j < adev->gfx.mec.num_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + /* MECs start at 1 */ + mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k); + + if (mec_int_cntl_reg) { + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? + 1 : 0); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i); + } + } + } + } break; default: break; From acddd5cf70e609e1e1e638ac0422977ea2b4783f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:01:06 -0400 Subject: [PATCH 131/447] drm/amdgpu/gfx: add bad opcode interrupt Add the irq source for bad opcodes. Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ddda94e49db4..86d3fa7eef90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -391,6 +391,7 @@ struct amdgpu_gfx { struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; struct amdgpu_irq_src priv_inst_irq; + struct amdgpu_irq_src bad_op_irq; struct amdgpu_irq_src cp_ecc_error_irq; struct amdgpu_irq_src sq_irq; struct amdgpu_irq_src rlc_gc_fed_irq; From a7909022371dc8c70bdc4871a97cc49e34d78a6d Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Thu, 11 Jul 2024 10:38:03 +0800 Subject: [PATCH 132/447] drm/amdgpu/gfx11: Enable bad opcode interrupt For the bad opcode case, it will cause CP/ME hang. The firmware will prevent the ME side from hanging by raising a bad opcode interrupt. And the driver needs to perform a vmid reset when receiving the interrupt. v2: update irq naming (drop priv) (Alex) Acked-by: Felix Kuehling Signed-off-by: Jesse Zhang Reviewed-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 73 ++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 02efa475eb7e..4a9766635933 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1569,6 +1569,13 @@ static int gfx_v11_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, @@ -4646,6 +4653,7 @@ static int gfx_v11_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -5002,6 +5010,9 @@ static int gfx_v11_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; return 0; } @@ -6293,6 +6304,51 @@ static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, @@ -6369,6 +6425,15 @@ static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v11_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -6747,6 +6812,11 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { .process = gfx_v11_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { + .set = gfx_v11_0_set_bad_op_fault_state, + .process = gfx_v11_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { .set = gfx_v11_0_set_priv_inst_fault_state, .process = gfx_v11_0_priv_inst_irq, @@ -6764,6 +6834,9 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; From bc6c2a6f6495668e3cf0acbecf820b93ca03aef7 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 12 Jul 2024 18:14:52 -0400 Subject: [PATCH 133/447] drm/amdgpu/gfx10: Enable bad opcode interrupt For the bad opcode case, it will cause CP/ME hang. The firmware will prevent the ME side from hanging by raising a bad opcode interrupt. And the driver needs to perform a vmid reset when receiving the interrupt. v2: update irq naming (drop priv) (Alex) Acked-by: Felix Kuehling Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 74 ++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 66d80f3dc661..853084a2ce7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4740,6 +4740,13 @@ static int gfx_v10_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_10_1__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -7416,6 +7423,7 @@ static int gfx_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); /* WA added for Vangogh asic fixing the SMU suspend failure * It needs to set power gating again during gfxoff control @@ -7726,6 +7734,10 @@ static int gfx_v10_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + return 0; } @@ -9162,6 +9174,51 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v10_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, @@ -9237,6 +9294,15 @@ static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v10_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -9624,6 +9690,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = { .process = gfx_v10_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v10_0_bad_op_irq_funcs = { + .set = gfx_v10_0_set_bad_op_fault_state, + .process = gfx_v10_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = { .set = gfx_v10_0_set_priv_inst_fault_state, .process = gfx_v10_0_priv_inst_irq, @@ -9645,6 +9716,9 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v10_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs; } From 5ebca62eb8ebff67e2c8c4903bdb4f3c07922114 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 12 Jul 2024 18:42:53 -0400 Subject: [PATCH 134/447] drm/amdgpu/gfx12: Enable bad opcode interrupt For the bad opcode case, it will cause CP/ME hang. The firmware will prevent the ME side from hanging by raising a bad opcode interrupt. And the driver needs to perform a vmid reset when receiving the interrupt. v2: update irq naming (drop priv) (Alex) Acked-by: Felix Kuehling Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 74 ++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index c74c8a60a23a..f932c7ff85e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1349,6 +1349,13 @@ static int gfx_v12_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, @@ -3592,6 +3599,7 @@ static int gfx_v12_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -3712,6 +3720,10 @@ static int gfx_v12_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + return 0; } @@ -4831,6 +4843,51 @@ static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v12_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, @@ -4907,6 +4964,15 @@ static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v12_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -5219,6 +5285,11 @@ static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = { .process = gfx_v12_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v12_0_bad_op_irq_funcs = { + .set = gfx_v12_0_set_bad_op_fault_state, + .process = gfx_v12_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = { .set = gfx_v12_0_set_priv_inst_fault_state, .process = gfx_v12_0_priv_inst_irq, @@ -5232,6 +5303,9 @@ static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v12_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs; } From 238352b4949bc5c724f6adc1c78d50f1d15e4759 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:50:26 -0400 Subject: [PATCH 135/447] drm/amdgpu/gfx9: Enable bad opcode interrupt For the bad opcode case, it will cause CP/ME hang. The firmware will prevent the ME side from hanging by raising a bad opcode interrupt. And the driver needs to perform a vmid reset when receiving the interrupt. Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 65 +++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 97476fb2ca40..675a1a8e2515 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2182,6 +2182,13 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -3937,6 +3944,7 @@ static int gfx_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) @@ -4747,6 +4755,10 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + r = gfx_v9_0_ecc_late_init(handle); if (r) return r; @@ -5990,6 +6002,42 @@ static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + + return 0; +} + static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -6163,6 +6211,15 @@ static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream\n"); + gfx_v9_0_fault(adev, entry); + return 0; +} + static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -7346,6 +7403,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { .process = gfx_v9_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = { + .set = gfx_v9_0_set_bad_op_fault_state, + .process = gfx_v9_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { .set = gfx_v9_0_set_priv_inst_fault_state, .process = gfx_v9_0_priv_inst_irq, @@ -7365,6 +7427,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; From bd4bea5ab2bda37ddb092a978218c4d9b46927e6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:57:14 -0400 Subject: [PATCH 136/447] drm/amdgpu/gfx9.4.3: Enable bad opcode interrupt For the bad opcode case, it will cause CP/ME hang. The firmware will prevent the ME side from hanging by raising a bad opcode interrupt. And the driver needs to perform a vmid reset when receiving the interrupt. Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 69 +++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 43a3ef276b5f..98fe6c40da64 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -901,6 +901,13 @@ static int gfx_v9_4_3_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -2162,6 +2169,7 @@ static int gfx_v9_4_3_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { @@ -2327,6 +2335,10 @@ static int gfx_v9_4_3_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + if (adev->gfx.ras && adev->gfx.ras->enable_watchdog_timer) adev->gfx.ras->enable_watchdog_timer(adev); @@ -2964,6 +2976,46 @@ static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v9_4_3_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 mec_int_cntl_reg, mec_int_cntl; + int i, j, k, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < num_xcc; i++) { + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (j = 0; j < adev->gfx.mec.num_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + /* MECs start at 1 */ + mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k); + + if (mec_int_cntl_reg) { + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? + 1 : 0); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i); + } + } + } + } + break; + default: + break; + } + + return 0; +} + static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -3116,6 +3168,15 @@ static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v9_4_3_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream\n"); + gfx_v9_4_3_fault(adev, entry); + return 0; +} + static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -4228,6 +4289,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = { .process = gfx_v9_4_3_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_4_3_bad_op_irq_funcs = { + .set = gfx_v9_4_3_set_bad_op_fault_state, + .process = gfx_v9_4_3_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = { .set = gfx_v9_4_3_set_priv_inst_fault_state, .process = gfx_v9_4_3_priv_inst_irq, @@ -4241,6 +4307,9 @@ static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v9_4_3_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs; } From b049504e211e8f4dbcd40434f2dcab2215ea1039 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 12:44:57 -0400 Subject: [PATCH 137/447] drm/amdkfd: Validate user queue svm memory residency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Queue CWSR area maybe registered to GPU as svm memory, create queue to ensure svm mapped to GPU with KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED flag. Add queue_refcount to struct svm_range, to track queue CWSR area usage. Because unmap mmu notifier callback return value is ignored, if application unmap the CWSR area while queue is active, pr_warn message in dmesg log. To be safe, evict user queue. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 110 ++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 12 +++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 + 3 files changed, 122 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 3fd386dcb011..67242ce051b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -24,6 +24,7 @@ #include #include "kfd_priv.h" +#include "kfd_svm.h" void print_queue_properties(struct queue_properties *q) { @@ -83,6 +84,100 @@ void uninit_queue(struct queue *q) kfree(q); } +static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + struct kfd_process *p = pdd->process; + struct list_head update_list; + struct svm_range *prange; + int ret = -EINVAL; + + INIT_LIST_HEAD(&update_list); + addr >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + + mutex_lock(&p->svms.lock); + + /* + * range may split to multiple svm pranges aligned to granularity boundaery. + */ + while (size) { + uint32_t gpuid, gpuidx; + int r; + + prange = svm_range_from_addr(&p->svms, addr, NULL); + if (!prange) + break; + + if (!prange->mapped_to_gpu) + break; + + r = kfd_process_gpuid_from_node(p, pdd->dev, &gpuid, &gpuidx); + if (r < 0) + break; + if (!test_bit(gpuidx, prange->bitmap_access) && + !test_bit(gpuidx, prange->bitmap_aip)) + break; + + if (!(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) + break; + + list_add(&prange->update_list, &update_list); + + if (prange->last - prange->start + 1 >= size) { + size = 0; + break; + } + + size -= prange->last - prange->start + 1; + addr += prange->last - prange->start + 1; + } + if (size) { + pr_debug("[0x%llx 0x%llx] not registered\n", addr, addr + size - 1); + goto out_unlock; + } + + list_for_each_entry(prange, &update_list, update_list) + atomic_inc(&prange->queue_refcount); + ret = 0; + +out_unlock: + mutex_unlock(&p->svms.lock); + return ret; +} + +static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + struct kfd_process *p = pdd->process; + struct svm_range *prange, *pchild; + struct interval_tree_node *node; + unsigned long last; + + addr >>= PAGE_SHIFT; + last = addr + (size >> PAGE_SHIFT) - 1; + + mutex_lock(&p->svms.lock); + + node = interval_tree_iter_first(&p->svms.objects, addr, last); + while (node) { + struct interval_tree_node *next_node; + unsigned long next_start; + + prange = container_of(node, struct svm_range, it_node); + next_node = interval_tree_iter_next(node, addr, last); + next_start = min(node->last, last) + 1; + + if (atomic_add_unless(&prange->queue_refcount, -1, 0)) { + list_for_each_entry(pchild, &prange->child_list, child_list) + atomic_add_unless(&pchild->queue_refcount, -1, 0); + } + + node = next_node; + addr = next_start; + } + + mutex_unlock(&p->svms.lock); +} + int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, u64 expected_size) { @@ -165,8 +260,17 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, &properties->cwsr_bo, 0); + if (!err) + goto out_unreserve; + + amdgpu_bo_unreserve(vm->root.bo); + + err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address, + properties->ctx_save_restore_area_size); if (err) - goto out_err_unreserve; + goto out_err_release; + + return 0; out_unreserve: amdgpu_bo_unreserve(vm->root.bo); @@ -174,6 +278,7 @@ out_unreserve: out_err_unreserve: amdgpu_bo_unreserve(vm->root.bo); +out_err_release: kfd_queue_release_buffers(pdd, properties); return err; } @@ -195,5 +300,8 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope kfd_queue_buffer_put(vm, &properties->cwsr_bo); amdgpu_bo_unreserve(vm->root.bo); + + kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, + properties->ctx_save_restore_area_size); return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bd9c2921e0dc..2339bbdf452f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1051,6 +1051,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, new->mapped_to_gpu = old->mapped_to_gpu; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount)); return 0; } @@ -1992,6 +1993,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->vram_pages = old->vram_pages; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount)); return new; } @@ -2444,6 +2446,16 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, unsigned long s, l; bool unmap_parent; + if (atomic_read(&prange->queue_refcount)) { + int r; + + pr_warn("Freeing queue vital buffer 0x%lx, queue evicted\n", + prange->start << PAGE_SHIFT); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM); + if (r) + pr_debug("failed %d to quiesce KFD queues\n", r); + } + p = kfd_lookup_process_by_mm(mm); if (!p) return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 70c1776611c4..747325a2ea89 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -137,6 +137,7 @@ struct svm_range { DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); bool mapped_to_gpu; + atomic_t queue_refcount; }; static inline void svm_range_lock(struct svm_range *prange) From 305cd109b761202d71f2f655ea369fe889ba1d01 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 13:00:48 -0400 Subject: [PATCH 138/447] drm/amdkfd: Validate user queue update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure update queue new ring buffer is mapped on GPU with correct size. Decrease queue old ring_bo queue_refcount and increase new ring_bo queue_refcount. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- .../amd/amdkfd/kfd_process_queue_manager.c | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 4947f28b3afb..9995dbb43359 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -549,11 +549,41 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); - if (!pqn) { + if (!pqn || !pqn->q) { pr_debug("No queue %d exists for update operation\n", qid); return -EFAULT; } + /* + * Update with NULL ring address is used to disable the queue + */ + if (p->queue_address && p->queue_size) { + struct kfd_process_device *pdd; + struct amdgpu_vm *vm; + struct queue *q = pqn->q; + int err; + + pdd = kfd_get_process_device_data(q->device, q->process); + if (!pdd) + return -ENODEV; + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo, + p->queue_size)) { + pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", + p->queue_address, p->queue_size); + return -EFAULT; + } + + kfd_queue_buffer_put(vm, &pqn->q->properties.ring_bo); + amdgpu_bo_unreserve(vm->root.bo); + + pqn->q->properties.ring_bo = p->ring_bo; + } + pqn->q->properties.queue_address = p->queue_address; pqn->q->properties.queue_size = p->queue_size; pqn->q->properties.queue_percent = p->queue_percent; From 3b37e2725ab32c9055bec00ef41caa63839efd37 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Thu, 18 Jul 2024 13:18:53 +0800 Subject: [PATCH 139/447] drm/amdgpu: skip kfd init if GFX is not ready. avoid kfd init crash in that case. Signed-off-by: Yifan Zhang Acked-by: Alex Deucher Tested-by: Jesse Zhang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bcacf2e35eba..730dae77570c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2471,6 +2471,7 @@ out: */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { + struct amdgpu_ip_block *ip_block; struct pci_dev *parent; int i, r; bool total; @@ -2608,7 +2609,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!total) return -ENODEV; - amdgpu_amdkfd_device_probe(adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + if (ip_block->status.valid != false) + amdgpu_amdkfd_device_probe(adev); + adev->cg_flags &= amdgpu_cg_mask; adev->pg_flags &= amdgpu_pg_mask; From 0b071245ddd98539d4f7493bdd188417fcf2d629 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Tue, 23 Jul 2024 16:54:34 +0800 Subject: [PATCH 140/447] drm/amdgpu: add missed harvest check for VCN IP v4/v5 To prevent below probe failure, add a check for models with VCN IP v4.0.6 where VCN1 may be harvested. v2: Apply the same check to VCN IP v4.0 and v5.0. [ 54.070117] RIP: 0010:vcn_v4_0_5_start_dpg_mode+0x9be/0x36b0 [amdgpu] [ 54.071055] Code: 80 fb ff 8d 82 00 80 fe ff 81 fe 00 06 00 00 0f 43 c2 49 69 d5 38 0d 00 00 48 8d 71 04 c1 e8 02 4c 01 f2 48 89 b2 50 f6 02 00 <89> 01 48 8b 82 50 f6 02 00 48 8d 48 04 48 89 8a 50 f6 02 00 c7 00 [ 54.072408] RSP: 0018:ffffb17985f736f8 EFLAGS: 00010286 [ 54.072793] RAX: 00000000000000d6 RBX: ffff99a82f680000 RCX: 0000000000000000 [ 54.073315] RDX: ffff99a82f680000 RSI: 0000000000000004 RDI: ffff99a82f680000 [ 54.073835] RBP: ffffb17985f73730 R08: 0000000000000001 R09: 0000000000000000 [ 54.074353] R10: 0000000000000008 R11: ffffb17983c05000 R12: 0000000000000000 [ 54.074879] R13: 0000000000000000 R14: ffff99a82f680000 R15: 0000000000000001 [ 54.075400] FS: 00007f8d9c79a000(0000) GS:ffff99ab2f140000(0000) knlGS:0000000000000000 [ 54.075988] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 54.076408] CR2: 0000000000000000 CR3: 0000000140c3a000 CR4: 0000000000750ef0 [ 54.076927] PKRU: 55555554 [ 54.077132] Call Trace: [ 54.077319] [ 54.077484] ? show_regs+0x69/0x80 [ 54.077747] ? __die+0x28/0x70 [ 54.077979] ? page_fault_oops+0x180/0x4b0 [ 54.078286] ? do_user_addr_fault+0x2d2/0x680 [ 54.078610] ? exc_page_fault+0x84/0x190 [ 54.078910] ? asm_exc_page_fault+0x2b/0x30 [ 54.079224] ? vcn_v4_0_5_start_dpg_mode+0x9be/0x36b0 [amdgpu] [ 54.079941] ? vcn_v4_0_5_start_dpg_mode+0xe6/0x36b0 [amdgpu] [ 54.080617] vcn_v4_0_5_set_powergating_state+0x82/0x19b0 [amdgpu] [ 54.081316] amdgpu_device_ip_set_powergating_state+0x64/0xc0 [amdgpu] [ 54.082057] amdgpu_vcn_ring_begin_use+0x6f/0x1d0 [amdgpu] [ 54.082727] amdgpu_ring_alloc+0x44/0x70 [amdgpu] [ 54.083351] amdgpu_vcn_dec_sw_ring_test_ring+0x40/0x110 [amdgpu] [ 54.084054] amdgpu_ring_test_helper+0x22/0x90 [amdgpu] [ 54.084698] vcn_v4_0_5_hw_init+0x87/0xc0 [amdgpu] [ 54.085307] amdgpu_device_init+0x1f96/0x2780 [amdgpu] [ 54.085951] amdgpu_driver_load_kms+0x1e/0xc0 [amdgpu] [ 54.086591] amdgpu_pci_probe+0x19f/0x550 [amdgpu] [ 54.087215] local_pci_probe+0x48/0xa0 [ 54.087509] pci_device_probe+0xc9/0x250 [ 54.087812] really_probe+0x1a4/0x3f0 [ 54.088101] __driver_probe_device+0x7d/0x170 [ 54.088443] driver_probe_device+0x24/0xa0 [ 54.088765] __driver_attach+0xdd/0x1d0 [ 54.089068] ? __pfx___driver_attach+0x10/0x10 [ 54.089417] bus_for_each_dev+0x8e/0xe0 [ 54.089718] driver_attach+0x22/0x30 [ 54.090000] bus_add_driver+0x120/0x220 [ 54.090303] driver_register+0x62/0x120 [ 54.090606] ? __pfx_amdgpu_init+0x10/0x10 [amdgpu] [ 54.091255] __pci_register_driver+0x62/0x70 [ 54.091593] amdgpu_init+0x67/0xff0 [amdgpu] [ 54.092190] do_one_initcall+0x5f/0x330 [ 54.092495] do_init_module+0x68/0x240 [ 54.092794] load_module+0x201c/0x2110 [ 54.093093] init_module_from_file+0x97/0xd0 [ 54.093428] ? init_module_from_file+0x97/0xd0 [ 54.093777] idempotent_init_module+0x11c/0x2a0 [ 54.094134] __x64_sys_finit_module+0x64/0xc0 [ 54.094476] do_syscall_64+0x58/0x120 [ 54.094767] entry_SYSCALL_64_after_hwframe+0x6e/0x76 Signed-off-by: Tim Huang Reviewed-by: Saleemkhan Jamadar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index f6d96a44d75f..776c539bfdda 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1045,6 +1045,9 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1498,6 +1501,9 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index f45495de6875..8d75061f9f38 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -958,6 +958,9 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1162,6 +1165,9 @@ static int vcn_v4_0_5_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 070b56610c7d..68c97fcd539b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -721,6 +721,9 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -898,6 +901,9 @@ static int vcn_v5_0_0_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; From 08ae395ea22fb3d9b318c8bde28c0dfd2f5fa4d2 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 17:18:17 +0530 Subject: [PATCH 141/447] drm/amd/display: Add null check for set_output_gamma in dcn30_set_output_transfer_func This commit adds a null check for the set_output_gamma function pointer in the dcn30_set_output_transfer_func function. Previously, set_output_gamma was being checked for nullity at line 386, but then it was being dereferenced without any nullity check at line 401. This could potentially lead to a null pointer dereference error if set_output_gamma is indeed null. To fix this, we now ensure that set_output_gamma is not null before dereferencing it. We do this by adding a nullity check for set_output_gamma before the call to set_output_gamma at line 401. If set_output_gamma is null, we log an error message and do not call the function. This fix prevents a potential null pointer dereference error. drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn30/dcn30_hwseq.c:401 dcn30_set_output_transfer_func() error: we previously assumed 'mpc->funcs->set_output_gamma' could be null (see line 386) drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn30/dcn30_hwseq.c 373 bool dcn30_set_output_transfer_func(struct dc *dc, 374 struct pipe_ctx *pipe_ctx, 375 const struct dc_stream_state *stream) 376 { 377 int mpcc_id = pipe_ctx->plane_res.hubp->inst; 378 struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; 379 const struct pwl_params *params = NULL; 380 bool ret = false; 381 382 /* program OGAM or 3DLUT only for the top pipe*/ 383 if (pipe_ctx->top_pipe == NULL) { 384 /*program rmu shaper and 3dlut in MPC*/ 385 ret = dcn30_set_mpc_shaper_3dlut(pipe_ctx, stream); 386 if (ret == false && mpc->funcs->set_output_gamma) { ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If this is NULL 387 if (stream->out_transfer_func.type == TF_TYPE_HWPWL) 388 params = &stream->out_transfer_func.pwl; 389 else if (pipe_ctx->stream->out_transfer_func.type == 390 TF_TYPE_DISTRIBUTED_POINTS && 391 cm3_helper_translate_curve_to_hw_format( 392 &stream->out_transfer_func, 393 &mpc->blender_params, false)) 394 params = &mpc->blender_params; 395 /* there are no ROM LUTs in OUTGAM */ 396 if (stream->out_transfer_func.type == TF_TYPE_PREDEFINED) 397 BREAK_TO_DEBUGGER(); 398 } 399 } 400 --> 401 mpc->funcs->set_output_gamma(mpc, mpcc_id, params); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Then it will crash 402 return ret; 403 } Fixes: d99f13878d6f ("drm/amd/display: Add DCN3 HWSEQ") Reported-by: Dan Carpenter Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Hersen Wu Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index 98a40d46aaae..42c52284a868 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -398,7 +398,11 @@ bool dcn30_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + else + DC_LOG_ERROR("%s: set_output_gamma function pointer is NULL.\n", __func__); + return ret; } From 47c0388b0589cb481c294dcb857d25a214c46eb3 Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Fri, 19 Jul 2024 16:10:40 +0800 Subject: [PATCH 142/447] drm/amdgpu: reset vm state machine after gpu reset(vram lost) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] Page table of compute VM in the VRAM will lost after gpu reset. VRAM won't be restored since compute VM has no shadows. [How] Use higher 32-bit of vm->generation to record a vram_lost_counter. Reset the VM state machine when vm->genertaion is not equal to the new generation token. v2: Check vm->generation instead of calling drm_sched_entity_error in amdgpu_vm_validate. v3: Use new generation token instead of vram_lost_counter for check. Signed-off-by: ZhenGuo Yin Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3abfa66d72a2..a060c28f0877 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (!vm) return result; - result += vm->generation; + result += lower_32_bits(vm->generation); /* Add one if the page tables will be re-generated on next CS */ if (drm_sched_entity_error(&vm->delayed)) ++result; @@ -463,13 +463,14 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { + uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; - if (drm_sched_entity_error(&vm->delayed)) { - ++vm->generation; + if (vm->generation != new_vm_generation) { + vm->generation = new_vm_generation; amdgpu_vm_bo_reset_state_machine(vm); amdgpu_vm_fini_entities(vm); r = amdgpu_vm_init_entities(adev, vm); @@ -2439,7 +2440,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_update = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub(); - vm->generation = 0; + vm->generation = amdgpu_vm_generation(adev, NULL); mutex_init(&vm->eviction_lock); vm->evicting = false; From 517fff221c1e6b8a8db69e7a440116caee120ff5 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 26 Jun 2024 14:52:28 -0400 Subject: [PATCH 143/447] drm/amdkfd: Store queue cwsr area size to node properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the queue eop buffer size, cwsr area size, ctl stack size calculation from Thunk, store the value to KFD node properties. Those will be used to validate queue eop buffer size, cwsr area size, ctl stack size when creating KFD user compute queue. Those will be exposed to user space via sysfs KFD node properties, to remove the duplicate calculation code from Thunk. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 75 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 4 ++ 4 files changed, 82 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index c31589043d5b..b5cae48dff66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1295,6 +1295,7 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_ void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo); int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); +void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_node *dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 67242ce051b5..adcda9730c9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -24,6 +24,7 @@ #include #include "kfd_priv.h" +#include "kfd_topology.h" #include "kfd_svm.h" void print_queue_properties(struct queue_properties *q) @@ -305,3 +306,77 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope properties->ctx_save_restore_area_size); return 0; } + +#define SGPR_SIZE_PER_CU 0x4000 +#define LDS_SIZE_PER_CU 0x10000 +#define HWREG_SIZE_PER_CU 0x1000 +#define DEBUGGER_BYTES_ALIGN 64 +#define DEBUGGER_BYTES_PER_WAVE 32 + +static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) +{ + u32 vgpr_size = 0x40000; + + if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ + gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ + gfxv == 90008) /* GFX_VERSION_ARCTURUS */ + vgpr_size = 0x80000; + else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */ + gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */ + gfxv == 120000 || /* GFX_VERSION_GFX1200 */ + gfxv == 120001) /* GFX_VERSION_GFX1201 */ + vgpr_size = 0x60000; + + return vgpr_size; +} + +#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \ + (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ + LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) + +#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ + ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ + +#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40 + +void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) +{ + struct kfd_node_properties *props = &dev->node_props; + u32 gfxv = props->gfx_target_version; + u32 ctl_stack_size; + u32 wg_data_size; + u32 wave_num; + u32 cu_num; + + if (gfxv < 80001) /* GFX_VERSION_CARRIZO */ + return; + + cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask); + wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */ + min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) + : cu_num * 32; + + wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE); + ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; + ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size, + PAGE_SIZE); + + if ((gfxv / 10000 * 10000) == 100000) { + /* HW design limits control stack size to 0x7000. + * This is insufficient for theoretical PM4 cases + * but sufficient for AQL, limited by SPI events. + */ + ctl_stack_size = min(ctl_stack_size, 0x7000); + } + + props->ctl_stack_size = ctl_stack_size; + props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN); + props->cwsr_size = ctl_stack_size + wg_data_size; + + if (gfxv == 80002) /* GFX_VERSION_TONGA */ + props->eop_buffer_size = 0x8000; + else if ((gfxv / 100 * 100) == 90400) /* GFX_VERSION_AQUA_VANJARAM */ + props->eop_buffer_size = 4096; + else if (gfxv >= 80000) + props->eop_buffer_size = 4096; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 6f89b06f89d3..a9b3eda65a2c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -2120,6 +2120,8 @@ int kfd_topology_add_device(struct kfd_node *gpu) dev->gpu->adev->gmc.xgmi.connected_to_cpu) dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; + kfd_queue_ctx_save_restore_size(dev); + kfd_debug_print_topology(); kfd_notify_gpu_change(gpu_id, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 2d1c9d771bef..43ba0d32e5bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -74,6 +74,10 @@ struct kfd_node_properties { uint32_t num_sdma_xgmi_engines; uint32_t num_sdma_queues_per_engine; uint32_t num_cp_queues; + uint32_t cwsr_size; + uint32_t ctl_stack_size; + uint32_t eop_buffer_size; + uint32_t debug_memory_size; char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; From 629568d25fea8ece4f65073f039aeef4e240ab67 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 26 Jun 2024 15:03:05 -0400 Subject: [PATCH 144/447] drm/amdkfd: Validate queue cwsr area and eop buffer size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating KFD user compute queue, check if queue eop buffer size, cwsr area size, ctl stack size equal to the size of KFD node properities. Check the entire cwsr area which may split into multiple svm ranges aligned to granularity boundary. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 46 +++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index adcda9730c9f..9807e8adf77d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -225,9 +225,15 @@ void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo) int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { + struct kfd_topology_device *topo_dev; struct amdgpu_vm *vm; + u32 total_cwsr_size; int err; + topo_dev = kfd_topology_device_by_id(pdd->dev->id); + if (!topo_dev) + return -EINVAL; + vm = drm_priv_to_vm(pdd->drm_priv); err = amdgpu_bo_reserve(vm->root.bo, false); if (err) @@ -252,6 +258,12 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope /* EOP buffer is not required for all ASICs */ if (properties->eop_ring_buffer_address) { + if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { + pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", + properties->eop_buf_bo->tbo.base.size, + topo_dev->node_props.eop_buffer_size); + goto out_err_unreserve; + } err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, &properties->eop_buf_bo, properties->eop_ring_buffer_size); @@ -259,15 +271,33 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope goto out_err_unreserve; } + if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) { + pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n", + properties->ctl_stack_size, + topo_dev->node_props.ctl_stack_size); + goto out_err_unreserve; + } + + if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { + pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", + properties->ctx_save_restore_area_size, + topo_dev->node_props.cwsr_size); + goto out_err_unreserve; + } + + total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) + * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); + err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, - &properties->cwsr_bo, 0); + &properties->cwsr_bo, total_cwsr_size); if (!err) goto out_unreserve; amdgpu_bo_unreserve(vm->root.bo); err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address, - properties->ctx_save_restore_area_size); + total_cwsr_size); if (err) goto out_err_release; @@ -286,7 +316,9 @@ out_err_release: int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { + struct kfd_topology_device *topo_dev; struct amdgpu_vm *vm; + u32 total_cwsr_size; int err; vm = drm_priv_to_vm(pdd->drm_priv); @@ -302,8 +334,14 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope amdgpu_bo_unreserve(vm->root.bo); - kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, - properties->ctx_save_restore_area_size); + topo_dev = kfd_topology_device_by_id(pdd->dev->id); + if (!topo_dev) + return -EINVAL; + total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) + * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); + + kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size); return 0; } From 8155566a26b8d6c1dd914f06a0c652e4e2f2adf1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 23 Jul 2024 13:23:56 -0400 Subject: [PATCH 145/447] drm/amdgpu: properly handle vbios fake edid sizing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comment in the vbios structure says: // = 128 means EDID length is 128 bytes, otherwise the EDID length = ucFakeEDIDLength*128 This fake edid struct has not been used in a long time, so I'm not sure if there were actually any boards out there with a non-128 byte EDID, but align the code with the comment. Reviewed-by: Thomas Weißschuh Reported-by: Thomas Weißschuh Link: https://lists.freedesktop.org/archives/amd-gfx/2024-June/109964.html Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/atombios_encoders.c | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 25feab188dfe..ebf83fee43bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -2065,26 +2065,29 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder) fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { struct edid *edid; - int edid_size = - max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength); - edid = kmalloc(edid_size, GFP_KERNEL); - if (edid) { - memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0], - fake_edid_record->ucFakeEDIDLength); + int edid_size; + if (fake_edid_record->ucFakeEDIDLength == 128) + edid_size = fake_edid_record->ucFakeEDIDLength; + else + edid_size = fake_edid_record->ucFakeEDIDLength * 128; + edid = kmemdup(&fake_edid_record->ucFakeEDIDString[0], + edid_size, GFP_KERNEL); + if (edid) { if (drm_edid_is_valid(edid)) { adev->mode_info.bios_hardcoded_edid = edid; adev->mode_info.bios_hardcoded_edid_size = edid_size; - } else + } else { kfree(edid); + } } + record += struct_size(fake_edid_record, + ucFakeEDIDString, + edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; } - record += fake_edid_record->ucFakeEDIDLength ? - struct_size(fake_edid_record, - ucFakeEDIDString, - fake_edid_record->ucFakeEDIDLength) : - /* empty fake edid record must be 3 bytes long */ - sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; From 17c6baff3d5f65c8da164137a58742541a060b2f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 23 Jul 2024 13:31:58 -0400 Subject: [PATCH 146/447] drm/radeon: properly handle vbios fake edid sizing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comment in the vbios structure says: // = 128 means EDID length is 128 bytes, otherwise the EDID length = ucFakeEDIDLength*128 This fake edid struct has not been used in a long time, so I'm not sure if there were actually any boards out there with a non-128 byte EDID, but align the code with the comment. Reviewed-by: Thomas Weißschuh Reported-by: Thomas Weißschuh Link: https://lists.freedesktop.org/archives/amd-gfx/2024-June/109964.html Fixes: c324acd5032f ("drm/radeon/kms: parse the extended LCD info block") Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_atombios.c | 29 +++++++++++++----------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 97c4e10d0550..168f3f94003b 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1717,26 +1717,29 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { struct edid *edid; - int edid_size = - max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength); - edid = kmalloc(edid_size, GFP_KERNEL); - if (edid) { - memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0], - fake_edid_record->ucFakeEDIDLength); + int edid_size; + if (fake_edid_record->ucFakeEDIDLength == 128) + edid_size = fake_edid_record->ucFakeEDIDLength; + else + edid_size = fake_edid_record->ucFakeEDIDLength * 128; + edid = kmemdup(&fake_edid_record->ucFakeEDIDString[0], + edid_size, GFP_KERNEL); + if (edid) { if (drm_edid_is_valid(edid)) { rdev->mode_info.bios_hardcoded_edid = edid; rdev->mode_info.bios_hardcoded_edid_size = edid_size; - } else + } else { kfree(edid); + } } + record += struct_size(fake_edid_record, + ucFakeEDIDString, + edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; } - record += fake_edid_record->ucFakeEDIDLength ? - struct_size(fake_edid_record, - ucFakeEDIDString, - fake_edid_record->ucFakeEDIDLength) : - /* empty fake edid record must be 3 bytes long */ - sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; From 75c3f06fd900e01a68b8ade17e6b6be64cfdc9ff Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 24 Jul 2024 14:24:28 +0100 Subject: [PATCH 147/447] drm/amd/display: Fix spelling mistake "tolarance" -> "tolerance" There is a spelling mistake in a dml2_printf message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 0b671c665373..5ba38d51382f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -8267,7 +8267,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); + dml2_printf("DML::%s: urgent latency tolerance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); #endif mode_lib->ms.support.OutstandingRequestsSupport = true; From fdedd77b0eb31209c59107de66880ef0be21a77a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 24 Jul 2024 08:49:35 -0700 Subject: [PATCH 148/447] drm/amd/display: Reapply 2fde4fdddc1f Commit 2563391e57b5 ("drm/amd/display: DML2.1 resynchronization") blew away the compiler warning fix from commit 2fde4fdddc1f ("drm/amd/display: Avoid -Wenum-float-conversion in add_margin_and_round_to_dfs_grainularity()"), causing the warning to reappear. drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c:183:58: error: arithmetic between enumeration type 'enum dentist_divider_range' and floating-point type 'double' [-Werror,-Wenum-float-conversion] 183 | divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Apply the fix again to resolve the warning. Fixes: 2563391e57b5 ("drm/amd/display: DML2.1 resynchronization") Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index 0021bbaa4b91..f19f6ebaae13 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -180,7 +180,7 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma clock_khz *= 1.0 + margin; - divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); + divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); /* we want to floor here to get higher clock than required rather than lower */ if (divider < DFS_DIVIDER_RANGE_2_START) { From f3c681f0c3b171db923d6147785064962351e043 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 24 Jul 2024 15:37:49 +0800 Subject: [PATCH 149/447] drm/amd/display: use swap() in sort() Use existing swap() function rather than duplicating its implementation. ./drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c:17:29-30: WARNING opportunity for swap(). Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=9573 Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index 717536d7bb30..8e68a8094658 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -7,16 +7,12 @@ static void sort(double *list_a, int list_a_size) { - double temp; // For all elements b[i] in list_b[] for (int i = 0; i < list_a_size - 1; i++) { // Find the first element of list_a that's larger than b[i] for (int j = i; j < list_a_size - 1; j++) { - if (list_a[j] > list_a[j + 1]) { - temp = list_a[j]; - list_a[j] = list_a[j + 1]; - list_a[j + 1] = temp; - } + if (list_a[j] > list_a[j + 1]) + swap(list_a[j], list_a[j + 1]); } } } From 60c30ba7ba2064066ec462236666058cbbf619c1 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 4 Jul 2024 08:14:15 +0800 Subject: [PATCH 150/447] drm/amdgpu/pm: support gpu_metrics sysfs interface for smu v14.0.2/3 support gpu_metrics sysfs interface for smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 86 ++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 98ea58d792ca..e1a27903c80a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -66,6 +66,7 @@ #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000 #define DEBUGSMC_MSG_Mode1Reset 2 +#define LINK_SPEED_MAX 3 static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), @@ -221,7 +222,6 @@ static struct cmn2asic_mapping smu_v14_0_2_workload_map[PP_SMC_POWER_PROFILE_COU WORKLOAD_MAP(PP_SMC_POWER_PROFILE_WINDOW3D, WORKLOAD_PPLIB_WINDOW_3D_BIT), }; -#if 0 static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_PPT0_BIT] = (SMU_THROTTLER_PPT0_BIT), [THROTTLER_PPT1_BIT] = (SMU_THROTTLER_PPT1_BIT), @@ -241,7 +241,6 @@ static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_GFX_APCC_PLUS_BIT] = (SMU_THROTTLER_APCC_BIT), [THROTTLER_FIT_BIT] = (SMU_THROTTLER_FIT_BIT), }; -#endif static int smu_v14_0_2_get_allowed_feature_mask(struct smu_context *smu, @@ -1869,6 +1868,88 @@ static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, return ret; } +static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v1_3 *gpu_metrics = + (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table; + SmuMetricsExternal_t metrics_ext; + SmuMetrics_t *metrics = &metrics_ext.SmuMetrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, + &metrics_ext, + true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3); + + gpu_metrics->temperature_edge = metrics->AvgTemperature[TEMP_EDGE]; + gpu_metrics->temperature_hotspot = metrics->AvgTemperature[TEMP_HOTSPOT]; + gpu_metrics->temperature_mem = metrics->AvgTemperature[TEMP_MEM]; + gpu_metrics->temperature_vrgfx = metrics->AvgTemperature[TEMP_VR_GFX]; + gpu_metrics->temperature_vrsoc = metrics->AvgTemperature[TEMP_VR_SOC]; + gpu_metrics->temperature_vrmem = max(metrics->AvgTemperature[TEMP_VR_MEM0], + metrics->AvgTemperature[TEMP_VR_MEM1]); + + gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; + gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; + gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, + metrics->Vcn1ActivityPercentage); + + gpu_metrics->average_socket_power = metrics->AverageSocketPower; + gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + + if (metrics->AverageGfxActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; + else + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; + + if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; + else + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs; + + gpu_metrics->average_vclk0_frequency = metrics->AverageVclk0Frequency; + gpu_metrics->average_dclk0_frequency = metrics->AverageDclk0Frequency; + gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency; + gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency; + + gpu_metrics->current_gfxclk = gpu_metrics->average_gfxclk_frequency; + gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK]; + gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK]; + gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0]; + gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk1 = metrics->CurrClock[PPCLK_DCLK_0]; + + gpu_metrics->throttle_status = + smu_v14_0_2_get_throttler_status(metrics); + gpu_metrics->indep_throttle_status = + smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status, + smu_v14_0_2_throttler_map); + + gpu_metrics->current_fan_speed = metrics->AvgFanRpm; + + gpu_metrics->pcie_link_width = metrics->PcieWidth; + if ((metrics->PcieRate - 1) > LINK_SPEED_MAX) + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(1); + else + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(metrics->PcieRate); + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + gpu_metrics->voltage_gfx = metrics->AvgVoltage[SVI_PLANE_VDD_GFX]; + gpu_metrics->voltage_soc = metrics->AvgVoltage[SVI_PLANE_VDD_SOC]; + gpu_metrics->voltage_mem = metrics->AvgVoltage[SVI_PLANE_VDDIO_MEM]; + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v1_3); +} + static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_allowed_feature_mask = smu_v14_0_2_get_allowed_feature_mask, .set_default_dpm_table = smu_v14_0_2_set_default_dpm_table, @@ -1905,6 +1986,7 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_thermal_alert = smu_v14_0_enable_thermal_alert, .disable_thermal_alert = smu_v14_0_disable_thermal_alert, .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, + .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, .populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk, From e06b71b2313a00579ba64a1cc43ad29d64cb8d4c Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 21 May 2024 13:22:15 -0400 Subject: [PATCH 151/447] drm/amdkfd: allow users to target recommended SDMA engines Certain GPUs have better copy performance over xGMI on specific SDMA engines depending on the source and destination GPU. Allow users to create SDMA queues on these recommended engines. Close to 2x overall performance has been observed with this optimization. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 38 +++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +- .../amd/amdkfd/kfd_process_queue_manager.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 52 +++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 + include/uapi/linux/kfd_ioctl.h | 6 ++- 7 files changed, 116 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 65a37ac5a0f0..0622ebd7e8ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -255,6 +255,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; q_properties->ctl_stack_size = args->ctl_stack_size; + q_properties->sdma_engine_id = args->sdma_engine_id; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -262,6 +263,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->type = KFD_QUEUE_TYPE_SDMA; else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID) + q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID; else return -ENOTSUPP; @@ -333,6 +336,18 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } + if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) + + kfd_get_num_xgmi_sdma_engines(dev) - 1; + + if (q_properties.sdma_engine_id > max_sdma_eng_id) { + err = -EINVAL; + pr_err("sdma_engine_id %i exceeds maximum id of %i\n", + q_properties.sdma_engine_id, max_sdma_eng_id); + goto err_sdma_engine_id; + } + } + if (!pdd->qpd.proc_doorbells) { err = kfd_alloc_process_doorbells(dev->kfd, pdd); if (err) { @@ -387,6 +402,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, err_create_queue: kfd_queue_release_buffers(pdd, &q_properties); err_acquire_queue_buf: +err_sdma_engine_id: err_bind_process: err_pdd: mutex_unlock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index fdc76c24b2e7..f0bfeb35246f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1532,6 +1532,41 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / kfd_get_num_xgmi_sdma_engines(dqm->dev); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int i, num_queues, num_engines, eng_offset = 0, start_engine; + bool free_bit_found = false, is_xgmi = false; + + if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { + num_queues = get_num_sdma_queues(dqm); + num_engines = kfd_get_num_sdma_engines(dqm->dev); + q->properties.type = KFD_QUEUE_TYPE_SDMA; + } else { + num_queues = get_num_xgmi_sdma_queues(dqm); + num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); + eng_offset = kfd_get_num_sdma_engines(dqm->dev); + q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; + is_xgmi = true; + } + + /* Scan available bit based on target engine ID. */ + start_engine = q->properties.sdma_engine_id - eng_offset; + for (i = start_engine; i < num_queues; i += num_engines) { + + if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) + continue; + + clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); + q->sdma_id = i; + q->properties.sdma_queue_id = q->sdma_id / num_engines; + free_bit_found = true; + break; + } + + if (!free_bit_found) { + dev_err(dev, "No more SDMA queue to allocate for target ID %i\n", + q->properties.sdma_engine_id); + return -ENOMEM; + } } pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -1784,7 +1819,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } if (q->properties.type == KFD_QUEUE_TYPE_SDMA || - q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || + q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { dqm_lock(dqm); retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); dqm_unlock(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index b5cae48dff66..4190fa339913 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -414,13 +414,16 @@ enum kfd_unmap_queues_filter { * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. * * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface. + * + * @KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: SDMA user mode queue with target SDMA engine ID. */ enum kfd_queue_type { KFD_QUEUE_TYPE_COMPUTE, KFD_QUEUE_TYPE_SDMA, KFD_QUEUE_TYPE_HIQ, KFD_QUEUE_TYPE_DIQ, - KFD_QUEUE_TYPE_SDMA_XGMI + KFD_QUEUE_TYPE_SDMA_XGMI, + KFD_QUEUE_TYPE_SDMA_BY_ENG_ID }; enum kfd_queue_format { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9995dbb43359..f732ee35b531 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -366,6 +366,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: + case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: /* SDMA queues are always allocated statically no matter * which scheduler mode is used. We also do not need to * check whether a SDMA queue can be allocated here, because diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index a9b3eda65a2c..40771f8752cb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -292,6 +292,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, iolink->max_bandwidth); sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size", iolink->rec_transfer_size); + sysfs_show_32bit_prop(buffer, offs, "recommended_sdma_engine_id_mask", + iolink->rec_sdma_eng_id_mask); sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); return offs; @@ -1265,6 +1267,55 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, } } +#define REC_SDMA_NUM_GPU 8 +static const int rec_sdma_eng_map[REC_SDMA_NUM_GPU][REC_SDMA_NUM_GPU] = { + { -1, 14, 12, 2, 4, 8, 10, 6 }, + { 14, -1, 2, 10, 8, 4, 6, 12 }, + { 10, 2, -1, 12, 14, 6, 4, 8 }, + { 2, 12, 10, -1, 6, 14, 8, 4 }, + { 4, 8, 14, 6, -1, 10, 12, 2 }, + { 8, 4, 6, 14, 12, -1, 2, 10 }, + { 10, 6, 4, 8, 12, 2, -1, 14 }, + { 6, 12, 8, 4, 2, 10, 14, -1 }}; + +static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, + struct kfd_iolink_properties *outbound_link, + struct kfd_iolink_properties *inbound_link) +{ + struct kfd_node *gpu = outbound_link->gpu; + struct amdgpu_device *adev = gpu->adev; + int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && + adev->aid_mask && num_xgmi_nodes && + (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE) == + AMDGPU_SPX_PARTITION_MODE) && + (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); + + if (support_rec_eng) { + int src_socket_id = adev->gmc.xgmi.physical_node_id; + int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; + + outbound_link->rec_sdma_eng_id_mask = + 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id]; + inbound_link->rec_sdma_eng_id_mask = + 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id]; + } else { + int num_sdma_eng = kfd_get_num_sdma_engines(gpu); + int i, eng_offset = 0; + + if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && + kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { + eng_offset = num_sdma_eng; + num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu); + } + + for (i = 0; i < num_sdma_eng; i++) { + outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + } + } +} + static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) { struct kfd_iolink_properties *link, *inbound_link; @@ -1303,6 +1354,7 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); + kfd_set_recommended_sdma_engines(peer_dev, link, inbound_link); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 43ba0d32e5bd..155b5c410af1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -125,6 +125,7 @@ struct kfd_iolink_properties { uint32_t min_bandwidth; uint32_t max_bandwidth; uint32_t rec_transfer_size; + uint32_t rec_sdma_eng_id_mask; uint32_t flags; struct kfd_node *gpu; struct kobject *kobj; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 285a36601dc9..71a7ce5f2d4c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -42,9 +42,10 @@ * - 1.14 - Update kfd_event_data * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl * - 1.16 - Add contiguous VRAM allocation flag + * - 1.17 - Add SDMA queue creation with target SDMA engine ID */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 16 +#define KFD_IOCTL_MINOR_VERSION 17 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -56,6 +57,7 @@ struct kfd_ioctl_get_version_args { #define KFD_IOC_QUEUE_TYPE_SDMA 0x1 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 #define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 +#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID 0x4 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 @@ -78,6 +80,8 @@ struct kfd_ioctl_create_queue_args { __u64 ctx_save_restore_address; /* to KFD */ __u32 ctx_save_restore_size; /* to KFD */ __u32 ctl_stack_size; /* to KFD */ + __u32 sdma_engine_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_destroy_queue_args { From 8f28c465a455563917aa15bf5ef40016b2a665d6 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 25 Jul 2024 09:57:12 +0800 Subject: [PATCH 152/447] drm/amd/display: remove unneeded semicolon No functional modification involved. ./drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:481:2-3: Unneeded semicolon. ./drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:3783:168-169: Unneeded semicolon. ./drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:3782:166-167: Unneeded semicolon. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=9575 Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 5ba38d51382f..13f2c80bad4c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -478,7 +478,7 @@ static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode default: DML2_ASSERT(0); return 256; - }; + } } static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan) @@ -3779,8 +3779,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; } if (p->SwathHeightC[k] == 0) From f3392e662efdc095f10109f588aa4f3be86f7eb5 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 23 Jul 2024 13:08:55 +0530 Subject: [PATCH 153/447] drm/amdgpu: add vcn ip dump ptr in vcn global struct Add pointer to the vcn ip dump in the vcn global structure to be accessible for all vcn version via global adev. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1a5439abd1a0..f127eccf59d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -330,6 +330,9 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; bool using_unified_queue; + + /* IP reg dump */ + uint32_t *ip_dump; }; struct amdgpu_fw_shared_rb_ptrs_struct { From 50d10d9271f6c6542196c54275091c7b2c6edf97 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Jul 2024 22:35:56 +0530 Subject: [PATCH 154/447] drm/amdgpu: add macro to calculate offset with instance Add macro definition which calculate offset of the register with index override. This is useful in case when there is an array of registers which is common for all instances. To read registers in that case it is easy to define registers once and the index value is manually passed to calculate proper offset of register for each instance. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 282584a48be0..ef7c603b50ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -93,6 +93,10 @@ struct soc15_ras_field_entry { #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) +/* Over ride the instance id */ +#define SOC15_REG_ENTRY_OFFSET_INST(entry, inst) \ + (adev->reg_offset[entry.hwip][inst][entry.seg] + entry.reg_offset) + #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } From 58d283801d06d4434df6625ed6e6b8d2ba47fe65 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Jul 2024 16:35:41 +0530 Subject: [PATCH 155/447] drm/amdgpu: add vcn_v3_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v3_0. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 149 +++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 24f947751c46..d0bca93f8226 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -60,6 +60,115 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RBC_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RBC_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_RBC_IB_VMID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE_VMIDS_MULTI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC_VMIDS_MULTI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SOFT_RESET), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SOFT_RESET2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_GATE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_CTRL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_GATE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_CTRL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_STATUS2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_GATE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_GPGPU_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_GPGPU_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_DBW_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_DBW_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CM_COLOC_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CM_COLOC_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD4_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD4_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE4_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE4_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE5_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE5_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE6_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE6_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE7_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE7_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SCRATCH1) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -126,6 +235,8 @@ static int vcn_v3_0_sw_init(void *handle) struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; r = amdgpu_vcn_sw_init(adev); @@ -246,6 +357,15 @@ static int vcn_v3_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -284,6 +404,7 @@ static int vcn_v3_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); return r; } @@ -2203,6 +2324,32 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool reg_safe; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + reg_safe = (RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS) & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (reg_safe) + for (j = 0; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2221,7 +2368,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v3_0_dump_ip_state, .print_ip_state = NULL, }; From cd162ae9bc3ba91eb630a1321afd3d1dde5f2000 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Jul 2024 16:48:28 +0530 Subject: [PATCH 156/447] drm/amdgpu: add print support for vcn_v3_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v3_0. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 28 ++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index d0bca93f8226..9e1cbeee10db 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -2324,6 +2324,32 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t inst_off; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + drm_printf(p, "\nActive Instance:VCN%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } +} + static void vcn_v3_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2369,7 +2395,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, .dump_ip_state = vcn_v3_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v3_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { From fbfb5f0342253d92c4e446588c428a9d90c3f610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 24 Jul 2024 09:24:02 +0200 Subject: [PATCH 157/447] drm/amdgpu: fix contiguous handling for IB parsing v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we won't get correct access to the IB. v2: keep setting AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS to avoid problems in the VRAM backend. Signed-off-by: Christian König Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3501 Fixes: e362b7c8f8c7 ("drm/amdgpu: Modify the contiguous flags behaviour") Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org Tested-by: Dave Airlie Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index cde2f4548a62..1e167d925b64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1782,7 +1782,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va_mapping *mapping; - int r; + int i, r; addr /= AMDGPU_GPU_PAGE_SIZE; @@ -1797,13 +1797,13 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) return -EINVAL; - if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { - (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); - if (r) - return r; - } + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); + for (i = 0; i < (*bo)->placement.num_placement; i++) + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); + if (r) + return r; return amdgpu_ttm_alloc_gart(&(*bo)->tbo); } From 25dd25f86eca77b1ce4a611432f57b006fa91615 Mon Sep 17 00:00:00 2001 From: Venkata Narendra Kumar Gutta Date: Tue, 18 Jun 2024 18:16:52 -0700 Subject: [PATCH 158/447] drm/amdgpu: Add MFD support for ISP I2C bus ISP I2C bus device can't be enumerated via ACPI mechanism since it shares the memory map with the AMDGPU. So use the MFD mechanism for registering the ISP I2C device and add the required resources. Signed-off-by: Venkata Narendra Kumar Gutta Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h | 1 + drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c | 57 +++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h | 11 +++++ drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c | 57 +++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h | 11 +++++ 5 files changed, 113 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h index 44e2ea8c9728..b03664c66dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -49,6 +49,7 @@ struct amdgpu_isp { const struct isp_funcs *funcs; struct mfd_cell *isp_cell; struct resource *isp_res; + struct resource *isp_i2c_res; struct isp_platform_data *isp_pdata; unsigned int harvest_config; const struct firmware *fw; diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c index aac107898bae..964c29ef25dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c @@ -42,23 +42,23 @@ static const unsigned int isp_4_1_0_int_srcid[MAX_ISP410_INT_SRC] = { static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) { struct amdgpu_device *adev = isp->adev; + int idx, int_idx, num_res, r; u64 isp_base; - int int_idx; - int r; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; isp_base = adev->rmmio_base; - isp->isp_cell = kcalloc(1, sizeof(struct mfd_cell), GFP_KERNEL); + isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); goto failure; } - isp->isp_res = kcalloc(MAX_ISP410_INT_SRC + 1, sizeof(struct resource), + num_res = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES + MAX_ISP410_INT_SRC; + isp->isp_res = kcalloc(num_res, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; @@ -83,22 +83,53 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) isp->isp_res[0].start = isp_base; isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END; - for (int_idx = 0; int_idx < MAX_ISP410_INT_SRC; int_idx++) { - isp->isp_res[int_idx + 1].name = "isp_4_1_0_irq"; - isp->isp_res[int_idx + 1].flags = IORESOURCE_IRQ; - isp->isp_res[int_idx + 1].start = + isp->isp_res[1].name = "isp_4_1_phy0_reg"; + isp->isp_res[1].flags = IORESOURCE_MEM; + isp->isp_res[1].start = isp_base + ISP410_PHY0_OFFSET; + isp->isp_res[1].end = isp_base + ISP410_PHY0_OFFSET + ISP410_PHY0_SIZE; + + isp->isp_res[2].name = "isp_gpio_sensor0_reg"; + isp->isp_res[2].flags = IORESOURCE_MEM; + isp->isp_res[2].start = isp_base + ISP410_GPIO_SENSOR0_OFFSET; + isp->isp_res[2].end = isp_base + ISP410_GPIO_SENSOR0_OFFSET + + ISP410_GPIO_SENSOR0_SIZE; + + for (idx = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES, int_idx = 0; + idx < num_res; idx++, int_idx++) { + isp->isp_res[idx].name = "isp_4_1_0_irq"; + isp->isp_res[idx].flags = IORESOURCE_IRQ; + isp->isp_res[idx].start = amdgpu_irq_create_mapping(adev, isp_4_1_0_int_srcid[int_idx]); - isp->isp_res[int_idx + 1].end = - isp->isp_res[int_idx + 1].start; + isp->isp_res[idx].end = + isp->isp_res[idx].start; } isp->isp_cell[0].name = "amd_isp_capture"; - isp->isp_cell[0].num_resources = MAX_ISP410_INT_SRC + 1; + isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 1); + isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), + GFP_KERNEL); + if (!isp->isp_i2c_res) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + goto failure; + } + + isp->isp_i2c_res[0].name = "isp_i2c0_reg"; + isp->isp_i2c_res[0].flags = IORESOURCE_MEM; + isp->isp_i2c_res[0].start = isp_base + ISP410_I2C0_OFFSET; + isp->isp_i2c_res[0].end = isp_base + ISP410_I2C0_OFFSET + ISP410_I2C0_SIZE; + + isp->isp_cell[1].name = "amd_isp_i2c_designware"; + isp->isp_cell[1].num_resources = 1; + isp->isp_cell[1].resources = &isp->isp_i2c_res[0]; + isp->isp_cell[1].platform_data = isp->isp_pdata; + isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); if (r) { DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); goto failure; @@ -111,6 +142,7 @@ failure: kfree(isp->isp_pdata); kfree(isp->isp_res); kfree(isp->isp_cell); + kfree(isp->isp_i2c_res); return r; } @@ -122,6 +154,7 @@ static int isp_v4_1_0_hw_fini(struct amdgpu_isp *isp) kfree(isp->isp_res); kfree(isp->isp_cell); kfree(isp->isp_pdata); + kfree(isp->isp_i2c_res); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h index 315f2822410c..7db24c0f1080 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h @@ -32,8 +32,19 @@ #include "ivsrcid/isp/irqsrcs_isp_4_1.h" +#define MAX_ISP410_MEM_RES 2 +#define MAX_ISP410_SENSOR_RES 1 #define MAX_ISP410_INT_SRC 8 +#define ISP410_PHY0_OFFSET 0x66700 +#define ISP410_PHY0_SIZE 0xD30 + +#define ISP410_I2C0_OFFSET 0x66400 +#define ISP410_I2C0_SIZE 0x100 + +#define ISP410_GPIO_SENSOR0_OFFSET 0x6613C +#define ISP410_GPIO_SENSOR0_SIZE 0x4 + void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index 4e17fa03f7b5..b56f27295468 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -42,23 +42,24 @@ static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = { static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) { struct amdgpu_device *adev = isp->adev; + int idx, int_idx, num_res, r; u64 isp_base; - int int_idx; - int r; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; isp_base = adev->rmmio_base; - isp->isp_cell = kcalloc(1, sizeof(struct mfd_cell), GFP_KERNEL); + isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); goto failure; } - isp->isp_res = kcalloc(MAX_ISP411_INT_SRC + 1, sizeof(struct resource), + num_res = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES + MAX_ISP411_INT_SRC; + + isp->isp_res = kcalloc(num_res, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; @@ -83,22 +84,52 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_res[0].start = isp_base; isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END; - for (int_idx = 0; int_idx < MAX_ISP411_INT_SRC; int_idx++) { - isp->isp_res[int_idx + 1].name = "isp_4_1_1_irq"; - isp->isp_res[int_idx + 1].flags = IORESOURCE_IRQ; - isp->isp_res[int_idx + 1].start = + isp->isp_res[1].name = "isp_4_1_1_phy0_reg"; + isp->isp_res[1].flags = IORESOURCE_MEM; + isp->isp_res[1].start = isp_base + ISP411_PHY0_OFFSET; + isp->isp_res[1].end = isp_base + ISP411_PHY0_OFFSET + ISP411_PHY0_SIZE; + + isp->isp_res[2].name = "isp_4_1_1_sensor0_reg"; + isp->isp_res[2].flags = IORESOURCE_MEM; + isp->isp_res[2].start = isp_base + ISP411_GPIO_SENSOR0_OFFSET; + isp->isp_res[2].end = isp_base + ISP411_GPIO_SENSOR0_OFFSET + + ISP411_GPIO_SENSOR0_SIZE; + + for (idx = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES, int_idx = 0; + idx < num_res; idx++, int_idx++) { + isp->isp_res[idx].name = "isp_4_1_1_irq"; + isp->isp_res[idx].flags = IORESOURCE_IRQ; + isp->isp_res[idx].start = amdgpu_irq_create_mapping(adev, isp_4_1_1_int_srcid[int_idx]); - isp->isp_res[int_idx + 1].end = - isp->isp_res[int_idx + 1].start; + isp->isp_res[idx].end = + isp->isp_res[idx].start; } isp->isp_cell[0].name = "amd_isp_capture"; - isp->isp_cell[0].num_resources = MAX_ISP411_INT_SRC + 1; + isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 1); + isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); + if (!isp->isp_i2c_res) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + goto failure; + } + + isp->isp_i2c_res[0].name = "isp_i2c0_reg"; + isp->isp_i2c_res[0].flags = IORESOURCE_MEM; + isp->isp_i2c_res[0].start = isp_base + ISP411_I2C0_OFFSET; + isp->isp_i2c_res[0].end = isp_base + ISP411_I2C0_OFFSET + ISP411_I2C0_SIZE; + + isp->isp_cell[1].name = "amd_isp_i2c_designware"; + isp->isp_cell[1].num_resources = 1; + isp->isp_cell[1].resources = &isp->isp_i2c_res[0]; + isp->isp_cell[1].platform_data = isp->isp_pdata; + isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); if (r) { DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); goto failure; @@ -111,6 +142,7 @@ failure: kfree(isp->isp_pdata); kfree(isp->isp_res); kfree(isp->isp_cell); + kfree(isp->isp_i2c_res); return r; } @@ -122,6 +154,7 @@ static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp) kfree(isp->isp_res); kfree(isp->isp_cell); kfree(isp->isp_pdata); + kfree(isp->isp_i2c_res); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h index dfb9522c9d6a..40887ddeb08c 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h @@ -32,8 +32,19 @@ #include "ivsrcid/isp/irqsrcs_isp_4_1.h" +#define MAX_ISP411_MEM_RES 2 +#define MAX_ISP411_SENSOR_RES 1 #define MAX_ISP411_INT_SRC 8 +#define ISP411_PHY0_OFFSET 0x66700 +#define ISP411_PHY0_SIZE 0xD30 + +#define ISP411_I2C0_OFFSET 0x66400 +#define ISP411_I2C0_SIZE 0x100 + +#define ISP411_GPIO_SENSOR0_OFFSET 0x6613C +#define ISP411_GPIO_SENSOR0_SIZE 0x4 + void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp); #endif From 8e4ed3cf1642df0c4456443d865cff61a9598aa8 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 25 Jul 2024 08:14:56 +0530 Subject: [PATCH 159/447] drm/amd/display: Add null check for pipe_ctx->plane_state in dcn20_program_pipe This commit addresses a null pointer dereference issue in the `dcn20_program_pipe` function. The issue could occur when `pipe_ctx->plane_state` is null. The fix adds a check to ensure `pipe_ctx->plane_state` is not null before accessing. This prevents a null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn20/dcn20_hwseq.c:1925 dcn20_program_pipe() error: we previously assumed 'pipe_ctx->plane_state' could be null (see line 1877) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 270e337ae27b..5a6064999033 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1922,22 +1922,29 @@ static void dcn20_program_pipe( dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); } - if (pipe_ctx->update_flags.raw || pipe_ctx->plane_state->update_flags.raw || pipe_ctx->stream->update_flags.raw) + if (pipe_ctx->update_flags.raw || + (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || + pipe_ctx->stream->update_flags.raw) dcn20_update_dchubp_dpp(dc, pipe_ctx, context); - if (pipe_ctx->update_flags.bits.enable - || pipe_ctx->plane_state->update_flags.bits.hdr_mult) + if (pipe_ctx->update_flags.bits.enable || + (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); if (hws->funcs.populate_mcm_luts) { - hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, - pipe_ctx->plane_state->lut_bank_a); - pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; + if (pipe_ctx->plane_state) { + hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, + pipe_ctx->plane_state->lut_bank_a); + pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; + } } if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || - pipe_ctx->plane_state->update_flags.bits.gamma_change || - pipe_ctx->plane_state->update_flags.bits.lut_3d) + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change) || + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.gamma_change) || + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.lut_3d)) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish @@ -1947,7 +1954,8 @@ static void dcn20_program_pipe( if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.plane_changed || pipe_ctx->stream->update_flags.bits.out_tf || - pipe_ctx->plane_state->update_flags.bits.output_tf_change) + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.output_tf_change)) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -1971,7 +1979,7 @@ static void dcn20_program_pipe( } /* Set ABM pipe after other pipe configurations done */ - if (pipe_ctx->plane_state->visible) { + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) { if (pipe_ctx->stream_res.abm) { dc->hwss.set_pipe(pipe_ctx); pipe_ctx->stream_res.abm->funcs->set_abm_level(pipe_ctx->stream_res.abm, From 66d71a72539e173a9b00ca0b1852cbaa5f5bf1ad Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 25 Jul 2024 07:23:48 +0530 Subject: [PATCH 160/447] drm/amd/display: Add null check for top_pipe_to_program in commit_planes_for_stream This commit addresses a null pointer dereference issue in the `commit_planes_for_stream` function at line 4140. The issue could occur when `top_pipe_to_program` is null. The fix adds a check to ensure `top_pipe_to_program` is not null before accessing its stream_res. This prevents a null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:4140 commit_planes_for_stream() error: we previously assumed 'top_pipe_to_program' could be null (see line 3906) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b8a6c062426d..95d6e29d5e47 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4137,7 +4137,8 @@ static void commit_planes_for_stream(struct dc *dc, } if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) - if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { + if (top_pipe_to_program && + top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { top_pipe_to_program->stream_res.tg->funcs->wait_for_state( top_pipe_to_program->stream_res.tg, CRTC_STATE_VACTIVE); From 7c5b344537a143d15385992e41a50a9c5125e93c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 26 Jul 2024 12:17:12 +0530 Subject: [PATCH 161/447] drm/amdkfd: Fix missing error code in kfd_queue_acquire_buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fix involves setting 'err' to '-EINVAL' before each 'goto out_err_unreserve'. Fixes the below: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c:265 kfd_queue_acquire_buffers() warn: missing error code 'err' drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c 226 int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) 227 { 228 struct kfd_topology_device *topo_dev; 229 struct amdgpu_vm *vm; 230 u32 total_cwsr_size; 231 int err; 232 233 topo_dev = kfd_topology_device_by_id(pdd->dev->id); 234 if (!topo_dev) 235 return -EINVAL; 236 237 vm = drm_priv_to_vm(pdd->drm_priv); 238 err = amdgpu_bo_reserve(vm->root.bo, false); 239 if (err) 240 return err; 241 242 err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE); 243 if (err) 244 goto out_err_unreserve; 245 246 err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE); 247 if (err) 248 goto out_err_unreserve; 249 250 err = kfd_queue_buffer_get(vm, (void *)properties->queue_address, 251 &properties->ring_bo, properties->queue_size); 252 if (err) 253 goto out_err_unreserve; 254 255 /* only compute queue requires EOP buffer and CWSR area */ 256 if (properties->type != KFD_QUEUE_TYPE_COMPUTE) 257 goto out_unreserve; This is clearly a success path. 258 259 /* EOP buffer is not required for all ASICs */ 260 if (properties->eop_ring_buffer_address) { 261 if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { 262 pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", 263 properties->eop_buf_bo->tbo.base.size, 264 topo_dev->node_props.eop_buffer_size); --> 265 goto out_err_unreserve; This has err in the label name. err = -EINVAL? 266 } 267 err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, 268 &properties->eop_buf_bo, 269 properties->eop_ring_buffer_size); 270 if (err) 271 goto out_err_unreserve; 272 } 273 274 if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) { 275 pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n", 276 properties->ctl_stack_size, 277 topo_dev->node_props.ctl_stack_size); 278 goto out_err_unreserve; err? 279 } 280 281 if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { 282 pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", 283 properties->ctx_save_restore_area_size, 284 topo_dev->node_props.cwsr_size); 285 goto out_err_unreserve; err? Not sure. 286 } 287 288 total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) 289 * NUM_XCC(pdd->dev->xcc_mask); 290 total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); 291 292 err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, 293 &properties->cwsr_bo, total_cwsr_size); 294 if (!err) 295 goto out_unreserve; 296 297 amdgpu_bo_unreserve(vm->root.bo); 298 299 err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address, 300 total_cwsr_size); 301 if (err) 302 goto out_err_release; 303 304 return 0; 305 306 out_unreserve: 307 amdgpu_bo_unreserve(vm->root.bo); 308 return 0; 309 310 out_err_unreserve: 311 amdgpu_bo_unreserve(vm->root.bo); 312 out_err_release: 313 kfd_queue_release_buffers(pdd, properties); 314 return err; 315 } Fixes: 629568d25fea ("drm/amdkfd: Validate queue cwsr area and eop buffer size") Reported-by: Dan Carpenter Cc: Philip Yang Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 9807e8adf77d..63795f0cd55a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -262,6 +262,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", properties->eop_buf_bo->tbo.base.size, topo_dev->node_props.eop_buffer_size); + err = -EINVAL; goto out_err_unreserve; } err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, @@ -275,6 +276,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n", properties->ctl_stack_size, topo_dev->node_props.ctl_stack_size); + err = -EINVAL; goto out_err_unreserve; } @@ -282,6 +284,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", properties->ctx_save_restore_area_size, topo_dev->node_props.cwsr_size); + err = -EINVAL; goto out_err_unreserve; } From f9e6759888866748f31b6b6c2142a481d587f51f Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Sun, 14 Jul 2024 16:31:05 -0400 Subject: [PATCH 162/447] drm/amd/display: roll back quality EASF and ISHARP and dc dependency changes [Why] Seeing several regressions related to quality EASF and ISHARP changes and removing dc dependency changes. [How] Roll back SPL changes Signed-off-by: Samson Tam Reviewed-by: Martin Leung Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 2 + .../gpu/drm/amd/display/dc/dc_spl_translate.c | 49 +- .../gpu/drm/amd/display/dc/dc_spl_translate.h | 1 - .../dc/dml2/dml21/dml21_translation_helper.c | 8 - .../display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 646 +++--- .../dc/resource/dcn401/dcn401_resource.c | 7 - drivers/gpu/drm/amd/display/dc/spl/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 1304 +++++-------- .../drm/amd/display/dc/spl/dc_spl_filters.c | 15 - .../drm/amd/display/dc/spl/dc_spl_filters.h | 15 - .../display/dc/spl/dc_spl_isharp_filters.c | 427 +--- .../display/dc/spl/dc_spl_isharp_filters.h | 33 +- .../display/dc/spl/dc_spl_scl_easf_filters.c | 1726 ----------------- .../display/dc/spl/dc_spl_scl_easf_filters.h | 38 - .../amd/display/dc/spl/dc_spl_scl_filters.c | 92 +- .../amd/display/dc/spl/dc_spl_scl_filters.h | 55 +- .../display/dc/spl/dc_spl_scl_filters_old.c | 25 + .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 43 +- .../gpu/drm/amd/display/dc/spl/spl_debug.h | 23 - .../drm/amd/display/dc/spl/spl_fixpt31_32.c | 518 ----- .../drm/amd/display/dc/spl/spl_fixpt31_32.h | 546 ------ 21 files changed, 1025 insertions(+), 4550 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h create mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_debug.h delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 5c9091f2a8b2..4f5b23520365 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1511,6 +1511,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha; + spl_out->scl_data.h_active = pipe_ctx->plane_res.scl_data.h_active; + spl_out->scl_data.v_active = pipe_ctx->plane_res.scl_data.v_active; // Convert pipe_ctx to respective input params for SPL translate_SPL_in_params_from_pipe_ctx(pipe_ctx, spl_in); diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 339d092e711c..bcc596724a4f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -42,26 +42,26 @@ static void populate_spltaps_from_taps(struct spl_taps *spl_scaling_quality, static void populate_taps_from_spltaps(struct scaling_taps *scaling_quality, const struct spl_taps *spl_scaling_quality) { - scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c + 1; - scaling_quality->h_taps = spl_scaling_quality->h_taps + 1; - scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c + 1; - scaling_quality->v_taps = spl_scaling_quality->v_taps + 1; + scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c; + scaling_quality->h_taps = spl_scaling_quality->h_taps; + scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c; + scaling_quality->v_taps = spl_scaling_quality->v_taps; } static void populate_ratios_from_splratios(struct scaling_ratios *ratios, - const struct ratio *spl_ratios) + const struct spl_ratios *spl_ratios) { - ratios->horz = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio >> 5, 3, 19); - ratios->vert = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio >> 5, 3, 19); - ratios->horz_c = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio_c >> 5, 3, 19); - ratios->vert_c = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio_c >> 5, 3, 19); + ratios->horz = spl_ratios->horz; + ratios->vert = spl_ratios->vert; + ratios->horz_c = spl_ratios->horz_c; + ratios->vert_c = spl_ratios->vert_c; } static void populate_inits_from_splinits(struct scl_inits *inits, - const struct init *spl_inits) + const struct spl_inits *spl_inits) { - inits->h = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int, spl_inits->h_filter_init_frac >> 5, 0, 19); - inits->v = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int, spl_inits->v_filter_init_frac >> 5, 0, 19); - inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19); - inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19); + inits->h = spl_inits->h; + inits->v = spl_inits->v; + inits->h_c = spl_inits->h_c; + inits->v_c = spl_inits->v_c; } /// @brief Translate SPL input parameters from pipe context /// @param pipe_ctx @@ -170,15 +170,6 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl /* Translate transfer function */ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type; spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf; - - spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active; - spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active; - /* Check if it is stream is in fullscreen and if its HDR. - * Use this to determine sharpness levels - */ - spl_in->is_fullscreen = dm_helpers_is_fullscreen(pipe_ctx->stream->ctx, pipe_ctx->stream); - spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream); - } /// @brief Translate SPL output parameters to pipe context @@ -187,15 +178,15 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out) { // Make scaler data recout point to spl output field recout - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->dscl_prog_data->recout); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->scl_data.recout); // Make scaler data ratios point to spl output field ratios - populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->dscl_prog_data->ratios); + populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->scl_data.ratios); // Make scaler data viewport point to spl output field viewport - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->dscl_prog_data->viewport); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->scl_data.viewport); // Make scaler data viewport_c point to spl output field viewport_c - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->dscl_prog_data->viewport_c); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->scl_data.viewport_c); // Make scaler data taps point to spl output field scaling taps - populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->dscl_prog_data->taps); + populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->scl_data.taps); // Make scaler data init point to spl output field init - populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->dscl_prog_data->init); + populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->scl_data.inits); } diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h index eaa5c5373b28..c73d640c3632 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h @@ -6,7 +6,6 @@ #define __DC_SPL_TRANSLATE_H__ #include "dc.h" #include "resource.h" -#include "dm_helpers.h" /* Map SPL input parameters to pipe context * @pipe_ctx: pipe context diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 4164cda90b2a..0f34688e4058 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -788,14 +788,6 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm * certain cases. Hence do corrective active and disable scaling. */ plane->composition.scaler_info.enabled = false; - } else if ((plane_state->ctx->dc->config.use_spl == true) && - (plane->composition.scaler_info.enabled == false)) { - /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then - * allow case where ratio is 1 but taps > 1 - */ - if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) || - (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1)) - plane->composition.scaler_info.enabled = true; } /* always_scale is only used for debug purposes not used in production but has to be diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 3a3745597f0c..88d24e36fe00 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -280,8 +280,7 @@ static void dpp401_dscl_set_scaler_filter( static void dpp401_dscl_set_scl_filter( struct dcn401_dpp *dpp, const struct scaler_data *scl_data, - bool chroma_coef_mode, - bool force_coeffs_update) + bool chroma_coef_mode) { bool h_2tap_hardcode_coef_en = false; bool v_2tap_hardcode_coef_en = false; @@ -344,7 +343,7 @@ static void dpp401_dscl_set_scl_filter( || (filter_v_c && (filter_v_c != dpp->filter_v_c)); } - if ((filter_updated) || (force_coeffs_update)) { + if (filter_updated) { uint32_t scl_mode = REG_READ(SCL_MODE); if (!h_2tap_hardcode_coef_en && filter_h) { @@ -656,226 +655,6 @@ static void dpp401_dscl_set_recout(struct dcn401_dpp *dpp, /* Number of RECOUT vertical lines */ RECOUT_HEIGHT, recout->height); } -/** - * dpp401_dscl_program_easf_v - Program EASF_V - * - * @dpp_base: High level DPP struct - * @scl_data: scalaer_data info - * - * This is the primary function to program vertical EASF registers - * - */ -static void dpp401_dscl_program_easf_v(struct dpp *dpp_base, const struct scaler_data *scl_data) -{ - struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); - - PERF_TRACE(); - /* DSCL_EASF_V_MODE */ - REG_SET_3(DSCL_EASF_V_MODE, 0, - SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en, - SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor, - SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); - - if (!scl_data->dscl_prog_data.easf_v_en) { - PERF_TRACE(); - return; - } - - /* DSCL_EASF_V_BF_CNTL */ - REG_SET_6(DSCL_EASF_V_BF_CNTL, 0, - SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en, - SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode, - SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode, - SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain, - SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain, - SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); - /* DSCL_EASF_V_RINGEST_3TAP_CNTLn */ - REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL1, 0, - SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt, - SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); - REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL2, 0, - SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope, - SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); - REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL3, 0, - SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope, - SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); - /* DSCL_EASF_V_RINGEST_EVENTAP_REDUCE */ - REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, 0, - SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1, - SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); - /* DSCL_EASF_V_RINGEST_EVENTAP_GAIN */ - REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, 0, - SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1, - SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); - /* DSCL_EASF_V_BF_FINAL_MAX_MIN */ - REG_SET_4(DSCL_EASF_V_BF_FINAL_MAX_MIN, 0, - SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa, - SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb, - SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina, - SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); - /* DSCL_EASF_V_BF1_PWL_SEGn */ - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG0, 0, - SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0, - SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0, - SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG1, 0, - SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1, - SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1, - SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG2, 0, - SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2, - SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2, - SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG3, 0, - SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3, - SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3, - SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG4, 0, - SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4, - SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4, - SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG5, 0, - SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5, - SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5, - SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG6, 0, - SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6, - SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6, - SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6); - REG_SET_2(DSCL_EASF_V_BF1_PWL_SEG7, 0, - SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7, - SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7); - /* DSCL_EASF_V_BF3_PWL_SEGn */ - REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG0, 0, - SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0, - SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0, - SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0); - REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG1, 0, - SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1, - SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1, - SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1); - REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG2, 0, - SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2, - SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2, - SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2); - REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG3, 0, - SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3, - SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3, - SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3); - REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG4, 0, - SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4, - SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4, - SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4); - REG_SET_2(DSCL_EASF_V_BF3_PWL_SEG5, 0, - SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5, - SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5); - PERF_TRACE(); -} -/** - * dpp401_dscl_program_easf_h - Program EASF_H - * - * @dpp_base: High level DPP struct - * @scl_data: scalaer_data info - * - * This is the primary function to program horizontal EASF registers - * - */ -static void dpp401_dscl_program_easf_h(struct dpp *dpp_base, const struct scaler_data *scl_data) -{ - struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); - - PERF_TRACE(); - /* DSCL_EASF_H_MODE */ - REG_SET_3(DSCL_EASF_H_MODE, 0, - SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en, - SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor, - SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); - - if (!scl_data->dscl_prog_data.easf_h_en) { - PERF_TRACE(); - return; - } - - /* DSCL_EASF_H_BF_CNTL */ - REG_SET_6(DSCL_EASF_H_BF_CNTL, 0, - SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en, - SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode, - SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode, - SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain, - SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain, - SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); - /* DSCL_EASF_H_RINGEST_EVENTAP_REDUCE */ - REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, 0, - SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1, - SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); - /* DSCL_EASF_H_RINGEST_EVENTAP_GAIN */ - REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, 0, - SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1, - SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); - /* DSCL_EASF_H_BF_FINAL_MAX_MIN */ - REG_SET_4(DSCL_EASF_H_BF_FINAL_MAX_MIN, 0, - SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa, - SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb, - SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina, - SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); - /* DSCL_EASF_H_BF1_PWL_SEGn */ - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG0, 0, - SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0, - SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0, - SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG1, 0, - SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1, - SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1, - SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG2, 0, - SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2, - SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2, - SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG3, 0, - SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3, - SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3, - SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG4, 0, - SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4, - SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4, - SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG5, 0, - SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5, - SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5, - SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG6, 0, - SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6, - SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6, - SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6); - REG_SET_2(DSCL_EASF_H_BF1_PWL_SEG7, 0, - SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7, - SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7); - /* DSCL_EASF_H_BF3_PWL_SEGn */ - REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG0, 0, - SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0, - SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0, - SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0); - REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG1, 0, - SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1, - SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1, - SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1); - REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG2, 0, - SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2, - SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2, - SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2); - REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG3, 0, - SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3, - SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3, - SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3); - REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG4, 0, - SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4, - SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4, - SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4); - REG_SET_2(DSCL_EASF_H_BF3_PWL_SEG5, 0, - SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5, - SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5); - PERF_TRACE(); -} /** * dpp401_dscl_program_easf - Program EASF * @@ -890,19 +669,261 @@ static void dpp401_dscl_program_easf(struct dpp *dpp_base, const struct scaler_d struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); PERF_TRACE(); - /* DSCL_SC_MODE */ - REG_SET_2(DSCL_SC_MODE, 0, - SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode, + REG_UPDATE(DSCL_SC_MODE, + SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode); + REG_UPDATE(DSCL_SC_MODE, SCL_SC_LTONL_EN, scl_data->dscl_prog_data.easf_ltonl_en); + /* DSCL_EASF_V_MODE */ + REG_UPDATE(DSCL_EASF_V_MODE, + SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en); + REG_UPDATE(DSCL_EASF_V_MODE, + SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor); + REG_UPDATE(DSCL_EASF_V_MODE, + SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, + SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, + SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, + SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, + SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, + SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, + SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, + SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, + SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, + SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, + SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, + SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, + SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, + SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, + SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, + SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, + SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, + SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, + SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, + SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, + SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, + SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, + SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, + SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, + SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, + SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, + SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, + SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, + SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, + SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, + SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, + SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, + SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, + SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, + SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, + SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, + SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, + SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, + SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, + SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, + SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, + SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, + SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, + SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, + SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, + SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, + SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5); + /* DSCL_EASF_H_MODE */ + REG_UPDATE(DSCL_EASF_H_MODE, + SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en); + REG_UPDATE(DSCL_EASF_H_MODE, + SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor); + REG_UPDATE(DSCL_EASF_H_MODE, + SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, + SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, + SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, + SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, + SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, + SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, + SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, + SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, + SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, + SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, + SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, + SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, + SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, + SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, + SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, + SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, + SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, + SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, + SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, + SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, + SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, + SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, + SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, + SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, + SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, + SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, + SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, + SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, + SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, + SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, + SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, + SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, + SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, + SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, + SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, + SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, + SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, + SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, + SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, + SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, + SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, + SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, + SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5); /* DSCL_EASF_SC_MATRIX_C0C1, DSCL_EASF_SC_MATRIX_C2C3 */ - REG_SET_2(DSCL_SC_MATRIX_C0C1, 0, - SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0, + REG_UPDATE(DSCL_SC_MATRIX_C0C1, + SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0); + REG_UPDATE(DSCL_SC_MATRIX_C0C1, SCL_SC_MATRIX_C1, scl_data->dscl_prog_data.easf_matrix_c1); - REG_SET_2(DSCL_SC_MATRIX_C2C3, 0, - SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2, + REG_UPDATE(DSCL_SC_MATRIX_C2C3, + SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2); + REG_UPDATE(DSCL_SC_MATRIX_C2C3, SCL_SC_MATRIX_C3, scl_data->dscl_prog_data.easf_matrix_c3); - dpp401_dscl_program_easf_v(dpp_base, scl_data); - dpp401_dscl_program_easf_h(dpp_base, scl_data); PERF_TRACE(); } /** @@ -937,11 +958,10 @@ static void dpp401_dscl_set_isharp_filter( REG_UPDATE(ISHARP_DELTA_CTRL, ISHARP_DELTA_LUT_HOST_SELECT, 0); - /* LUT data write is auto-indexed. Write index once */ - REG_SET(ISHARP_DELTA_INDEX, 0, - ISHARP_DELTA_INDEX, 0); for (level = 0; level < NUM_LEVELS; level++) { filter_data = filter[level]; + REG_SET(ISHARP_DELTA_INDEX, 0, + ISHARP_DELTA_INDEX, level); REG_SET(ISHARP_DELTA_DATA, 0, ISHARP_DELTA_DATA, filter_data); } @@ -957,74 +977,107 @@ static void dpp401_dscl_set_isharp_filter( * */ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, - const struct scaler_data *scl_data, - bool *bs_coeffs_updated) + const struct scaler_data *scl_data) { struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); - *bs_coeffs_updated = false; PERF_TRACE(); - /* ISHARP_MODE */ - REG_SET_6(ISHARP_MODE, 0, - ISHARP_EN, scl_data->dscl_prog_data.isharp_en, - ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable, - ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode, - ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode, - ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode, - ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); - - /* Skip remaining register programming if ISHARP is disabled */ - if (!scl_data->dscl_prog_data.isharp_en) { - PERF_TRACE(); - return; - } - - /* ISHARP_NOISEDET_THRESHOLD */ - REG_SET_2(ISHARP_NOISEDET_THRESHOLD, 0, - ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold, + /* ISHARP_EN */ + REG_UPDATE(ISHARP_MODE, + ISHARP_EN, scl_data->dscl_prog_data.isharp_en); + /* ISHARP_NOISEDET_EN */ + REG_UPDATE(ISHARP_MODE, + ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable); + /* ISHARP_NOISEDET_MODE */ + REG_UPDATE(ISHARP_MODE, + ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); + /* ISHARP_NOISEDET_UTHRE */ + REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, + ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); + /* ISHARP_NOISEDET_DTHRE */ + REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); - - /* ISHARP_NOISE_GAIN_PWL */ - REG_SET_3(ISHARP_NOISE_GAIN_PWL, 0, - ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in, - ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in, + REG_UPDATE(ISHARP_MODE, + ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); + /* ISHARP_NOISEDET_UTHRE */ + REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, + ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); + /* ISHARP_NOISEDET_DTHRE */ + REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, + ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); + /* ISHARP_NOISEDET_PWL_START_IN */ + REG_UPDATE(ISHARP_NOISE_GAIN_PWL, + ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in); + /* ISHARP_NOISEDET_PWL_END_IN */ + REG_UPDATE(ISHARP_NOISE_GAIN_PWL, + ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in); + /* ISHARP_NOISEDET_PWL_SLOPE */ + REG_UPDATE(ISHARP_NOISE_GAIN_PWL, ISHARP_NOISEDET_PWL_SLOPE, scl_data->dscl_prog_data.isharp_noise_det.pwl_slope); - + /* ISHARP_LBA_MODE */ + REG_UPDATE(ISHARP_MODE, + ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode); /* ISHARP_LBA: IN_SEG, BASE_SEG, SLOPE_SEG */ - REG_SET_3(ISHARP_LBA_PWL_SEG0, 0, - ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0], - ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0], + REG_UPDATE(ISHARP_LBA_PWL_SEG0, + ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0]); + REG_UPDATE(ISHARP_LBA_PWL_SEG0, + ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0]); + REG_UPDATE(ISHARP_LBA_PWL_SEG0, ISHARP_LBA_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.isharp_lba.slope_seg[0]); - REG_SET_3(ISHARP_LBA_PWL_SEG1, 0, - ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1], - ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1], + REG_UPDATE(ISHARP_LBA_PWL_SEG1, + ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1]); + REG_UPDATE(ISHARP_LBA_PWL_SEG1, + ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1]); + REG_UPDATE(ISHARP_LBA_PWL_SEG1, ISHARP_LBA_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.isharp_lba.slope_seg[1]); - REG_SET_3(ISHARP_LBA_PWL_SEG2, 0, - ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2], - ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2], + REG_UPDATE(ISHARP_LBA_PWL_SEG2, + ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2]); + REG_UPDATE(ISHARP_LBA_PWL_SEG2, + ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2]); + REG_UPDATE(ISHARP_LBA_PWL_SEG2, ISHARP_LBA_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.isharp_lba.slope_seg[2]); - REG_SET_3(ISHARP_LBA_PWL_SEG3, 0, - ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3], - ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3], + REG_UPDATE(ISHARP_LBA_PWL_SEG3, + ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3]); + REG_UPDATE(ISHARP_LBA_PWL_SEG3, + ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3]); + REG_UPDATE(ISHARP_LBA_PWL_SEG3, ISHARP_LBA_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.isharp_lba.slope_seg[3]); - REG_SET_3(ISHARP_LBA_PWL_SEG4, 0, - ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4], - ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4], + REG_UPDATE(ISHARP_LBA_PWL_SEG4, + ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4]); + REG_UPDATE(ISHARP_LBA_PWL_SEG4, + ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4]); + REG_UPDATE(ISHARP_LBA_PWL_SEG4, ISHARP_LBA_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.isharp_lba.slope_seg[4]); - REG_SET_2(ISHARP_LBA_PWL_SEG5, 0, - ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5], + REG_UPDATE(ISHARP_LBA_PWL_SEG5, + ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5]); + REG_UPDATE(ISHARP_LBA_PWL_SEG5, ISHARP_LBA_PWL_BASE_SEG5, scl_data->dscl_prog_data.isharp_lba.base_seg[5]); + /* ISHARP_FMT_MODE */ + REG_UPDATE(ISHARP_MODE, + ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode); + /* ISHARP_FMT_NORM */ + REG_UPDATE(ISHARP_MODE, + ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); /* ISHARP_DELTA_LUT */ dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); - - /* ISHARP_NLDELTA_SOFT_CLIP */ - REG_SET_6(ISHARP_NLDELTA_SOFT_CLIP, 0, - ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p, - ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p, - ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p, - ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n, - ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n, + /* ISHARP_NLDELTA_SCLIP_EN_P */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p); + /* ISHARP_NLDELTA_SCLIP_PIVOT_P */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p); + /* ISHARP_NLDELTA_SCLIP_SLOPE_P */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p); + /* ISHARP_NLDELTA_SCLIP_EN_N */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n); + /* ISHARP_NLDELTA_SCLIP_PIVOT_N */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n); + /* ISHARP_NLDELTA_SCLIP_SLOPE_N */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_SLOPE_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_n); /* Blur and Scale Coefficients - SCL_COEF_RAM_TAP_SELECT */ @@ -1034,14 +1087,12 @@ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, dpp, scl_data->taps.v_taps, SCL_COEF_VERTICAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_v); - *bs_coeffs_updated = true; } if (scl_data->dscl_prog_data.filter_blur_scale_h) { dpp401_dscl_set_scaler_filter( dpp, scl_data->taps.h_taps, SCL_COEF_HORIZONTAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_h); - *bs_coeffs_updated = true; } } PERF_TRACE(); @@ -1072,7 +1123,6 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN && scl_data->format <= PIXEL_FORMAT_VIDEO_END; - bool bs_coeffs_updated = false; if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) return; @@ -1132,7 +1182,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) { if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_disable_easf(dpp_base, scl_data); - dpp401_dscl_program_isharp(dpp_base, scl_data, &bs_coeffs_updated); + dpp401_dscl_program_isharp(dpp_base, scl_data); return; } @@ -1159,18 +1209,12 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, SCL_V_NUM_TAPS_C, v_num_taps_c, SCL_H_NUM_TAPS_C, h_num_taps_c); - /* ISharp configuration - * - B&S coeffs are written to same coeff RAM as WB scaler coeffs - * - coeff RAM toggle is in EASF programming - * - if we are only programming B&S coeffs, then need to reprogram - * WB scaler coeffs and toggle coeff RAM together - */ - //if (dpp->base.ctx->dc->config.prefer_easf) - dpp401_dscl_program_isharp(dpp_base, scl_data, &bs_coeffs_updated); - - dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr, bs_coeffs_updated); + dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr); /* Edge adaptive scaler function configuration */ if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_program_easf(dpp_base, scl_data); + /* isharp configuration */ + //if (dpp->base.ctx->dc->config.prefer_easf) + dpp401_dscl_program_isharp(dpp_base, scl_data); PERF_TRACE(); } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 5ee20753572e..9fcdf06d6aa4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -76,9 +76,6 @@ #include "dml2/dml2_wrapper.h" -#include "spl/dc_spl_scl_easf_filters.h" -#include "spl/dc_spl_isharp_filters.h" - #define DC_LOGGER_INIT(logger) enum dcn401_clk_src_array_id { @@ -2126,10 +2123,6 @@ static bool dcn401_resource_construct( dc->dml2_options.max_segments_per_hubp = 20; dc->dml2_options.det_segment_size = DCN4_01_CRB_SEGMENT_SIZE_KB; - /* SPL */ - spl_init_easf_filter_coeffs(); - spl_init_blur_scale_coeffs(); - return true; create_fail: diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile index 05764d4d4604..f8df85ea4d32 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/Makefile +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'spl' sub-component of DAL. # It provides the scaling library interface. -SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o spl_fixpt31_32.o +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_filters_old.o dc_spl_isharp_filters.o AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index b8858ea7c776..9eccdb38bed4 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -4,11 +4,9 @@ #include "dc_spl.h" #include "dc_spl_scl_filters.h" -#include "dc_spl_scl_easf_filters.h" #include "dc_spl_isharp_filters.h" -#include "spl_debug.h" -#define IDENTITY_RATIO(ratio) (spl_fixpt_u2d19(ratio) == (1 << 19)) +#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) #define MIN_VIEWPORT_SIZE 12 static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1) @@ -109,26 +107,26 @@ static struct spl_rect calculate_plane_rec_in_timing_active( const struct spl_rect *stream_src = &spl_in->basic_out.src_rect; const struct spl_rect *stream_dst = &spl_in->basic_out.dst_rect; struct spl_rect rec_out = {0}; - struct spl_fixed31_32 temp; + struct fixed31_32 temp; - temp = spl_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, + temp = dc_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, stream_src->width); - rec_out.x = stream_dst->x + spl_fixpt_round(temp); + rec_out.x = stream_dst->x + dc_fixpt_round(temp); - temp = spl_fixpt_from_fraction( + temp = dc_fixpt_from_fraction( (rec_in->x + rec_in->width) * (long long)stream_dst->width, stream_src->width); - rec_out.width = stream_dst->x + spl_fixpt_round(temp) - rec_out.x; + rec_out.width = stream_dst->x + dc_fixpt_round(temp) - rec_out.x; - temp = spl_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, + temp = dc_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, stream_src->height); - rec_out.y = stream_dst->y + spl_fixpt_round(temp); + rec_out.y = stream_dst->y + dc_fixpt_round(temp); - temp = spl_fixpt_from_fraction( + temp = dc_fixpt_from_fraction( (rec_in->y + rec_in->height) * (long long)stream_dst->height, stream_src->height); - rec_out.height = stream_dst->y + spl_fixpt_round(temp) - rec_out.y; + rec_out.height = stream_dst->y + dc_fixpt_round(temp) - rec_out.y; return rec_out; } @@ -146,7 +144,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; mpc_rec.height = plane_clip_rec->height; mpc_rec.y = plane_clip_rec->y; - SPL_ASSERT(mpc_slice_count == 1 || + ASSERT(mpc_slice_count == 1 || spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE || mpc_rec.width % 2 == 0); @@ -159,7 +157,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( } if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) { - SPL_ASSERT(mpc_rec.height % 2 == 0); + ASSERT(mpc_rec.height % 2 == 0); mpc_rec.height /= 2; } return mpc_rec; @@ -199,7 +197,7 @@ static struct spl_rect calculate_odm_slice_in_timing_active(struct spl_in *spl_i return spl_in->basic_out.odm_slice_rect; } -static void spl_calculate_recout(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out) +static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) { /* * A plane clip represents the desired plane size and position in Stream @@ -342,23 +340,20 @@ static void spl_calculate_recout(struct spl_in *spl_in, struct spl_scratch *spl_ /* shift the overlapping area so it is with respect to current * ODM slice's position */ - spl_scratch->scl_data.recout = shift_rec( + spl_out->scl_data.recout = shift_rec( &overlapping_area, -odm_slice.x, -odm_slice.y); - spl_scratch->scl_data.recout.height -= + spl_out->scl_data.recout.height -= spl_in->debug.visual_confirm_base_offset; - spl_scratch->scl_data.recout.height -= + spl_out->scl_data.recout.height -= spl_in->debug.visual_confirm_dpp_offset; } else /* if there is no overlap, zero recout */ - memset(&spl_scratch->scl_data.recout, 0, + memset(&spl_out->scl_data.recout, 0, sizeof(struct spl_rect)); } - /* Calculate scaling ratios */ -static void spl_calculate_scaling_ratios(struct spl_in *spl_in, - struct spl_scratch *spl_scratch, - struct spl_out *spl_out) +static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out *spl_out) { const int in_w = spl_in->basic_out.src_rect.width; const int in_h = spl_in->basic_out.src_rect.height; @@ -369,75 +364,59 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, /*Swap surf_src height and width since scaling ratios are in recout rotation*/ if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) - spl_swap(surf_src.height, surf_src.width); + swap(surf_src.height, surf_src.width); - spl_scratch->scl_data.ratios.horz = spl_fixpt_from_fraction( + spl_out->scl_data.ratios.horz = dc_fixpt_from_fraction( surf_src.width, spl_in->basic_in.dst_rect.width); - spl_scratch->scl_data.ratios.vert = spl_fixpt_from_fraction( + spl_out->scl_data.ratios.vert = dc_fixpt_from_fraction( surf_src.height, spl_in->basic_in.dst_rect.height); if (spl_in->basic_out.view_format == SPL_VIEW_3D_SIDE_BY_SIDE) - spl_scratch->scl_data.ratios.horz.value *= 2; + spl_out->scl_data.ratios.horz.value *= 2; else if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) - spl_scratch->scl_data.ratios.vert.value *= 2; + spl_out->scl_data.ratios.vert.value *= 2; - spl_scratch->scl_data.ratios.vert.value = spl_div64_s64( - spl_scratch->scl_data.ratios.vert.value * in_h, out_h); - spl_scratch->scl_data.ratios.horz.value = spl_div64_s64( - spl_scratch->scl_data.ratios.horz.value * in_w, out_w); + spl_out->scl_data.ratios.vert.value = div64_s64( + spl_out->scl_data.ratios.vert.value * in_h, out_h); + spl_out->scl_data.ratios.horz.value = div64_s64( + spl_out->scl_data.ratios.horz.value * in_w, out_w); - spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz; - spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert; + spl_out->scl_data.ratios.horz_c = spl_out->scl_data.ratios.horz; + spl_out->scl_data.ratios.vert_c = spl_out->scl_data.ratios.vert; if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) { - spl_scratch->scl_data.ratios.horz_c.value /= 2; - spl_scratch->scl_data.ratios.vert_c.value /= 2; + spl_out->scl_data.ratios.horz_c.value /= 2; + spl_out->scl_data.ratios.vert_c.value /= 2; } - spl_scratch->scl_data.ratios.horz = spl_fixpt_truncate( - spl_scratch->scl_data.ratios.horz, 19); - spl_scratch->scl_data.ratios.vert = spl_fixpt_truncate( - spl_scratch->scl_data.ratios.vert, 19); - spl_scratch->scl_data.ratios.horz_c = spl_fixpt_truncate( - spl_scratch->scl_data.ratios.horz_c, 19); - spl_scratch->scl_data.ratios.vert_c = spl_fixpt_truncate( - spl_scratch->scl_data.ratios.vert_c, 19); - - /* - * Coefficient table and some registers are different based on ratio - * that is output/input. Currently we calculate input/output - * Store 1/ratio in recip_ratio for those lookups - */ - spl_scratch->scl_data.recip_ratios.horz = spl_fixpt_recip( - spl_scratch->scl_data.ratios.horz); - spl_scratch->scl_data.recip_ratios.vert = spl_fixpt_recip( - spl_scratch->scl_data.ratios.vert); - spl_scratch->scl_data.recip_ratios.horz_c = spl_fixpt_recip( - spl_scratch->scl_data.ratios.horz_c); - spl_scratch->scl_data.recip_ratios.vert_c = spl_fixpt_recip( - spl_scratch->scl_data.ratios.vert_c); + spl_out->scl_data.ratios.horz = dc_fixpt_truncate( + spl_out->scl_data.ratios.horz, 19); + spl_out->scl_data.ratios.vert = dc_fixpt_truncate( + spl_out->scl_data.ratios.vert, 19); + spl_out->scl_data.ratios.horz_c = dc_fixpt_truncate( + spl_out->scl_data.ratios.horz_c, 19); + spl_out->scl_data.ratios.vert_c = dc_fixpt_truncate( + spl_out->scl_data.ratios.vert_c, 19); } - /* Calculate Viewport size */ -static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_scratch *spl_scratch) +static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *spl_out) { - spl_scratch->scl_data.viewport.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz, - spl_scratch->scl_data.recout.width)); - spl_scratch->scl_data.viewport.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert, - spl_scratch->scl_data.recout.height)); - spl_scratch->scl_data.viewport_c.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz_c, - spl_scratch->scl_data.recout.width)); - spl_scratch->scl_data.viewport_c.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert_c, - spl_scratch->scl_data.recout.height)); + spl_out->scl_data.viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz, + spl_out->scl_data.recout.width)); + spl_out->scl_data.viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert, + spl_out->scl_data.recout.height)); + spl_out->scl_data.viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz_c, + spl_out->scl_data.recout.width)); + spl_out->scl_data.viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert_c, + spl_out->scl_data.recout.height)); if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) { - spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); - spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); + swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); + swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); } } - static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, bool horizontal_mirror, bool *orthogonal_rotation, @@ -461,7 +440,6 @@ static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, if (horizontal_mirror) *flip_horz_scan_dir = !*flip_horz_scan_dir; } - /* * We completely calculate vp offset, size and inits here based entirely on scaling * ratios and recout for pixel perfect pipe combine. @@ -471,13 +449,13 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, int recout_size, int src_size, int taps, - struct spl_fixed31_32 ratio, - struct spl_fixed31_32 init_adj, - struct spl_fixed31_32 *init, + struct fixed31_32 ratio, + struct fixed31_32 init_adj, + struct fixed31_32 *init, int *vp_offset, int *vp_size) { - struct spl_fixed31_32 temp; + struct fixed31_32 temp; int int_part; /* @@ -490,33 +468,33 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, * init_bot = init + scaling_ratio * to get pixel perfect combine add the fraction from calculating vp offset */ - temp = spl_fixpt_mul_int(ratio, recout_offset_within_recout_full); - *vp_offset = spl_fixpt_floor(temp); + temp = dc_fixpt_mul_int(ratio, recout_offset_within_recout_full); + *vp_offset = dc_fixpt_floor(temp); temp.value &= 0xffffffff; - *init = spl_fixpt_add(spl_fixpt_div_int(spl_fixpt_add_int(ratio, taps + 1), 2), temp); - *init = spl_fixpt_add(*init, init_adj); - *init = spl_fixpt_truncate(*init, 19); + *init = dc_fixpt_add(dc_fixpt_div_int(dc_fixpt_add_int(ratio, taps + 1), 2), temp); + *init = dc_fixpt_add(*init, init_adj); + *init = dc_fixpt_truncate(*init, 19); /* * If viewport has non 0 offset and there are more taps than covered by init then * we should decrease the offset and increase init so we are never sampling * outside of viewport. */ - int_part = spl_fixpt_floor(*init); + int_part = dc_fixpt_floor(*init); if (int_part < taps) { int_part = taps - int_part; if (int_part > *vp_offset) int_part = *vp_offset; *vp_offset -= int_part; - *init = spl_fixpt_add_int(*init, int_part); + *init = dc_fixpt_add_int(*init, int_part); } /* * If taps are sampling outside of viewport at end of recout and there are more pixels * available in the surface we should increase the viewport size, regardless set vp to * only what is used. */ - temp = spl_fixpt_add(*init, spl_fixpt_mul_int(ratio, recout_size - 1)); - *vp_size = spl_fixpt_floor(temp); + temp = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_size - 1)); + *vp_size = dc_fixpt_floor(temp); if (*vp_size + *vp_offset > src_size) *vp_size = src_size - *vp_offset; @@ -531,16 +509,15 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, static bool spl_is_yuv420(enum spl_pixel_format format) { - if ((format >= SPL_PIXEL_FORMAT_420BPP8) && - (format <= SPL_PIXEL_FORMAT_420BPP10)) + if ((format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN) && + (format <= SPL_PIXEL_FORMAT_VIDEO_END)) return true; return false; } /*Calculate inits and viewport */ -static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, - struct spl_scratch *spl_scratch) +static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_out *spl_out) { struct spl_rect src = spl_in->basic_in.src_rect; struct spl_rect recout_dst_in_active_timing; @@ -551,11 +528,11 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1; bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; - struct spl_fixed31_32 init_adj_h = spl_fixpt_zero; - struct spl_fixed31_32 init_adj_v = spl_fixpt_zero; + struct fixed31_32 init_adj_h = dc_fixpt_zero; + struct fixed31_32 init_adj_v = dc_fixpt_zero; recout_clip_in_active_timing = shift_rec( - &spl_scratch->scl_data.recout, odm_slice.x, odm_slice.y); + &spl_out->scl_data.recout, odm_slice.x, odm_slice.y); recout_dst_in_active_timing = calculate_plane_rec_in_timing_active( spl_in, &spl_in->basic_in.dst_rect); overlap_in_active_timing = intersect_rec(&recout_clip_in_active_timing, @@ -578,8 +555,8 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, &flip_horz_scan_dir); if (orthogonal_rotation) { - spl_swap(src.width, src.height); - spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); + swap(src.width, src.height); + swap(flip_vert_scan_dir, flip_horz_scan_dir); } if (spl_is_yuv420(spl_in->basic_in.format)) { @@ -591,17 +568,17 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, switch (spl_in->basic_in.cositing) { case CHROMA_COSITING_LEFT: - init_adj_h = spl_fixpt_zero; - init_adj_v = spl_fixpt_from_fraction(sign, 4); + init_adj_h = dc_fixpt_zero; + init_adj_v = dc_fixpt_from_fraction(sign, 2); break; case CHROMA_COSITING_NONE: - init_adj_h = spl_fixpt_from_fraction(sign, 4); - init_adj_v = spl_fixpt_from_fraction(sign, 4); + init_adj_h = dc_fixpt_from_fraction(sign, 2); + init_adj_v = dc_fixpt_from_fraction(sign, 2); break; case CHROMA_COSITING_TOPLEFT: default: - init_adj_h = spl_fixpt_zero; - init_adj_v = spl_fixpt_zero; + init_adj_h = dc_fixpt_zero; + init_adj_v = dc_fixpt_zero; break; } } @@ -609,60 +586,59 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_scratch->scl_data.recout.width, + spl_out->scl_data.recout.width, src.width, - spl_scratch->scl_data.taps.h_taps, - spl_scratch->scl_data.ratios.horz, - spl_fixpt_zero, - &spl_scratch->scl_data.inits.h, - &spl_scratch->scl_data.viewport.x, - &spl_scratch->scl_data.viewport.width); + spl_out->scl_data.taps.h_taps, + spl_out->scl_data.ratios.horz, + dc_fixpt_zero, + &spl_out->scl_data.inits.h, + &spl_out->scl_data.viewport.x, + &spl_out->scl_data.viewport.width); spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_scratch->scl_data.recout.width, + spl_out->scl_data.recout.width, src.width / vpc_div, - spl_scratch->scl_data.taps.h_taps_c, - spl_scratch->scl_data.ratios.horz_c, + spl_out->scl_data.taps.h_taps_c, + spl_out->scl_data.ratios.horz_c, init_adj_h, - &spl_scratch->scl_data.inits.h_c, - &spl_scratch->scl_data.viewport_c.x, - &spl_scratch->scl_data.viewport_c.width); + &spl_out->scl_data.inits.h_c, + &spl_out->scl_data.viewport_c.x, + &spl_out->scl_data.viewport_c.width); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_scratch->scl_data.recout.height, + spl_out->scl_data.recout.height, src.height, - spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.ratios.vert, - spl_fixpt_zero, - &spl_scratch->scl_data.inits.v, - &spl_scratch->scl_data.viewport.y, - &spl_scratch->scl_data.viewport.height); + spl_out->scl_data.taps.v_taps, + spl_out->scl_data.ratios.vert, + dc_fixpt_zero, + &spl_out->scl_data.inits.v, + &spl_out->scl_data.viewport.y, + &spl_out->scl_data.viewport.height); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_scratch->scl_data.recout.height, + spl_out->scl_data.recout.height, src.height / vpc_div, - spl_scratch->scl_data.taps.v_taps_c, - spl_scratch->scl_data.ratios.vert_c, + spl_out->scl_data.taps.v_taps_c, + spl_out->scl_data.ratios.vert_c, init_adj_v, - &spl_scratch->scl_data.inits.v_c, - &spl_scratch->scl_data.viewport_c.y, - &spl_scratch->scl_data.viewport_c.height); + &spl_out->scl_data.inits.v_c, + &spl_out->scl_data.viewport_c.y, + &spl_out->scl_data.viewport_c.height); if (orthogonal_rotation) { - spl_swap(spl_scratch->scl_data.viewport.x, spl_scratch->scl_data.viewport.y); - spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); - spl_swap(spl_scratch->scl_data.viewport_c.x, spl_scratch->scl_data.viewport_c.y); - spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); + swap(spl_out->scl_data.viewport.x, spl_out->scl_data.viewport.y); + swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); + swap(spl_out->scl_data.viewport_c.x, spl_out->scl_data.viewport_c.y); + swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); } - spl_scratch->scl_data.viewport.x += src.x; - spl_scratch->scl_data.viewport.y += src.y; - SPL_ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); - spl_scratch->scl_data.viewport_c.x += src.x / vpc_div; - spl_scratch->scl_data.viewport_c.y += src.y / vpc_div; + spl_out->scl_data.viewport.x += src.x; + spl_out->scl_data.viewport.y += src.y; + ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); + spl_out->scl_data.viewport_c.x += src.x / vpc_div; + spl_out->scl_data.viewport_c.y += src.y / vpc_div; } - static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) { /* @@ -671,7 +647,7 @@ static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) * This may break with rotation, good thing we aren't mixing hw rotation and 3d */ if (spl_in->basic_in.mpc_combine_v) { - SPL_ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || + ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || (spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM && spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE)); if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) @@ -689,7 +665,6 @@ static void spl_clamp_viewport(struct spl_rect *viewport) if (viewport->width < MIN_VIEWPORT_SIZE) viewport->width = MIN_VIEWPORT_SIZE; } - static bool spl_dscl_is_420_format(enum spl_pixel_format format) { if (format == SPL_PIXEL_FORMAT_420BPP8 || @@ -698,7 +673,6 @@ static bool spl_dscl_is_420_format(enum spl_pixel_format format) else return false; } - static bool spl_dscl_is_video_format(enum spl_pixel_format format) { if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN @@ -707,21 +681,17 @@ static bool spl_dscl_is_video_format(enum spl_pixel_format format) else return false; } - static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, - const struct spl_scaler_data *data, - bool enable_isharp, bool enable_easf) + const struct spl_scaler_data *data) { - const long long one = spl_fixpt_one.value; + const long long one = dc_fixpt_one.value; enum spl_pixel_format pixel_format = spl_in->basic_in.format; - /* Bypass if ratio is 1:1 with no ISHARP or force scale on */ if (data->ratios.horz.value == one && data->ratios.vert.value == one && data->ratios.horz_c.value == one && data->ratios.vert_c.value == one - && !spl_in->basic_out.always_scale - && !enable_isharp) + && !spl_in->basic_out.always_scale) return SCL_MODE_SCALING_444_BYPASS; if (!spl_dscl_is_420_format(pixel_format)) { @@ -730,204 +700,69 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, else return SCL_MODE_SCALING_444_RGB_ENABLE; } - - /* Bypass YUV if at 1:1 with no ISHARP or if doing 2:1 YUV - * downscale without EASF - */ - if ((!enable_isharp) && (!enable_easf)) { - if (data->ratios.horz.value == one && data->ratios.vert.value == one) - return SCL_MODE_SCALING_420_LUMA_BYPASS; - if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) - return SCL_MODE_SCALING_420_CHROMA_BYPASS; - } + if (data->ratios.horz.value == one && data->ratios.vert.value == one) + return SCL_MODE_SCALING_420_LUMA_BYPASS; + if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) + return SCL_MODE_SCALING_420_CHROMA_BYPASS; return SCL_MODE_SCALING_420_YCBCR_ENABLE; } - -static bool spl_choose_lls_policy(enum spl_pixel_format format, - enum spl_transfer_func_type tf_type, - enum spl_transfer_func_predefined tf_predefined_type, - enum linear_light_scaling *lls_pref) -{ - if (spl_is_yuv420(format)) { - *lls_pref = LLS_PREF_NO; - if ((tf_type == SPL_TF_TYPE_PREDEFINED) || - (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) - return true; - } else { /* RGB or YUV444 */ - if ((tf_type == SPL_TF_TYPE_PREDEFINED) || - (tf_type == SPL_TF_TYPE_BYPASS)) { - *lls_pref = LLS_PREF_YES; - return true; - } - } - *lls_pref = LLS_PREF_NO; - return false; -} - -/* Enable EASF ?*/ -static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch) -{ - int vratio = 0; - int hratio = 0; - bool skip_easf = false; - bool lls_enable_easf = true; - - /* - * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer - * function to determine whether to use LINEAR or NONLINEAR scaling - */ - if (spl_in->lls_pref == LLS_PREF_DONT_CARE) - lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, - spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, - &spl_in->lls_pref); - - vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); - hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); - - if (!lls_enable_easf || spl_in->disable_easf) - skip_easf = true; - - /* - * No EASF support for downscaling > 2:1 - * EASF support for upscaling or downscaling up to 2:1 - */ - if ((vratio > 2) || (hratio > 2)) - skip_easf = true; - - /* Check for linear scaling or EASF preferred */ - if (spl_in->lls_pref != LLS_PREF_YES && !spl_in->prefer_easf) - skip_easf = true; - - return skip_easf; -} - -/* Check if video is in fullscreen mode */ -static bool spl_is_video_fullscreen(struct spl_in *spl_in) -{ - if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) - return true; - return false; -} - -static bool spl_get_isharp_en(struct spl_in *spl_in, - struct spl_scratch *spl_scratch) -{ - bool enable_isharp = false; - int vratio = 0; - int hratio = 0; - struct spl_taps taps = spl_scratch->scl_data.taps; - bool fullscreen = spl_is_video_fullscreen(spl_in); - - vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); - hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); - - /* Return if adaptive sharpness is disabled */ - if (spl_in->adaptive_sharpness.enable == false) - return enable_isharp; - - /* No iSHARP support for downscaling */ - if (vratio > 1 || hratio > 1) - return enable_isharp; - - // Scaling is up to 1:1 (no scaling) or upscaling - - /* - * Apply sharpness to all RGB surfaces and to - * NV12/P010 surfaces if in fullscreen - */ - if (spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) - return enable_isharp; - - /* - * Apply sharpness if supports horizontal taps 4,6 AND - * vertical taps 3, 4, 6 - */ - if ((taps.h_taps == 4 || taps.h_taps == 6) && - (taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6)) - enable_isharp = true; - - return enable_isharp; -} - /* Calculate optimal number of taps */ static bool spl_get_optimal_number_of_taps( - int max_downscale_src_width, struct spl_in *spl_in, struct spl_scratch *spl_scratch, - const struct spl_taps *in_taps, bool *enable_easf_v, bool *enable_easf_h, - bool *enable_isharp) + int max_downscale_src_width, struct spl_in *spl_in, struct spl_out *spl_out, + const struct spl_taps *in_taps) { int num_part_y, num_part_c; int max_taps_y, max_taps_c; int min_taps_y, min_taps_c; enum lb_memory_config lb_config; - bool skip_easf = false; - if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && + if (spl_out->scl_data.viewport.width > spl_out->scl_data.h_active && max_downscale_src_width != 0 && - spl_scratch->scl_data.viewport.width > max_downscale_src_width) + spl_out->scl_data.viewport.width > max_downscale_src_width) return false; - - /* Check if we are using EASF or not */ - skip_easf = enable_easf(spl_in, spl_scratch); - /* * Set default taps if none are provided * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling * taps = 4 for upscaling */ - if (skip_easf) { - if (in_taps->h_taps == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz) > 1) - spl_scratch->scl_data.taps.h_taps = spl_min(2 * spl_fixpt_ceil( - spl_scratch->scl_data.ratios.horz), 8); - else - spl_scratch->scl_data.taps.h_taps = 4; - } else - spl_scratch->scl_data.taps.h_taps = in_taps->h_taps; - if (in_taps->v_taps == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1) - spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( - spl_scratch->scl_data.ratios.vert, 2)), 8); - else - spl_scratch->scl_data.taps.v_taps = 4; - } else - spl_scratch->scl_data.taps.v_taps = in_taps->v_taps; - if (in_taps->v_taps_c == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1) - spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( - spl_scratch->scl_data.ratios.vert_c, 2)), 8); - else - spl_scratch->scl_data.taps.v_taps_c = 4; - } else - spl_scratch->scl_data.taps.v_taps_c = in_taps->v_taps_c; - if (in_taps->h_taps_c == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz_c) > 1) - spl_scratch->scl_data.taps.h_taps_c = spl_min(2 * spl_fixpt_ceil( - spl_scratch->scl_data.ratios.horz_c), 8); - else - spl_scratch->scl_data.taps.h_taps_c = 4; - } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) - /* Only 1 and even h_taps_c are supported by hw */ - spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; + if (in_taps->h_taps == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) + spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz), 8); else - spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c; - } else { - if (spl_is_yuv420(spl_in->basic_in.format)) { - spl_scratch->scl_data.taps.h_taps = 6; - spl_scratch->scl_data.taps.v_taps = 6; - spl_scratch->scl_data.taps.h_taps_c = 4; - spl_scratch->scl_data.taps.v_taps_c = 4; - } else { /* RGB */ - spl_scratch->scl_data.taps.h_taps = 6; - spl_scratch->scl_data.taps.v_taps = 6; - spl_scratch->scl_data.taps.h_taps_c = 6; - spl_scratch->scl_data.taps.v_taps_c = 6; - } - } + spl_out->scl_data.taps.h_taps = 4; + } else + spl_out->scl_data.taps.h_taps = in_taps->h_taps; + if (in_taps->v_taps == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) + spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( + spl_out->scl_data.ratios.vert, 2)), 8); + else + spl_out->scl_data.taps.v_taps = 4; + } else + spl_out->scl_data.taps.v_taps = in_taps->v_taps; + if (in_taps->v_taps_c == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) + spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( + spl_out->scl_data.ratios.vert_c, 2)), 8); + else + spl_out->scl_data.taps.v_taps_c = 4; + } else + spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; + if (in_taps->h_taps_c == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) + spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c), 8); + else + spl_out->scl_data.taps.h_taps_c = 4; + } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) + /* Only 1 and even h_taps_c are supported by hw */ + spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; + else + spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; /*Ensure we can support the requested number of vtaps*/ - min_taps_y = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); - min_taps_c = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c); + min_taps_y = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + min_taps_c = dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c); /* Use LB_MEMORY_CONFIG_3 for 4:2:0 */ if ((spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8) @@ -936,16 +771,16 @@ static bool spl_get_optimal_number_of_taps( else lb_config = LB_MEMORY_CONFIG_0; // Determine max vtap support by calculating how much line buffer can fit - spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_scratch->scl_data, + spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_out->scl_data, lb_config, &num_part_y, &num_part_c); /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */ - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 2) - max_taps_y = num_part_y - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) - 2); + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 2) + max_taps_y = num_part_y - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) - 2); else max_taps_y = num_part_y; - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 2) - max_taps_c = num_part_c - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) - 2); + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 2) + max_taps_c = num_part_c - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) - 2); else max_taps_c = num_part_c; @@ -954,108 +789,48 @@ static bool spl_get_optimal_number_of_taps( else if (max_taps_c < min_taps_c) return false; - if (spl_scratch->scl_data.taps.v_taps > max_taps_y) - spl_scratch->scl_data.taps.v_taps = max_taps_y; + if (spl_out->scl_data.taps.v_taps > max_taps_y) + spl_out->scl_data.taps.v_taps = max_taps_y; - if (spl_scratch->scl_data.taps.v_taps_c > max_taps_c) - spl_scratch->scl_data.taps.v_taps_c = max_taps_c; + if (spl_out->scl_data.taps.v_taps_c > max_taps_c) + spl_out->scl_data.taps.v_taps_c = max_taps_c; + if (spl_in->prefer_easf) { + // EASF can be enabled only for taps 3,4,6 + // If optimal no of taps is 5, then set it to 4 + // If optimal no of taps is 7 or 8, then set it to 6 + if (spl_out->scl_data.taps.v_taps == 5) + spl_out->scl_data.taps.v_taps = 4; + if (spl_out->scl_data.taps.v_taps == 7 || spl_out->scl_data.taps.v_taps == 8) + spl_out->scl_data.taps.v_taps = 6; - if (!skip_easf) { - /* - * RGB ( L + NL ) and Linear HDR support 6x6, 6x4, 6x3, 4x4, 4x3 - * NL YUV420 only supports 6x6, 6x4 for Y and 4x4 for UV - * - * If LB does not support 3, 4, or 6 taps, then disable EASF_V - * and only enable EASF_H. So for RGB, support 6x2, 4x2 - * and for NL YUV420, support 6x2 for Y and 4x2 for UV - * - * All other cases, have to disable EASF_V and EASF_H - * - * If optimal no of taps is 5, then set it to 4 - * If optimal no of taps is 7 or 8, then fine since max tap is 6 - * - */ - if (spl_scratch->scl_data.taps.v_taps == 5) - spl_scratch->scl_data.taps.v_taps = 4; + if (spl_out->scl_data.taps.v_taps_c == 5) + spl_out->scl_data.taps.v_taps_c = 4; + if (spl_out->scl_data.taps.v_taps_c == 7 || spl_out->scl_data.taps.v_taps_c == 8) + spl_out->scl_data.taps.v_taps_c = 6; - if (spl_scratch->scl_data.taps.v_taps_c == 5) - spl_scratch->scl_data.taps.v_taps_c = 4; + if (spl_out->scl_data.taps.h_taps == 5) + spl_out->scl_data.taps.h_taps = 4; + if (spl_out->scl_data.taps.h_taps == 7 || spl_out->scl_data.taps.h_taps == 8) + spl_out->scl_data.taps.h_taps = 6; - if (spl_scratch->scl_data.taps.h_taps == 5) - spl_scratch->scl_data.taps.h_taps = 4; + if (spl_out->scl_data.taps.h_taps_c == 5) + spl_out->scl_data.taps.h_taps_c = 4; + if (spl_out->scl_data.taps.h_taps_c == 7 || spl_out->scl_data.taps.h_taps_c == 8) + spl_out->scl_data.taps.h_taps_c = 6; - if (spl_scratch->scl_data.taps.h_taps_c == 5) - spl_scratch->scl_data.taps.h_taps_c = 4; - - if (spl_is_yuv420(spl_in->basic_in.format)) { - if ((spl_scratch->scl_data.taps.h_taps <= 4) || - (spl_scratch->scl_data.taps.h_taps_c <= 3)) { - *enable_easf_v = false; - *enable_easf_h = false; - } else if ((spl_scratch->scl_data.taps.v_taps <= 3) || - (spl_scratch->scl_data.taps.v_taps_c <= 3)) { - *enable_easf_v = false; - *enable_easf_h = true; - } else { - *enable_easf_v = true; - *enable_easf_h = true; - } - SPL_ASSERT((spl_scratch->scl_data.taps.v_taps > 1) && - (spl_scratch->scl_data.taps.v_taps_c > 1)); - } else { /* RGB */ - if (spl_scratch->scl_data.taps.h_taps <= 3) { - *enable_easf_v = false; - *enable_easf_h = false; - } else if (spl_scratch->scl_data.taps.v_taps < 3) { - *enable_easf_v = false; - *enable_easf_h = true; - } else { - *enable_easf_v = true; - *enable_easf_h = true; - } - SPL_ASSERT(spl_scratch->scl_data.taps.v_taps > 1); - } - } else { - *enable_easf_v = false; - *enable_easf_h = false; } // end of if prefer_easf - - /* Sharpener requires scaler to be enabled, including for 1:1 - * Check if ISHARP can be enabled - * If ISHARP is not enabled, for 1:1, set taps to 1 and disable - * EASF - * For case of 2:1 YUV where chroma is 1:1, set taps to 1 if - * EASF is not enabled - */ - - *enable_isharp = spl_get_isharp_en(spl_in, spl_scratch); - if (!*enable_isharp && !spl_in->basic_out.always_scale) { - if ((IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)) && - (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) { - spl_scratch->scl_data.taps.h_taps = 1; - spl_scratch->scl_data.taps.v_taps = 1; - - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)) - spl_scratch->scl_data.taps.h_taps_c = 1; - - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)) - spl_scratch->scl_data.taps.v_taps_c = 1; - - *enable_easf_v = false; - *enable_easf_h = false; - } else { - if ((!*enable_easf_h) && - (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))) - spl_scratch->scl_data.taps.h_taps_c = 1; - - if ((!*enable_easf_v) && - (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) - spl_scratch->scl_data.taps.v_taps_c = 1; - } + if (!spl_in->basic_out.always_scale) { + if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) + spl_out->scl_data.taps.h_taps = 1; + if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert)) + spl_out->scl_data.taps.v_taps = 1; + if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) + spl_out->scl_data.taps.h_taps_c = 1; + if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) + spl_out->scl_data.taps.v_taps_c = 1; } return true; } - static void spl_set_black_color_data(enum spl_pixel_format format, struct scl_black_color *scl_black_color) { @@ -1073,38 +848,38 @@ static void spl_set_black_color_data(enum spl_pixel_format format, static void spl_set_manual_ratio_init_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *scl_data) { - struct spl_fixed31_32 bot; + struct fixed31_32 bot; - dscl_prog_data->ratios.h_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.horz) << 5; - dscl_prog_data->ratios.v_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.vert) << 5; - dscl_prog_data->ratios.h_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.horz_c) << 5; - dscl_prog_data->ratios.v_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.vert_c) << 5; + dscl_prog_data->ratios.h_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.horz) << 5; + dscl_prog_data->ratios.v_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.vert) << 5; + dscl_prog_data->ratios.h_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.horz_c) << 5; + dscl_prog_data->ratios.v_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.vert_c) << 5; /* * 0.24 format for fraction, first five bits zeroed */ dscl_prog_data->init.h_filter_init_frac = - spl_fixpt_u0d19(scl_data->inits.h) << 5; + dc_fixpt_u0d19(scl_data->inits.h) << 5; dscl_prog_data->init.h_filter_init_int = - spl_fixpt_floor(scl_data->inits.h); + dc_fixpt_floor(scl_data->inits.h); dscl_prog_data->init.h_filter_init_frac_c = - spl_fixpt_u0d19(scl_data->inits.h_c) << 5; + dc_fixpt_u0d19(scl_data->inits.h_c) << 5; dscl_prog_data->init.h_filter_init_int_c = - spl_fixpt_floor(scl_data->inits.h_c); + dc_fixpt_floor(scl_data->inits.h_c); dscl_prog_data->init.v_filter_init_frac = - spl_fixpt_u0d19(scl_data->inits.v) << 5; + dc_fixpt_u0d19(scl_data->inits.v) << 5; dscl_prog_data->init.v_filter_init_int = - spl_fixpt_floor(scl_data->inits.v); + dc_fixpt_floor(scl_data->inits.v); dscl_prog_data->init.v_filter_init_frac_c = - spl_fixpt_u0d19(scl_data->inits.v_c) << 5; + dc_fixpt_u0d19(scl_data->inits.v_c) << 5; dscl_prog_data->init.v_filter_init_int_c = - spl_fixpt_floor(scl_data->inits.v_c); + dc_fixpt_floor(scl_data->inits.v_c); - bot = spl_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); - dscl_prog_data->init.v_filter_init_bot_frac = spl_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int = spl_fixpt_floor(bot); - bot = spl_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); - dscl_prog_data->init.v_filter_init_bot_frac_c = spl_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int_c = spl_fixpt_floor(bot); + bot = dc_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); + dscl_prog_data->init.v_filter_init_bot_frac = dc_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int = dc_fixpt_floor(bot); + bot = dc_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); + dscl_prog_data->init.v_filter_init_bot_frac_c = dc_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int_c = dc_fixpt_floor(bot); } static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, @@ -1115,28 +890,79 @@ static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->taps.v_taps_c = scl_data->taps.v_taps_c - 1; dscl_prog_data->taps.h_taps_c = scl_data->taps.h_taps_c - 1; } - +static const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +{ + if (taps == 8) + return spl_get_filter_8tap_64p(ratio); + else if (taps == 7) + return spl_get_filter_7tap_64p(ratio); + else if (taps == 6) + return spl_get_filter_6tap_64p(ratio); + else if (taps == 5) + return spl_get_filter_5tap_64p(ratio); + else if (taps == 4) + return spl_get_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_filter_3tap_64p(ratio); + else if (taps == 2) + return spl_get_filter_2tap_64p(); + else if (taps == 1) + return NULL; + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} +static void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) +{ + dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps, data->ratios.horz); + dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps, data->ratios.vert); + dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps_c, data->ratios.horz_c); + dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps_c, data->ratios.vert_c); +} +#ifdef CONFIG_DRM_AMD_DC_FP +static const uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) +{ + if ((taps == 3) || (taps == 4) || (taps == 6)) + return spl_get_filter_isharp_bs_4tap_64p(); + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} +static void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) +{ + dscl_prog_data->filter_blur_scale_h = spl_dscl_get_blur_scale_coeffs_64p( + data->taps.h_taps); + dscl_prog_data->filter_blur_scale_v = spl_dscl_get_blur_scale_coeffs_64p( + data->taps.v_taps); +} +#endif /* Populate dscl prog data structure from scaler data calculated by SPL */ -static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *spl_scratch, - struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, bool enable_isharp) +static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out) { struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; - const struct spl_scaler_data *data = &spl_scratch->scl_data; + const struct spl_scaler_data *data = &spl_out->scl_data; struct scl_black_color *scl_black_color = &dscl_prog_data->scl_black_color; - bool enable_easf = enable_easf_v || enable_easf_h; - // Set values for recout - dscl_prog_data->recout = spl_scratch->scl_data.recout; + dscl_prog_data->recout = spl_out->scl_data.recout; // Set values for MPC Size - dscl_prog_data->mpc_size.width = spl_scratch->scl_data.h_active; - dscl_prog_data->mpc_size.height = spl_scratch->scl_data.v_active; + dscl_prog_data->mpc_size.width = spl_out->scl_data.h_active; + dscl_prog_data->mpc_size.height = spl_out->scl_data.v_active; // SCL_MODE - Set SCL_MODE data - dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data, enable_isharp, - enable_easf); + dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data); // SCL_BLACK_COLOR spl_set_black_color_data(spl_in->basic_in.format, scl_black_color); @@ -1147,101 +973,103 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *sp // Set HTaps/VTaps spl_set_taps_data(dscl_prog_data, data); // Set viewport - dscl_prog_data->viewport = spl_scratch->scl_data.viewport; + dscl_prog_data->viewport = spl_out->scl_data.viewport; // Set viewport_c - dscl_prog_data->viewport_c = spl_scratch->scl_data.viewport_c; + dscl_prog_data->viewport_c = spl_out->scl_data.viewport_c; // Set filters data - spl_set_filters_data(dscl_prog_data, data, enable_easf_v, enable_easf_h); + spl_set_filters_data(dscl_prog_data, data); } - -/* Set EASF data */ -static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v, - bool enable_easf_h, enum linear_light_scaling lls_pref, - enum spl_pixel_format format, enum system_setup setup) +/* Enable EASF ?*/ +static bool enable_easf(int scale_ratio, int taps, + enum linear_light_scaling lls_pref, bool prefer_easf) { - struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; + // Is downscaling > 6:1 ? + if (scale_ratio > 6) { + // END - No EASF support for downscaling > 6:1 + return false; + } + // Is upscaling or downscaling up to 2:1? + if (scale_ratio <= 2) { + // Is linear scaling or EASF preferred? + if (lls_pref == LLS_PREF_YES || prefer_easf) { + // LB support taps 3, 4, 6 + if (taps == 3 || taps == 4 || taps == 6) { + // END - EASF supported + return true; + } + } + } + // END - EASF not supported + return false; +} +/* Set EASF data */ +static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, + bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref, + enum spl_pixel_format format) +{ + if (spl_is_yuv420(format)) /* TODO: 0 = RGB, 1 = YUV */ + dscl_prog_data->easf_matrix_mode = 1; + else + dscl_prog_data->easf_matrix_mode = 0; + if (enable_easf_v) { dscl_prog_data->easf_v_en = true; dscl_prog_data->easf_v_ring = 0; - dscl_prog_data->easf_v_sharp_factor = 0; + dscl_prog_data->easf_v_sharp_factor = 1; dscl_prog_data->easf_v_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - /* 2-bit, BF3 chroma mode correction calculation mode */ - dscl_prog_data->easf_v_bf3_mode = spl_get_v_bf3_mode( - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ minCoef ]*/ + dscl_prog_data->easf_v_bf3_mode = 2; // 2-bit, BF3 chroma mode correction calculation mode + dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control dscl_prog_data->easf_v_ringest_3tap_dntilt_uptilt = - spl_get_3tap_dntilt_uptilt_offset(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ upTiltMaxVal ]*/ + 0x9F00;// FP1.5.10 [minCoef] (-0.036109167214271) dscl_prog_data->easf_v_ringest_3tap_uptilt_max = - spl_get_3tap_uptilt_maxval(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ dnTiltSlope ]*/ + 0x24FE; // FP1.5.10 [upTiltMaxVal] ( 0.904556445553545) dscl_prog_data->easf_v_ringest_3tap_dntilt_slope = - spl_get_3tap_dntilt_slope(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ upTilt1Slope ]*/ + 0x3940; // FP1.5.10 [dnTiltSlope] ( 0.910488988173371) dscl_prog_data->easf_v_ringest_3tap_uptilt1_slope = - spl_get_3tap_uptilt1_slope(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ upTilt2Slope ]*/ + 0x359C; // FP1.5.10 [upTilt1Slope] ( 0.125620179040899) dscl_prog_data->easf_v_ringest_3tap_uptilt2_slope = - spl_get_3tap_uptilt2_slope(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ upTilt2Offset ]*/ + 0x359C; // FP1.5.10 [upTilt2Slope] ( 0.006786817723568) dscl_prog_data->easf_v_ringest_3tap_uptilt2_offset = - spl_get_3tap_uptilt2_offset(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ + 0x9F00; // FP1.5.10 [upTilt2Offset] (-0.006139059716651) dscl_prog_data->easf_v_ringest_eventap_reduceg1 = - spl_get_reducer_gain4(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ + 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] dscl_prog_data->easf_v_ringest_eventap_reduceg2 = - spl_get_reducer_gain6(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ + 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] dscl_prog_data->easf_v_ringest_eventap_gain1 = - spl_get_gainRing4(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ + 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 dscl_prog_data->easf_v_ringest_eventap_gain2 = - spl_get_gainRing6(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); + 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 dscl_prog_data->easf_v_bf_maxa = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_maxb = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 1 dscl_prog_data->easf_v_bf_mina = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_minb = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { - dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control - - dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 - dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 - dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 - dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 - dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 - dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 - dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 - dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1262,41 +1090,13 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 } else { - dscl_prog_data->easf_v_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_v_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_v_bf2_roc_gain = 14; // U2.2, Rate Of Change control - - dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 - dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 - dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 - dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 - dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 - dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 - dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 - dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1315,11 +1115,11 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 } } else dscl_prog_data->easf_v_en = false; @@ -1327,63 +1127,52 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s if (enable_easf_h) { dscl_prog_data->easf_h_en = true; dscl_prog_data->easf_h_ring = 0; - dscl_prog_data->easf_h_sharp_factor = 0; + dscl_prog_data->easf_h_sharp_factor = 1; dscl_prog_data->easf_h_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_h_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - /* 2-bit, BF3 chroma mode correction calculation mode */ - dscl_prog_data->easf_h_bf3_mode = spl_get_h_bf3_mode( - spl_scratch->scl_data.recip_ratios.horz); - /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ + dscl_prog_data->easf_h_bf3_mode = + 2; // 2-bit, BF3 chroma mode correction calculation mode + dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control dscl_prog_data->easf_h_ringest_eventap_reduceg1 = - spl_get_reducer_gain4(spl_scratch->scl_data.taps.h_taps, - spl_scratch->scl_data.recip_ratios.horz); - /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ + 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] dscl_prog_data->easf_h_ringest_eventap_reduceg2 = - spl_get_reducer_gain6(spl_scratch->scl_data.taps.h_taps, - spl_scratch->scl_data.recip_ratios.horz); - /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ + 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] dscl_prog_data->easf_h_ringest_eventap_gain1 = - spl_get_gainRing4(spl_scratch->scl_data.taps.h_taps, - spl_scratch->scl_data.recip_ratios.horz); - /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ + 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 dscl_prog_data->easf_h_ringest_eventap_gain2 = - spl_get_gainRing6(spl_scratch->scl_data.taps.h_taps, - spl_scratch->scl_data.recip_ratios.horz); + 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 dscl_prog_data->easf_h_bf_maxa = 63; //Horz Max BF value A in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_maxb = 63; //Horz Max BF value B in U0.6 format.Selected if H_FCNTL==1 dscl_prog_data->easf_h_bf_mina = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_minb = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==1 + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { - dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control - - dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 - dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 - dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 - dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 - dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 - dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 - dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 - dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1401,40 +1190,12 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 } else { - dscl_prog_data->easf_h_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_h_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_h_bf2_roc_gain = 14; // U2.2, Rate Of Change control - - dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 - dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 - dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 - dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 - dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 - dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 - dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 - dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1452,36 +1213,25 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 } // if (lls_pref == LLS_PREF_YES) } else dscl_prog_data->easf_h_en = false; if (lls_pref == LLS_PREF_YES) { dscl_prog_data->easf_ltonl_en = 1; // Linear input - if (setup == HDR_L) { - dscl_prog_data->easf_matrix_c0 = - 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) - dscl_prog_data->easf_matrix_c1 = - 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) - dscl_prog_data->easf_matrix_c2 = - 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) - dscl_prog_data->easf_matrix_c3 = - 0x0; // fp1.5.10, C3 coefficient - } else { // SDR_L - dscl_prog_data->easf_matrix_c0 = - 0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720) - dscl_prog_data->easf_matrix_c1 = - 0x55DC; // fp1.5.10, C1 coefficient (LN_rec709: 0.7152 * (2^14)/125 = 93.74269440) - dscl_prog_data->easf_matrix_c2 = - 0x48BB; // fp1.5.10, C2 coefficient (LN_rec709: 0.0722 * (2^14)/125 = 9.46339840) - dscl_prog_data->easf_matrix_c3 = - 0x0; // fp1.5.10, C3 coefficient - } + dscl_prog_data->easf_matrix_c0 = + 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) + dscl_prog_data->easf_matrix_c1 = + 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) + dscl_prog_data->easf_matrix_c2 = + 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) + dscl_prog_data->easf_matrix_c3 = + 0x0; // fp1.5.10, C3 coefficient } else { dscl_prog_data->easf_ltonl_en = 0; // Non-Linear input dscl_prog_data->easf_matrix_c0 = @@ -1493,43 +1243,27 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s dscl_prog_data->easf_matrix_c3 = 0x0; // fp1.5.10, C3 coefficient } - - if (spl_is_yuv420(format)) { /* TODO: 0 = RGB, 1 = YUV */ - dscl_prog_data->easf_matrix_mode = 1; - /* - * 2-bit, BF3 chroma mode correction calculation mode - * Needs to be disabled for YUV420 mode - * Override lookup value - */ - dscl_prog_data->easf_v_bf3_mode = 0; - dscl_prog_data->easf_h_bf3_mode = 0; - } else - dscl_prog_data->easf_matrix_mode = 0; - } - /*Set isharp noise detection */ -static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) +static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data) { // ISHARP_NOISEDET_MODE // 0: 3x5 as VxH // 1: 4x5 as VxH // 2: // 3: 5x5 as VxH - if (data->taps.v_taps == 6) - dscl_prog_data->isharp_noise_det.mode = 3; - else if (data->taps.v_taps == 4) - dscl_prog_data->isharp_noise_det.mode = 1; - else if (data->taps.v_taps == 3) - dscl_prog_data->isharp_noise_det.mode = 0; + if (dscl_prog_data->taps.v_taps == 6) + dscl_prog_data->isharp_noise_det.mode = 3; // ISHARP_NOISEDET_MODE + else if (dscl_prog_data->taps.h_taps == 4) + dscl_prog_data->isharp_noise_det.mode = 1; // ISHARP_NOISEDET_MODE + else if (dscl_prog_data->taps.h_taps == 3) + dscl_prog_data->isharp_noise_det.mode = 0; // ISHARP_NOISEDET_MODE }; /* Set Sharpener data */ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, struct adaptive_sharpness adp_sharpness, bool enable_isharp, enum linear_light_scaling lls_pref, enum spl_pixel_format format, - const struct spl_scaler_data *data, struct spl_fixed31_32 ratio, - enum system_setup setup) + const struct spl_scaler_data *data) { /* Turn off sharpener if not required */ if (!enable_isharp) { @@ -1538,12 +1272,10 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } dscl_prog_data->isharp_en = 1; // ISHARP_EN + dscl_prog_data->isharp_noise_det.enable = 1; // ISHARP_NOISEDET_EN // Set ISHARP_NOISEDET_MODE if htaps = 6-tap - if (data->taps.h_taps == 6) { - dscl_prog_data->isharp_noise_det.enable = 1; /* ISHARP_NOISEDET_EN */ - spl_set_isharp_noise_det_mode(dscl_prog_data, data); /* ISHARP_NOISEDET_MODE */ - } else - dscl_prog_data->isharp_noise_det.enable = 0; // ISHARP_NOISEDET_EN + if (dscl_prog_data->taps.h_taps == 6) + spl_set_isharp_noise_det_mode(dscl_prog_data); // ISHARP_NOISEDET_MODE // Program noise detection threshold dscl_prog_data->isharp_noise_det.uthreshold = 24; // ISHARP_NOISEDET_UTHRE dscl_prog_data->isharp_noise_det.dthreshold = 4; // ISHARP_NOISEDET_DTHRE @@ -1552,67 +1284,50 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->isharp_noise_det.pwl_end_in = 13; // ISHARP_NOISEDET_PWL_END_IN dscl_prog_data->isharp_noise_det.pwl_slope = 1623; // ISHARP_NOISEDET_PWL_SLOPE - if (lls_pref == LLS_PREF_NO) /* ISHARP_FMT_MODE */ + if ((lls_pref == LLS_PREF_NO) && !spl_is_yuv420(format)) /* ISHARP_FMT_MODE */ dscl_prog_data->isharp_fmt.mode = 1; else dscl_prog_data->isharp_fmt.mode = 0; dscl_prog_data->isharp_fmt.norm = 0x3C00; // ISHARP_FMT_NORM dscl_prog_data->isharp_lba.mode = 0; // ISHARP_LBA_MODE - if (setup == SDR_L) { - // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 - dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[0] = 62; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 - dscl_prog_data->isharp_lba.in_seg[1] = 130; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 - dscl_prog_data->isharp_lba.in_seg[2] = 312; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[2] = 0x1D9; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -39 - // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 - dscl_prog_data->isharp_lba.in_seg[3] = 520; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 - dscl_prog_data->isharp_lba.in_seg[4] = 520; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 - dscl_prog_data->isharp_lba.in_seg[5] = 520; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format - } else { - // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 - dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 - dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 - dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[2] = 0x1EC; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -20 - // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 - dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 - dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 - dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = -20; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + switch (adp_sharpness.sharpness) { + case SHARPNESS_LOW: + dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_0p5x(); + break; + case SHARPNESS_MID: + dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_1p0x(); + break; + case SHARPNESS_HIGH: + dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_2p0x(); + break; + default: + BREAK_TO_DEBUGGER(); } - spl_build_isharp_1dlut_from_reference_curve(ratio, setup); - dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut( - adp_sharpness.sharpness); - // Program the nldelta soft clip values if (lls_pref == LLS_PREF_YES) { dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */ @@ -1631,7 +1346,62 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } // Set the values as per lookup table +#ifdef CONFIG_DRM_AMD_DC_FP spl_set_blur_scale_data(dscl_prog_data, data); +#endif +} +static bool spl_get_isharp_en(struct adaptive_sharpness adp_sharpness, + int vscale_ratio, int hscale_ratio, struct spl_taps taps, + enum spl_pixel_format format) +{ + bool enable_isharp = false; + + if (adp_sharpness.enable == false) + return enable_isharp; // Return if adaptive sharpness is disabled + // Is downscaling ? + if (vscale_ratio > 1 || hscale_ratio > 1) { + // END - No iSHARP support for downscaling + return enable_isharp; + } + // Scaling is up to 1:1 (no scaling) or upscaling + + /* Only apply sharpness to NV12 and not P010 */ + if (format != SPL_PIXEL_FORMAT_420BPP8) + return enable_isharp; + + // LB support horizontal taps 4,6 or vertical taps 3, 4, 6 + if (taps.h_taps == 4 || taps.h_taps == 6 || + taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6) { + // END - iSHARP supported + enable_isharp = true; + } + return enable_isharp; +} + +static bool spl_choose_lls_policy(enum spl_pixel_format format, + enum spl_transfer_func_type tf_type, + enum spl_transfer_func_predefined tf_predefined_type, + enum linear_light_scaling *lls_pref) +{ + if (spl_is_yuv420(format)) { + *lls_pref = LLS_PREF_NO; + if ((tf_type == SPL_TF_TYPE_PREDEFINED) || (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) + return true; + } else { /* RGB or YUV444 */ + if (tf_type == SPL_TF_TYPE_PREDEFINED) { + if ((tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG) || + (tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG12)) + *lls_pref = LLS_PREF_NO; + else + *lls_pref = LLS_PREF_YES; + return true; + } else if (tf_type == SPL_TF_TYPE_BYPASS) { + *lls_pref = LLS_PREF_YES; + return true; + } + } + *lls_pref = LLS_PREF_NO; + return false; } /* Calculate scaler parameters */ @@ -1640,71 +1410,67 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool res = false; bool enable_easf_v = false; bool enable_easf_h = false; + bool lls_enable_easf = true; int vratio = 0; int hratio = 0; - struct spl_scratch spl_scratch; - struct spl_fixed31_32 isharp_scale_ratio; - enum system_setup setup; - bool enable_isharp = false; - const struct spl_scaler_data *data = &spl_scratch.scl_data; - - memset(&spl_scratch, 0, sizeof(struct spl_scratch)); - spl_scratch.scl_data.h_active = spl_in->h_active; - spl_scratch.scl_data.v_active = spl_in->v_active; + const struct spl_scaler_data *data = &spl_out->scl_data; // All SPL calls /* recout calculation */ /* depends on h_active */ - spl_calculate_recout(spl_in, &spl_scratch, spl_out); + spl_calculate_recout(spl_in, spl_out); /* depends on pixel format */ - spl_calculate_scaling_ratios(spl_in, &spl_scratch, spl_out); + spl_calculate_scaling_ratios(spl_in, spl_out); /* depends on scaling ratios and recout, does not calculate offset yet */ - spl_calculate_viewport_size(spl_in, &spl_scratch); + spl_calculate_viewport_size(spl_in, spl_out); res = spl_get_optimal_number_of_taps( spl_in->basic_out.max_downscale_src_width, spl_in, - &spl_scratch, &spl_in->scaling_quality, &enable_easf_v, - &enable_easf_h, &enable_isharp); + spl_out, &spl_in->scaling_quality); /* * Depends on recout, scaling ratios, h_active and taps * May need to re-check lb size after this in some obscure scenario */ if (res) - spl_calculate_inits_and_viewports(spl_in, &spl_scratch); + spl_calculate_inits_and_viewports(spl_in, spl_out); // Handle 3d recout - spl_handle_3d_recout(spl_in, &spl_scratch.scl_data.recout); + spl_handle_3d_recout(spl_in, &spl_out->scl_data.recout); // Clamp - spl_clamp_viewport(&spl_scratch.scl_data.viewport); + spl_clamp_viewport(&spl_out->scl_data.viewport); if (!res) return res; - // Save all calculated parameters in dscl_prog_data structure to program hw registers - spl_set_dscl_prog_data(spl_in, &spl_scratch, spl_out, enable_easf_v, enable_easf_h, enable_isharp); + /* + * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer + * function to determine whether to use LINEAR or NONLINEAR scaling + */ + if (spl_in->lls_pref == LLS_PREF_DONT_CARE) + lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, + spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, + &spl_in->lls_pref); - if (spl_in->lls_pref == LLS_PREF_YES) { - if (spl_in->is_hdr_on) - setup = HDR_L; - else - setup = SDR_L; + // Save all calculated parameters in dscl_prog_data structure to program hw registers + spl_set_dscl_prog_data(spl_in, spl_out); + + vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + if (!lls_enable_easf || spl_in->disable_easf) { + enable_easf_v = false; + enable_easf_h = false; } else { - if (spl_in->is_hdr_on) - setup = HDR_NL; - else - setup = SDR_NL; + /* Enable EASF on vertical? */ + enable_easf_v = enable_easf(vratio, spl_out->scl_data.taps.v_taps, spl_in->lls_pref, spl_in->prefer_easf); + /* Enable EASF on horizontal? */ + enable_easf_h = enable_easf(hratio, spl_out->scl_data.taps.h_taps, spl_in->lls_pref, spl_in->prefer_easf); } // Set EASF - spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, - spl_in->basic_in.format, setup); + spl_set_easf_data(spl_out->dscl_prog_data, enable_easf_v, enable_easf_h, spl_in->lls_pref, + spl_in->basic_in.format); // Set iSHARP - vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert); - hratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.horz); - if (vratio <= hratio) - isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.vert; - else - isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz; - + bool enable_isharp = spl_get_isharp_en(spl_in->adaptive_sharpness, vratio, hratio, + spl_out->scl_data.taps, spl_in->basic_in.format); spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, - spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup); + spl_in->lls_pref, spl_in->basic_in.format, data); return res; } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c deleted file mode 100644 index 99238644e0a1..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dc_spl_filters.h" - -void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, - uint16_t *s1_12_filter, int num_taps) -{ - int num_entries = NUM_PHASES_COEFF * num_taps; - int i; - - for (i = 0; i < num_entries; i++) - *(s1_12_filter + i) = *(s1_10_filter + i) * 4; -} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h deleted file mode 100644 index 20439cdbdb10..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: MIT */ - -/* Copyright 2024 Advanced Micro Devices, Inc. */ - -#ifndef __DC_SPL_FILTERS_H__ -#define __DC_SPL_FILTERS_H__ - -#include "dc_spl_types.h" - -#define NUM_PHASES_COEFF 33 - -void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, - uint16_t *s1_12_filter, int num_taps); - -#endif /* __DC_SPL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index d483f259512e..a5d9a6223d06 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -2,9 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "dc_spl_types.h" -#include "spl_debug.h" -#include "dc_spl_filters.h" #include "dc_spl_isharp_filters.h" //======================================== @@ -233,53 +230,6 @@ static const uint32_t filter_isharp_1D_lut_2p0x[32] = { 0x080B0D0E, 0x00020406, }; -//======================================== -// Delta Gain 1DLUT -// LUT content is packed as 4-bytes into one DWORD/entry -// A_start = 0.000000 -// A_end = 10.000000 -// A_gain = 3.000000 -// B_start = 11.000000 -// B_end = 127.000000 -// C_start = 40.000000 -// C_end = 127.000000 -//======================================== -static const uint32_t filter_isharp_1D_lut_3p0x[32] = { -0x03010000, -0x0F0B0805, -0x211E1813, -0x2B292624, -0x3533302E, -0x3E3C3A37, -0x46444240, -0x4D4B4A48, -0x5352504F, -0x59575655, -0x5D5C5B5A, -0x61605F5E, -0x64646362, -0x66666565, -0x68686767, -0x68686868, -0x68686868, -0x67676868, -0x65656666, -0x62636464, -0x5E5F6061, -0x5A5B5C5D, -0x55565759, -0x4F505253, -0x484A4B4D, -0x40424446, -0x373A3C3E, -0x2E303335, -0x2426292B, -0x191B1E21, -0x0D101316, -0x0003060A, -}; - -//======================================== // Wide scaler coefficients //======================================================== // gen_scaler_coeffs.m @@ -334,7 +284,7 @@ static const uint16_t filter_isharp_wide_6tap_64p[198] = { // Blur & Scale LPF // S1.10 //======================================================== -static const uint16_t filter_isharp_bs_4tap_in_6_64p[198] = { +static const uint16_t filter_isharp_bs_4tap_64p[198] = { 0x0000, 0x00E5, 0x0237, 0x00E4, 0x0000, 0x0000, 0x0000, 0x00DE, 0x0237, 0x00EB, 0x0000, 0x0000, 0x0000, 0x00D7, 0x0236, 0x00F2, 0x0001, 0x0000, @@ -369,228 +319,6 @@ static const uint16_t filter_isharp_bs_4tap_in_6_64p[198] = { 0x0000, 0x003B, 0x01CF, 0x01C2, 0x0034, 0x0000, 0x0000, 0x0037, 0x01C9, 0x01C9, 0x0037, 0x0000 }; -//======================================================== -// gen_BlurScale_coeffs.m -// 25-Apr-2022 -// 4 -// 64 -// Blur & Scale LPF -// S1.10 -//======================================================== -static const uint16_t filter_isharp_bs_4tap_64p[132] = { -0x00E5, 0x0237, 0x00E4, 0x0000, -0x00DE, 0x0237, 0x00EB, 0x0000, -0x00D7, 0x0236, 0x00F2, 0x0001, -0x00D0, 0x0235, 0x00FA, 0x0001, -0x00C9, 0x0234, 0x0101, 0x0002, -0x00C2, 0x0233, 0x0108, 0x0003, -0x00BB, 0x0232, 0x0110, 0x0003, -0x00B5, 0x0230, 0x0117, 0x0004, -0x00AE, 0x022E, 0x011F, 0x0005, -0x00A8, 0x022C, 0x0126, 0x0006, -0x00A2, 0x022A, 0x012D, 0x0007, -0x009C, 0x0228, 0x0134, 0x0008, -0x0096, 0x0225, 0x013C, 0x0009, -0x0090, 0x0222, 0x0143, 0x000B, -0x008A, 0x021F, 0x014B, 0x000C, -0x0085, 0x021C, 0x0151, 0x000E, -0x007F, 0x0218, 0x015A, 0x000F, -0x007A, 0x0215, 0x0160, 0x0011, -0x0074, 0x0211, 0x0168, 0x0013, -0x006F, 0x020D, 0x016F, 0x0015, -0x006A, 0x0209, 0x0176, 0x0017, -0x0065, 0x0204, 0x017E, 0x0019, -0x0060, 0x0200, 0x0185, 0x001B, -0x005C, 0x01FB, 0x018C, 0x001D, -0x0057, 0x01F6, 0x0193, 0x0020, -0x0053, 0x01F1, 0x019A, 0x0022, -0x004E, 0x01EC, 0x01A1, 0x0025, -0x004A, 0x01E6, 0x01A8, 0x0028, -0x0046, 0x01E1, 0x01AF, 0x002A, -0x0042, 0x01DB, 0x01B6, 0x002D, -0x003F, 0x01D5, 0x01BB, 0x0031, -0x003B, 0x01CF, 0x01C2, 0x0034, -0x0037, 0x01C9, 0x01C9, 0x0037, -}; -//======================================================== -// gen_BlurScale_coeffs.m -// 09-Jun-2022 -// 3 -// 64 -// Blur & Scale LPF -// S1.10 -//======================================================== -static const uint16_t filter_isharp_bs_3tap_64p[99] = { -0x0200, 0x0200, 0x0000, -0x01F6, 0x0206, 0x0004, -0x01EC, 0x020B, 0x0009, -0x01E2, 0x0211, 0x000D, -0x01D8, 0x0216, 0x0012, -0x01CE, 0x021C, 0x0016, -0x01C4, 0x0221, 0x001B, -0x01BA, 0x0226, 0x0020, -0x01B0, 0x022A, 0x0026, -0x01A6, 0x022F, 0x002B, -0x019C, 0x0233, 0x0031, -0x0192, 0x0238, 0x0036, -0x0188, 0x023C, 0x003C, -0x017E, 0x0240, 0x0042, -0x0174, 0x0244, 0x0048, -0x016A, 0x0248, 0x004E, -0x0161, 0x024A, 0x0055, -0x0157, 0x024E, 0x005B, -0x014D, 0x0251, 0x0062, -0x0144, 0x0253, 0x0069, -0x013A, 0x0256, 0x0070, -0x0131, 0x0258, 0x0077, -0x0127, 0x025B, 0x007E, -0x011E, 0x025C, 0x0086, -0x0115, 0x025E, 0x008D, -0x010B, 0x0260, 0x0095, -0x0102, 0x0262, 0x009C, -0x00F9, 0x0263, 0x00A4, -0x00F0, 0x0264, 0x00AC, -0x00E7, 0x0265, 0x00B4, -0x00DF, 0x0264, 0x00BD, -0x00D6, 0x0265, 0x00C5, -0x00CD, 0x0266, 0x00CD, -}; - -/* Converted Blur & Scale coeff tables from S1.10 to S1.12 */ -static uint16_t filter_isharp_bs_4tap_in_6_64p_s1_12[198]; -static uint16_t filter_isharp_bs_4tap_64p_s1_12[132]; -static uint16_t filter_isharp_bs_3tap_64p_s1_12[99]; - -struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_sdr_nl[3][6] = { - { /* LOW */ - {1125, 1000, 75, 100}, - {11, 10, 6, 10}, - {1075, 1000, 45, 100}, - {105, 100, 3, 10}, - {1025, 1000, 15, 100}, - {1, 1, 0, 1}, - }, - { /* MID */ - {1125, 1000, 2, 1}, - {11, 10, 175, 100}, - {1075, 1000, 15, 10}, - {105, 100, 125, 100}, - {1025, 1000, 1, 1}, - {1, 1, 75, 100}, - }, - { /* HIGH */ - {1125, 1000, 35, 10}, - {11, 10, 32, 10}, - {1075, 1000, 29, 10}, - {105, 100, 26, 10}, - {1025, 1000, 23, 10}, - {1, 1, 2, 1}, - }, -}; - -struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_sdr_l[3][6] = { - { /* LOW */ - {1125, 1000, 75, 100}, - {11, 10, 6, 10}, - {1075, 1000, 45, 100}, - {105, 100, 3, 10}, - {1025, 1000, 15, 100}, - {1, 1, 0, 1}, - }, - { /* MID */ - {1125, 1000, 15, 10}, - {11, 10, 135, 100}, - {1075, 1000, 12, 10}, - {105, 100, 105, 100}, - {1025, 1000, 9, 10}, - {1, 1, 75, 100}, - }, - { /* HIGH */ - {1125, 1000, 25, 10}, - {11, 10, 23, 10}, - {1075, 1000, 21, 10}, - {105, 100, 19, 10}, - {1025, 1000, 17, 10}, - {1, 1, 15, 10}, - }, -}; - -struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_hdr_nl[3][6] = { - { /* LOW */ - {1125, 1000, 5, 10}, - {11, 10, 4, 10}, - {1075, 1000, 3, 10}, - {105, 100, 2, 10}, - {1025, 1000, 1, 10}, - {1, 1, 0, 1}, - }, - { /* MID */ - {1125, 1000, 1, 1}, - {11, 10, 9, 10}, - {1075, 1000, 8, 10}, - {105, 100, 7, 10}, - {1025, 1000, 6, 10}, - {1, 1, 5, 10}, - }, - { /* HIGH */ - {1125, 1000, 15, 10}, - {11, 10, 14, 10}, - {1075, 1000, 13, 10}, - {105, 100, 12, 10}, - {1025, 1000, 11, 10}, - {1, 1, 1, 1}, - }, -}; - -struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_hdr_l[3][6] = { - { /* LOW */ - {1125, 1000, 75, 100}, - {11, 10, 6, 10}, - {1075, 1000, 45, 100}, - {105, 100, 3, 10}, - {1025, 1000, 15, 100}, - {1, 1, 0, 1}, - }, - { /* MID */ - {1125, 1000, 15, 10}, - {11, 10, 135, 100}, - {1075, 1000, 12, 10}, - {105, 100, 105, 100}, - {1025, 1000, 9, 10}, - {1, 1, 75, 100}, - }, - { /* HIGH */ - {1125, 1000, 25, 10}, - {11, 10, 23, 10}, - {1075, 1000, 21, 10}, - {105, 100, 19, 10}, - {1025, 1000, 17, 10}, - {1, 1, 15, 10}, - }, -}; - -/* Pre-generated 1DLUT for LOW for given setup and sharpness level */ -uint32_t filter_isharp_1D_lut_pregen[3][32] = { - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }, -}; - const uint32_t *spl_get_filter_isharp_1D_lut_0(void) { return filter_isharp_1D_lut_0; @@ -611,160 +339,11 @@ const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void) { return filter_isharp_1D_lut_2p0x; } -const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void) -{ - return filter_isharp_1D_lut_3p0x; -} const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void) { return filter_isharp_wide_6tap_64p; } -uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void) +const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) { - return filter_isharp_bs_4tap_in_6_64p_s1_12; + return filter_isharp_bs_4tap_64p; } -uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) -{ - return filter_isharp_bs_4tap_64p_s1_12; -} -uint16_t *spl_get_filter_isharp_bs_3tap_64p(void) -{ - return filter_isharp_bs_3tap_64p_s1_12; -} - -void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup) -{ - uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; - struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level, ratio_level; - int i, j; - struct scale_ratio_to_sharpness_level_lookup *setup_lookup_ptr; - int num_sharp_ramp_levels; - int size_1dlut; - int sharp_calc_int; - uint32_t filter_pregen_store[32]; - - /* - * Given scaling ratio and current system setup, build pregenerated - * 1DLUT tables for three sharpness levels - LOW, MID, HIGH - */ - for (i = 0; i < 3; i++) { - /* - * Based on setup ( HDR/SDR, L/NL ), get base scale ratio to - * sharpness curve - */ - switch (setup) { - case HDR_L: - setup_lookup_ptr = scale_to_sharp_hdr_l[i]; - num_sharp_ramp_levels = sizeof(scale_to_sharp_hdr_l[i])/ - sizeof(struct scale_ratio_to_sharpness_level_lookup); - break; - case HDR_NL: - setup_lookup_ptr = scale_to_sharp_hdr_nl[i]; - num_sharp_ramp_levels = sizeof(scale_to_sharp_hdr_nl[i])/ - sizeof(struct scale_ratio_to_sharpness_level_lookup); - break; - case SDR_L: - setup_lookup_ptr = scale_to_sharp_sdr_l[i]; - num_sharp_ramp_levels = sizeof(scale_to_sharp_sdr_l[i])/ - sizeof(struct scale_ratio_to_sharpness_level_lookup); - break; - case SDR_NL: - default: - setup_lookup_ptr = scale_to_sharp_sdr_nl[i]; - num_sharp_ramp_levels = sizeof(scale_to_sharp_sdr_nl[i])/ - sizeof(struct scale_ratio_to_sharpness_level_lookup); - break; - } - - /* - * Compare desired scaling ratio and find adjusted sharpness from - * base scale ratio to sharpness curve - */ - j = 0; - sharp_level = spl_fixpt_zero; - while (j < num_sharp_ramp_levels) { - ratio_level = spl_fixpt_from_fraction(setup_lookup_ptr->ratio_numer, - setup_lookup_ptr->ratio_denom); - if (ratio.value >= ratio_level.value) { - sharp_level = spl_fixpt_from_fraction(setup_lookup_ptr->sharpness_numer, - setup_lookup_ptr->sharpness_denom); - break; - } - setup_lookup_ptr++; - j++; - } - - /* - * Calculate LUT_128_gained with this equation: - * - * LUT_128_gained[i] = (uint8)(0.5 + min(255,(double)(LUT_128[i])*sharpLevel/iGain)) - * where LUT_128[i] is contents of 3p0x isharp 1dlut - * where sharpLevel is desired sharpness level - * where iGain is base sharpness level 3.0 - * where LUT_128_gained[i] is adjusted 1dlut value based on desired sharpness level - */ - byte_ptr_1dlut_src = (uint8_t *)filter_isharp_1D_lut_3p0x; - byte_ptr_1dlut_dst = (uint8_t *)filter_pregen_store; - size_1dlut = sizeof(filter_isharp_1D_lut_3p0x); - memset(byte_ptr_1dlut_dst, 0, size_1dlut); - for (j = 0; j < size_1dlut; j++) { - sharp_base = spl_fixpt_from_int((int)*byte_ptr_1dlut_src); - sharp_calc = spl_fixpt_mul(sharp_base, sharp_level); - sharp_calc = spl_fixpt_div(sharp_calc, spl_fixpt_from_int(3)); - sharp_calc = spl_fixpt_min(spl_fixpt_from_int(255), sharp_calc); - sharp_calc = spl_fixpt_add(sharp_calc, spl_fixpt_from_fraction(1, 2)); - sharp_calc_int = spl_fixpt_floor(sharp_calc); - if (sharp_calc_int > 255) - sharp_calc_int = 255; - *byte_ptr_1dlut_dst = (uint8_t)sharp_calc_int; - - byte_ptr_1dlut_src++; - byte_ptr_1dlut_dst++; - } - - /* Compare if filter has change, if so update */ - if (memcmp((void *)filter_isharp_1D_lut_pregen[i], (void *)filter_pregen_store, size_1dlut) != 0) - memcpy((void *)filter_isharp_1D_lut_pregen[i], (void *)filter_pregen_store, size_1dlut); - } -} - -uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum explicit_sharpness sharpness) -{ - return filter_isharp_1D_lut_pregen[sharpness]; -} - -void spl_init_blur_scale_coeffs(void) -{ - convert_filter_s1_10_to_s1_12(filter_isharp_bs_3tap_64p, - filter_isharp_bs_3tap_64p_s1_12, 3); - convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_64p, - filter_isharp_bs_4tap_64p_s1_12, 4); - convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_in_6_64p, - filter_isharp_bs_4tap_in_6_64p_s1_12, 6); -} - -uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) -{ - if (taps == 3) - return spl_get_filter_isharp_bs_3tap_64p(); - else if (taps == 4) - return spl_get_filter_isharp_bs_4tap_64p(); - else if (taps == 6) - return spl_get_filter_isharp_bs_4tap_in_6_64p(); - else { - /* should never happen, bug */ - SPL_BREAK_TO_DEBUGGER(); - return NULL; - } -} - -void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) -{ - dscl_prog_data->filter_blur_scale_h = - spl_dscl_get_blur_scale_coeffs_64p(data->taps.h_taps); - - dscl_prog_data->filter_blur_scale_v = - spl_dscl_get_blur_scale_coeffs_64p(data->taps.v_taps); -} - diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index 6cb000bf9d53..1aaf4c50c1bc 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -12,37 +12,6 @@ const uint32_t *spl_get_filter_isharp_1D_lut_0p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p0x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void); -const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void); -uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void); -uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); -uint16_t *spl_get_filter_isharp_bs_3tap_64p(void); +const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void); -uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps); - -struct scale_ratio_to_sharpness_level_lookup { - unsigned int ratio_numer; - unsigned int ratio_denom; - unsigned int sharpness_numer; - unsigned int sharpness_denom; -}; - -struct sharpness_level_mapping { - unsigned int level; - unsigned int level_numer; - unsigned int level_denom; -}; - -enum system_setup { - SDR_NL = 0, - SDR_L, - HDR_NL, - HDR_L -}; - -void spl_init_blur_scale_coeffs(void); -void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data); - -void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup); -uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum explicit_sharpness sharpness); #endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c deleted file mode 100644 index 09bf82f7d468..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c +++ /dev/null @@ -1,1726 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "spl_debug.h" -#include "dc_spl_filters.h" -#include "dc_spl_scl_filters.h" -#include "dc_spl_scl_easf_filters.h" - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.3_p_10qb_ -// 3 -// 64 -// input/output = 0.300000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_30[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F6, 0x0206, 0x0004, - 0x01EC, 0x020B, 0x0009, - 0x01E2, 0x0211, 0x000D, - 0x01D8, 0x0216, 0x0012, - 0x01CE, 0x021C, 0x0016, - 0x01C4, 0x0221, 0x001B, - 0x01BA, 0x0226, 0x0020, - 0x01B0, 0x022A, 0x0026, - 0x01A6, 0x022F, 0x002B, - 0x019C, 0x0233, 0x0031, - 0x0192, 0x0238, 0x0036, - 0x0188, 0x023C, 0x003C, - 0x017E, 0x0240, 0x0042, - 0x0174, 0x0244, 0x0048, - 0x016A, 0x0248, 0x004E, - 0x0161, 0x024A, 0x0055, - 0x0157, 0x024E, 0x005B, - 0x014D, 0x0251, 0x0062, - 0x0144, 0x0253, 0x0069, - 0x013A, 0x0256, 0x0070, - 0x0131, 0x0258, 0x0077, - 0x0127, 0x025B, 0x007E, - 0x011E, 0x025C, 0x0086, - 0x0115, 0x025E, 0x008D, - 0x010B, 0x0260, 0x0095, - 0x0102, 0x0262, 0x009C, - 0x00F9, 0x0263, 0x00A4, - 0x00F0, 0x0264, 0x00AC, - 0x00E7, 0x0265, 0x00B4, - 0x00DF, 0x0264, 0x00BD, - 0x00D6, 0x0265, 0x00C5, - 0x00CD, 0x0266, 0x00CD, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.4_p_10qb_ -// 3 -// 64 -// input/output = 0.400000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_40[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F6, 0x0206, 0x0004, - 0x01EB, 0x020E, 0x0007, - 0x01E1, 0x0214, 0x000B, - 0x01D7, 0x021A, 0x000F, - 0x01CD, 0x0220, 0x0013, - 0x01C2, 0x0226, 0x0018, - 0x01B8, 0x022C, 0x001C, - 0x01AE, 0x0231, 0x0021, - 0x01A3, 0x0237, 0x0026, - 0x0199, 0x023C, 0x002B, - 0x018F, 0x0240, 0x0031, - 0x0185, 0x0245, 0x0036, - 0x017A, 0x024A, 0x003C, - 0x0170, 0x024F, 0x0041, - 0x0166, 0x0253, 0x0047, - 0x015C, 0x0257, 0x004D, - 0x0152, 0x025A, 0x0054, - 0x0148, 0x025E, 0x005A, - 0x013E, 0x0261, 0x0061, - 0x0134, 0x0264, 0x0068, - 0x012B, 0x0266, 0x006F, - 0x0121, 0x0269, 0x0076, - 0x0117, 0x026C, 0x007D, - 0x010E, 0x026E, 0x0084, - 0x0104, 0x0270, 0x008C, - 0x00FB, 0x0271, 0x0094, - 0x00F2, 0x0272, 0x009C, - 0x00E9, 0x0273, 0x00A4, - 0x00E0, 0x0274, 0x00AC, - 0x00D7, 0x0275, 0x00B4, - 0x00CE, 0x0275, 0x00BD, - 0x00C5, 0x0276, 0x00C5, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.5_p_10qb_ -// 3 -// 64 -// input/output = 0.500000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_50[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F5, 0x0209, 0x0002, - 0x01EA, 0x0211, 0x0005, - 0x01DF, 0x021A, 0x0007, - 0x01D4, 0x0222, 0x000A, - 0x01C9, 0x022A, 0x000D, - 0x01BE, 0x0232, 0x0010, - 0x01B3, 0x0239, 0x0014, - 0x01A8, 0x0241, 0x0017, - 0x019D, 0x0248, 0x001B, - 0x0192, 0x024F, 0x001F, - 0x0187, 0x0255, 0x0024, - 0x017C, 0x025C, 0x0028, - 0x0171, 0x0262, 0x002D, - 0x0166, 0x0268, 0x0032, - 0x015B, 0x026E, 0x0037, - 0x0150, 0x0273, 0x003D, - 0x0146, 0x0278, 0x0042, - 0x013B, 0x027D, 0x0048, - 0x0130, 0x0282, 0x004E, - 0x0126, 0x0286, 0x0054, - 0x011B, 0x028A, 0x005B, - 0x0111, 0x028D, 0x0062, - 0x0107, 0x0290, 0x0069, - 0x00FD, 0x0293, 0x0070, - 0x00F3, 0x0296, 0x0077, - 0x00E9, 0x0298, 0x007F, - 0x00DF, 0x029A, 0x0087, - 0x00D5, 0x029C, 0x008F, - 0x00CC, 0x029D, 0x0097, - 0x00C3, 0x029E, 0x009F, - 0x00BA, 0x029E, 0x00A8, - 0x00B1, 0x029E, 0x00B1, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.6_p_10qb_ -// 3 -// 64 -// input/output = 0.600000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_60[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F4, 0x020B, 0x0001, - 0x01E8, 0x0216, 0x0002, - 0x01DC, 0x0221, 0x0003, - 0x01D0, 0x022B, 0x0005, - 0x01C4, 0x0235, 0x0007, - 0x01B8, 0x0240, 0x0008, - 0x01AC, 0x0249, 0x000B, - 0x01A0, 0x0253, 0x000D, - 0x0194, 0x025C, 0x0010, - 0x0188, 0x0265, 0x0013, - 0x017C, 0x026E, 0x0016, - 0x0170, 0x0277, 0x0019, - 0x0164, 0x027F, 0x001D, - 0x0158, 0x0287, 0x0021, - 0x014C, 0x028F, 0x0025, - 0x0140, 0x0297, 0x0029, - 0x0135, 0x029D, 0x002E, - 0x0129, 0x02A4, 0x0033, - 0x011D, 0x02AB, 0x0038, - 0x0112, 0x02B0, 0x003E, - 0x0107, 0x02B5, 0x0044, - 0x00FC, 0x02BA, 0x004A, - 0x00F1, 0x02BF, 0x0050, - 0x00E6, 0x02C3, 0x0057, - 0x00DB, 0x02C7, 0x005E, - 0x00D1, 0x02CA, 0x0065, - 0x00C7, 0x02CC, 0x006D, - 0x00BD, 0x02CE, 0x0075, - 0x00B3, 0x02D0, 0x007D, - 0x00A9, 0x02D2, 0x0085, - 0x00A0, 0x02D2, 0x008E, - 0x0097, 0x02D2, 0x0097, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.7_p_10qb_ -// 3 -// 64 -// input/output = 0.700000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_70[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F3, 0x020D, 0x0000, - 0x01E5, 0x021B, 0x0000, - 0x01D8, 0x0228, 0x0000, - 0x01CB, 0x0235, 0x0000, - 0x01BD, 0x0243, 0x0000, - 0x01B0, 0x024F, 0x0001, - 0x01A2, 0x025C, 0x0002, - 0x0195, 0x0268, 0x0003, - 0x0187, 0x0275, 0x0004, - 0x017A, 0x0280, 0x0006, - 0x016D, 0x028C, 0x0007, - 0x015F, 0x0298, 0x0009, - 0x0152, 0x02A2, 0x000C, - 0x0145, 0x02AD, 0x000E, - 0x0138, 0x02B7, 0x0011, - 0x012B, 0x02C0, 0x0015, - 0x011E, 0x02CA, 0x0018, - 0x0111, 0x02D3, 0x001C, - 0x0105, 0x02DB, 0x0020, - 0x00F8, 0x02E3, 0x0025, - 0x00EC, 0x02EA, 0x002A, - 0x00E0, 0x02F1, 0x002F, - 0x00D5, 0x02F6, 0x0035, - 0x00C9, 0x02FC, 0x003B, - 0x00BE, 0x0301, 0x0041, - 0x00B3, 0x0305, 0x0048, - 0x00A8, 0x0309, 0x004F, - 0x009E, 0x030C, 0x0056, - 0x0094, 0x030E, 0x005E, - 0x008A, 0x0310, 0x0066, - 0x0081, 0x0310, 0x006F, - 0x0077, 0x0312, 0x0077, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.8_p_10qb_ -// 3 -// 64 -// input/output = 0.800000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_80[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F1, 0x0210, 0x0FFF, - 0x01E2, 0x0220, 0x0FFE, - 0x01D2, 0x0232, 0x0FFC, - 0x01C3, 0x0241, 0x0FFC, - 0x01B4, 0x0251, 0x0FFB, - 0x01A4, 0x0262, 0x0FFA, - 0x0195, 0x0271, 0x0FFA, - 0x0186, 0x0281, 0x0FF9, - 0x0176, 0x0291, 0x0FF9, - 0x0167, 0x02A0, 0x0FF9, - 0x0158, 0x02AE, 0x0FFA, - 0x0149, 0x02BD, 0x0FFA, - 0x013A, 0x02CB, 0x0FFB, - 0x012C, 0x02D7, 0x0FFD, - 0x011D, 0x02E5, 0x0FFE, - 0x010F, 0x02F1, 0x0000, - 0x0101, 0x02FD, 0x0002, - 0x00F3, 0x0308, 0x0005, - 0x00E5, 0x0313, 0x0008, - 0x00D8, 0x031D, 0x000B, - 0x00CB, 0x0326, 0x000F, - 0x00BE, 0x032F, 0x0013, - 0x00B2, 0x0337, 0x0017, - 0x00A6, 0x033E, 0x001C, - 0x009A, 0x0345, 0x0021, - 0x008F, 0x034A, 0x0027, - 0x0084, 0x034F, 0x002D, - 0x0079, 0x0353, 0x0034, - 0x006F, 0x0356, 0x003B, - 0x0065, 0x0358, 0x0043, - 0x005C, 0x0359, 0x004B, - 0x0053, 0x035A, 0x0053, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.9_p_10qb_ -// 3 -// 64 -// input/output = 0.900000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_90[99] = { - 0x0200, 0x0200, 0x0000, - 0x01EE, 0x0214, 0x0FFE, - 0x01DC, 0x0228, 0x0FFC, - 0x01CA, 0x023C, 0x0FFA, - 0x01B9, 0x024F, 0x0FF8, - 0x01A7, 0x0262, 0x0FF7, - 0x0195, 0x0276, 0x0FF5, - 0x0183, 0x028A, 0x0FF3, - 0x0172, 0x029C, 0x0FF2, - 0x0160, 0x02AF, 0x0FF1, - 0x014F, 0x02C2, 0x0FEF, - 0x013E, 0x02D4, 0x0FEE, - 0x012D, 0x02E5, 0x0FEE, - 0x011C, 0x02F7, 0x0FED, - 0x010C, 0x0307, 0x0FED, - 0x00FB, 0x0318, 0x0FED, - 0x00EC, 0x0327, 0x0FED, - 0x00DC, 0x0336, 0x0FEE, - 0x00CD, 0x0344, 0x0FEF, - 0x00BE, 0x0352, 0x0FF0, - 0x00B0, 0x035E, 0x0FF2, - 0x00A2, 0x036A, 0x0FF4, - 0x0095, 0x0375, 0x0FF6, - 0x0088, 0x037F, 0x0FF9, - 0x007B, 0x0388, 0x0FFD, - 0x006F, 0x0391, 0x0000, - 0x0064, 0x0397, 0x0005, - 0x0059, 0x039D, 0x000A, - 0x004E, 0x03A3, 0x000F, - 0x0045, 0x03A6, 0x0015, - 0x003B, 0x03A9, 0x001C, - 0x0033, 0x03AA, 0x0023, - 0x002A, 0x03AC, 0x002A, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_1_p_10qb_ -// 3 -// 64 -// input/output = 1.000000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_1_00[99] = { - 0x0200, 0x0200, 0x0000, - 0x01EB, 0x0217, 0x0FFE, - 0x01D5, 0x022F, 0x0FFC, - 0x01C0, 0x0247, 0x0FF9, - 0x01AB, 0x025E, 0x0FF7, - 0x0196, 0x0276, 0x0FF4, - 0x0181, 0x028D, 0x0FF2, - 0x016C, 0x02A5, 0x0FEF, - 0x0158, 0x02BB, 0x0FED, - 0x0144, 0x02D1, 0x0FEB, - 0x0130, 0x02E8, 0x0FE8, - 0x011C, 0x02FE, 0x0FE6, - 0x0109, 0x0313, 0x0FE4, - 0x00F6, 0x0328, 0x0FE2, - 0x00E4, 0x033C, 0x0FE0, - 0x00D2, 0x034F, 0x0FDF, - 0x00C0, 0x0363, 0x0FDD, - 0x00B0, 0x0374, 0x0FDC, - 0x009F, 0x0385, 0x0FDC, - 0x0090, 0x0395, 0x0FDB, - 0x0081, 0x03A4, 0x0FDB, - 0x0072, 0x03B3, 0x0FDB, - 0x0064, 0x03C0, 0x0FDC, - 0x0057, 0x03CC, 0x0FDD, - 0x004B, 0x03D6, 0x0FDF, - 0x003F, 0x03E0, 0x0FE1, - 0x0034, 0x03E8, 0x0FE4, - 0x002A, 0x03EF, 0x0FE7, - 0x0020, 0x03F5, 0x0FEB, - 0x0017, 0x03FA, 0x0FEF, - 0x000F, 0x03FD, 0x0FF4, - 0x0007, 0x03FF, 0x0FFA, - 0x0000, 0x0400, 0x0000, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.3_p_10qb_ -// 4 -// 64 -// input/output = 0.300000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_30[132] = { - 0x0104, 0x01F8, 0x0104, 0x0000, - 0x00FE, 0x01F7, 0x010A, 0x0001, - 0x00F8, 0x01F6, 0x010F, 0x0003, - 0x00F2, 0x01F5, 0x0114, 0x0005, - 0x00EB, 0x01F4, 0x011B, 0x0006, - 0x00E5, 0x01F3, 0x0120, 0x0008, - 0x00DF, 0x01F2, 0x0125, 0x000A, - 0x00DA, 0x01F0, 0x012A, 0x000C, - 0x00D4, 0x01EE, 0x0130, 0x000E, - 0x00CE, 0x01ED, 0x0135, 0x0010, - 0x00C8, 0x01EB, 0x013A, 0x0013, - 0x00C2, 0x01E9, 0x0140, 0x0015, - 0x00BD, 0x01E7, 0x0145, 0x0017, - 0x00B7, 0x01E5, 0x014A, 0x001A, - 0x00B1, 0x01E2, 0x0151, 0x001C, - 0x00AC, 0x01E0, 0x0155, 0x001F, - 0x00A7, 0x01DD, 0x015A, 0x0022, - 0x00A1, 0x01DB, 0x015F, 0x0025, - 0x009C, 0x01D8, 0x0165, 0x0027, - 0x0097, 0x01D5, 0x016A, 0x002A, - 0x0092, 0x01D2, 0x016E, 0x002E, - 0x008C, 0x01CF, 0x0174, 0x0031, - 0x0087, 0x01CC, 0x0179, 0x0034, - 0x0083, 0x01C9, 0x017D, 0x0037, - 0x007E, 0x01C5, 0x0182, 0x003B, - 0x0079, 0x01C2, 0x0187, 0x003E, - 0x0074, 0x01BE, 0x018C, 0x0042, - 0x0070, 0x01BA, 0x0190, 0x0046, - 0x006B, 0x01B7, 0x0195, 0x0049, - 0x0066, 0x01B3, 0x019A, 0x004D, - 0x0062, 0x01AF, 0x019E, 0x0051, - 0x005E, 0x01AB, 0x01A2, 0x0055, - 0x005A, 0x01A6, 0x01A6, 0x005A, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.4_p_10qb_ -// 4 -// 64 -// input/output = 0.400000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_40[132] = { - 0x00FB, 0x0209, 0x00FC, 0x0000, - 0x00F5, 0x0209, 0x0101, 0x0001, - 0x00EE, 0x0208, 0x0108, 0x0002, - 0x00E8, 0x0207, 0x010E, 0x0003, - 0x00E2, 0x0206, 0x0114, 0x0004, - 0x00DB, 0x0205, 0x011A, 0x0006, - 0x00D5, 0x0204, 0x0120, 0x0007, - 0x00CF, 0x0203, 0x0125, 0x0009, - 0x00C9, 0x0201, 0x012C, 0x000A, - 0x00C3, 0x01FF, 0x0132, 0x000C, - 0x00BD, 0x01FD, 0x0138, 0x000E, - 0x00B7, 0x01FB, 0x013E, 0x0010, - 0x00B1, 0x01F9, 0x0144, 0x0012, - 0x00AC, 0x01F7, 0x0149, 0x0014, - 0x00A6, 0x01F4, 0x0150, 0x0016, - 0x00A0, 0x01F2, 0x0156, 0x0018, - 0x009B, 0x01EF, 0x015C, 0x001A, - 0x0095, 0x01EC, 0x0162, 0x001D, - 0x0090, 0x01E9, 0x0168, 0x001F, - 0x008B, 0x01E6, 0x016D, 0x0022, - 0x0085, 0x01E3, 0x0173, 0x0025, - 0x0080, 0x01DF, 0x0179, 0x0028, - 0x007B, 0x01DC, 0x017E, 0x002B, - 0x0076, 0x01D8, 0x0184, 0x002E, - 0x0071, 0x01D4, 0x018A, 0x0031, - 0x006D, 0x01D1, 0x018E, 0x0034, - 0x0068, 0x01CD, 0x0193, 0x0038, - 0x0063, 0x01C8, 0x019A, 0x003B, - 0x005F, 0x01C4, 0x019E, 0x003F, - 0x005B, 0x01C0, 0x01A3, 0x0042, - 0x0056, 0x01BB, 0x01A9, 0x0046, - 0x0052, 0x01B7, 0x01AD, 0x004A, - 0x004E, 0x01B2, 0x01B2, 0x004E, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.5_p_10qb_ -// 4 -// 64 -// input/output = 0.500000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_50[132] = { - 0x00E5, 0x0236, 0x00E5, 0x0000, - 0x00DE, 0x0235, 0x00ED, 0x0000, - 0x00D7, 0x0235, 0x00F4, 0x0000, - 0x00D0, 0x0235, 0x00FB, 0x0000, - 0x00C9, 0x0234, 0x0102, 0x0001, - 0x00C2, 0x0233, 0x010A, 0x0001, - 0x00BC, 0x0232, 0x0111, 0x0001, - 0x00B5, 0x0230, 0x0119, 0x0002, - 0x00AE, 0x022F, 0x0121, 0x0002, - 0x00A8, 0x022D, 0x0128, 0x0003, - 0x00A2, 0x022B, 0x012F, 0x0004, - 0x009B, 0x0229, 0x0137, 0x0005, - 0x0095, 0x0226, 0x013F, 0x0006, - 0x008F, 0x0224, 0x0146, 0x0007, - 0x0089, 0x0221, 0x014E, 0x0008, - 0x0083, 0x021E, 0x0155, 0x000A, - 0x007E, 0x021B, 0x015C, 0x000B, - 0x0078, 0x0217, 0x0164, 0x000D, - 0x0072, 0x0213, 0x016D, 0x000E, - 0x006D, 0x0210, 0x0173, 0x0010, - 0x0068, 0x020C, 0x017A, 0x0012, - 0x0063, 0x0207, 0x0182, 0x0014, - 0x005E, 0x0203, 0x0189, 0x0016, - 0x0059, 0x01FE, 0x0191, 0x0018, - 0x0054, 0x01F9, 0x0198, 0x001B, - 0x0050, 0x01F4, 0x019F, 0x001D, - 0x004B, 0x01EF, 0x01A6, 0x0020, - 0x0047, 0x01EA, 0x01AC, 0x0023, - 0x0043, 0x01E4, 0x01B3, 0x0026, - 0x003F, 0x01DF, 0x01B9, 0x0029, - 0x003B, 0x01D9, 0x01C0, 0x002C, - 0x0037, 0x01D3, 0x01C6, 0x0030, - 0x0033, 0x01CD, 0x01CD, 0x0033, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.6_p_10qb_ -// 4 -// 64 -// input/output = 0.600000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_60[132] = { - 0x00C8, 0x026F, 0x00C9, 0x0000, - 0x00C0, 0x0270, 0x00D1, 0x0FFF, - 0x00B8, 0x0270, 0x00D9, 0x0FFF, - 0x00B1, 0x0270, 0x00E1, 0x0FFE, - 0x00A9, 0x026F, 0x00EB, 0x0FFD, - 0x00A2, 0x026E, 0x00F3, 0x0FFD, - 0x009A, 0x026D, 0x00FD, 0x0FFC, - 0x0093, 0x026C, 0x0105, 0x0FFC, - 0x008C, 0x026A, 0x010F, 0x0FFB, - 0x0085, 0x0268, 0x0118, 0x0FFB, - 0x007E, 0x0265, 0x0122, 0x0FFB, - 0x0078, 0x0263, 0x012A, 0x0FFB, - 0x0071, 0x0260, 0x0134, 0x0FFB, - 0x006B, 0x025C, 0x013E, 0x0FFB, - 0x0065, 0x0259, 0x0147, 0x0FFB, - 0x005F, 0x0255, 0x0151, 0x0FFB, - 0x0059, 0x0251, 0x015A, 0x0FFC, - 0x0054, 0x024D, 0x0163, 0x0FFC, - 0x004E, 0x0248, 0x016D, 0x0FFD, - 0x0049, 0x0243, 0x0176, 0x0FFE, - 0x0044, 0x023E, 0x017F, 0x0FFF, - 0x003F, 0x0238, 0x0189, 0x0000, - 0x003A, 0x0232, 0x0193, 0x0001, - 0x0036, 0x022C, 0x019C, 0x0002, - 0x0031, 0x0226, 0x01A5, 0x0004, - 0x002D, 0x021F, 0x01AF, 0x0005, - 0x0029, 0x0218, 0x01B8, 0x0007, - 0x0025, 0x0211, 0x01C1, 0x0009, - 0x0022, 0x020A, 0x01C9, 0x000B, - 0x001E, 0x0203, 0x01D2, 0x000D, - 0x001B, 0x01FB, 0x01DA, 0x0010, - 0x0018, 0x01F3, 0x01E3, 0x0012, - 0x0015, 0x01EB, 0x01EB, 0x0015, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.7_p_10qb_ -// 4 -// 64 -// input/output = 0.700000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_70[132] = { - 0x00A3, 0x02B9, 0x00A4, 0x0000, - 0x009A, 0x02BA, 0x00AD, 0x0FFF, - 0x0092, 0x02BA, 0x00B6, 0x0FFE, - 0x0089, 0x02BA, 0x00C1, 0x0FFC, - 0x0081, 0x02B9, 0x00CB, 0x0FFB, - 0x0079, 0x02B8, 0x00D5, 0x0FFA, - 0x0071, 0x02B7, 0x00DF, 0x0FF9, - 0x0069, 0x02B5, 0x00EA, 0x0FF8, - 0x0062, 0x02B3, 0x00F4, 0x0FF7, - 0x005B, 0x02B0, 0x00FF, 0x0FF6, - 0x0054, 0x02AD, 0x010B, 0x0FF4, - 0x004D, 0x02A9, 0x0117, 0x0FF3, - 0x0046, 0x02A5, 0x0123, 0x0FF2, - 0x0040, 0x02A1, 0x012D, 0x0FF2, - 0x003A, 0x029C, 0x0139, 0x0FF1, - 0x0034, 0x0297, 0x0145, 0x0FF0, - 0x002F, 0x0292, 0x0150, 0x0FEF, - 0x0029, 0x028C, 0x015C, 0x0FEF, - 0x0024, 0x0285, 0x0169, 0x0FEE, - 0x001F, 0x027F, 0x0174, 0x0FEE, - 0x001B, 0x0278, 0x017F, 0x0FEE, - 0x0016, 0x0270, 0x018D, 0x0FED, - 0x0012, 0x0268, 0x0199, 0x0FED, - 0x000E, 0x0260, 0x01A4, 0x0FEE, - 0x000B, 0x0258, 0x01AF, 0x0FEE, - 0x0007, 0x024F, 0x01BC, 0x0FEE, - 0x0004, 0x0246, 0x01C7, 0x0FEF, - 0x0001, 0x023D, 0x01D3, 0x0FEF, - 0x0FFE, 0x0233, 0x01DF, 0x0FF0, - 0x0FFC, 0x0229, 0x01EA, 0x0FF1, - 0x0FFA, 0x021F, 0x01F4, 0x0FF3, - 0x0FF8, 0x0215, 0x01FF, 0x0FF4, - 0x0FF6, 0x020A, 0x020A, 0x0FF6, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.8_p_10qb_ -// 4 -// 64 -// input/output = 0.800000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_80[132] = { - 0x0075, 0x0315, 0x0076, 0x0000, - 0x006C, 0x0316, 0x007F, 0x0FFF, - 0x0062, 0x0316, 0x008A, 0x0FFE, - 0x0059, 0x0315, 0x0096, 0x0FFC, - 0x0050, 0x0314, 0x00A1, 0x0FFB, - 0x0048, 0x0312, 0x00AD, 0x0FF9, - 0x0040, 0x0310, 0x00B8, 0x0FF8, - 0x0038, 0x030D, 0x00C5, 0x0FF6, - 0x0030, 0x030A, 0x00D1, 0x0FF5, - 0x0029, 0x0306, 0x00DE, 0x0FF3, - 0x0022, 0x0301, 0x00EB, 0x0FF2, - 0x001C, 0x02FC, 0x00F8, 0x0FF0, - 0x0015, 0x02F7, 0x0106, 0x0FEE, - 0x0010, 0x02F1, 0x0112, 0x0FED, - 0x000A, 0x02EA, 0x0121, 0x0FEB, - 0x0005, 0x02E3, 0x012F, 0x0FE9, - 0x0000, 0x02DB, 0x013D, 0x0FE8, - 0x0FFB, 0x02D3, 0x014C, 0x0FE6, - 0x0FF7, 0x02CA, 0x015A, 0x0FE5, - 0x0FF3, 0x02C1, 0x0169, 0x0FE3, - 0x0FF0, 0x02B7, 0x0177, 0x0FE2, - 0x0FEC, 0x02AD, 0x0186, 0x0FE1, - 0x0FE9, 0x02A2, 0x0196, 0x0FDF, - 0x0FE7, 0x0297, 0x01A4, 0x0FDE, - 0x0FE4, 0x028C, 0x01B3, 0x0FDD, - 0x0FE2, 0x0280, 0x01C2, 0x0FDC, - 0x0FE0, 0x0274, 0x01D0, 0x0FDC, - 0x0FDF, 0x0268, 0x01DE, 0x0FDB, - 0x0FDD, 0x025B, 0x01EE, 0x0FDA, - 0x0FDC, 0x024E, 0x01FC, 0x0FDA, - 0x0FDB, 0x0241, 0x020A, 0x0FDA, - 0x0FDB, 0x0233, 0x0218, 0x0FDA, - 0x0FDA, 0x0226, 0x0226, 0x0FDA, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.9_p_10qb_ -// 4 -// 64 -// input/output = 0.900000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_90[132] = { - 0x003F, 0x0383, 0x003E, 0x0000, - 0x0034, 0x0383, 0x004A, 0x0FFF, - 0x002B, 0x0383, 0x0054, 0x0FFE, - 0x0021, 0x0381, 0x0061, 0x0FFD, - 0x0019, 0x037F, 0x006C, 0x0FFC, - 0x0010, 0x037C, 0x0079, 0x0FFB, - 0x0008, 0x0378, 0x0086, 0x0FFA, - 0x0001, 0x0374, 0x0093, 0x0FF8, - 0x0FFA, 0x036E, 0x00A1, 0x0FF7, - 0x0FF3, 0x0368, 0x00B0, 0x0FF5, - 0x0FED, 0x0361, 0x00BF, 0x0FF3, - 0x0FE8, 0x035A, 0x00CD, 0x0FF1, - 0x0FE2, 0x0352, 0x00DC, 0x0FF0, - 0x0FDE, 0x0349, 0x00EB, 0x0FEE, - 0x0FD9, 0x033F, 0x00FC, 0x0FEC, - 0x0FD5, 0x0335, 0x010D, 0x0FE9, - 0x0FD2, 0x032A, 0x011D, 0x0FE7, - 0x0FCF, 0x031E, 0x012E, 0x0FE5, - 0x0FCC, 0x0312, 0x013F, 0x0FE3, - 0x0FCA, 0x0305, 0x0150, 0x0FE1, - 0x0FC8, 0x02F8, 0x0162, 0x0FDE, - 0x0FC6, 0x02EA, 0x0174, 0x0FDC, - 0x0FC5, 0x02DC, 0x0185, 0x0FDA, - 0x0FC4, 0x02CD, 0x0197, 0x0FD8, - 0x0FC3, 0x02BE, 0x01AA, 0x0FD5, - 0x0FC3, 0x02AF, 0x01BB, 0x0FD3, - 0x0FC3, 0x029F, 0x01CD, 0x0FD1, - 0x0FC3, 0x028E, 0x01E0, 0x0FCF, - 0x0FC3, 0x027E, 0x01F2, 0x0FCD, - 0x0FC4, 0x026D, 0x0203, 0x0FCC, - 0x0FC5, 0x025C, 0x0215, 0x0FCA, - 0x0FC6, 0x024B, 0x0227, 0x0FC8, - 0x0FC7, 0x0239, 0x0239, 0x0FC7, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_1_p_10qb_ -// 4 -// 64 -// input/output = 1.000000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_1_00[132] = { - 0x0000, 0x0400, 0x0000, 0x0000, - 0x0FF6, 0x03FF, 0x000B, 0x0000, - 0x0FED, 0x03FE, 0x0015, 0x0000, - 0x0FE4, 0x03FB, 0x0022, 0x0FFF, - 0x0FDC, 0x03F7, 0x002E, 0x0FFF, - 0x0FD5, 0x03F2, 0x003B, 0x0FFE, - 0x0FCE, 0x03EC, 0x0048, 0x0FFE, - 0x0FC8, 0x03E5, 0x0056, 0x0FFD, - 0x0FC3, 0x03DC, 0x0065, 0x0FFC, - 0x0FBE, 0x03D3, 0x0075, 0x0FFA, - 0x0FB9, 0x03C9, 0x0085, 0x0FF9, - 0x0FB6, 0x03BE, 0x0094, 0x0FF8, - 0x0FB2, 0x03B2, 0x00A6, 0x0FF6, - 0x0FB0, 0x03A5, 0x00B7, 0x0FF4, - 0x0FAD, 0x0397, 0x00CA, 0x0FF2, - 0x0FAB, 0x0389, 0x00DC, 0x0FF0, - 0x0FAA, 0x0379, 0x00EF, 0x0FEE, - 0x0FA9, 0x0369, 0x0102, 0x0FEC, - 0x0FA9, 0x0359, 0x0115, 0x0FE9, - 0x0FA9, 0x0348, 0x0129, 0x0FE6, - 0x0FA9, 0x0336, 0x013D, 0x0FE4, - 0x0FA9, 0x0323, 0x0153, 0x0FE1, - 0x0FAA, 0x0310, 0x0168, 0x0FDE, - 0x0FAC, 0x02FD, 0x017C, 0x0FDB, - 0x0FAD, 0x02E9, 0x0192, 0x0FD8, - 0x0FAF, 0x02D5, 0x01A7, 0x0FD5, - 0x0FB1, 0x02C0, 0x01BD, 0x0FD2, - 0x0FB3, 0x02AC, 0x01D2, 0x0FCF, - 0x0FB5, 0x0296, 0x01E9, 0x0FCC, - 0x0FB8, 0x0281, 0x01FE, 0x0FC9, - 0x0FBA, 0x026C, 0x0214, 0x0FC6, - 0x0FBD, 0x0256, 0x022A, 0x0FC3, - 0x0FC0, 0x0240, 0x0240, 0x0FC0, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.3_p_10qb_ -// 6 -// 64 -// input/output = 0.300000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_30[198] = { - 0x004B, 0x0100, 0x0169, 0x0101, 0x004B, 0x0000, - 0x0049, 0x00FD, 0x0169, 0x0103, 0x004E, 0x0000, - 0x0047, 0x00FA, 0x0169, 0x0106, 0x0050, 0x0000, - 0x0045, 0x00F7, 0x0168, 0x0109, 0x0052, 0x0001, - 0x0043, 0x00F5, 0x0168, 0x010B, 0x0054, 0x0001, - 0x0040, 0x00F2, 0x0168, 0x010E, 0x0057, 0x0001, - 0x003E, 0x00EF, 0x0168, 0x0110, 0x0059, 0x0002, - 0x003C, 0x00EC, 0x0167, 0x0113, 0x005C, 0x0002, - 0x003A, 0x00E9, 0x0167, 0x0116, 0x005E, 0x0002, - 0x0038, 0x00E6, 0x0166, 0x0118, 0x0061, 0x0003, - 0x0036, 0x00E3, 0x0165, 0x011C, 0x0063, 0x0003, - 0x0034, 0x00E0, 0x0165, 0x011D, 0x0066, 0x0004, - 0x0033, 0x00DD, 0x0164, 0x0120, 0x0068, 0x0004, - 0x0031, 0x00DA, 0x0163, 0x0122, 0x006B, 0x0005, - 0x002F, 0x00D7, 0x0163, 0x0125, 0x006D, 0x0005, - 0x002D, 0x00D3, 0x0162, 0x0128, 0x0070, 0x0006, - 0x002B, 0x00D0, 0x0161, 0x012A, 0x0073, 0x0007, - 0x002A, 0x00CD, 0x0160, 0x012D, 0x0075, 0x0007, - 0x0028, 0x00CA, 0x015F, 0x012F, 0x0078, 0x0008, - 0x0026, 0x00C7, 0x015E, 0x0131, 0x007B, 0x0009, - 0x0025, 0x00C4, 0x015D, 0x0133, 0x007E, 0x0009, - 0x0023, 0x00C1, 0x015C, 0x0136, 0x0080, 0x000A, - 0x0022, 0x00BE, 0x015A, 0x0138, 0x0083, 0x000B, - 0x0020, 0x00BB, 0x0159, 0x013A, 0x0086, 0x000C, - 0x001F, 0x00B8, 0x0158, 0x013B, 0x0089, 0x000D, - 0x001E, 0x00B5, 0x0156, 0x013E, 0x008C, 0x000D, - 0x001C, 0x00B2, 0x0155, 0x0140, 0x008F, 0x000E, - 0x001B, 0x00AF, 0x0153, 0x0143, 0x0091, 0x000F, - 0x0019, 0x00AC, 0x0152, 0x0145, 0x0094, 0x0010, - 0x0018, 0x00A9, 0x0150, 0x0147, 0x0097, 0x0011, - 0x0017, 0x00A6, 0x014F, 0x0148, 0x009A, 0x0012, - 0x0016, 0x00A3, 0x014D, 0x0149, 0x009D, 0x0014, - 0x0015, 0x00A0, 0x014B, 0x014B, 0x00A0, 0x0015, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.4_p_10qb_ -// 6 -// 64 -// input/output = 0.400000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_40[198] = { - 0x0028, 0x0106, 0x01A3, 0x0107, 0x0028, 0x0000, - 0x0026, 0x0102, 0x01A3, 0x010A, 0x002B, 0x0000, - 0x0024, 0x00FE, 0x01A3, 0x010F, 0x002D, 0x0FFF, - 0x0022, 0x00FA, 0x01A3, 0x0113, 0x002F, 0x0FFF, - 0x0021, 0x00F6, 0x01A3, 0x0116, 0x0031, 0x0FFF, - 0x001F, 0x00F2, 0x01A2, 0x011B, 0x0034, 0x0FFE, - 0x001D, 0x00EE, 0x01A2, 0x011F, 0x0036, 0x0FFE, - 0x001B, 0x00EA, 0x01A1, 0x0123, 0x0039, 0x0FFE, - 0x0019, 0x00E6, 0x01A1, 0x0127, 0x003B, 0x0FFE, - 0x0018, 0x00E2, 0x01A0, 0x012A, 0x003E, 0x0FFE, - 0x0016, 0x00DE, 0x01A0, 0x012E, 0x0041, 0x0FFD, - 0x0015, 0x00DA, 0x019F, 0x0132, 0x0043, 0x0FFD, - 0x0013, 0x00D6, 0x019E, 0x0136, 0x0046, 0x0FFD, - 0x0012, 0x00D2, 0x019D, 0x0139, 0x0049, 0x0FFD, - 0x0010, 0x00CE, 0x019C, 0x013D, 0x004C, 0x0FFD, - 0x000F, 0x00CA, 0x019A, 0x0141, 0x004F, 0x0FFD, - 0x000E, 0x00C6, 0x0199, 0x0144, 0x0052, 0x0FFD, - 0x000D, 0x00C2, 0x0197, 0x0148, 0x0055, 0x0FFD, - 0x000B, 0x00BE, 0x0196, 0x014C, 0x0058, 0x0FFD, - 0x000A, 0x00BA, 0x0195, 0x014F, 0x005B, 0x0FFD, - 0x0009, 0x00B6, 0x0193, 0x0153, 0x005E, 0x0FFD, - 0x0008, 0x00B2, 0x0191, 0x0157, 0x0061, 0x0FFD, - 0x0007, 0x00AE, 0x0190, 0x015A, 0x0064, 0x0FFD, - 0x0006, 0x00AA, 0x018E, 0x015D, 0x0068, 0x0FFD, - 0x0005, 0x00A6, 0x018C, 0x0161, 0x006B, 0x0FFD, - 0x0005, 0x00A2, 0x0189, 0x0164, 0x006F, 0x0FFD, - 0x0004, 0x009E, 0x0187, 0x0167, 0x0072, 0x0FFE, - 0x0003, 0x009A, 0x0185, 0x016B, 0x0075, 0x0FFE, - 0x0002, 0x0096, 0x0183, 0x016E, 0x0079, 0x0FFE, - 0x0002, 0x0093, 0x0180, 0x016F, 0x007D, 0x0FFF, - 0x0001, 0x008F, 0x017E, 0x0173, 0x0080, 0x0FFF, - 0x0001, 0x008B, 0x017B, 0x0175, 0x0084, 0x0000, - 0x0000, 0x0087, 0x0179, 0x0179, 0x0087, 0x0000, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.5_p_10qb_ -// 6 -// 64 -// input/output = 0.500000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_50[198] = { - 0x0000, 0x0107, 0x01F3, 0x0106, 0x0000, 0x0000, - 0x0FFE, 0x0101, 0x01F3, 0x010D, 0x0002, 0x0FFF, - 0x0FFD, 0x00FB, 0x01F3, 0x0113, 0x0003, 0x0FFF, - 0x0FFC, 0x00F6, 0x01F3, 0x0118, 0x0005, 0x0FFE, - 0x0FFA, 0x00F0, 0x01F3, 0x011E, 0x0007, 0x0FFE, - 0x0FF9, 0x00EB, 0x01F2, 0x0124, 0x0009, 0x0FFD, - 0x0FF8, 0x00E5, 0x01F2, 0x0129, 0x000B, 0x0FFD, - 0x0FF7, 0x00E0, 0x01F1, 0x012F, 0x000D, 0x0FFC, - 0x0FF6, 0x00DA, 0x01F0, 0x0135, 0x0010, 0x0FFB, - 0x0FF5, 0x00D4, 0x01EF, 0x013B, 0x0012, 0x0FFB, - 0x0FF4, 0x00CF, 0x01EE, 0x0141, 0x0014, 0x0FFA, - 0x0FF3, 0x00C9, 0x01ED, 0x0147, 0x0017, 0x0FF9, - 0x0FF2, 0x00C4, 0x01EB, 0x014C, 0x001A, 0x0FF9, - 0x0FF1, 0x00BF, 0x01EA, 0x0152, 0x001C, 0x0FF8, - 0x0FF1, 0x00B9, 0x01E8, 0x0157, 0x001F, 0x0FF8, - 0x0FF0, 0x00B4, 0x01E6, 0x015D, 0x0022, 0x0FF7, - 0x0FF0, 0x00AE, 0x01E4, 0x0163, 0x0025, 0x0FF6, - 0x0FEF, 0x00A9, 0x01E2, 0x0168, 0x0028, 0x0FF6, - 0x0FEF, 0x00A4, 0x01DF, 0x016E, 0x002B, 0x0FF5, - 0x0FEF, 0x009F, 0x01DD, 0x0172, 0x002E, 0x0FF5, - 0x0FEE, 0x009A, 0x01DA, 0x0178, 0x0032, 0x0FF4, - 0x0FEE, 0x0094, 0x01D8, 0x017E, 0x0035, 0x0FF3, - 0x0FEE, 0x008F, 0x01D5, 0x0182, 0x0039, 0x0FF3, - 0x0FEE, 0x008A, 0x01D2, 0x0188, 0x003C, 0x0FF2, - 0x0FEE, 0x0085, 0x01CF, 0x018C, 0x0040, 0x0FF2, - 0x0FEE, 0x0081, 0x01CB, 0x0191, 0x0044, 0x0FF1, - 0x0FEE, 0x007C, 0x01C8, 0x0196, 0x0047, 0x0FF1, - 0x0FEE, 0x0077, 0x01C4, 0x019C, 0x004B, 0x0FF0, - 0x0FEE, 0x0072, 0x01C1, 0x01A0, 0x004F, 0x0FF0, - 0x0FEE, 0x006E, 0x01BD, 0x01A4, 0x0053, 0x0FF0, - 0x0FEE, 0x0069, 0x01B9, 0x01A9, 0x0058, 0x0FEF, - 0x0FEE, 0x0065, 0x01B5, 0x01AD, 0x005C, 0x0FEF, - 0x0FEF, 0x0060, 0x01B1, 0x01B1, 0x0060, 0x0FEF, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.6_p_10qb_ -// 6 -// 64 -// input/output = 0.600000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_60[198] = { - 0x0FD9, 0x00FB, 0x0258, 0x00FB, 0x0FD9, 0x0000, - 0x0FD9, 0x00F3, 0x0258, 0x0102, 0x0FDA, 0x0000, - 0x0FD8, 0x00EB, 0x0258, 0x010B, 0x0FDB, 0x0FFF, - 0x0FD8, 0x00E3, 0x0258, 0x0112, 0x0FDC, 0x0FFF, - 0x0FD8, 0x00DC, 0x0257, 0x011B, 0x0FDC, 0x0FFE, - 0x0FD7, 0x00D4, 0x0256, 0x0123, 0x0FDE, 0x0FFE, - 0x0FD7, 0x00CD, 0x0255, 0x012B, 0x0FDF, 0x0FFD, - 0x0FD7, 0x00C5, 0x0254, 0x0133, 0x0FE0, 0x0FFD, - 0x0FD7, 0x00BE, 0x0252, 0x013C, 0x0FE1, 0x0FFC, - 0x0FD7, 0x00B6, 0x0251, 0x0143, 0x0FE3, 0x0FFC, - 0x0FD8, 0x00AF, 0x024F, 0x014B, 0x0FE4, 0x0FFB, - 0x0FD8, 0x00A8, 0x024C, 0x0154, 0x0FE6, 0x0FFA, - 0x0FD8, 0x00A1, 0x024A, 0x015B, 0x0FE8, 0x0FFA, - 0x0FD9, 0x009A, 0x0247, 0x0163, 0x0FEA, 0x0FF9, - 0x0FD9, 0x0093, 0x0244, 0x016C, 0x0FEC, 0x0FF8, - 0x0FD9, 0x008C, 0x0241, 0x0174, 0x0FEF, 0x0FF7, - 0x0FDA, 0x0085, 0x023E, 0x017B, 0x0FF1, 0x0FF7, - 0x0FDB, 0x007F, 0x023A, 0x0183, 0x0FF3, 0x0FF6, - 0x0FDB, 0x0078, 0x0237, 0x018B, 0x0FF6, 0x0FF5, - 0x0FDC, 0x0072, 0x0233, 0x0192, 0x0FF9, 0x0FF4, - 0x0FDD, 0x006C, 0x022F, 0x0199, 0x0FFC, 0x0FF3, - 0x0FDD, 0x0065, 0x022A, 0x01A3, 0x0FFF, 0x0FF2, - 0x0FDE, 0x005F, 0x0226, 0x01AA, 0x0002, 0x0FF1, - 0x0FDF, 0x005A, 0x0221, 0x01B0, 0x0006, 0x0FF0, - 0x0FE0, 0x0054, 0x021C, 0x01B7, 0x0009, 0x0FF0, - 0x0FE1, 0x004E, 0x0217, 0x01BE, 0x000D, 0x0FEF, - 0x0FE2, 0x0048, 0x0212, 0x01C6, 0x0010, 0x0FEE, - 0x0FE3, 0x0043, 0x020C, 0x01CD, 0x0014, 0x0FED, - 0x0FE4, 0x003E, 0x0207, 0x01D3, 0x0018, 0x0FEC, - 0x0FE5, 0x0039, 0x0200, 0x01DA, 0x001D, 0x0FEB, - 0x0FE6, 0x0034, 0x01FA, 0x01E1, 0x0021, 0x0FEA, - 0x0FE7, 0x002F, 0x01F5, 0x01E7, 0x0025, 0x0FE9, - 0x0FE8, 0x002A, 0x01EE, 0x01EE, 0x002A, 0x0FE8, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.7_p_10qb_ -// 6 -// 64 -// input/output = 0.700000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_70[198] = { - 0x0FC0, 0x00DA, 0x02CC, 0x00DA, 0x0FC0, 0x0000, - 0x0FC1, 0x00D0, 0x02CC, 0x00E4, 0x0FBF, 0x0000, - 0x0FC2, 0x00C6, 0x02CB, 0x00EF, 0x0FBE, 0x0000, - 0x0FC3, 0x00BC, 0x02CA, 0x00F9, 0x0FBE, 0x0000, - 0x0FC4, 0x00B2, 0x02C9, 0x0104, 0x0FBD, 0x0000, - 0x0FC5, 0x00A8, 0x02C7, 0x010F, 0x0FBD, 0x0000, - 0x0FC7, 0x009F, 0x02C5, 0x0119, 0x0FBC, 0x0000, - 0x0FC8, 0x0095, 0x02C3, 0x0124, 0x0FBC, 0x0000, - 0x0FC9, 0x008C, 0x02C0, 0x012F, 0x0FBC, 0x0000, - 0x0FCB, 0x0083, 0x02BD, 0x0139, 0x0FBC, 0x0000, - 0x0FCC, 0x007A, 0x02BA, 0x0144, 0x0FBC, 0x0000, - 0x0FCE, 0x0072, 0x02B6, 0x014D, 0x0FBD, 0x0000, - 0x0FD0, 0x0069, 0x02B2, 0x0159, 0x0FBD, 0x0FFF, - 0x0FD1, 0x0061, 0x02AD, 0x0164, 0x0FBE, 0x0FFF, - 0x0FD3, 0x0059, 0x02A9, 0x016E, 0x0FBF, 0x0FFE, - 0x0FD4, 0x0051, 0x02A4, 0x017A, 0x0FBF, 0x0FFE, - 0x0FD6, 0x0049, 0x029E, 0x0184, 0x0FC1, 0x0FFE, - 0x0FD8, 0x0042, 0x0299, 0x018E, 0x0FC2, 0x0FFD, - 0x0FD9, 0x003A, 0x0293, 0x019B, 0x0FC3, 0x0FFC, - 0x0FDB, 0x0033, 0x028D, 0x01A4, 0x0FC5, 0x0FFC, - 0x0FDC, 0x002D, 0x0286, 0x01AF, 0x0FC7, 0x0FFB, - 0x0FDE, 0x0026, 0x0280, 0x01BA, 0x0FC8, 0x0FFA, - 0x0FE0, 0x001F, 0x0279, 0x01C4, 0x0FCB, 0x0FF9, - 0x0FE1, 0x0019, 0x0272, 0x01CE, 0x0FCD, 0x0FF9, - 0x0FE3, 0x0013, 0x026A, 0x01D9, 0x0FCF, 0x0FF8, - 0x0FE4, 0x000D, 0x0263, 0x01E3, 0x0FD2, 0x0FF7, - 0x0FE6, 0x0008, 0x025B, 0x01EC, 0x0FD5, 0x0FF6, - 0x0FE7, 0x0002, 0x0253, 0x01F7, 0x0FD8, 0x0FF5, - 0x0FE9, 0x0FFD, 0x024A, 0x0202, 0x0FDB, 0x0FF3, - 0x0FEA, 0x0FF8, 0x0242, 0x020B, 0x0FDF, 0x0FF2, - 0x0FEC, 0x0FF3, 0x0239, 0x0215, 0x0FE2, 0x0FF1, - 0x0FED, 0x0FEF, 0x0230, 0x021E, 0x0FE6, 0x0FF0, - 0x0FEF, 0x0FEB, 0x0226, 0x0226, 0x0FEB, 0x0FEF, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.8_p_10qb_ -// 6 -// 64 -// input/output = 0.800000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_80[198] = { - 0x0FBF, 0x00A1, 0x0340, 0x00A1, 0x0FBF, 0x0000, - 0x0FC1, 0x0095, 0x0340, 0x00AD, 0x0FBC, 0x0001, - 0x0FC4, 0x0089, 0x033E, 0x00BA, 0x0FBA, 0x0001, - 0x0FC6, 0x007D, 0x033D, 0x00C6, 0x0FB8, 0x0002, - 0x0FC9, 0x0072, 0x033A, 0x00D3, 0x0FB6, 0x0002, - 0x0FCC, 0x0067, 0x0338, 0x00DF, 0x0FB3, 0x0003, - 0x0FCE, 0x005C, 0x0334, 0x00EE, 0x0FB1, 0x0003, - 0x0FD1, 0x0051, 0x0331, 0x00FA, 0x0FAF, 0x0004, - 0x0FD3, 0x0047, 0x032D, 0x0108, 0x0FAD, 0x0004, - 0x0FD6, 0x003D, 0x0328, 0x0116, 0x0FAB, 0x0004, - 0x0FD8, 0x0033, 0x0323, 0x0123, 0x0FAA, 0x0005, - 0x0FDB, 0x002A, 0x031D, 0x0131, 0x0FA8, 0x0005, - 0x0FDD, 0x0021, 0x0317, 0x013F, 0x0FA7, 0x0005, - 0x0FDF, 0x0018, 0x0311, 0x014D, 0x0FA5, 0x0006, - 0x0FE2, 0x0010, 0x030A, 0x015A, 0x0FA4, 0x0006, - 0x0FE4, 0x0008, 0x0302, 0x0169, 0x0FA3, 0x0006, - 0x0FE6, 0x0000, 0x02FB, 0x0177, 0x0FA2, 0x0006, - 0x0FE8, 0x0FF9, 0x02F3, 0x0185, 0x0FA1, 0x0006, - 0x0FEB, 0x0FF1, 0x02EA, 0x0193, 0x0FA1, 0x0006, - 0x0FED, 0x0FEB, 0x02E1, 0x01A1, 0x0FA0, 0x0006, - 0x0FEE, 0x0FE4, 0x02D8, 0x01B0, 0x0FA0, 0x0006, - 0x0FF0, 0x0FDE, 0x02CE, 0x01BE, 0x0FA0, 0x0006, - 0x0FF2, 0x0FD8, 0x02C5, 0x01CB, 0x0FA0, 0x0006, - 0x0FF4, 0x0FD3, 0x02BA, 0x01D8, 0x0FA1, 0x0006, - 0x0FF6, 0x0FCD, 0x02B0, 0x01E7, 0x0FA1, 0x0005, - 0x0FF7, 0x0FC8, 0x02A5, 0x01F5, 0x0FA2, 0x0005, - 0x0FF9, 0x0FC4, 0x029A, 0x0202, 0x0FA3, 0x0004, - 0x0FFA, 0x0FC0, 0x028E, 0x0210, 0x0FA4, 0x0004, - 0x0FFB, 0x0FBC, 0x0283, 0x021D, 0x0FA6, 0x0003, - 0x0FFD, 0x0FB8, 0x0276, 0x022A, 0x0FA8, 0x0003, - 0x0FFE, 0x0FB4, 0x026B, 0x0237, 0x0FAA, 0x0002, - 0x0FFF, 0x0FB1, 0x025E, 0x0245, 0x0FAC, 0x0001, - 0x0000, 0x0FAE, 0x0252, 0x0252, 0x0FAE, 0x0000, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.9_p_10qb_ -// 6 -// 64 -// input/output = 0.900000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_90[198] = { - 0x0FD8, 0x0055, 0x03A7, 0x0054, 0x0FD8, 0x0000, - 0x0FDB, 0x0047, 0x03A7, 0x0063, 0x0FD4, 0x0000, - 0x0FDF, 0x003B, 0x03A5, 0x006F, 0x0FD1, 0x0001, - 0x0FE2, 0x002E, 0x03A3, 0x007E, 0x0FCD, 0x0002, - 0x0FE5, 0x0022, 0x03A0, 0x008D, 0x0FCA, 0x0002, - 0x0FE8, 0x0017, 0x039D, 0x009B, 0x0FC6, 0x0003, - 0x0FEB, 0x000C, 0x0398, 0x00AC, 0x0FC2, 0x0003, - 0x0FEE, 0x0001, 0x0394, 0x00BA, 0x0FBF, 0x0004, - 0x0FF1, 0x0FF7, 0x038E, 0x00CA, 0x0FBB, 0x0005, - 0x0FF4, 0x0FED, 0x0388, 0x00DA, 0x0FB8, 0x0005, - 0x0FF6, 0x0FE4, 0x0381, 0x00EB, 0x0FB4, 0x0006, - 0x0FF9, 0x0FDB, 0x037A, 0x00FA, 0x0FB1, 0x0007, - 0x0FFB, 0x0FD3, 0x0372, 0x010B, 0x0FAD, 0x0008, - 0x0FFD, 0x0FCB, 0x0369, 0x011D, 0x0FAA, 0x0008, - 0x0000, 0x0FC3, 0x0360, 0x012E, 0x0FA6, 0x0009, - 0x0002, 0x0FBC, 0x0356, 0x013F, 0x0FA3, 0x000A, - 0x0003, 0x0FB6, 0x034C, 0x0150, 0x0FA0, 0x000B, - 0x0005, 0x0FB0, 0x0341, 0x0162, 0x0F9D, 0x000B, - 0x0007, 0x0FAA, 0x0336, 0x0173, 0x0F9A, 0x000C, - 0x0008, 0x0FA5, 0x032A, 0x0185, 0x0F97, 0x000D, - 0x000A, 0x0FA0, 0x031E, 0x0197, 0x0F94, 0x000D, - 0x000B, 0x0F9B, 0x0311, 0x01A9, 0x0F92, 0x000E, - 0x000C, 0x0F97, 0x0303, 0x01BC, 0x0F8F, 0x000F, - 0x000D, 0x0F94, 0x02F6, 0x01CD, 0x0F8D, 0x000F, - 0x000E, 0x0F91, 0x02E8, 0x01DE, 0x0F8B, 0x0010, - 0x000F, 0x0F8E, 0x02D9, 0x01F1, 0x0F89, 0x0010, - 0x0010, 0x0F8B, 0x02CA, 0x0202, 0x0F88, 0x0011, - 0x0010, 0x0F89, 0x02BB, 0x0214, 0x0F87, 0x0011, - 0x0011, 0x0F87, 0x02AB, 0x0226, 0x0F86, 0x0011, - 0x0011, 0x0F86, 0x029C, 0x0236, 0x0F85, 0x0012, - 0x0011, 0x0F85, 0x028B, 0x0249, 0x0F84, 0x0012, - 0x0012, 0x0F84, 0x027B, 0x0259, 0x0F84, 0x0012, - 0x0012, 0x0F84, 0x026A, 0x026A, 0x0F84, 0x0012, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_1_p_10qb_ -// 6 -// 64 -// input/output = 1.000000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_1_00[198] = { - 0x0000, 0x0000, 0x0400, 0x0000, 0x0000, 0x0000, - 0x0003, 0x0FF3, 0x0400, 0x000D, 0x0FFD, 0x0000, - 0x0006, 0x0FE7, 0x03FE, 0x001C, 0x0FF9, 0x0000, - 0x0009, 0x0FDB, 0x03FC, 0x002B, 0x0FF5, 0x0000, - 0x000C, 0x0FD0, 0x03F9, 0x003A, 0x0FF1, 0x0000, - 0x000E, 0x0FC5, 0x03F5, 0x004A, 0x0FED, 0x0001, - 0x0011, 0x0FBB, 0x03F0, 0x005A, 0x0FE9, 0x0001, - 0x0013, 0x0FB2, 0x03EB, 0x006A, 0x0FE5, 0x0001, - 0x0015, 0x0FA9, 0x03E4, 0x007B, 0x0FE1, 0x0002, - 0x0017, 0x0FA1, 0x03DD, 0x008D, 0x0FDC, 0x0002, - 0x0018, 0x0F99, 0x03D4, 0x00A0, 0x0FD8, 0x0003, - 0x001A, 0x0F92, 0x03CB, 0x00B2, 0x0FD3, 0x0004, - 0x001B, 0x0F8C, 0x03C1, 0x00C6, 0x0FCE, 0x0004, - 0x001C, 0x0F86, 0x03B7, 0x00D9, 0x0FC9, 0x0005, - 0x001D, 0x0F80, 0x03AB, 0x00EE, 0x0FC4, 0x0006, - 0x001E, 0x0F7C, 0x039F, 0x0101, 0x0FBF, 0x0007, - 0x001F, 0x0F78, 0x0392, 0x0115, 0x0FBA, 0x0008, - 0x001F, 0x0F74, 0x0385, 0x012B, 0x0FB5, 0x0008, - 0x0020, 0x0F71, 0x0376, 0x0140, 0x0FB0, 0x0009, - 0x0020, 0x0F6E, 0x0367, 0x0155, 0x0FAB, 0x000B, - 0x0020, 0x0F6C, 0x0357, 0x016B, 0x0FA6, 0x000C, - 0x0020, 0x0F6A, 0x0347, 0x0180, 0x0FA2, 0x000D, - 0x0020, 0x0F69, 0x0336, 0x0196, 0x0F9D, 0x000E, - 0x0020, 0x0F69, 0x0325, 0x01AB, 0x0F98, 0x000F, - 0x001F, 0x0F68, 0x0313, 0x01C3, 0x0F93, 0x0010, - 0x001F, 0x0F69, 0x0300, 0x01D8, 0x0F8F, 0x0011, - 0x001E, 0x0F69, 0x02ED, 0x01EF, 0x0F8B, 0x0012, - 0x001D, 0x0F6A, 0x02D9, 0x0205, 0x0F87, 0x0014, - 0x001D, 0x0F6C, 0x02C5, 0x021A, 0x0F83, 0x0015, - 0x001C, 0x0F6E, 0x02B1, 0x0230, 0x0F7F, 0x0016, - 0x001B, 0x0F70, 0x029C, 0x0247, 0x0F7B, 0x0017, - 0x001A, 0x0F72, 0x0287, 0x025D, 0x0F78, 0x0018, - 0x0019, 0x0F75, 0x0272, 0x0272, 0x0F75, 0x0019, -}; - -/* Converted scaler coeff tables from S1.10 to S1.12 */ -static uint16_t easf_filter_3tap_64p_ratio_0_30_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_40_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_50_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_60_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_70_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_80_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_90_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_1_00_s1_12[99]; -static uint16_t easf_filter_4tap_64p_ratio_0_30_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_40_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_50_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_60_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_70_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_80_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_90_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_1_00_s1_12[132]; -static uint16_t easf_filter_6tap_64p_ratio_0_30_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_40_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_50_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_60_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_70_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_80_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_90_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_1_00_s1_12[198]; - -struct scale_ratio_to_reg_value_lookup easf_v_bf3_mode_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x0000}, - {9, 10, 0x0000}, - {1, 1, 0x0000}, - {-1, -1, 0x0002}, -}; - -struct scale_ratio_to_reg_value_lookup easf_h_bf3_mode_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x0000}, - {9, 10, 0x0000}, - {1, 1, 0x0000}, - {-1, -1, 0x0002}, -}; - -struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_6tap_lookup[] = { - {3, 10, 0x4100}, - {4, 10, 0x4100}, - {5, 10, 0x4100}, - {6, 10, 0x4100}, - {7, 10, 0x4100}, - {8, 10, 0x4100}, - {9, 10, 0x4100}, - {1, 1, 0x4100}, - {-1, -1, 0x4100}, -}; - -struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_6tap_lookup[] = { - {3, 10, 0x4000}, - {4, 10, 0x4000}, - {5, 10, 0x4000}, - {6, 10, 0x4000}, - {7, 10, 0x4000}, - {8, 10, 0x4000}, - {9, 10, 0x4000}, - {1, 1, 0x4000}, - {-1, -1, 0x4000}, -}; - -struct scale_ratio_to_reg_value_lookup easf_gain_ring6_6tap_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x251F}, - {5, 10, 0x291F}, - {6, 10, 0xA51F}, - {7, 10, 0xA51F}, - {8, 10, 0xAA66}, - {9, 10, 0xA51F}, - {1, 1, 0xA640}, - {-1, -1, 0xA640}, -}; - -struct scale_ratio_to_reg_value_lookup easf_gain_ring4_6tap_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x9600}, - {5, 10, 0xA460}, - {6, 10, 0xA8E0}, - {7, 10, 0xAC00}, - {8, 10, 0xAD20}, - {9, 10, 0xAFC0}, - {1, 1, 0xB058}, - {-1, -1, 0xB058}, -}; - -struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_4tap_lookup[] = { - {3, 10, 0x4100}, - {4, 10, 0x4100}, - {5, 10, 0x4100}, - {6, 10, 0x4100}, - {7, 10, 0x4100}, - {8, 10, 0x4100}, - {9, 10, 0x4100}, - {1, 1, 0x4100}, - {-1, -1, 0x4100}, -}; - -struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_4tap_lookup[] = { - {3, 10, 0x4000}, - {4, 10, 0x4000}, - {5, 10, 0x4000}, - {6, 10, 0x4000}, - {7, 10, 0x4000}, - {8, 10, 0x4000}, - {9, 10, 0x4000}, - {1, 1, 0x4000}, - {-1, -1, 0x4000}, -}; - -struct scale_ratio_to_reg_value_lookup easf_gain_ring6_4tap_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x0000}, - {9, 10, 0x0000}, - {1, 1, 0x0000}, - {-1, -1, 0x0000}, -}; - -struct scale_ratio_to_reg_value_lookup easf_gain_ring4_4tap_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x9900}, - {7, 10, 0xA100}, - {8, 10, 0xA8C0}, - {9, 10, 0xAB20}, - {1, 1, 0xAC00}, - {-1, -1, 0xAC00}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_uptilt_offset_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x4100}, - {9, 10, 0x9F00}, - {1, 1, 0xA4C0}, - {-1, -1, 0xA8D8}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt_maxval_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x4000}, - {9, 10, 0x24FE}, - {1, 1, 0x2D64}, - {-1, -1, 0x3ADB}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_slope_lookup[] = { - {3, 10, 0x3800}, - {4, 10, 0x3800}, - {5, 10, 0x3800}, - {6, 10, 0x3800}, - {7, 10, 0x3800}, - {8, 10, 0x3886}, - {9, 10, 0x3940}, - {1, 1, 0x3A4E}, - {-1, -1, 0x3B66}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt1_slope_lookup[] = { - {3, 10, 0x3800}, - {4, 10, 0x3800}, - {5, 10, 0x3800}, - {6, 10, 0x3800}, - {7, 10, 0x3800}, - {8, 10, 0x36F4}, - {9, 10, 0x359C}, - {1, 1, 0x3360}, - {-1, -1, 0x2F20}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_slope_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x0000}, - {9, 10, 0x359C}, - {1, 1, 0x31F0}, - {-1, -1, 0x1F00}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_offset_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x0000}, - {9, 10, 0x9F00}, - {1, 1, 0xA400}, - {-1, -1, 0x9E00}, -}; - -void spl_init_easf_filter_coeffs(void) -{ - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_30, - easf_filter_3tap_64p_ratio_0_30_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_40, - easf_filter_3tap_64p_ratio_0_40_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_50, - easf_filter_3tap_64p_ratio_0_50_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_60, - easf_filter_3tap_64p_ratio_0_60_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_70, - easf_filter_3tap_64p_ratio_0_70_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_80, - easf_filter_3tap_64p_ratio_0_80_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_90, - easf_filter_3tap_64p_ratio_0_90_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_1_00, - easf_filter_3tap_64p_ratio_1_00_s1_12, 3); - - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_30, - easf_filter_4tap_64p_ratio_0_30_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_40, - easf_filter_4tap_64p_ratio_0_40_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_50, - easf_filter_4tap_64p_ratio_0_50_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_60, - easf_filter_4tap_64p_ratio_0_60_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_70, - easf_filter_4tap_64p_ratio_0_70_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_80, - easf_filter_4tap_64p_ratio_0_80_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_90, - easf_filter_4tap_64p_ratio_0_90_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_1_00, - easf_filter_4tap_64p_ratio_1_00_s1_12, 4); - - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_30, - easf_filter_6tap_64p_ratio_0_30_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_40, - easf_filter_6tap_64p_ratio_0_40_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_50, - easf_filter_6tap_64p_ratio_0_50_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_60, - easf_filter_6tap_64p_ratio_0_60_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_70, - easf_filter_6tap_64p_ratio_0_70_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_80, - easf_filter_6tap_64p_ratio_0_80_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_90, - easf_filter_6tap_64p_ratio_0_90_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_1_00, - easf_filter_6tap_64p_ratio_1_00_s1_12, 6); -} - -uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio) -{ - if (ratio.value < spl_fixpt_from_fraction(3, 10).value) - return easf_filter_3tap_64p_ratio_0_30_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) - return easf_filter_3tap_64p_ratio_0_40_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) - return easf_filter_3tap_64p_ratio_0_50_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) - return easf_filter_3tap_64p_ratio_0_60_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) - return easf_filter_3tap_64p_ratio_0_70_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) - return easf_filter_3tap_64p_ratio_0_80_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) - return easf_filter_3tap_64p_ratio_0_90_s1_12; - else - return easf_filter_3tap_64p_ratio_1_00_s1_12; -} - -uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio) -{ - if (ratio.value < spl_fixpt_from_fraction(3, 10).value) - return easf_filter_4tap_64p_ratio_0_30_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) - return easf_filter_4tap_64p_ratio_0_40_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) - return easf_filter_4tap_64p_ratio_0_50_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) - return easf_filter_4tap_64p_ratio_0_60_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) - return easf_filter_4tap_64p_ratio_0_70_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) - return easf_filter_4tap_64p_ratio_0_80_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) - return easf_filter_4tap_64p_ratio_0_90_s1_12; - else - return easf_filter_4tap_64p_ratio_1_00_s1_12; -} - -uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio) -{ - if (ratio.value < spl_fixpt_from_fraction(3, 10).value) - return easf_filter_6tap_64p_ratio_0_30_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) - return easf_filter_6tap_64p_ratio_0_40_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) - return easf_filter_6tap_64p_ratio_0_50_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) - return easf_filter_6tap_64p_ratio_0_60_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) - return easf_filter_6tap_64p_ratio_0_70_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) - return easf_filter_6tap_64p_ratio_0_80_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) - return easf_filter_6tap_64p_ratio_0_90_s1_12; - else - return easf_filter_6tap_64p_ratio_1_00_s1_12; -} - -uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) -{ - if (taps == 6) - return spl_get_easf_filter_6tap_64p(ratio); - else if (taps == 4) - return spl_get_easf_filter_4tap_64p(ratio); - else if (taps == 3) - return spl_get_easf_filter_3tap_64p(ratio); - else { - /* should never happen, bug */ - SPL_BREAK_TO_DEBUGGER(); - return NULL; - } -} - -void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data, bool enable_easf_v, - bool enable_easf_h) -{ - /* - * Old coefficients calculated scaling ratio = input / output - * New coefficients are calculated based on = output / input - */ - if (enable_easf_h) { - dscl_prog_data->filter_h = spl_dscl_get_easf_filter_coeffs_64p( - data->taps.h_taps, data->recip_ratios.horz); - - dscl_prog_data->filter_h_c = spl_dscl_get_easf_filter_coeffs_64p( - data->taps.h_taps_c, data->recip_ratios.horz_c); - } else { - dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps, data->ratios.horz); - - dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps_c, data->ratios.horz_c); - } - if (enable_easf_v) { - dscl_prog_data->filter_v = spl_dscl_get_easf_filter_coeffs_64p( - data->taps.v_taps, data->recip_ratios.vert); - - dscl_prog_data->filter_v_c = spl_dscl_get_easf_filter_coeffs_64p( - data->taps.v_taps_c, data->recip_ratios.vert_c); - } else { - dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps, data->ratios.vert); - - dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps_c, data->ratios.vert_c); - } -} - -static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct spl_fixed31_32 ratio, - struct scale_ratio_to_reg_value_lookup *lookup_table_base_ptr, - unsigned int num_entries) -{ - unsigned int count = 0; - uint32_t value = 0; - struct scale_ratio_to_reg_value_lookup *lookup_table_index_ptr; - - lookup_table_index_ptr = (lookup_table_base_ptr + num_entries - 1); - value = lookup_table_index_ptr->reg_value; - - while (count < num_entries) { - - lookup_table_index_ptr = (lookup_table_base_ptr + count); - if (lookup_table_index_ptr->numer < 0) - break; - - if (ratio.value < spl_fixpt_from_fraction( - lookup_table_index_ptr->numer, - lookup_table_index_ptr->denom).value) { - value = lookup_table_index_ptr->reg_value; - break; - } - - count++; - } - return value; -} -uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries = sizeof(easf_v_bf3_mode_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_v_bf3_mode_lookup, num_entries); - return value; -} -uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries = sizeof(easf_h_bf3_mode_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_h_bf3_mode_lookup, num_entries); - return value; -} -uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 4) { - num_entries = sizeof(easf_reducer_gain6_4tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_reducer_gain6_4tap_lookup, num_entries); - } else if (taps == 6) { - num_entries = sizeof(easf_reducer_gain6_6tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_reducer_gain6_6tap_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 4) { - num_entries = sizeof(easf_reducer_gain4_4tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_reducer_gain4_4tap_lookup, num_entries); - } else if (taps == 6) { - num_entries = sizeof(easf_reducer_gain4_6tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_reducer_gain4_6tap_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 4) { - num_entries = sizeof(easf_gain_ring6_4tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_gain_ring6_4tap_lookup, num_entries); - } else if (taps == 6) { - num_entries = sizeof(easf_gain_ring6_6tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_gain_ring6_6tap_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 4) { - num_entries = sizeof(easf_gain_ring4_4tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_gain_ring4_4tap_lookup, num_entries); - } else if (taps == 6) { - num_entries = sizeof(easf_gain_ring4_6tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_gain_ring4_6tap_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_dntilt_uptilt_offset_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_dntilt_uptilt_offset_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_uptilt_maxval_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_uptilt_maxval_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_dntilt_slope_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_dntilt_slope_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_uptilt1_slope_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_uptilt1_slope_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_uptilt2_slope_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_uptilt2_slope_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_uptilt2_offset_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_uptilt2_offset_lookup, num_entries); - } else - value = 0; - return value; -} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h deleted file mode 100644 index 8bb2b8108e38..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: MIT */ - -/* Copyright 2024 Advanced Micro Devices, Inc. */ - -#ifndef __DC_SPL_SCL_EASF_FILTERS_H__ -#define __DC_SPL_SCL_EASF_FILTERS_H__ - -#include "dc_spl_types.h" - -struct scale_ratio_to_reg_value_lookup { - int numer; - int denom; - const uint32_t reg_value; -}; - -void spl_init_easf_filter_coeffs(void); -uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio); -uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio); -uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio); -uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); -void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data, bool enable_easf_v, - bool enable_easf_h); - -uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio); -uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio); -uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio); - -#endif /* __DC_SPL_SCL_EASF_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c index b9a7b77a7167..e2baaf584139 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -2,8 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "dc_spl_types.h" -#include "spl_debug.h" #include "dc_spl_scl_filters.h" //========================================= // = 2 @@ -1319,97 +1317,97 @@ static const uint16_t filter_8tap_64p_183[264] = { 0x3FD4, 0x3F84, 0x0214, 0x0694, 0x0694, 0x0214, 0x3F84, 0x3FD4 }; -const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_3tap_16p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_3tap_16p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_3tap_16p_149; else return filter_3tap_16p_183; } -const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_3tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_3tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_3tap_64p_149; else return filter_3tap_64p_183; } -const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_4tap_16p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_4tap_16p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_4tap_16p_149; else return filter_4tap_16p_183; } -const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_4tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_4tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_4tap_64p_149; else return filter_4tap_64p_183; } -const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_5tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_5tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_5tap_64p_149; else return filter_5tap_64p_183; } -const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_6tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_6tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_6tap_64p_149; else return filter_6tap_64p_183; } -const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_7tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_7tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_7tap_64p_149; else return filter_7tap_64p_183; } -const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_8tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_8tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_8tap_64p_149; else return filter_8tap_64p_183; @@ -1424,29 +1422,3 @@ const uint16_t *spl_get_filter_2tap_64p(void) { return filter_2tap_64p; } - -const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) -{ - if (taps == 8) - return spl_get_filter_8tap_64p(ratio); - else if (taps == 7) - return spl_get_filter_7tap_64p(ratio); - else if (taps == 6) - return spl_get_filter_6tap_64p(ratio); - else if (taps == 5) - return spl_get_filter_5tap_64p(ratio); - else if (taps == 4) - return spl_get_filter_4tap_64p(ratio); - else if (taps == 3) - return spl_get_filter_3tap_64p(ratio); - else if (taps == 2) - return spl_get_filter_2tap_64p(); - else if (taps == 1) - return NULL; - else { - /* should never happen, bug */ - SPL_BREAK_TO_DEBUGGER(); - return NULL; - } -} - diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h index 48202bc4f81e..6d96aca53b24 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h @@ -7,16 +7,53 @@ #include "dc_spl_types.h" -const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio); const uint16_t *spl_get_filter_2tap_16p(void); const uint16_t *spl_get_filter_2tap_64p(void); -const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_16p_upscale(void); +const uint16_t *spl_get_filter_3tap_16p_116(void); +const uint16_t *spl_get_filter_3tap_16p_149(void); +const uint16_t *spl_get_filter_3tap_16p_183(void); +const uint16_t *spl_get_filter_4tap_16p_upscale(void); +const uint16_t *spl_get_filter_4tap_16p_116(void); +const uint16_t *spl_get_filter_4tap_16p_149(void); +const uint16_t *spl_get_filter_4tap_16p_183(void); + +const uint16_t *spl_get_filter_3tap_64p_upscale(void); +const uint16_t *spl_get_filter_3tap_64p_116(void); +const uint16_t *spl_get_filter_3tap_64p_149(void); +const uint16_t *spl_get_filter_3tap_64p_183(void); + +const uint16_t *spl_get_filter_4tap_64p_upscale(void); +const uint16_t *spl_get_filter_4tap_64p_116(void); +const uint16_t *spl_get_filter_4tap_64p_149(void); +const uint16_t *spl_get_filter_4tap_64p_183(void); + +const uint16_t *spl_get_filter_5tap_64p_upscale(void); +const uint16_t *spl_get_filter_5tap_64p_116(void); +const uint16_t *spl_get_filter_5tap_64p_149(void); +const uint16_t *spl_get_filter_5tap_64p_183(void); + +const uint16_t *spl_get_filter_6tap_64p_upscale(void); +const uint16_t *spl_get_filter_6tap_64p_116(void); +const uint16_t *spl_get_filter_6tap_64p_149(void); +const uint16_t *spl_get_filter_6tap_64p_183(void); + +const uint16_t *spl_get_filter_7tap_64p_upscale(void); +const uint16_t *spl_get_filter_7tap_64p_116(void); +const uint16_t *spl_get_filter_7tap_64p_149(void); +const uint16_t *spl_get_filter_7tap_64p_183(void); + +const uint16_t *spl_get_filter_8tap_64p_upscale(void); +const uint16_t *spl_get_filter_8tap_64p_116(void); +const uint16_t *spl_get_filter_8tap_64p_149(void); +const uint16_t *spl_get_filter_8tap_64p_183(void); #endif /* __DC_SPL_SCL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c new file mode 100644 index 000000000000..bb0e1b80ec3c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c @@ -0,0 +1,25 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 1438a86826a4..caaa9ced2ec4 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -2,15 +2,14 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include "os_types.h" // swap +#ifndef ASSERT +#define ASSERT(_bool) ((void *)0) +#endif +#include "include/fixed31_32.h" // fixed31_32 and related functions #ifndef __DC_SPL_TYPES_H__ #define __DC_SPL_TYPES_H__ -#include "spl_os_types.h" // swap -#ifndef SPL_ASSERT -#define SPL_ASSERT(_bool) ((void *)0) -#endif -#include "spl_fixpt31_32.h" // fixed31_32 and related functions - enum lb_memory_config { /* Enable all 3 pieces of memory */ LB_MEMORY_CONFIG_0 = 0, @@ -39,16 +38,16 @@ struct spl_rect { }; struct spl_ratios { - struct spl_fixed31_32 horz; - struct spl_fixed31_32 vert; - struct spl_fixed31_32 horz_c; - struct spl_fixed31_32 vert_c; + struct fixed31_32 horz; + struct fixed31_32 vert; + struct fixed31_32 horz_c; + struct fixed31_32 vert_c; }; struct spl_inits { - struct spl_fixed31_32 h; - struct spl_fixed31_32 h_c; - struct spl_fixed31_32 v; - struct spl_fixed31_32 v_c; + struct fixed31_32 h; + struct fixed31_32 h_c; + struct fixed31_32 v; + struct fixed31_32 v_c; }; struct spl_taps { @@ -81,8 +80,6 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_420BPP10, /*end of pixel format definition*/ SPL_PIXEL_FORMAT_INVALID, - SPL_PIXEL_FORMAT_422BPP8, - SPL_PIXEL_FORMAT_422BPP10, SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8, SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16, SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8, @@ -138,7 +135,6 @@ struct spl_scaler_data { struct spl_rect viewport_c; struct spl_rect recout; struct spl_ratios ratios; - struct spl_ratios recip_ratios; struct spl_inits inits; }; @@ -408,16 +404,11 @@ struct dscl_prog_data { const uint16_t *filter_blur_scale_h; }; -/* SPL input and output definitions */ -// SPL scratch struct -struct spl_scratch { - // Pack all SPL outputs in scl_data - struct spl_scaler_data scl_data; -}; - /* SPL input and output definitions */ // SPL outputs struct struct spl_out { + // Pack all SPL outputs in scl_data + struct spl_scaler_data scl_data; // Pack all output need to program hw registers struct dscl_prog_data *dscl_prog_data; }; @@ -500,10 +491,6 @@ struct spl_in { bool prefer_easf; bool disable_easf; struct spl_debug debug; - bool is_fullscreen; - bool is_hdr_on; - int h_active; - int v_active; }; // end of SPL inputs diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h b/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h deleted file mode 100644 index a36239ab8d1c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright © 1997-2004 Advanced Micro Devices, Inc. All rights reserved. */ - -#ifndef SPL_DEBUG_H -#define SPL_DEBUG_H - -#ifdef SPL_ASSERT -#undef SPL_ASSERT -#endif -#define SPL_ASSERT(b) - -#define SPL_ASSERT_CRITICAL(expr) do {if (expr)/* Do nothing */; } while (0) - -#ifdef SPL_DALMSG -#undef SPL_DALMSG -#endif -#define SPL_DALMSG(b) - -#ifdef SPL_DAL_ASSERT_MSG -#undef SPL_DAL_ASSERT_MSG -#endif -#define SPL_DAL_ASSERT_MSG(b, m) - -#endif // SPL_DEBUG_H diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c deleted file mode 100644 index 2bb1de88aef7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c +++ /dev/null @@ -1,518 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "spl_fixpt31_32.h" - -static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL }; -static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL }; -static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL }; - -static inline unsigned long long abs_i64( - long long arg) -{ - if (arg > 0) - return (unsigned long long)arg; - else - return (unsigned long long)(-arg); -} - -/* - * @brief - * result = dividend / divisor - * *remainder = dividend % divisor - */ -static inline unsigned long long complete_integer_division_u64( - unsigned long long dividend, - unsigned long long divisor, - unsigned long long *remainder) -{ - unsigned long long result; - - ASSERT(divisor); - - result = spl_div64_u64_rem(dividend, divisor, remainder); - - return result; -} - - -#define FRACTIONAL_PART_MASK \ - ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) - -#define GET_INTEGER_PART(x) \ - ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) - -#define GET_FRACTIONAL_PART(x) \ - (FRACTIONAL_PART_MASK & (x)) - -struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator) -{ - struct spl_fixed31_32 res; - - bool arg1_negative = numerator < 0; - bool arg2_negative = denominator < 0; - - unsigned long long arg1_value = arg1_negative ? -numerator : numerator; - unsigned long long arg2_value = arg2_negative ? -denominator : denominator; - - unsigned long long remainder; - - /* determine integer part */ - - unsigned long long res_value = complete_integer_division_u64( - arg1_value, arg2_value, &remainder); - - ASSERT(res_value <= LONG_MAX); - - /* determine fractional part */ - { - unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; - - do { - remainder <<= 1; - - res_value <<= 1; - - if (remainder >= arg2_value) { - res_value |= 1; - remainder -= arg2_value; - } - } while (--i != 0); - } - - /* round up LSB */ - { - unsigned long long summand = (remainder << 1) >= arg2_value; - - ASSERT(res_value <= LLONG_MAX - summand); - - res_value += summand; - } - - res.value = (long long)res_value; - - if (arg1_negative ^ arg2_negative) - res.value = -res.value; - - return res; -} - -struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - struct spl_fixed31_32 res; - - bool arg1_negative = arg1.value < 0; - bool arg2_negative = arg2.value < 0; - - unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; - unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; - - unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); - unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); - - unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); - unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); - - unsigned long long tmp; - - res.value = arg1_int * arg2_int; - - ASSERT(res.value <= (long long)LONG_MAX); - - res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; - - tmp = arg1_int * arg2_fra; - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - tmp = arg2_int * arg1_fra; - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - tmp = arg1_fra * arg2_fra; - - tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + - (tmp >= (unsigned long long)spl_fixpt_half.value); - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - if (arg1_negative ^ arg2_negative) - res.value = -res.value; - - return res; -} - -struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg) -{ - struct spl_fixed31_32 res; - - unsigned long long arg_value = abs_i64(arg.value); - - unsigned long long arg_int = GET_INTEGER_PART(arg_value); - - unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); - - unsigned long long tmp; - - res.value = arg_int * arg_int; - - ASSERT(res.value <= (long long)LONG_MAX); - - res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; - - tmp = arg_int * arg_fra; - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - tmp = arg_fra * arg_fra; - - tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + - (tmp >= (unsigned long long)spl_fixpt_half.value); - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - return res; -} - -struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg) -{ - /* - * @note - * Good idea to use Newton's method - */ - - ASSERT(arg.value); - - return spl_fixpt_from_fraction( - spl_fixpt_one.value, - arg.value); -} - -struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg) -{ - struct spl_fixed31_32 square; - - struct spl_fixed31_32 res = spl_fixpt_one; - - int n = 27; - - struct spl_fixed31_32 arg_norm = arg; - - if (spl_fixpt_le( - spl_fixpt_two_pi, - spl_fixpt_abs(arg))) { - arg_norm = spl_fixpt_sub( - arg_norm, - spl_fixpt_mul_int( - spl_fixpt_two_pi, - (int)spl_div64_s64( - arg_norm.value, - spl_fixpt_two_pi.value))); - } - - square = spl_fixpt_sqr(arg_norm); - - do { - res = spl_fixpt_sub( - spl_fixpt_one, - spl_fixpt_div_int( - spl_fixpt_mul( - square, - res), - n * (n - 1))); - - n -= 2; - } while (n > 2); - - if (arg.value != arg_norm.value) - res = spl_fixpt_div( - spl_fixpt_mul(res, arg_norm), - arg); - - return res; -} - -struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg) -{ - return spl_fixpt_mul( - arg, - spl_fixpt_sinc(arg)); -} - -struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg) -{ - /* TODO implement argument normalization */ - - const struct spl_fixed31_32 square = spl_fixpt_sqr(arg); - - struct spl_fixed31_32 res = spl_fixpt_one; - - int n = 26; - - do { - res = spl_fixpt_sub( - spl_fixpt_one, - spl_fixpt_div_int( - spl_fixpt_mul( - square, - res), - n * (n - 1))); - - n -= 2; - } while (n != 0); - - return res; -} - -/* - * @brief - * result = exp(arg), - * where abs(arg) < 1 - * - * Calculated as Taylor series. - */ -static struct spl_fixed31_32 fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg) -{ - unsigned int n = 9; - - struct spl_fixed31_32 res = spl_fixpt_from_fraction( - n + 2, - n + 1); - /* TODO find correct res */ - - ASSERT(spl_fixpt_lt(arg, spl_fixpt_one)); - - do - res = spl_fixpt_add( - spl_fixpt_one, - spl_fixpt_div_int( - spl_fixpt_mul( - arg, - res), - n)); - while (--n != 1); - - return spl_fixpt_add( - spl_fixpt_one, - spl_fixpt_mul( - arg, - res)); -} - -struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg) -{ - /* - * @brief - * Main equation is: - * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), - * where m = round(x / ln(2)), r = x - m * ln(2) - */ - - if (spl_fixpt_le( - spl_fixpt_ln2_div_2, - spl_fixpt_abs(arg))) { - int m = spl_fixpt_round( - spl_fixpt_div( - arg, - spl_fixpt_ln2)); - - struct spl_fixed31_32 r = spl_fixpt_sub( - arg, - spl_fixpt_mul_int( - spl_fixpt_ln2, - m)); - - ASSERT(m != 0); - - ASSERT(spl_fixpt_lt( - spl_fixpt_abs(r), - spl_fixpt_one)); - - if (m > 0) - return spl_fixpt_shl( - fixed31_32_exp_from_taylor_series(r), - (unsigned char)m); - else - return spl_fixpt_div_int( - fixed31_32_exp_from_taylor_series(r), - 1LL << -m); - } else if (arg.value != 0) - return fixed31_32_exp_from_taylor_series(arg); - else - return spl_fixpt_one; -} - -struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg) -{ - struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one); - /* TODO improve 1st estimation */ - - struct spl_fixed31_32 error; - - ASSERT(arg.value > 0); - /* TODO if arg is negative, return NaN */ - /* TODO if arg is zero, return -INF */ - - do { - struct spl_fixed31_32 res1 = spl_fixpt_add( - spl_fixpt_sub( - res, - spl_fixpt_one), - spl_fixpt_div( - arg, - spl_fixpt_exp(res))); - - error = spl_fixpt_sub( - res, - res1); - - res = res1; - /* TODO determine max_allowed_error based on quality of exp() */ - } while (abs_i64(error.value) > 100ULL); - - return res; -} - - -/* this function is a generic helper to translate fixed point value to - * specified integer format that will consist of integer_bits integer part and - * fractional_bits fractional part. For example it is used in - * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional - * part in 32 bits. It is used in hw programming (scaler) - */ - -static inline unsigned int ux_dy( - long long value, - unsigned int integer_bits, - unsigned int fractional_bits) -{ - /* 1. create mask of integer part */ - unsigned int result = (1 << integer_bits) - 1; - /* 2. mask out fractional part */ - unsigned int fractional_part = FRACTIONAL_PART_MASK & value; - /* 3. shrink fixed point integer part to be of integer_bits width*/ - result &= GET_INTEGER_PART(value); - /* 4. make space for fractional part to be filled in after integer */ - result <<= fractional_bits; - /* 5. shrink fixed point fractional part to of fractional_bits width*/ - fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; - /* 6. merge the result */ - return result | fractional_part; -} - -static inline unsigned int clamp_ux_dy( - long long value, - unsigned int integer_bits, - unsigned int fractional_bits, - unsigned int min_clamp) -{ - unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); - - if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) - return (1 << (integer_bits + fractional_bits)) - 1; - else if (truncated_val > min_clamp) - return truncated_val; - else - return min_clamp; -} - -unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg) -{ - return ux_dy(arg.value, 4, 19); -} - -unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg) -{ - return ux_dy(arg.value, 3, 19); -} - -unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg) -{ - return ux_dy(arg.value, 2, 19); -} - -unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg) -{ - return ux_dy(arg.value, 0, 19); -} - -unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg) -{ - return clamp_ux_dy(arg.value, 0, 14, 1); -} - -unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg) -{ - return clamp_ux_dy(arg.value, 0, 10, 1); -} - -int spl_fixpt_s4d19(struct spl_fixed31_32 arg) -{ - if (arg.value < 0) - return -(int)ux_dy(spl_fixpt_abs(arg).value, 4, 19); - else - return ux_dy(arg.value, 4, 19); -} - -struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, - unsigned int integer_bits, - unsigned int fractional_bits) -{ - struct spl_fixed31_32 fixpt_value = spl_fixpt_zero; - struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero; - long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; - - fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); - frac_mask = frac_mask << fractional_bits; - fixpt_int_value.value = value & frac_mask; - fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); - fixpt_value.value |= fixpt_int_value.value; - return fixpt_value; -} - -struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, - unsigned int frac_value, - unsigned int integer_bits, - unsigned int fractional_bits) -{ - struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value); - - fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); - return fixpt_value; -} diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h deleted file mode 100644 index 27ec6d416b7c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h +++ /dev/null @@ -1,546 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __SPL_FIXED31_32_H__ -#define __SPL_FIXED31_32_H__ - -#include "os_types.h" -#include "spl_os_types.h" // swap -#ifndef ASSERT -#define ASSERT(_bool) ((void *)0) -#endif - -#ifndef LLONG_MAX -#define LLONG_MAX 9223372036854775807ll -#endif -#ifndef LLONG_MIN -#define LLONG_MIN (-LLONG_MAX - 1ll) -#endif - -#define FIXED31_32_BITS_PER_FRACTIONAL_PART 32 -#ifndef LLONG_MIN -#define LLONG_MIN (1LL<<63) -#endif -#ifndef LLONG_MAX -#define LLONG_MAX (-1LL>>1) -#endif - -/* - * @brief - * Arithmetic operations on real numbers - * represented as fixed-point numbers. - * There are: 1 bit for sign, - * 31 bit for integer part, - * 32 bits for fractional part. - * - * @note - * Currently, overflows and underflows are asserted; - * no special result returned. - */ - -struct spl_fixed31_32 { - long long value; -}; - - -/* - * @brief - * Useful constants - */ - -static const struct spl_fixed31_32 spl_fixpt_zero = { 0 }; -static const struct spl_fixed31_32 spl_fixpt_epsilon = { 1LL }; -static const struct spl_fixed31_32 spl_fixpt_half = { 0x80000000LL }; -static const struct spl_fixed31_32 spl_fixpt_one = { 0x100000000LL }; - -/* - * @brief - * Initialization routines - */ - -/* - * @brief - * result = numerator / denominator - */ -struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator); - -/* - * @brief - * result = arg - */ -static inline struct spl_fixed31_32 spl_fixpt_from_int(int arg) -{ - struct spl_fixed31_32 res; - - res.value = (long long) arg << FIXED31_32_BITS_PER_FRACTIONAL_PART; - - return res; -} - -/* - * @brief - * Unary operators - */ - -/* - * @brief - * result = -arg - */ -static inline struct spl_fixed31_32 spl_fixpt_neg(struct spl_fixed31_32 arg) -{ - struct spl_fixed31_32 res; - - res.value = -arg.value; - - return res; -} - -/* - * @brief - * result = abs(arg) := (arg >= 0) ? arg : -arg - */ -static inline struct spl_fixed31_32 spl_fixpt_abs(struct spl_fixed31_32 arg) -{ - if (arg.value < 0) - return spl_fixpt_neg(arg); - else - return arg; -} - -/* - * @brief - * Binary relational operators - */ - -/* - * @brief - * result = arg1 < arg2 - */ -static inline bool spl_fixpt_lt(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - return arg1.value < arg2.value; -} - -/* - * @brief - * result = arg1 <= arg2 - */ -static inline bool spl_fixpt_le(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - return arg1.value <= arg2.value; -} - -/* - * @brief - * result = arg1 == arg2 - */ -static inline bool spl_fixpt_eq(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - return arg1.value == arg2.value; -} - -/* - * @brief - * result = min(arg1, arg2) := (arg1 <= arg2) ? arg1 : arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_min(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - if (arg1.value <= arg2.value) - return arg1; - else - return arg2; -} - -/* - * @brief - * result = max(arg1, arg2) := (arg1 <= arg2) ? arg2 : arg1 - */ -static inline struct spl_fixed31_32 spl_fixpt_max(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - if (arg1.value <= arg2.value) - return arg2; - else - return arg1; -} - -/* - * @brief - * | min_value, when arg <= min_value - * result = | arg, when min_value < arg < max_value - * | max_value, when arg >= max_value - */ -static inline struct spl_fixed31_32 spl_fixpt_clamp( - struct spl_fixed31_32 arg, - struct spl_fixed31_32 min_value, - struct spl_fixed31_32 max_value) -{ - if (spl_fixpt_le(arg, min_value)) - return min_value; - else if (spl_fixpt_le(max_value, arg)) - return max_value; - else - return arg; -} - -/* - * @brief - * Binary shift operators - */ - -/* - * @brief - * result = arg << shift - */ -static inline struct spl_fixed31_32 spl_fixpt_shl(struct spl_fixed31_32 arg, unsigned char shift) -{ - ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) || - ((arg.value < 0) && (arg.value >= ~(LLONG_MAX >> shift)))); - - arg.value = arg.value << shift; - - return arg; -} - -/* - * @brief - * result = arg >> shift - */ -static inline struct spl_fixed31_32 spl_fixpt_shr(struct spl_fixed31_32 arg, unsigned char shift) -{ - bool negative = arg.value < 0; - - if (negative) - arg.value = -arg.value; - arg.value = arg.value >> shift; - if (negative) - arg.value = -arg.value; - return arg; -} - -/* - * @brief - * Binary additive operators - */ - -/* - * @brief - * result = arg1 + arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_add(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - struct spl_fixed31_32 res; - - ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) || - ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value))); - - res.value = arg1.value + arg2.value; - - return res; -} - -/* - * @brief - * result = arg1 + arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_add_int(struct spl_fixed31_32 arg1, int arg2) -{ - return spl_fixpt_add(arg1, spl_fixpt_from_int(arg2)); -} - -/* - * @brief - * result = arg1 - arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_sub(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - struct spl_fixed31_32 res; - - ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) || - ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value))); - - res.value = arg1.value - arg2.value; - - return res; -} - -/* - * @brief - * result = arg1 - arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_sub_int(struct spl_fixed31_32 arg1, int arg2) -{ - return spl_fixpt_sub(arg1, spl_fixpt_from_int(arg2)); -} - - -/* - * @brief - * Binary multiplicative operators - */ - -/* - * @brief - * result = arg1 * arg2 - */ -struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2); - - -/* - * @brief - * result = arg1 * arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_mul_int(struct spl_fixed31_32 arg1, int arg2) -{ - return spl_fixpt_mul(arg1, spl_fixpt_from_int(arg2)); -} - -/* - * @brief - * result = square(arg) := arg * arg - */ -struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg); - -/* - * @brief - * result = arg1 / arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_div_int(struct spl_fixed31_32 arg1, long long arg2) -{ - return spl_fixpt_from_fraction(arg1.value, spl_fixpt_from_int((int)arg2).value); -} - -/* - * @brief - * result = arg1 / arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_div(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - return spl_fixpt_from_fraction(arg1.value, arg2.value); -} - -/* - * @brief - * Reciprocal function - */ - -/* - * @brief - * result = reciprocal(arg) := 1 / arg - * - * @note - * No special actions taken in case argument is zero. - */ -struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg); - -/* - * @brief - * Trigonometric functions - */ - -/* - * @brief - * result = sinc(arg) := sin(arg) / arg - * - * @note - * Argument specified in radians, - * internally it's normalized to [-2pi...2pi] range. - */ -struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg); - -/* - * @brief - * result = sin(arg) - * - * @note - * Argument specified in radians, - * internally it's normalized to [-2pi...2pi] range. - */ -struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg); - -/* - * @brief - * result = cos(arg) - * - * @note - * Argument specified in radians - * and should be in [-2pi...2pi] range - - * passing arguments outside that range - * will cause incorrect result! - */ -struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg); - -/* - * @brief - * Transcendent functions - */ - -/* - * @brief - * result = exp(arg) - * - * @note - * Currently, function is verified for abs(arg) <= 1. - */ -struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg); - -/* - * @brief - * result = log(arg) - * - * @note - * Currently, abs(arg) should be less than 1. - * No normalization is done. - * Currently, no special actions taken - * in case of invalid argument(s). Take care! - */ -struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg); - -/* - * @brief - * Power function - */ - -/* - * @brief - * result = pow(arg1, arg2) - * - * @note - * Currently, abs(arg1) should be less than 1. Take care! - */ -static inline struct spl_fixed31_32 spl_fixpt_pow(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - if (arg1.value == 0) - return arg2.value == 0 ? spl_fixpt_one : spl_fixpt_zero; - - return spl_fixpt_exp( - spl_fixpt_mul( - spl_fixpt_log(arg1), - arg2)); -} - -/* - * @brief - * Rounding functions - */ - -/* - * @brief - * result = floor(arg) := greatest integer lower than or equal to arg - */ -static inline int spl_fixpt_floor(struct spl_fixed31_32 arg) -{ - unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; - - if (arg.value >= 0) - return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); - else - return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); -} - -/* - * @brief - * result = round(arg) := integer nearest to arg - */ -static inline int spl_fixpt_round(struct spl_fixed31_32 arg) -{ - unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; - - const long long summand = spl_fixpt_half.value; - - ASSERT(LLONG_MAX - (long long)arg_value >= summand); - - arg_value += summand; - - if (arg.value >= 0) - return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); - else - return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); -} - -/* - * @brief - * result = ceil(arg) := lowest integer greater than or equal to arg - */ -static inline int spl_fixpt_ceil(struct spl_fixed31_32 arg) -{ - unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; - - const long long summand = spl_fixpt_one.value - - spl_fixpt_epsilon.value; - - ASSERT(LLONG_MAX - (long long)arg_value >= summand); - - arg_value += summand; - - if (arg.value >= 0) - return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); - else - return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); -} - -/* the following two function are used in scaler hw programming to convert fixed - * point value to format 2 bits from integer part and 19 bits from fractional - * part. The same applies for u0d19, 0 bits from integer part and 19 bits from - * fractional - */ - -unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg); - -unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg); - -unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg); - -unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg); - -unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg); - -unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg); - -int spl_fixpt_s4d19(struct spl_fixed31_32 arg); - -static inline struct spl_fixed31_32 spl_fixpt_truncate(struct spl_fixed31_32 arg, unsigned int frac_bits) -{ - bool negative = arg.value < 0; - - if (frac_bits >= FIXED31_32_BITS_PER_FRACTIONAL_PART) { - ASSERT(frac_bits == FIXED31_32_BITS_PER_FRACTIONAL_PART); - return arg; - } - - if (negative) - arg.value = -arg.value; - arg.value &= (~0ULL) << (FIXED31_32_BITS_PER_FRACTIONAL_PART - frac_bits); - if (negative) - arg.value = -arg.value; - return arg; -} - -struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits); -struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, - unsigned int frac_value, - unsigned int integer_bits, - unsigned int fractional_bits); - -#endif From 7fb363c57522b704e156fc4c5fbcb7ee133fe304 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 11 Jul 2024 14:31:27 -0400 Subject: [PATCH 163/447] drm/amd/display: Let drm_crtc_vblank_on/off manage interrupts [Why] We manage interrupts for CRTCs in two places: 1. In manage_dm_interrupts(), when CRTC get enabled or disabled 2. When drm_vblank_get/put() starts or kills the vblank counter, calling into amdgpu_dm_crtc_set_vblank() The interrupts managed by these twp places should be identical. [How] Since manage_dm_interrupts() already use drm_crtc_vblank_on/off(), just move all CRTC interrupt management into amdgpu_dm_crtc_set_vblank(). This has the added benefit of disabling all CRTC and HUBP interrupts when there are no vblank requestors. Note that there is a TODO item - unchanged from when it was first introduced - to properly identify the HUBP instance from the OTG instance, rather than just assume direct mapping. Signed-off-by: Leo Li Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 +------------- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 48 +++++++++++++++++-- 2 files changed, 47 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index fa8d455de7f5..42ca9b52fe5e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8246,42 +8246,10 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, bool enable) { - /* - * We have no guarantee that the frontend index maps to the same - * backend index - some even map to more than one. - * - * TODO: Use a different interrupt or check DC itself for the mapping. - */ - int irq_type = - amdgpu_display_crtc_idx_to_irq_type( - adev, - acrtc->crtc_id); - - if (enable) { + if (enable) drm_crtc_vblank_on(&acrtc->base); - amdgpu_irq_get( - adev, - &adev->pageflip_irq, - irq_type); -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - amdgpu_irq_get( - adev, - &adev->vline0_irq, - irq_type); -#endif - } else { -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - amdgpu_irq_put( - adev, - &adev->vline0_irq, - irq_type); -#endif - amdgpu_irq_put( - adev, - &adev->pageflip_irq, - irq_type); + else drm_crtc_vblank_off(&acrtc->base); - } } static void dm_update_pflip_irq_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 99014339aaa3..1fe038616d9f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -286,11 +286,14 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); struct amdgpu_display_manager *dm = &adev->dm; struct vblank_control_work *work; + int irq_type; int rc = 0; if (acrtc->otg_inst == -1) goto skip; + irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); + if (enable) { /* vblank irq on -> Only need vupdate irq in vrr mode */ if (amdgpu_dm_crtc_vrr_active(acrtc_state)) @@ -303,13 +306,52 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) if (rc) return rc; - rc = (enable) - ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id) - : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id); + /* crtc vblank or vstartup interrupt */ + if (enable) { + rc = amdgpu_irq_get(adev, &adev->crtc_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get crtc_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->crtc_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put crtc_irq ret=%d\n", rc); + } if (rc) return rc; + /* + * hubp surface flip interrupt + * + * We have no guarantee that the frontend index maps to the same + * backend index - some even map to more than one. + * + * TODO: Use a different interrupt or check DC itself for the mapping. + */ + if (enable) { + rc = amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get pageflip_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put pageflip_irq ret=%d\n", rc); + } + + if (rc) + return rc; + +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + /* crtc vline0 interrupt, only available on DCN+ */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) != 0) { + if (enable) { + rc = amdgpu_irq_get(adev, &adev->vline0_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get vline0_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->vline0_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put vline0_irq ret=%d\n", rc); + } + + if (rc) + return rc; + } +#endif skip: if (amdgpu_in_reset(adev)) return 0; From 17e68f89132b9ee4b144358b49e5df404b314181 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 11 Jul 2024 14:38:11 -0400 Subject: [PATCH 164/447] drm/amd/display: Run idle optimizations at end of vblank handler [Why & How] 1. After allowing idle optimizations, hw programming is disallowed. 2. Before hw programming, we need to disallow idle optimizations. Otherwise, in scenario 1, we will immediately kick hw out of idle optimizations with register access. Scenario 2 is less of a concern, since any register access will kick hw out of idle optimizations. But we'll do it early for correctness. Signed-off-by: Leo Li Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 1fe038616d9f..a2cf2c066a76 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -251,9 +251,10 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) else if (dm->active_vblank_irq_count) dm->active_vblank_irq_count--; - dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0); - - DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); + if (dm->active_vblank_irq_count > 0) { + DRM_DEBUG_KMS("Allow idle optimizations (MALL): false\n"); + dc_allow_idle_optimizations(dm->dc, false); + } /* * Control PSR based on vblank requirements from OS @@ -272,6 +273,11 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->stream->link->replay_settings.replay_feature_enabled); } + if (dm->active_vblank_irq_count == 0) { + DRM_DEBUG_KMS("Allow idle optimizations (MALL): true\n"); + dc_allow_idle_optimizations(dm->dc, true); + } + mutex_unlock(&dm->dc_lock); dc_stream_release(vblank_work->stream); From 0e4b858285e633ea6bc7335fd66529955d023af9 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Fri, 12 Jul 2024 12:50:48 -0400 Subject: [PATCH 165/447] drm/amd/display: Add two dmmuy I2C entry for GPIO port mapping issue [Why] When only 4 I2C is declared, two dummies are required to correctly map GPIO port. [How] Add one more I2C dummy entry to match GPIO port. Signed-off-by: Chris Park Reviewed-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../display/dc/gpio/dcn401/hw_factory_dcn401.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c index 46415cab23ab..928abca18a18 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c @@ -86,7 +86,13 @@ static const struct ddc_registers ddc_data_regs_dcn[] = { ddc_data_regs_dcn2(2), ddc_data_regs_dcn2(3), ddc_data_regs_dcn2(4), -// ddc_data_regs_dcn2(5), + { + // add a dummy entry for cases no such port + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + }, { // add a dummy entry for cases no such port {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, @@ -107,7 +113,13 @@ static const struct ddc_registers ddc_clk_regs_dcn[] = { ddc_clk_regs_dcn2(2), ddc_clk_regs_dcn2(3), ddc_clk_regs_dcn2(4), -// ddc_clk_regs_dcn2(5), + { + // add a dummy entry for cases no such port + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + }, { // add a dummy entry for cases no such port {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, From 076362d931d0d5ed01a3d1cd4d066f2e6e7f86f8 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 26 Jul 2024 15:09:59 +0530 Subject: [PATCH 166/447] drm/amdgpu: print VCN instance dump for valid instance VCN dump is dependent on power state of the ip. Dump is valid if VCN was powered up at the time of ip dump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 28 +++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 9e1cbeee10db..c2278cc49dd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -2329,7 +2329,7 @@ static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, j; uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - uint32_t inst_off; + uint32_t inst_off, is_powered; if (!adev->vcn.ip_dump) return; @@ -2342,11 +2342,17 @@ static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) } inst_off = i * reg_count; - drm_printf(p, "\nActive Instance:VCN%d\n", i); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } } } @@ -2354,7 +2360,7 @@ static void vcn_v3_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, j; - bool reg_safe; + bool is_powered; uint32_t inst_off; uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); @@ -2366,11 +2372,13 @@ static void vcn_v3_0_dump_ip_state(void *handle) continue; inst_off = i * reg_count; - reg_safe = (RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS) & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - if (reg_safe) - for (j = 0; j < reg_count; j++) + if (is_powered) + for (j = 1; j < reg_count; j++) adev->vcn.ip_dump[inst_off + j] = RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); } From fcb3a4fb8255149a73afeb3d8f2397eaac3a46b0 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 15 Jul 2024 15:52:46 -0400 Subject: [PATCH 167/447] drm/amd/display: Request 0MHz dispclk for zero display case [Why] If we aren't entering RCG/IPS2 or CLKSTOP is not supported by PMFW then we should be requesting a dispclk value of 0MHz to PMFW. Currenly we run at max clock since there's an assumption in APU clock table formulation where we can run at any DISPCLK at any state so the real clock value ends up as 1200Mhz - the maximum. [How] Set to 0 instead of the minimum value in the state array. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Duncan Ma Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index d5dcc8b77281..866b0abcff1b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -575,7 +575,7 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s unsigned int lowest_state_idx = 0; out_clks.p_state_supported = true; - out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dispclk_mhz * 1000; + out_clks.dispclk_khz = 0; /* No requirement, and lowest index will generally be maximum dispclk. */ out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000; out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000; out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts; From 680caca6feee720d0a6cb00f892c0d2d46dc4075 Mon Sep 17 00:00:00 2001 From: Bhuvanachandra Pinninti Date: Tue, 16 Jul 2024 18:53:03 +0530 Subject: [PATCH 168/447] drm/amd/display: Refactor for dio Moved files to respective folders to improve DIO code. Signed-off-by: Bhuvanachandra Pinninti Reviewed-by: Martin Leung Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/Makefile | 1 - drivers/gpu/drm/amd/display/dc/dcn301/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn314/Makefile | 10 ---------- drivers/gpu/drm/amd/display/dc/dio/Makefile | 18 ++++++++++++++++++ .../{ => dio}/dcn301/dcn301_dio_link_encoder.c | 0 .../{ => dio}/dcn301/dcn301_dio_link_encoder.h | 0 .../dcn314/dcn314_dio_stream_encoder.c | 0 .../dcn314/dcn314_dio_stream_encoder.h | 0 .../dc/dio/dcn321/dcn321_dio_link_encoder.c | 1 - .../dc/dio/dcn35/dcn35_dio_stream_encoder.c | 1 - 10 files changed, 19 insertions(+), 14 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn314/Makefile rename drivers/gpu/drm/amd/display/dc/{ => dio}/dcn301/dcn301_dio_link_encoder.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => dio}/dcn301/dcn301_dio_link_encoder.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => dio}/dcn314/dcn314_dio_stream_encoder.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => dio}/dcn314/dcn314_dio_stream_encoder.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 80069651def3..8992e697759f 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -35,7 +35,6 @@ DC_LIBS += dcn201 DC_LIBS += dcn30 DC_LIBS += dcn301 DC_LIBS += dcn31 -DC_LIBS += dcn314 DC_LIBS += dml DC_LIBS += dml2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index dc37dbf870df..fb4814ab3f05 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -3,7 +3,7 @@ # # Makefile for dcn30. -DCN301 = dcn301_dio_link_encoder.o dcn301_panel_cntl.o +DCN301 = dcn301_panel_cntl.o AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile deleted file mode 100644 index 15fdcf7c6466..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved. -# -# Makefile for dcn314. - -DCN314 = dcn314_dio_stream_encoder.o - -AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN314) diff --git a/drivers/gpu/drm/amd/display/dc/dio/Makefile b/drivers/gpu/drm/amd/display/dc/dio/Makefile index 67840e474d7a..0dfd480976f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dio/Makefile @@ -51,6 +51,15 @@ AMD_DAL_DIO_DCN30 = $(addprefix $(AMDDALPATH)/dc/dio/dcn30/,$(DIO_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN30) +############################################################################### +# DCN301 +############################################################################### +DIO_DCN301 = dcn301_dio_link_encoder.o + +AMD_DAL_DIO_DCN301 = $(addprefix $(AMDDALPATH)/dc/dio/dcn301/,$(DIO_DCN301)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN301) + ############################################################################### # DCN31 ############################################################################### @@ -60,6 +69,15 @@ AMD_DAL_DIO_DCN31 = $(addprefix $(AMDDALPATH)/dc/dio/dcn31/,$(DIO_DCN31)) AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN31) +############################################################################### +# DCN314 +############################################################################### +DIO_DCN314 = dcn314_dio_stream_encoder.o + +AMD_DAL_DIO_DCN314 = $(addprefix $(AMDDALPATH)/dc/dio/dcn314/,$(DIO_DCN314)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN314) + ############################################################################### # DCN32 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c rename to drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h rename to drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c rename to drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h rename to drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c index 05783daa62ac..2ed382a8e79c 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c @@ -23,7 +23,6 @@ * */ - #include "reg_helper.h" #include "core_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index fcc88ef83e6a..19e50fbf908d 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -22,7 +22,6 @@ * */ - #include "dc_bios_types.h" #include "dcn30/dcn30_dio_stream_encoder.h" #include "dcn314/dcn314_dio_stream_encoder.h" From d5022deb4a526009fdc20e4d62528b25b05112dc Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 15 Jul 2024 13:54:18 -0400 Subject: [PATCH 169/447] drm/amd/display: Add option to disable unbounded req in DML21 Use debug option for disabling unbounded req in DML21 Signed-off-by: Alvin Lee Reviewed-by: Austin Zheng Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 0f34688e4058..65776602648d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -959,6 +959,11 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->minimize_det_reallocation = true; dml_dispcfg->overrides.enable_subvp_implicit_pmo = true; + if (in_dc->debug.disable_unbounded_requesting) { + dml_dispcfg->overrides.hw.force_unbounded_requesting.enable = true; + dml_dispcfg->overrides.hw.force_unbounded_requesting.value = false; + } + for (stream_index = 0; stream_index < context->stream_count; stream_index++) { disp_cfg_stream_location = map_stream_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]); From 3f96f545f877ac59d0c967f52d760b4b2b3b9a47 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 7 Jun 2024 22:09:53 -0600 Subject: [PATCH 170/447] drm/amd/display: Fix possible overflow in integer multiplication [WHAT & HOW] Integer multiplies integer may overflow in context that expects an expression of unsigned long long (64 bits). This can be fixed by casting integer to unsigned long long to force 64 bits results. This fixes 2 OVERFLOW_BEFORE_WIDEN issues reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/resource/dcn32/dcn32_resource_helpers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c index 47c8a9fbe754..f5a4e97c40ce 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c @@ -663,7 +663,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + pipe->stream->timing.v_total * (unsigned long long)pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } @@ -724,7 +724,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + pipe->stream->timing.v_total * (unsigned long long)pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } From 722e96c99f1d7532fdfbb557f50a399f6cc57d82 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 15 Jul 2024 10:24:58 -0600 Subject: [PATCH 171/447] drm/amd/display: Check null pointers before using them [WHAT & HOW] dc_link is null checked previously in the same function, indicating it might be null as reported by Coverity. This fixes 1 FORWARD_NULL issue reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 42ca9b52fe5e..b76579815bdf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12083,7 +12083,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, } } - as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link); + if (amdgpu_dm_connector->dc_link) + as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link); if (as_type == FREESYNC_TYPE_PCON_IN_WHITELIST) { i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); From 58a8ee96f84d2c21abb85ad8c22d2bbdf59bd7a9 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 15 Jul 2024 10:37:28 -0600 Subject: [PATCH 172/447] drm/amd/display: Check stream_status before it is used [WHAT & HOW] dc_state_get_stream_status can return null, and therefore null must be checked before stream_status is used. This fixes 1 NULL_RETURNS issue reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 95d6e29d5e47..4c9e420742a2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3748,7 +3748,7 @@ static void commit_planes_for_stream_fast(struct dc *dc, surface_count, stream, context); - } else { + } else if (stream_status) { build_dmub_cmd_list(dc, srf_updates, surface_count, From 739d0f3e1f36738d4cd84166784a8f7a58d69612 Mon Sep 17 00:00:00 2001 From: Michael Chen Date: Tue, 23 Jul 2024 17:45:23 -0400 Subject: [PATCH 173/447] drm/amdgpu: increase mes log buffer size for gfx12 MES firmware requires larger log buffer for gfx12. Allocate proper buffer respectively for gfx11 and gfx12. Signed-off-by: Michael Chen Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 6 +++--- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 8 ++++++-- drivers/gpu/drm/amd/include/mes_v11_api_def.h | 3 +++ drivers/gpu/drm/amd/include/mes_v12_api_def.h | 3 +++ 6 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index e499d6ba306b..dac88d2dd70d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -103,7 +103,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) if (!amdgpu_mes_log_enable) return 0; - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -113,7 +113,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) return r; } - memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE); + memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); return 0; @@ -1573,7 +1573,7 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); + mem, adev->mes.event_log_size, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index e11051271f71..2d659c612f03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,7 +52,6 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ -#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; @@ -135,8 +134,9 @@ struct amdgpu_mes { unsigned long *doorbell_bitmap; /* MES event log buffer */ - struct amdgpu_bo *event_log_gpu_obj; - uint64_t event_log_gpu_addr; + uint32_t event_log_size; + struct amdgpu_bo *event_log_gpu_obj; + uint64_t event_log_gpu_addr; void *event_log_cpu_addr; /* ip specific functions */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 8ce51b9236c1..f9343642ae7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1163,6 +1163,8 @@ static int mes_v11_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index c9f74231ad59..0713bc3eb263 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -551,8 +551,10 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.oversubscription_timer = 50; mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 0; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + } return mes_v12_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), @@ -1237,6 +1239,8 @@ static int mes_v12_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index b72d5d362251..21ceafce1f9b 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 1 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 + /* Driver submits one API(cmd) as a single Frame and this command size is same * for all API to ease the debugging and parsing of ring buffer. */ diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index ffd67c6ed9b3..4cf2c9f30b3d 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 0x14 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG_12 */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0xC000 + /* Driver submits one API(cmd) as a single Frame and this command size is same for all API * to ease the debugging and parsing of ring buffer. */ From 9724b8494d3e85cb01c377f201bfaf25fd7d38ff Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 16 Jul 2024 13:47:43 -0400 Subject: [PATCH 174/447] drm/amd/display: restore immediate_disable_crtc for w/a [why] immediate_disable_crtc does not reset ODM. if switching to disable_crtc which will disable ODM as well. i.e. need to restore ODM mem cfg at reenable it at end of w/a. Signed-off-by: Charlene Liu Reviewed-by: Xi (Alex) Liu Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 4 ++-- drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 2d5bd5c7ab94..e075b2720f96 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -149,8 +149,8 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * !pipe->stream->link_enc) && !stream_changed_otg_dig_on) { /* This w/a should not trigger when we have a dig active */ if (disable) { - if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->disable_crtc) - pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c index 6bbbf313b2bb..4b6446ed4ce4 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c @@ -149,7 +149,9 @@ static bool optc31_disable_crtc(struct timing_generator *optc) return true; } - +/* + * Immediate_Disable_Crtc - this is to temp disable Timing generator without reset ODM. + */ bool optc31_immediate_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -162,10 +164,12 @@ bool optc31_immediate_disable_crtc(struct timing_generator *optc) VTG0_ENABLE, 0); /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, + if (optc->ctx->dce_environment != DCE_ENV_DIAG) + REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); + /* clear the false state */ optc1_clear_optc_underflow(optc); From 093b79d034c59a3b66f6312863502378a422496d Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 16 Jul 2024 15:58:35 -0400 Subject: [PATCH 175/447] drm/amd/display: sync dmub output event type. [why] dmubfw added a new event type, update amdgpu to avoid "notify type 6 invalid" Signed-off-by: Charlene Liu Reviewed-by: Chris Park Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b76579815bdf..5b1e313a48e8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -877,6 +877,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) "HPD_IRQ", "SET_CONFIGC_REPLY", "DPIA_NOTIFICATION", + "HPD_SENSE_NOTIFY", }; do { From 4981bd4749fa769b3d6acbc82fe8059c030d0920 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 12 Jul 2024 18:02:30 +0800 Subject: [PATCH 176/447] drm/amd/display: Add a missing PSR state [Why & How] Add a missing PSR state to make the dmub_psr_get_state() return a correct PSR state. Signed-off-by: Tom Chung Reviewed-by: Sun peng Li Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index c550e8997033..97279b080f3e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -590,6 +590,7 @@ enum dc_psr_state { PSR_STATE5c, PSR_STATE_HWLOCK_MGR, PSR_STATE_POLLVUPDATE, + PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME, PSR_STATE_INVALID = 0xFF }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 0f3d15126a1e..cae18f8c1c9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -94,6 +94,8 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state) state = PSR_STATE_HWLOCK_MGR; else if (raw_state == 0x61) state = PSR_STATE_POLLVUPDATE; + else if (raw_state == 0x62) + state = PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME; else state = PSR_STATE_INVALID; From df18a4de9e77ad92c472fd1eb0fb1255d52dd4cd Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 12 Jul 2024 17:29:07 +0800 Subject: [PATCH 177/447] drm/amd/display: Reset VRR config during resume [Why] After resume the system, the new_crtc_state->vrr_infopacket does not synchronize with the current state. It will affect the update_freesync_state_on_stream() does not update the state correctly. The previous patch causes a PSR SU regression that cannot let panel go into self-refresh mode. [How] Reset the VRR config during resume to force update the VRR config later. Fixes: eb6dfbb7a9c6 ("drm/amd/display: Reset freesync config before update new state") Signed-off-by: Tom Chung Reviewed-by: Sun peng Li Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5b1e313a48e8..b4fbccbf2f29 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -176,6 +176,7 @@ MODULE_FIRMWARE(FIRMWARE_DCN_401_DMUB); static int amdgpu_dm_init(struct amdgpu_device *adev); static void amdgpu_dm_fini(struct amdgpu_device *adev); static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector); +static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state); static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link) { @@ -3213,8 +3214,11 @@ static int dm_resume(void *handle) drm_connector_list_iter_end(&iter); /* Force mode set in atomic commit */ - for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) + for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) { new_crtc_state->active_changed = true; + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + reset_freesync_config_for_crtc(dm_new_crtc_state); + } /* * atomic_check is expected to create the dc states. We need to release From bd870cfd21489d28195fda157710ebd4cecaa8ca Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 16 Jul 2024 17:41:54 -0400 Subject: [PATCH 178/447] drm/amd/display: Add seamless boot support for more DIG operation modes [Why] When pre-OS firmware enables display support for displays that operate the DIG in 2 pixels per cycle processing modes the inferred pixel rate from get_pixel_clk_frequency_100hz does not account for the true pixel rate since we're outputting 2 per cycle past the stream encoder. This causes seamless boot validation to abort early. [How] Add a new stream encoder function for getting pixels per cycle from the stream encoder. If the pixels per cycle is greater than 1 and the driver policy is to enable 2 pixels per cycle for post-OS then allow seamless boot to continue. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Duncan Ma Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 12 ++++++++++-- .../dc/dio/dcn35/dcn35_dio_stream_encoder.c | 19 +++++++++++++++++++ .../amd/display/dc/inc/hw/stream_encoder.h | 1 + 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 4c9e420742a2..b1253e4c81a8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1823,10 +1823,18 @@ bool dc_validate_boot_timing(const struct dc *dc, tg->funcs->get_optc_source(tg, &numOdmPipes, &id_src[0], &id_src[1]); - if (numOdmPipes == 2) + if (numOdmPipes == 2) { pix_clk_100hz *= 2; - if (numOdmPipes == 4) + } else if (numOdmPipes == 4) { pix_clk_100hz *= 4; + } else if (se && se->funcs->get_pixels_per_cycle) { + uint32_t pixels_per_cycle = se->funcs->get_pixels_per_cycle(se); + + if (pixels_per_cycle != 1 && !dc->debug.enable_dp_dig_pixel_rate_div_policy) + return false; + + pix_clk_100hz *= pixels_per_cycle; + } // Note: In rare cases, HW pixclk may differ from crtc's pixclk // slightly due to rounding issues in 10 kHz units. diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index 19e50fbf908d..6ab2a218b769 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -422,6 +422,24 @@ void enc35_enable_fifo(struct stream_encoder *enc) REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); } +static uint32_t enc35_get_pixels_per_cycle(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value; + + REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, &value); + + switch (value) { + case 0: + return 1; + case 1: + return 2; + default: + ASSERT_CRITICAL(false); + return 1; + } +} + static const struct stream_encoder_funcs dcn35_str_enc_funcs = { .dp_set_odm_combine = enc314_dp_set_odm_combine, @@ -474,6 +492,7 @@ static const struct stream_encoder_funcs dcn35_str_enc_funcs = { .disable_fifo = enc35_disable_fifo, .is_fifo_enabled = enc35_is_fifo_enabled, .map_stream_to_link = enc35_stream_encoder_map_to_link, + .get_pixels_per_cycle = enc35_get_pixels_per_cycle, }; void dcn35_dio_stream_encoder_construct( diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 6fe42120738d..fe7f3137f228 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -273,6 +273,7 @@ struct stream_encoder_funcs { void (*disable_fifo)(struct stream_encoder *enc); bool (*is_fifo_enabled)(struct stream_encoder *enc); void (*map_stream_to_link)(struct stream_encoder *enc, uint32_t stream_enc_inst, uint32_t link_enc_inst); + uint32_t (*get_pixels_per_cycle)(struct stream_encoder *enc); }; struct hpo_dp_stream_encoder_state { From cead9ac8b0992ae2659b637e86a0da7cfeb5e267 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Tue, 16 Jul 2024 13:39:10 -0400 Subject: [PATCH 179/447] drm/amd/display: Use correct cm_helper function Need to use cm3_helper function with DCN401 to avoid cases where high RGB component values can get set to zero if using the TF curve generated by cm_helper. Signed-off-by: Ilya Bakoulin Reviewed-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 0fa610590245..8e1ca709d304 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -497,6 +497,7 @@ void dcn401_populate_mcm_luts(struct dc *dc, enum MCM_LUT_XABLE lut3d_xable = MCM_LUT_DISABLE; enum MCM_LUT_XABLE lut1d_xable = MCM_LUT_DISABLE; bool is_17x17x17 = true; + bool rval; dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); @@ -506,11 +507,10 @@ void dcn401_populate_mcm_luts(struct dc *dc, if (mcm_luts.lut1d_func->type == TF_TYPE_HWPWL) m_lut_params.pwl = &mcm_luts.lut1d_func->pwl; else if (mcm_luts.lut1d_func->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format( - dc->ctx, + rval = cm3_helper_translate_curve_to_hw_format( mcm_luts.lut1d_func, &dpp_base->regamma_params, false); - m_lut_params.pwl = &dpp_base->regamma_params; + m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL; } if (m_lut_params.pwl) { if (mpc->funcs->populate_lut) @@ -527,11 +527,10 @@ void dcn401_populate_mcm_luts(struct dc *dc, m_lut_params.pwl = &mcm_luts.shaper->pwl; else if (mcm_luts.shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { ASSERT(false); - cm_helper_translate_curve_to_hw_format( - dc->ctx, + rval = cm3_helper_translate_curve_to_hw_format( mcm_luts.shaper, &dpp_base->regamma_params, true); - m_lut_params.pwl = &dpp_base->regamma_params; + m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL; } if (m_lut_params.pwl) { if (mpc->funcs->populate_lut) @@ -669,6 +668,7 @@ bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; bool result = true; const struct pwl_params *lut_params = NULL; + bool rval; mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE; @@ -677,10 +677,9 @@ bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, if (plane_state->blend_tf.type == TF_TYPE_HWPWL) lut_params = &plane_state->blend_tf.pwl; else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - &plane_state->blend_tf, + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->blend_tf, &dpp_base->regamma_params, false); - lut_params = &dpp_base->regamma_params; + lut_params = rval ? &dpp_base->regamma_params : NULL; } result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); lut_params = NULL; @@ -693,10 +692,9 @@ bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { // TODO: dpp_base replace ASSERT(false); - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - &plane_state->in_shaper_func, + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->in_shaper_func, &dpp_base->shaper_params, true); - lut_params = &dpp_base->shaper_params; + lut_params = rval ? &dpp_base->shaper_params : NULL; } result = mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); From a15268787b79fd183dd526cc16bec9af4f4e49a1 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 17 Jul 2024 09:17:56 -0600 Subject: [PATCH 180/447] drm/amd/display: Avoid overflow assignment in link_dp_cts sampling_rate is an uint8_t but is assigned an unsigned int, and thus it can overflow. As a result, sampling_rate is changed to uint32_t. Similarly, LINK_QUAL_PATTERN_SET has a size of 2 bits, and it should only be assigned to a value less or equal than 4. This fixes 2 INTEGER_OVERFLOW issues reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 2 +- drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c | 3 ++- drivers/gpu/drm/amd/display/include/dpcd_defs.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 519c3df78ee5..95c275bf649b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -727,7 +727,7 @@ struct dp_audio_test_data_flags { struct dp_audio_test_data { struct dp_audio_test_data_flags flags; - uint8_t sampling_rate; + uint32_t sampling_rate; uint8_t channel_count; uint8_t pattern_type; uint8_t pattern_period[8]; diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index df3781081da7..32d5a4b14333 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -775,7 +775,8 @@ bool dp_set_test_pattern( core_link_read_dpcd(link, DP_TRAINING_PATTERN_SET, &training_pattern.raw, sizeof(training_pattern)); - training_pattern.v1_3.LINK_QUAL_PATTERN_SET = pattern; + if (pattern <= PHY_TEST_PATTERN_END_DP11) + training_pattern.v1_3.LINK_QUAL_PATTERN_SET = pattern; core_link_write_dpcd(link, DP_TRAINING_PATTERN_SET, &training_pattern.raw, sizeof(training_pattern)); diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h index aee5170f5fb2..c246235e4afe 100644 --- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h +++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h @@ -76,6 +76,7 @@ enum dpcd_phy_test_patterns { PHY_TEST_PATTERN_D10_2, PHY_TEST_PATTERN_SYMBOL_ERROR, PHY_TEST_PATTERN_PRBS7, + PHY_TEST_PATTERN_END_DP11 = PHY_TEST_PATTERN_PRBS7, PHY_TEST_PATTERN_80BIT_CUSTOM,/* For DP1.2 only */ PHY_TEST_PATTERN_CP2520_1, PHY_TEST_PATTERN_CP2520_2, From 13d8850a3387635c051c5ed1c8a8b6c1e9bd1341 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 26 Jul 2024 18:07:41 +0530 Subject: [PATCH 181/447] drm/amdgpu: trigger ip dump before suspend of IP's Problem: IP dump right now is done post suspend of all IP's which for some IP's could change power state and software state too which we do not want to reflect in the dump as it might not be same at the time of hang. Solution: IP should be dumped as close to the HW state when the GPU was in hung state without trying to reinitialize any resource. Acked-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 60 +++++++++++----------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 730dae77570c..a2a1a3da17e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5277,11 +5277,29 @@ mode1_reset_failed: return ret; } +static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) +{ + int i; + + lockdep_assert_held(&adev->reset_domain->sem); + + for (i = 0; i < adev->reset_info.num_regs; i++) { + adev->reset_info.reset_dump_reg_value[i] = + RREG32(adev->reset_info.reset_dump_reg_list[i]); + + trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], + adev->reset_info.reset_dump_reg_value[i]); + } + + return 0; +} + int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { int i, r = 0; struct amdgpu_job *job = NULL; + struct amdgpu_device *tmp_adev = reset_context->reset_req_dev; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); @@ -5340,6 +5358,18 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, } } + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { + amdgpu_reset_reg_dumps(tmp_adev); + + dev_info(tmp_adev->dev, "Dumping IP State\n"); + /* Trigger ip dump before we reset the asic */ + for (i = 0; i < tmp_adev->num_ip_blocks; i++) + if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) + tmp_adev->ip_blocks[i].version->funcs + ->dump_ip_state((void *)tmp_adev); + dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); + } + if (need_full_reset) r = amdgpu_device_ip_suspend(adev); if (need_full_reset) @@ -5352,47 +5382,17 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, return r; } -static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) -{ - int i; - - lockdep_assert_held(&adev->reset_domain->sem); - - for (i = 0; i < adev->reset_info.num_regs; i++) { - adev->reset_info.reset_dump_reg_value[i] = - RREG32(adev->reset_info.reset_dump_reg_list[i]); - - trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], - adev->reset_info.reset_dump_reg_value[i]); - } - - return 0; -} - int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; - uint32_t i; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { - amdgpu_reset_reg_dumps(tmp_adev); - - dev_info(tmp_adev->dev, "Dumping IP State\n"); - /* Trigger ip dump before we reset the asic */ - for (i = 0; i < tmp_adev->num_ip_blocks; i++) - if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) - tmp_adev->ip_blocks[i].version->funcs - ->dump_ip_state((void *)tmp_adev); - dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); - } - reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ From 608d886c978cd5f3d8650630568d96c231845227 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 25 Jul 2024 17:30:37 -0400 Subject: [PATCH 182/447] drm/amdgpu: Fix APU handling in amdgpu_pm_load_smu_firmware() We only need to skip this on modern APUs. It's required on older APUs as it's where start_smu gets called from. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3502 Fixes: 064d92436b69 ("drm/amd/pm: avoid to load smu firmware for APUs") Reviewed-by: Tim Huang Signed-off-by: Alex Deucher Cc: Tim Huang --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index a1b8a82d77cf..8b7d6ed7e2ed 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -618,7 +618,8 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; int r = 0; - if (!pp_funcs || !pp_funcs->load_firmware || adev->flags & AMD_IS_APU) + if (!pp_funcs || !pp_funcs->load_firmware || + (is_support_sw_smu(adev) && (adev->flags & AMD_IS_APU))) return 0; mutex_lock(&adev->pm.mutex); From aeb81b62c7fe4782198e9dd79c7d6cdf04d92586 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 26 Jul 2024 15:40:15 +0200 Subject: [PATCH 183/447] drm/amdgpu: convert bios_hardcoded_edid to drm_edid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of manually passing around 'struct edid *' and its size, use 'struct drm_edid', which encapsulates a validated combination of both. As the drm_edid_ can handle NULL gracefully, the explicit checks can be dropped. Also save a few characters by transforming '&array[0]' to the equivalent 'array' and using 'max_t(int, ...)' instead of manual casts. Signed-off-by: Thomas Weißschuh Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 6 +----- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 2 +- drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 17 ++++++----------- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 2 +- 8 files changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index bd0fbdc5f55d..344e0a9ee08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -249,11 +249,7 @@ amdgpu_connector_find_encoder(struct drm_connector *connector, static struct edid * amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev) { - if (adev->mode_info.bios_hardcoded_edid) { - return kmemdup((unsigned char *)adev->mode_info.bios_hardcoded_edid, - adev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); - } - return NULL; + return drm_edid_duplicate(drm_edid_raw(adev->mode_info.bios_hardcoded_edid)); } static void amdgpu_connector_get_edid(struct drm_connector *connector) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d002b845d8ac..5e3faefc5510 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -51,6 +51,7 @@ struct amdgpu_encoder; struct amdgpu_router; struct amdgpu_hpd; struct edid; +struct drm_edid; #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base) #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) @@ -326,8 +327,7 @@ struct amdgpu_mode_info { /* FMT dithering */ struct drm_property *dither_property; /* hardcoded DFP edid from BIOS */ - struct edid *bios_hardcoded_edid; - int bios_hardcoded_edid_size; + const struct drm_edid *bios_hardcoded_edid; /* firmware flags */ u32 firmware_flags; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 6415d0d039e1..e5f508d34ed8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -549,7 +549,7 @@ static int amdgpu_vkms_sw_fini(void *handle) adev->mode_info.mode_config_initialized = false; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); kfree(adev->amdgpu_vkms_output); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index ebf83fee43bb..8defca3705d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -2064,23 +2064,18 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder) case LCD_FAKE_EDID_PATCH_RECORD_TYPE: fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { - struct edid *edid; + const struct drm_edid *edid; int edid_size; if (fake_edid_record->ucFakeEDIDLength == 128) edid_size = fake_edid_record->ucFakeEDIDLength; else edid_size = fake_edid_record->ucFakeEDIDLength * 128; - edid = kmemdup(&fake_edid_record->ucFakeEDIDString[0], - edid_size, GFP_KERNEL); - if (edid) { - if (drm_edid_is_valid(edid)) { - adev->mode_info.bios_hardcoded_edid = edid; - adev->mode_info.bios_hardcoded_edid_size = edid_size; - } else { - kfree(edid); - } - } + edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size); + if (drm_edid_valid(edid)) + adev->mode_info.bios_hardcoded_edid = edid; + else + drm_edid_free(edid); record += struct_size(fake_edid_record, ucFakeEDIDString, edid_size); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index dddb5fe16f2c..742adbc460c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2846,7 +2846,7 @@ static int dce_v10_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 11780e4d7e9f..8d46ebadfa46 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2973,7 +2973,7 @@ static int dce_v11_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 05c0df97f01d..f08dc6a3886f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2745,7 +2745,7 @@ static int dce_v6_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index dc73e301d937..a6a3adf2ae13 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2766,7 +2766,7 @@ static int dce_v8_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); From c6bb3acf1cdeed5d01ebde70b769a8e25993965d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 26 Jul 2024 15:40:16 +0200 Subject: [PATCH 184/447] drm/radeon: convert bios_hardcoded_edid to drm_edid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of manually passing around 'struct edid *' and its size, use 'struct drm_edid', which encapsulates a validated combination of both. As the drm_edid_ can handle NULL gracefully, the explicit checks can be dropped. Also save a few characters by transforming '&array[0]' to the equivalent 'array' and using 'max_t(int, ...)' instead of manual casts. Signed-off-by: Thomas Weißschuh Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_atombios.c | 17 +++++--------- drivers/gpu/drm/radeon/radeon_combios.c | 26 +++++----------------- drivers/gpu/drm/radeon/radeon_connectors.c | 4 ++-- drivers/gpu/drm/radeon/radeon_display.c | 2 +- drivers/gpu/drm/radeon/radeon_mode.h | 4 ++-- 5 files changed, 16 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 168f3f94003b..81a0a91921b9 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1716,23 +1716,18 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct case LCD_FAKE_EDID_PATCH_RECORD_TYPE: fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { - struct edid *edid; + const struct drm_edid *edid; int edid_size; if (fake_edid_record->ucFakeEDIDLength == 128) edid_size = fake_edid_record->ucFakeEDIDLength; else edid_size = fake_edid_record->ucFakeEDIDLength * 128; - edid = kmemdup(&fake_edid_record->ucFakeEDIDString[0], - edid_size, GFP_KERNEL); - if (edid) { - if (drm_edid_is_valid(edid)) { - rdev->mode_info.bios_hardcoded_edid = edid; - rdev->mode_info.bios_hardcoded_edid_size = edid_size; - } else { - kfree(edid); - } - } + edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size); + if (drm_edid_valid(edid)) + rdev->mode_info.bios_hardcoded_edid = edid; + else + drm_edid_free(edid); record += struct_size(fake_edid_record, ucFakeEDIDString, edid_size); diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 41ddc576f8f8..df8d7f56b028 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -370,7 +370,7 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) { int edid_info, size; - struct edid *edid; + const struct drm_edid *edid; unsigned char *raw; edid_info = combios_get_table_offset(rdev_to_drm(rdev), COMBIOS_HARDCODED_EDID_TABLE); if (!edid_info) @@ -378,19 +378,14 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) raw = rdev->bios + edid_info; size = EDID_LENGTH * (raw[0x7e] + 1); - edid = kmalloc(size, GFP_KERNEL); - if (edid == NULL) - return false; + edid = drm_edid_alloc(raw, size); - memcpy((unsigned char *)edid, raw, size); - - if (!drm_edid_is_valid(edid)) { - kfree(edid); + if (!drm_edid_valid(edid)) { + drm_edid_free(edid); return false; } rdev->mode_info.bios_hardcoded_edid = edid; - rdev->mode_info.bios_hardcoded_edid_size = size; return true; } @@ -398,18 +393,7 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) struct edid * radeon_bios_get_hardcoded_edid(struct radeon_device *rdev) { - struct edid *edid; - - if (rdev->mode_info.bios_hardcoded_edid) { - edid = kmalloc(rdev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); - if (edid) { - memcpy((unsigned char *)edid, - (unsigned char *)rdev->mode_info.bios_hardcoded_edid, - rdev->mode_info.bios_hardcoded_edid_size); - return edid; - } - } - return NULL; + return drm_edid_duplicate(drm_edid_raw(rdev->mode_info.bios_hardcoded_edid)); } static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 880edabfc9e3..528a8f3677c2 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1059,7 +1059,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force) */ if ((!rdev->is_atom_bios) && (ret == connector_status_disconnected) && - rdev->mode_info.bios_hardcoded_edid_size) { + rdev->mode_info.bios_hardcoded_edid) { ret = connector_status_connected; } @@ -1392,7 +1392,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) out: if ((!rdev->is_atom_bios) && (ret == connector_status_disconnected) && - rdev->mode_info.bios_hardcoded_edid_size) { + rdev->mode_info.bios_hardcoded_edid) { radeon_connector->use_digital = true; ret = connector_status_connected; } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 10fd58f400bc..8f5f8abcb1b4 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1658,7 +1658,7 @@ void radeon_modeset_fini(struct radeon_device *rdev) rdev->mode_info.mode_config_initialized = false; } - kfree(rdev->mode_info.bios_hardcoded_edid); + drm_edid_free(rdev->mode_info.bios_hardcoded_edid); /* free i2c buses */ radeon_i2c_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index e0a5af180801..421c83fc70dc 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -39,6 +39,7 @@ #include struct edid; +struct drm_edid; struct radeon_bo; struct radeon_device; @@ -262,8 +263,7 @@ struct radeon_mode_info { /* Output CSC */ struct drm_property *output_csc_property; /* hardcoded DFP edid from BIOS */ - struct edid *bios_hardcoded_edid; - int bios_hardcoded_edid_size; + const struct drm_edid *bios_hardcoded_edid; /* firmware flags */ u16 firmware_flags; From b5126ba85beadfa1f3cfdc9e7a8533ad444ca210 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Wed, 17 Jul 2024 18:29:04 -0400 Subject: [PATCH 185/447] drm/amd/display: Add new enable and disable functions Add new enable and disable functions based on DCCG spec. Signed-off-by: Hansen Dsouza Reviewed-by: Muhammad Ahmed Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 212 ++++++++++++++++++ 1 file changed, 212 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index b698b773338a..7d88b0ae241c 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -732,6 +732,206 @@ static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum physymclk_fe_so } } +static uint32_t dccg35_is_fe_rcg(struct dccg *dccg, int inst) +{ + uint32_t enable = 0; + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_FE_ROOT_GATE_DISABLE, &enable); + break; + case 1: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_FE_ROOT_GATE_DISABLE, &enable); + break; + case 2: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_FE_ROOT_GATE_DISABLE, &enable); + break; + case 3: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_FE_ROOT_GATE_DISABLE, &enable); + break; + case 4: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_FE_ROOT_GATE_DISABLE, &enable); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } + return enable; +} + +static uint32_t dccg35_is_symclk32_se_rcg(struct dccg *dccg, int inst) +{ + uint32_t disable_l1 = 0; + uint32_t disable_l2 = 0; + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, &disable_l2); + break; + case 1: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, &disable_l2); + break; + case 2: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, &disable_l2); + break; + case 3: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, &disable_l2); + break; + default: + BREAK_TO_DEBUGGER(); + return 0; + } + + /* return true if either block level or DCCG level gating is active */ + return (disable_l1 | disable_l2); +} + +static void dccg35_enable_symclk_fe_new( + struct dccg *dccg, + int inst, + enum physymclk_fe_source src) +{ + dccg35_set_physymclk_fe_rcg(dccg, inst, false); + dccg35_set_symclk_fe_src_new(dccg, src, inst); +} + +static void dccg35_disable_symclk_fe_new( + struct dccg *dccg, + int inst) +{ + dccg35_set_symclk_fe_src_new(dccg, PHYSYMCLK_FE_REFCLK, inst); + dccg35_set_physymclk_fe_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk_be_new( + struct dccg *dccg, + int inst, + enum physymclk_source src) +{ + dccg35_set_physymclk_rcg(dccg, inst, false); + dccg35_set_physymclk_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk_be_new( + struct dccg *dccg, + int inst) +{ + int i; + + /* Switch from functional clock to refclock */ + dccg35_set_physymclk_src_new(dccg, inst, PHYSYMCLK_REFCLK); + + /* Check if any other SE connected LE and disable them */ + for (i = 0; i < 4; i++) { + /* Make sure FE is not already in RCG */ + if (dccg35_is_fe_rcg(dccg, i) == 0) { + if (dccg35_is_symclk_fe_src_functional_be(dccg, i, inst)) + dccg35_disable_symclk_fe_new(dccg, i); + } + } + /* Safe to RCG SYMCLK*/ + dccg35_set_physymclk_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk32_se_new( + struct dccg *dccg, + int inst, + enum symclk32_se_clk_source src) +{ + dccg35_set_symclk32_se_rcg(dccg, inst, false); + dccg35_set_symclk32_se_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk32_se_new( + struct dccg *dccg, + int inst) +{ + dccg35_set_symclk32_se_src_new(dccg, SYMCLK32_SE_REFCLK, inst); + dccg35_set_symclk32_se_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk32_le_new( + struct dccg *dccg, + int inst, + enum symclk32_le_clk_source src) +{ + dccg35_set_symclk32_le_rcg(dccg, inst, false); + dccg35_set_symclk32_le_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk32_le_new( + struct dccg *dccg, + int inst) +{ + int i; + + /* Switch from functional clock to refclock */ + dccg35_set_symclk32_le_src_new(dccg, inst, SYMCLK32_LE_REFCLK); + + /* Check if any SE are connected and disable SE as well */ + for (i = 0; i < 4; i++) { + /* Make sure FE is not already in RCG */ + if (dccg35_is_symclk32_se_rcg(dccg, i) == 0) { + /* Disable and SE connected to this LE before RCG */ + if (dccg35_is_symclk32_se_src_functional_le_new(dccg, i, inst)) + dccg35_disable_symclk32_se_new(dccg, i); + } + } + /* Safe to RCG SYM32_LE*/ + dccg35_set_symclk32_le_rcg(dccg, inst, true); +} + +static void dccg35_enable_dpp_new( + struct dccg *dccg, + int inst, + enum dppclk_clock_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* Sanitize inst before use in array de-ref */ + if (inst < 0) { + BREAK_TO_DEBUGGER(); + return; + } + dccg35_set_dppclk_rcg(dccg, inst, false); + dcn35_set_dppclk_src_new(dccg, inst, src); + /* Switch DPP clock to DTO */ + REG_SET_2(DPPCLK_DTO_PARAM[inst], 0, + DPPCLK0_DTO_PHASE, 0xFF, + DPPCLK0_DTO_MODULO, 0xFF); +} + +static void dccg35_disable_dpp_new( + struct dccg *dccg, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* Sanitize inst before use in array de-ref */ + if (inst < 0) { + BREAK_TO_DEBUGGER(); + return; + } + dcn35_set_dppclk_src_new(dccg, inst, DPP_REFCLK); + REG_SET_2(DPPCLK_DTO_PARAM[inst], 0, + DPPCLK0_DTO_PHASE, 0, + DPPCLK0_DTO_MODULO, 1); + dccg35_set_dppclk_rcg(dccg, inst, true); +} + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -1753,6 +1953,18 @@ struct dccg *dccg35_create( (void)&dccg35_set_physymclk_src_new; (void)&dccg35_is_symclk_fe_src_functional_be; (void)&dccg35_set_symclk_fe_src_new; + (void)&dccg35_is_fe_rcg; + (void)&dccg35_is_symclk32_se_rcg; + (void)&dccg35_enable_symclk_fe_new; + (void)&dccg35_disable_symclk_fe_new; + (void)&dccg35_enable_symclk_be_new; + (void)&dccg35_disable_symclk_be_new; + (void)&dccg35_enable_symclk32_se_new; + (void)&dccg35_disable_symclk32_se_new; + (void)&dccg35_enable_symclk32_le_new; + (void)&dccg35_disable_symclk32_le_new; + (void)&dccg35_enable_dpp_new; + (void)&dccg35_disable_dpp_new; base = &dccg_dcn->base; base->ctx = ctx; From c3f15273721f2ee60d32fc7d4f2c233a1eff47a8 Mon Sep 17 00:00:00 2001 From: Cruise Date: Thu, 18 Jul 2024 12:55:02 +0800 Subject: [PATCH 186/447] drm/amd/display: Add logs for debugging outbox The DP tunnel AUX reply is received through Outbox1. Print the Outbox1 status if an issue occurs. Signed-off-by: Cruise Reviewed-by: Nicholas Kazlauskas Reviewed-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 3 +++ drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 3 +++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 4 ++++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c | 4 ++++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c | 4 ++++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c | 4 ++++ 6 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index fb3391854eed..41270fade5f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -979,6 +979,9 @@ void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv) DC_LOG_DEBUG(" inbox0_rptr : %08x", diag_data.inbox0_rptr); DC_LOG_DEBUG(" inbox0_wptr : %08x", diag_data.inbox0_wptr); DC_LOG_DEBUG(" inbox0_size : %08x", diag_data.inbox0_size); + DC_LOG_DEBUG(" outbox1_rptr : %08x", diag_data.outbox1_rptr); + DC_LOG_DEBUG(" outbox1_wptr : %08x", diag_data.outbox1_wptr); + DC_LOG_DEBUG(" outbox1_size : %08x", diag_data.outbox1_size); DC_LOG_DEBUG(" is_enabled : %d", diag_data.is_dmcub_enabled); DC_LOG_DEBUG(" is_soft_reset : %d", diag_data.is_dmcub_soft_reset); DC_LOG_DEBUG(" is_secure_reset : %d", diag_data.is_dmcub_secure_reset); diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 6589bb9aea6b..cd70453aeae0 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -330,6 +330,9 @@ struct dmub_diagnostic_data { uint32_t inbox0_rptr; uint32_t inbox0_wptr; uint32_t inbox0_size; + uint32_t outbox1_rptr; + uint32_t outbox1_wptr; + uint32_t outbox1_size; uint32_t gpint_datain0; struct dmub_srv_debug timeout_info; uint8_t is_dmcub_enabled : 1; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 662c34e9495c..d9f31b191c69 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -449,6 +449,10 @@ void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c index e1da270502cc..9600b7f858b0 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c @@ -459,6 +459,10 @@ void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 916ed022e96b..746696b6f09a 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -502,6 +502,10 @@ void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index cf139e9cc20e..39a8cb6d7523 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -444,6 +444,10 @@ void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnost diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; From 6eb1fe53ee94cffd7187844d08d46ba8659d667a Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 16 Jul 2024 14:58:06 -0600 Subject: [PATCH 187/447] drm/amd/display: Check null values from functions Functions get_per_method_common_meta and get_expanded_strategy_list can return null and thus it is necessary to check their returned values before dereferencing. This fixes 3 NULL_RETURNS issues reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index dddb21818f8a..9331a8fe77c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1199,12 +1199,17 @@ static bool is_timing_group_schedulable( /* init allow start and end lines for timing group */ stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); + if (!stream_method_fams2_meta) + return false; + group_fams2_meta->allow_start_otg_vline = stream_method_fams2_meta->allow_start_otg_vline; group_fams2_meta->allow_end_otg_vline = stream_method_fams2_meta->allow_end_otg_vline; group_fams2_meta->period_us = stream_method_fams2_meta->period_us; for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); + if (!stream_method_fams2_meta) + return false; if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) { /* set group allow start to larger otg vline */ @@ -1768,6 +1773,9 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp build_synchronized_timing_groups(pmo, display_config); strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); + if (!strategy_list) + return false; + strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); if (strategy_list_size == 0) From 4067f4fa0423a89fb19a30b57231b384d77d2610 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 15 Jul 2024 09:57:01 -0600 Subject: [PATCH 188/447] drm/amd/display: Initialize get_bytes_per_element's default to 1 Variables, used as denominators and maybe not assigned to other values, should not be 0. bytes_per_element_y & bytes_per_element_c are initialized by get_bytes_per_element() which should never return 0. This fixes 10 DIVIDE_BY_ZERO issues reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c | 2 +- .../gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 3d95bfa5aca2..ae5251041728 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 98502a4f0567..9e1c18b90805 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -53,7 +53,7 @@ static void calculate_ttu_cursor( static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) From 31663521ede2edb622ee1b397ae3ac666d6351c5 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 18 Jul 2024 11:53:31 -0400 Subject: [PATCH 189/447] drm/amd/display: Use gpuvm_min_page_size_kbytes for DML2 surfaces [Why] It's currently hard coded to 256 when it should be using the SOC provided values. This can result in corruption with linear surfaces where we prefetch more PTE than the buffer can hold. [How] Update the min page size correctly for the plane. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Jun Lei Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml2_translation_helper.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 25d4ef040173..7e39873832bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -954,7 +954,9 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out)); } -static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_stream_state *in) +static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_stream_state *in, + const struct soc_bounding_box_st *soc) { dml_uint_t width, height; @@ -971,7 +973,7 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; - out->GPUVMMinPageSizeKBytes[location] = 256; + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; out->ViewportWidth[location] = width; out->ViewportHeight[location] = height; @@ -1008,7 +1010,9 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->ScalerEnabled[location] = false; } -static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, struct dc_state *context) +static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_plane_state *in, struct dc_state *context, + const struct soc_bounding_box_st *soc) { struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL); if (!scaler_data) @@ -1019,7 +1023,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; - out->GPUVMMinPageSizeKBytes[location] = 256; + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; out->ViewportWidth[location] = scaler_data->viewport.width; out->ViewportHeight[location] = scaler_data->viewport.height; @@ -1332,7 +1336,8 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat disp_cfg_plane_location = dml_dispcfg->num_surfaces++; populate_dummy_dml_surface_cfg(&dml_dispcfg->surface, disp_cfg_plane_location, context->streams[i]); - populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, context->streams[i]); + populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, + context->streams[i], &dml2->v20.dml_core_ctx.soc); dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; @@ -1348,7 +1353,10 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); - populate_dml_plane_cfg_from_plane_state(&dml_dispcfg->plane, disp_cfg_plane_location, context->stream_status[i].plane_states[j], context); + populate_dml_plane_cfg_from_plane_state( + &dml_dispcfg->plane, disp_cfg_plane_location, + context->stream_status[i].plane_states[j], context, + &dml2->v20.dml_core_ctx.soc); if (stream_mall_type == SUBVP_MAIN) { dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; From c9bfc37f085aa180b3c49b9c95756b9ef032243e Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Thu, 18 Jul 2024 22:42:06 -0400 Subject: [PATCH 190/447] drm/amd/display: Add new enable and disable functions for DCN35 Add new enable and disable functions based on DCCG spec. Signed-off-by: Hansen Dsouza Reviewed-by: Muhammad Ahmed Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 7d88b0ae241c..bd3757de51c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -932,6 +932,53 @@ static void dccg35_disable_dpp_new( dccg35_set_dppclk_rcg(dccg, inst, true); } +static void dccg35_disable_dscclk_new(struct dccg *dccg, + int inst) +{ + dccg35_set_dsc_clk_src_new(dccg, inst, DSC_CLK_REF_CLK); + dccg35_set_dsc_clk_rcg(dccg, inst, true); +} + +static void dccg35_enable_dscclk_new(struct dccg *dccg, + int inst, + enum dsc_clk_source src) +{ + dccg35_set_dsc_clk_rcg(dccg, inst, false); + dccg35_set_dsc_clk_src_new(dccg, inst, src); +} + +static void dccg35_enable_dtbclk_p_new(struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + dccg35_set_dtbclk_p_rcg(dccg, inst, false); + dccg35_set_dtbclk_p_src_new(dccg, src, inst); +} + +static void dccg35_disable_dtbclk_p_new(struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + dccg35_set_dtbclk_p_src_new(dccg, DTBCLK_REFCLK, inst); + dccg35_set_dtbclk_p_rcg(dccg, inst, true); +} + +static void dccg35_enable_dpstreamclk_new(struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + dccg35_set_dpstreamclk_src_new(dccg, DP_STREAM_REFCLK, inst); + dccg35_set_dpstreamclk_rcg(dccg, inst, true); +} + +static void dccg35_disable_dpstreamclk_new(struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + dccg35_set_dpstreamclk_rcg(dccg, inst, false); + dccg35_set_dtbclk_p_src_new(dccg, src, inst); +} + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -1965,7 +2012,12 @@ struct dccg *dccg35_create( (void)&dccg35_disable_symclk32_le_new; (void)&dccg35_enable_dpp_new; (void)&dccg35_disable_dpp_new; - + (void)&dccg35_disable_dscclk_new; + (void)&dccg35_enable_dscclk_new; + (void)&dccg35_enable_dtbclk_p_new; + (void)&dccg35_disable_dtbclk_p_new; + (void)&dccg35_enable_dpstreamclk_new; + (void)&dccg35_disable_dpstreamclk_new; base = &dccg_dcn->base; base->ctx = ctx; base->funcs = &dccg35_funcs; From 9a72570491b524c9dc4c1caa7323b2297c27b0b7 Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Wed, 10 Jul 2024 12:58:20 -0400 Subject: [PATCH 191/447] drm/amd/display: Enable aux transfer path via dmub for dp tunneling [Why] Aux transfer retries path does not support dp tunneling. [How] Based on ddc pin check, aux will be issued in legacy path or DMUB. Signed-off-by: Meenakshikumar Somasundaram Reviewed-by: Eric Yang Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index b8996d285f00..bb4ac5042c80 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -735,7 +735,15 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, (unsigned int) payload->mot); if (payload->write) dce_aux_log_payload(" write", payload->data, payload->length, 16); - ret = dce_aux_transfer_raw(ddc, payload, &operation_result); + + /* Check whether aux to be processed via dmub or dcn directly */ + if (ddc->ctx->dc->debug.enable_dmub_aux_for_legacy_ddc + || ddc->ddc_pin == NULL) { + ret = dce_aux_transfer_dmub_raw(ddc, payload, &operation_result); + } else { + ret = dce_aux_transfer_raw(ddc, payload, &operation_result); + } + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, LOG_FLAG_I2cAux_DceAux, "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d payload->reply=%u", From ca0fb243c3bb53dbbd71d16c76f319bf923ee3d4 Mon Sep 17 00:00:00 2001 From: Daniel Sa Date: Fri, 19 Jul 2024 13:39:09 -0400 Subject: [PATCH 192/447] drm/amd/display: Underflow Seen on DCN401 eGPU [WHY] In dcn401 we read clock values before FW is loaded. These incorrect values cause the driver to believe that we are running higher clocks than what we actually have. This then causes corruption/underflow for the eGPU. [HOW] When new values are read from HW, update internal structures to propagate the new/correct value. Fixes issue Signed-off-by: Daniel Sa Reviewed-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 8e1ca709d304..ceaaa8df3641 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -221,6 +221,7 @@ void dcn401_init_hw(struct dc *dc) int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; + int current_dchub_ref_freq = 0; if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) { dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -264,6 +265,8 @@ void dcn401_init_hw(struct dc *dc) dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency, &res_pool->ref_clocks.dccg_ref_clock_inKhz); + current_dchub_ref_freq = res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; + (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub, res_pool->ref_clocks.dccg_ref_clock_inKhz, &res_pool->ref_clocks.dchub_ref_clock_inKhz); @@ -433,8 +436,9 @@ void dcn401_init_hw(struct dc *dc) dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0; dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; dc->debug.fams2_config.bits.enable &= dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver == 2; - if (!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) { - /* update bounding box if FAMS2 disabled */ + if ((!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) + || res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq) { + /* update bounding box if FAMS2 disabled, or if dchub clk has changed */ if (dc->clk_mgr) dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); From 9330af0af3d54df71b6b752a260dadef05a4fc44 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 16 Jul 2024 14:05:12 -0600 Subject: [PATCH 193/447] drm/amd/display: Check UnboundedRequestEnabled's value CalculateSwathAndDETConfiguration_params_st's UnboundedRequestEnabled is a pointer (i.e. dml_bool_t *UnboundedRequestEnabled), and thus p->UnboundedRequestEnabled checks its address, not bool value. To check value, *p->UnboundedRequestEnabled is used instead. This fixes 1 REVERSE_INULL issue reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 13f2c80bad4c..c54f1af1845c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -3851,7 +3851,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *p->hw_debug5 = false; for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (!(p->mrq_present) && (!p->UnboundedRequestEnabled) && (TotalActiveDPP == 1) + if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) && p->display_cfg->plane_descriptors[k].surface.dcc.enable && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1) + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) From 29d0732f8f61ed028d642034e5323b8cdf6a1905 Mon Sep 17 00:00:00 2001 From: Cruise Date: Mon, 22 Jul 2024 19:15:53 +0800 Subject: [PATCH 194/447] drm/amd/display: Get link index for AUX reply notification The link index wasn't updated for the AUX reply notification. Get link index based on DPIA instance for AUX reply notification. Signed-off-by: Cruise Reviewed-by: Meenakshikumar Somasundaram Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c index cd6570a1e20e..fe9f99f1bdf9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c @@ -61,6 +61,7 @@ void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification /* For HPD/HPD RX, convert dpia port index into link index */ if (notify->type == DMUB_NOTIFICATION_HPD || notify->type == DMUB_NOTIFICATION_HPD_IRQ || + notify->type == DMUB_NOTIFICATION_AUX_REPLY || notify->type == DMUB_NOTIFICATION_DPIA_NOTIFICATION || notify->type == DMUB_NOTIFICATION_SET_CONFIG_REPLY) { notify->link_index = From 85ecfdda063b6f148335c354c8b7200a49640510 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 16 Jul 2024 13:24:48 -0600 Subject: [PATCH 195/447] drm/amd/display: Re-order enum in a header file Move the lb_memory_config close to the pixel format enums to improve the code readability. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index caaa9ced2ec4..36d10b0f2eed 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -10,22 +10,6 @@ #ifndef __DC_SPL_TYPES_H__ #define __DC_SPL_TYPES_H__ -enum lb_memory_config { - /* Enable all 3 pieces of memory */ - LB_MEMORY_CONFIG_0 = 0, - - /* Enable only the first piece of memory */ - LB_MEMORY_CONFIG_1 = 1, - - /* Enable only the second piece of memory */ - LB_MEMORY_CONFIG_2 = 2, - - /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the - * last piece of chroma memory used for the luma storage - */ - LB_MEMORY_CONFIG_3 = 3 -}; - struct spl_size { uint32_t width; uint32_t height; @@ -87,6 +71,22 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_UNKNOWN }; +enum lb_memory_config { + /* Enable all 3 pieces of memory */ + LB_MEMORY_CONFIG_0 = 0, + + /* Enable only the first piece of memory */ + LB_MEMORY_CONFIG_1 = 1, + + /* Enable only the second piece of memory */ + LB_MEMORY_CONFIG_2 = 2, + + /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the + * last piece of chroma memory used for the luma storage + */ + LB_MEMORY_CONFIG_3 = 3 +}; + /* Rotation angle */ enum spl_rotation_angle { SPL_ROTATION_ANGLE_0 = 0, From 5d6a620875a04e70c51d8366eccae74d9cef0308 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 15:32:21 -0600 Subject: [PATCH 196/447] drm/amd/display: Setup two pixel per container SPL has a control field for controlling the two pixels per container that is not in use yet. This commit adds a proper initialization for this feature. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_spl_translate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index bcc596724a4f..8f85a1db5eba 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -128,6 +128,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->basic_out.always_scale = pipe_ctx->stream->ctx->dc->debug.always_scale; // Make spl input basic output info alpha_en field point to plane res scl_data lb_params alpha_en spl_in->basic_out.alpha_en = pipe_ctx->plane_res.scl_data.lb_params.alpha_en; + spl_in->basic_out.use_two_pixels_per_container = pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing); // Make spl input basic input info scaling quality field point to plane state scaling_quality populate_spltaps_from_taps(&spl_in->scaling_quality, &plane_state->scaling_quality); // Translate edge adaptive scaler preference From 6cc213b9aa34bc3213e20f9256345c5cc1495b0b Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 4 Dec 2023 16:35:04 -0500 Subject: [PATCH 197/447] drm/amd/display: Replace dm_execute_dmub_cmd with dc_wake_and_execute_dmub_cmd In the commit c2cec7a872b6 ("drm/amd/display: Wake DMCUB before sending a command for replay feature"), replaced dm_execute_dmub_cmd with dc_wake_and_execute_dmub_cmd in multiple areas, but due to merge issues the replacement of this function in the dmub_replay_copy_settings was missed. This commit replaces the old dm_execute_dmub_cmd with dc_wake_and_execute_dmub_cmd. Fixes: 3601a35a2e9d ("drm/amd/display: Wake DMCUB before sending a command for replay feature") Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 44df9e2351c2..14f935961672 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -187,8 +187,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, else copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0; - - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } From c7b3569b3ebc53e997500be09eb612b6c852525a Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Thu, 4 Apr 2024 10:25:21 -0400 Subject: [PATCH 198/447] drm/amd/display: Fix Cursor Offset in Scaled Scenarios [WHY] Cursor position code had improper offsets in scaled modes. [HOW] Adjust cursor scaling to account for cursor offsets properly. Reviewed-by: Rodrigo Siqueira Signed-off-by: Sung Lee Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index eb0da6c6b87c..846c183fe3a8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -725,8 +725,8 @@ void hubp401_cursor_set_position( CURSOR_ENABLE, cur_en); REG_SET_2(CURSOR_POSITION, 0, - CURSOR_X_POSITION, pos->x, - CURSOR_Y_POSITION, pos->y); + CURSOR_X_POSITION, x_pos, + CURSOR_Y_POSITION, y_pos); REG_SET_2(CURSOR_HOT_SPOT, 0, CURSOR_HOT_SPOT_X, pos->x_hotspot, From ab799c16c9d537fa2f070283f1ca63a4425502e9 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 26 Apr 2024 15:24:06 -0400 Subject: [PATCH 199/447] drm/amd/display: For FAMS2 don't program P-State force from driver P-State force programming is handled entirely by FW in FAMS2. Remove any programming from driver side to prevent incorrect programming from driver side (which may override FW programming) Signed-off-by: Alvin Lee Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c | 1 - drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index 846c183fe3a8..b1ebf5053b4f 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -990,7 +990,6 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_soft_reset = hubp31_soft_reset, .hubp_set_flip_int = hubp401_set_flip_int, .hubp_in_blank = hubp401_in_blank, - .hubp_update_force_pstate_disallow = hubp32_update_force_pstate_disallow, .phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable, .hubp_update_mall_sel = hubp401_update_mall_sel, .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 2533f16510ba..457f4167e848 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -132,7 +132,6 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .dccg_init = dcn20_dccg_init, .set_mcm_luts = dcn401_set_mcm_luts, .program_mall_pipe_config = dcn32_program_mall_pipe_config, - .update_force_pstate = dcn32_update_force_pstate, .update_mall_sel = dcn32_update_mall_sel, .calculate_dccg_k1_k2_values = NULL, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, From 675d9ac9d0de765531e94f9fdc536989a997a324 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 16:53:42 -0600 Subject: [PATCH 200/447] drm/amd/display: Add missing DET segments programming The commit 5034b935f62a ("drm/amd/display: Modify DHCUB waterwark structures and functions") introduced a code refactor for DCHUB, but during the merge process into amd-staging-drm-next, the program det segments were removed. This commit adds the DET segment programming for DCN35. Fixes: 5034b935f62a ("drm/amd/display: Modify DHCUB waterwark structures and functions") Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index a9dc7cf12dac..899e239352aa 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -771,6 +771,8 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } From 24ffa5bb6d363c8164be6af974e318f5752797e1 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:30:21 -0600 Subject: [PATCH 201/447] drm/amd/display: Remove duplicated code DCN_MINIMUM_DISPCLK_Khz and DCN_MINIMUM_DPPCLK_Khz is declared twice. This commit removes that duplication. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index c55d7279fe51..2d06067ff36d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -29,9 +29,6 @@ #include "dc.h" #include "dm_pp_smu.h" -#define DCN_MINIMUM_DISPCLK_Khz 100000 -#define DCN_MINIMUM_DPPCLK_Khz 100000 - /* Constants */ #define DDR4_DRAM_WIDTH 64 #define WM_A 0 From a00a177055cced5cd2bb057a1ace9a95a286bc49 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:33:40 -0600 Subject: [PATCH 202/447] drm/amd/display: Add missing mcache registers Add missing register programming for mcache in DCN401. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 106008593464..514d1ce20df9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -138,7 +138,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ - HUBP_3DLUT_FL_REG_LIST_DCN401(id) + HUBP_3DLUT_FL_REG_LIST_DCN401(id), \ + SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \ + SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id) /* ABM */ #define ABM_DCN401_REG_LIST_RI(id) \ From 74bad61c5d83f5af8a855c8b7dc8e20377c74d46 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:40:22 -0600 Subject: [PATCH 203/447] drm/amd/display: Add dcc propagation value Initialize the field dcc_meta_propagation_delay_us with 10 ms. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 9fcdf06d6aa4..3e76732ac0dc 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -723,6 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = { .min_prefetch_in_strobe_ns = 60000, // 60us .disable_unbounded_requesting = false, .enable_legacy_fast_update = false, + .dcc_meta_propagation_delay_us = 10, .fams2_config = { .bits = { .enable = true, From d91f93c7a7fb9589e62814c1e229943e1259b48c Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:43:43 -0600 Subject: [PATCH 204/447] drm/amd/display: Add missing registers for dcn32 Add missing debug registers for DCN32. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index fee67fbab8e2..7901792afb7b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -505,6 +505,8 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SRI_ARR(CM_POST_CSC_B_C11_C12, CM, id), \ SRI_ARR(CM_POST_CSC_B_C33_C34, CM, id), \ SRI_ARR(CM_MEM_PWR_CTRL, CM, id), SRI_ARR(CM_CONTROL, CM, id), \ + SRI_ARR(CM_TEST_DEBUG_INDEX, CM, id), \ + SRI_ARR(CM_TEST_DEBUG_DATA, CM, id), \ SRI_ARR(FORMAT_CONTROL, CNVC_CFG, id), \ SRI_ARR(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ SRI_ARR(CURSOR0_CONTROL, CNVC_CUR, id), \ @@ -761,6 +763,7 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SRI_ARR(DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id), \ SRI_ARR(DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id), \ SRI_ARR(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_TEST_DEBUG_BUS_ROTATE, DSCC, id), \ SRI_ARR(DSCCIF_CONFIG0, DSCCIF, id), \ SRI_ARR(DSCCIF_CONFIG1, DSCCIF, id), \ SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) @@ -1185,6 +1188,8 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL), \ SR(DCHUBBUB_ARB_DRAM_STATE_CNTL), SR(DCHUBBUB_ARB_SAT_LEVEL), \ SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND), SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_TEST_DEBUG_INDEX), \ + SR(DCHUBBUB_TEST_DEBUG_DATA), \ SR(DCHUBBUB_SOFT_RESET), SR(DCHUBBUB_CRC_CTRL), \ SR(DCN_VM_FB_LOCATION_BASE), SR(DCN_VM_FB_LOCATION_TOP), \ SR(DCN_VM_FB_OFFSET), SR(DCN_VM_AGP_BOT), SR(DCN_VM_AGP_TOP), \ From 946e2c5be80b2cf93be34e28b3a6bdadc8ca419b Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:46:25 -0600 Subject: [PATCH 205/447] drm/amd/display: Remove unused code Remove function pointers that were never used. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/inc/hw/transform.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 28da1dddf0a0..45262cba675e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -245,16 +245,6 @@ struct transform_funcs { void (*set_cursor_attributes)( struct transform *xfm_base, const struct dc_cursor_attributes *attr); - - bool (*transform_program_blnd_lut)( - struct transform *xfm, - const struct pwl_params *params); - bool (*transform_program_shaper_lut)( - struct transform *xfm, - const struct pwl_params *params); - bool (*transform_program_3dlut)( - struct transform *xfm, - struct tetrahedral_params *params); }; const uint16_t *get_filter_2tap_16p(void); From 3e048c8846a658098d935df83050170c8a8fb104 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 21 Jul 2024 21:46:07 -0400 Subject: [PATCH 206/447] drm/amd/display: 3.2.294 This version brings along the following: - SPL improvements. - Address coverity issues. - DML2 fixes. - Code cleanup. - DIO and DCCG refactor. - Improve the PSR state. Signed-off-by: Aric Cyr Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4077c1ddb9c1..250d5d48c2d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.293" +#define DC_VER "3.2.294" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 1cb62da0802c8f08e26443a5409edba99b8a1f6e Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 25 Jul 2024 19:10:43 -0400 Subject: [PATCH 207/447] drm/amdkfd: Fix compile error if HMM support not enabled Fixes the below if kernel config not enable HMM support >> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c:107:26: error: implicit declaration of function 'svm_range_from_addr' >> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c:107:24: error: assignment to 'struct svm_range *' from 'int' makes pointer from integer without a cast [-Wint-conversion] >> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c:111:28: error: invalid use of undefined type 'struct svm_range' Fixes: b049504e211e ("drm/amdkfd: Validate user queue svm memory residency") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407252127.zvnxaKRA-lkp@intel.com/ Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 63795f0cd55a..e0a073ae4a49 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -85,6 +85,8 @@ void uninit_queue(struct queue *q) kfree(q); } +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size) { struct kfd_process *p = pdd->process; @@ -178,6 +180,18 @@ static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u mutex_unlock(&p->svms.lock); } +#else + +static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + return -EINVAL; +} + +static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size) +{ +} + +#endif int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, u64 expected_size) From f905d0c328b440fabaaf265350bf4187ccd5f59b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Mon, 29 Jul 2024 09:24:20 +0800 Subject: [PATCH 208/447] drm/amd/pm: update powerplay structure on smu v14.0.2/3 update powerplay structure on smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../amd/pm/swsmu/inc/smu_v14_0_2_pptable.h | 52 ++++++++++++++++--- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h index 4a3fde89aed7..75c921e87360 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h @@ -27,7 +27,8 @@ #pragma pack(push, 1) -#define SMU_14_0_2_TABLE_FORMAT_REVISION 3 +#define SMU_14_0_2_TABLE_FORMAT_REVISION 23 +#define SMU_14_0_2_CUSTOM_TABLE_FORMAT_REVISION 1 // POWERPLAYTABLE::ulPlatformCaps #define SMU_14_0_2_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page. @@ -43,6 +44,7 @@ #define SMU_14_0_2_PP_THERMALCONTROLLER_NONE 0 #define SMU_14_0_2_PP_OVERDRIVE_VERSION 0x1 // TODO: FIX OverDrive Version TBD +#define SMU_14_0_2_PP_CUSTOM_OVERDRIVE_VERSION 0x1 #define SMU_14_0_2_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00 enum SMU_14_0_2_OD_SW_FEATURE_CAP @@ -107,6 +109,7 @@ enum SMU_14_0_2_PWRMODE_SETTING SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE, + SMU_14_0_2_PMSETTING_COUNT }; #define SMU_14_0_2_MAX_PMSETTING 32 // Maximum Number of PowerMode Settings @@ -127,17 +130,24 @@ struct smu_14_0_2_overdrive_table int16_t pm_setting[SMU_14_0_2_MAX_PMSETTING]; // Optimized power mode feature settings }; +enum smu_14_0_3_pptable_source { + PPTABLE_SOURCE_IFWI = 0, + PPTABLE_SOURCE_DRIVER_HARDCODED = 1, + PPTABLE_SOURCE_PPGEN_REGISTRY = 2, + PPTABLE_SOURCE_MAX = PPTABLE_SOURCE_PPGEN_REGISTRY, +}; + struct smu_14_0_2_powerplay_table { struct atom_common_table_header header; // header.format_revision = 3 (HAS TO MATCH SMU_14_0_2_TABLE_FORMAT_REVISION), header.content_revision = ? structuresize is calculated by PPGen. uint8_t table_revision; // PPGen use only: table_revision = 3 - uint8_t padding; // Padding 1 byte to align table_size offset to 6 bytes (pmfw_start_offset, for PMFW to know the starting offset of PPTable_t). + uint8_t pptable_source; // PPGen UI dropdown box uint16_t pmfw_pptable_start_offset; // The start offset of the pmfw portion. i.e. start of PPTable_t (start of SkuTable_t) uint16_t pmfw_pptable_size; // The total size of pmfw_pptable, i.e PPTable_t. - uint16_t pmfw_pfe_table_start_offset; // The start offset of the PFE_Settings_t within pmfw_pptable. - uint16_t pmfw_pfe_table_size; // The size of PFE_Settings_t. - uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t within pmfw_pptable. - uint16_t pmfw_board_table_size; // The size of BoardTable_t. + uint16_t pmfw_sku_table_start_offset; // DO NOT CHANGE ORDER; The absolute start offset of the SkuTable_t (within smu_14_0_3_powerplay_table). + uint16_t pmfw_sku_table_size; // DO NOT CHANGE ORDER; The size of SkuTable_t. + uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t + uint16_t pmfw_board_table_size; // The size of BoardTable_t. uint16_t pmfw_custom_sku_table_start_offset; // The start offset of the CustomSkuTable_t within pmfw_pptable. uint16_t pmfw_custom_sku_table_size; // The size of the CustomSkuTable_t. uint32_t golden_pp_id; // PPGen use only: PP Table ID on the Golden Data Base @@ -159,6 +169,36 @@ struct smu_14_0_2_powerplay_table PPTable_t smc_pptable; // PPTable_t in driver_if.h -- as requested by PMFW, this offset should start at a 32-byte boundary, and the table_size above should remain at offset=6 bytes }; +enum SMU_14_0_2_CUSTOM_OD_SW_FEATURE_CAP { + SMU_14_0_2_CUSTOM_ODCAP_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODCAP_COUNT +}; + +enum SMU_14_0_2_CUSTOM_OD_FEATURE_SETTING_ID { + SMU_14_0_2_CUSTOM_ODSETTING_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODSETTING_COUNT, +}; + +struct smu_14_0_2_custom_overdrive_table { + uint8_t revision; + uint8_t reserve[3]; + uint8_t cap[SMU_14_0_2_CUSTOM_ODCAP_COUNT]; + int32_t max[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int32_t min[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int16_t pm_setting[SMU_14_0_2_PMSETTING_COUNT]; +}; + +struct smu_14_0_3_custom_powerplay_table { + uint8_t custom_table_revision; + uint16_t custom_table_size; + uint16_t custom_sku_table_offset; + uint32_t custom_platform_caps; + uint16_t software_shutdown_temp; + struct smu_14_0_2_custom_overdrive_table custom_overdrive_table; + uint32_t reserve[8]; + CustomSkuTable_t custom_sku_table_pmfw; +}; + #pragma pack(pop) #endif From 8141f21b941710ecebe49220b69822cab3abd23d Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 26 Jul 2024 19:31:55 +0530 Subject: [PATCH 209/447] drm/amd/display: Handle null 'stream_status' in 'planes_changed_for_existing_stream' This commit adds a null check for 'stream_status' in the function 'planes_changed_for_existing_stream'. Previously, the code assumed 'stream_status' could be null, but did not handle the case where it was actually null. This could lead to a null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_resource.c:3784 planes_changed_for_existing_stream() error: we previously assumed 'stream_status' could be null (see line 3774) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 4f5b23520365..1c379a6b1b4c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3773,8 +3773,10 @@ static bool planes_changed_for_existing_stream(struct dc_state *context, } } - if (!stream_status) + if (!stream_status) { ASSERT(0); + return false; + } for (i = 0; i < set_count; i++) if (set[i].stream == stream) From 9710b84e2a6afde2db20cd33435038eb75b91200 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 30 Jul 2024 09:43:11 +0800 Subject: [PATCH 210/447] drm/amd/pm: add overdrive support on smu v14.0.2/3 add overdrive support on smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 11 +- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 886 +++++++++++++++++- 2 files changed, 894 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 09973615f210..865e916fc425 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -452,17 +452,26 @@ int smu_v14_0_init_smc_tables(struct smu_context *smu) ret = -ENOMEM; goto err3_out; } + + smu_table->user_overdrive_table = + kzalloc(tables[SMU_TABLE_OVERDRIVE].size, GFP_KERNEL); + if (!smu_table->user_overdrive_table) { + ret = -ENOMEM; + goto err4_out; + } } smu_table->combo_pptable = kzalloc(tables[SMU_TABLE_COMBO_PPTABLE].size, GFP_KERNEL); if (!smu_table->combo_pptable) { ret = -ENOMEM; - goto err4_out; + goto err5_out; } return 0; +err5_out: + kfree(smu_table->user_overdrive_table); err4_out: kfree(smu_table->boot_overdrive_table); err3_out: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e1a27903c80a..5913f9c60fe0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -68,6 +68,18 @@ #define DEBUGSMC_MSG_Mode1Reset 2 #define LINK_SPEED_MAX 3 +#define PP_OD_FEATURE_GFXCLK_FMIN 0 +#define PP_OD_FEATURE_GFXCLK_FMAX 1 +#define PP_OD_FEATURE_UCLK_FMIN 2 +#define PP_OD_FEATURE_UCLK_FMAX 3 +#define PP_OD_FEATURE_GFX_VF_CURVE 4 +#define PP_OD_FEATURE_FAN_CURVE_TEMP 5 +#define PP_OD_FEATURE_FAN_CURVE_PWM 6 +#define PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT 7 +#define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 +#define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 +#define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 + static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -204,6 +216,7 @@ static struct cmn2asic_mapping smu_v14_0_2_table_map[SMU_TABLE_COUNT] = { [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, TAB_MAP(I2C_COMMANDS), TAB_MAP(ECCINFO), + TAB_MAP(OVERDRIVE), }; static struct cmn2asic_mapping smu_v14_0_2_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -1029,16 +1042,97 @@ static int smu_v14_0_2_get_current_clk_freq_by_table(struct smu_context *smu, value); } +static bool smu_v14_0_2_is_od_feature_supported(struct smu_context *smu, + int od_feature_bit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + const OverDriveLimits_t * const overdrive_upperlimits = + &pptable->SkuTable.OverDriveLimitsBasicMax; + + return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); +} + +static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, + int32_t *max) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + const OverDriveLimits_t * const overdrive_upperlimits = + &pptable->SkuTable.OverDriveLimitsBasicMax; + const OverDriveLimits_t * const overdrive_lowerlimits = + &pptable->SkuTable.OverDriveLimitsBasicMin; + int32_t od_min_setting, od_max_setting; + + switch (od_feature_bit) { + case PP_OD_FEATURE_GFXCLK_FMIN: + od_min_setting = overdrive_lowerlimits->GfxclkFmin; + od_max_setting = overdrive_upperlimits->GfxclkFmin; + break; + case PP_OD_FEATURE_GFXCLK_FMAX: + od_min_setting = overdrive_lowerlimits->GfxclkFmax; + od_max_setting = overdrive_upperlimits->GfxclkFmax; + break; + case PP_OD_FEATURE_UCLK_FMIN: + od_min_setting = overdrive_lowerlimits->UclkFmin; + od_max_setting = overdrive_upperlimits->UclkFmin; + break; + case PP_OD_FEATURE_UCLK_FMAX: + od_min_setting = overdrive_lowerlimits->UclkFmax; + od_max_setting = overdrive_upperlimits->UclkFmax; + break; + case PP_OD_FEATURE_GFX_VF_CURVE: + od_min_setting = overdrive_lowerlimits->VoltageOffsetPerZoneBoundary[0]; + od_max_setting = overdrive_upperlimits->VoltageOffsetPerZoneBoundary[0]; + break; + case PP_OD_FEATURE_FAN_CURVE_TEMP: + od_min_setting = overdrive_lowerlimits->FanLinearTempPoints[0]; + od_max_setting = overdrive_upperlimits->FanLinearTempPoints[0]; + break; + case PP_OD_FEATURE_FAN_CURVE_PWM: + od_min_setting = overdrive_lowerlimits->FanLinearPwmPoints[0]; + od_max_setting = overdrive_upperlimits->FanLinearPwmPoints[0]; + break; + case PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT: + od_min_setting = overdrive_lowerlimits->AcousticLimitRpmThreshold; + od_max_setting = overdrive_upperlimits->AcousticLimitRpmThreshold; + break; + case PP_OD_FEATURE_FAN_ACOUSTIC_TARGET: + od_min_setting = overdrive_lowerlimits->AcousticTargetRpmThreshold; + od_max_setting = overdrive_upperlimits->AcousticTargetRpmThreshold; + break; + case PP_OD_FEATURE_FAN_TARGET_TEMPERATURE: + od_min_setting = overdrive_lowerlimits->FanTargetTemperature; + od_max_setting = overdrive_upperlimits->FanTargetTemperature; + break; + case PP_OD_FEATURE_FAN_MINIMUM_PWM: + od_min_setting = overdrive_lowerlimits->FanMinimumPwm; + od_max_setting = overdrive_upperlimits->FanMinimumPwm; + break; + default: + od_min_setting = od_max_setting = INT_MAX; + break; + } + + if (min) + *min = od_min_setting; + if (max) + *max = od_max_setting; +} + static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { struct smu_dpm_context *smu_dpm = &smu->smu_dpm; struct smu_14_0_dpm_context *dpm_context = smu_dpm->dpm_context; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)smu->smu_table.overdrive_table; struct smu_14_0_dpm_table *single_dpm_table; struct smu_14_0_pcie_table *pcie_table; uint32_t gen_speed, lane_width; int i, curr_freq, size = 0; + int32_t min_value, max_value; int ret = 0; smu_cmn_get_sysfs_buf(&buf, &size); @@ -1159,6 +1253,183 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, "*" : ""); break; + case SMU_OD_SCLK: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_GFXCLK_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_SCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", + od_table->OverDriveTable.GfxclkFmin, + od_table->OverDriveTable.GfxclkFmax); + break; + + case SMU_OD_MCLK: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_UCLK_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_MCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMHz\n", + od_table->OverDriveTable.UclkFmin, + od_table->OverDriveTable.UclkFmax); + break; + + case SMU_OD_VDDGFX_OFFSET: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_GFX_VF_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_VDDGFX_OFFSET:\n"); + size += sysfs_emit_at(buf, size, "%dmV\n", + od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[0]); + break; + + case SMU_OD_FAN_CURVE: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_FAN_CURVE:\n"); + for (i = 0; i < NUM_OD_FAN_MAX_POINTS - 1; i++) + size += sysfs_emit_at(buf, size, "%d: %dC %d%%\n", + i, + (int)od_table->OverDriveTable.FanLinearTempPoints[i], + (int)od_table->OverDriveTable.FanLinearPwmPoints[i]); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "FAN_CURVE(hotspot temp): %uC %uC\n", + min_value, max_value); + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "FAN_CURVE(fan speed): %u%% %u%%\n", + min_value, max_value); + + break; + + case SMU_OD_ACOUSTIC_LIMIT: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_ACOUSTIC_LIMIT:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.AcousticLimitRpmThreshold); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ACOUSTIC_LIMIT: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_ACOUSTIC_TARGET: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_ACOUSTIC_TARGET:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.AcousticTargetRpmThreshold); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_TARGET, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ACOUSTIC_TARGET: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_FAN_TARGET_TEMPERATURE: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_TARGET_TEMPERATURE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanTargetTemperature); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_TARGET_TEMPERATURE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "TARGET_TEMPERATURE: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_FAN_MINIMUM_PWM: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_MINIMUM_PWM:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanMinimumPwm); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_MINIMUM_PWM, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "MINIMUM_PWM: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_RANGE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && + !smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && + !smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMIN, + &min_value, + NULL); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMAX, + NULL, + &max_value); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMIN, + &min_value, + NULL); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMAX, + NULL, + &max_value); + size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFX_VF_CURVE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "VDDGFX_OFFSET: %7dmv %10dmv\n", + min_value, max_value); + } + break; + default: break; } @@ -1400,7 +1671,27 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *max_power_limit, uint32_t *min_power_limit) { - // TODO + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + CustomSkuTable_t *skutable = &pptable->CustomSkuTable; + uint32_t power_limit; + uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + + if (smu_v14_0_get_current_power_limit(smu, &power_limit)) + power_limit = smu->adev->pm.ac_power ? + skutable->SocketPowerLimitAc[PPT_THROTTLER_PPT0] : + skutable->SocketPowerLimitDc[PPT_THROTTLER_PPT0]; + + if (current_power_limit) + *current_power_limit = power_limit; + if (default_power_limit) + *default_power_limit = power_limit; + + if (max_power_limit) + *max_power_limit = msg_limit; + + if (min_power_limit) + *min_power_limit = 0; return 0; } @@ -1950,6 +2241,594 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v1_3); } +static void smu_v14_0_2_dump_od_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + struct amdgpu_device *adev = smu->adev; + + dev_dbg(adev->dev, "OD: Gfxclk: (%d, %d)\n", od_table->OverDriveTable.GfxclkFmin, + od_table->OverDriveTable.GfxclkFmax); + dev_dbg(adev->dev, "OD: Uclk: (%d, %d)\n", od_table->OverDriveTable.UclkFmin, + od_table->OverDriveTable.UclkFmax); +} + +static int smu_v14_0_2_upload_overdrive_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + int ret; + ret = smu_cmn_update_table(smu, + SMU_TABLE_OVERDRIVE, + 0, + (void *)od_table, + true); + if (ret) + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + + return ret; +} + +static void smu_v14_0_2_set_supported_od_feature_mask(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + adev->pm.od_feature_mask |= OD_OPS_SUPPORT_FAN_CURVE_RETRIEVE | + OD_OPS_SUPPORT_FAN_CURVE_SET | + OD_OPS_SUPPORT_ACOUSTIC_LIMIT_THRESHOLD_RETRIEVE | + OD_OPS_SUPPORT_ACOUSTIC_LIMIT_THRESHOLD_SET | + OD_OPS_SUPPORT_ACOUSTIC_TARGET_THRESHOLD_RETRIEVE | + OD_OPS_SUPPORT_ACOUSTIC_TARGET_THRESHOLD_SET | + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; +} + +static int smu_v14_0_2_get_overdrive_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + int ret; + ret = smu_cmn_update_table(smu, + SMU_TABLE_OVERDRIVE, + 0, + (void *)od_table, + false); + if (ret) + dev_err(smu->adev->dev, "Failed to get overdrive table!\n"); + + return ret; +} + +static int smu_v14_0_2_set_default_od_settings(struct smu_context *smu) +{ + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)smu->smu_table.overdrive_table; + OverDriveTableExternal_t *boot_od_table = + (OverDriveTableExternal_t *)smu->smu_table.boot_overdrive_table; + OverDriveTableExternal_t *user_od_table = + (OverDriveTableExternal_t *)smu->smu_table.user_overdrive_table; + OverDriveTableExternal_t user_od_table_bak; + int ret; + int i; + + ret = smu_v14_0_2_get_overdrive_table(smu, boot_od_table); + if (ret) + return ret; + + smu_v14_0_2_dump_od_table(smu, boot_od_table); + + memcpy(od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + + /* + * For S3/S4/Runpm resume, we need to setup those overdrive tables again, + * but we have to preserve user defined values in "user_od_table". + */ + if (!smu->adev->in_suspend) { + memcpy(user_od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + smu->user_dpm_profile.user_od = false; + } else if (smu->user_dpm_profile.user_od) { + memcpy(&user_od_table_bak, + user_od_table, + sizeof(OverDriveTableExternal_t)); + memcpy(user_od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + user_od_table->OverDriveTable.GfxclkFmin = + user_od_table_bak.OverDriveTable.GfxclkFmin; + user_od_table->OverDriveTable.GfxclkFmax = + user_od_table_bak.OverDriveTable.GfxclkFmax; + user_od_table->OverDriveTable.UclkFmin = + user_od_table_bak.OverDriveTable.UclkFmin; + user_od_table->OverDriveTable.UclkFmax = + user_od_table_bak.OverDriveTable.UclkFmax; + for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) + user_od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = + user_od_table_bak.OverDriveTable.VoltageOffsetPerZoneBoundary[i]; + for (i = 0; i < NUM_OD_FAN_MAX_POINTS - 1; i++) { + user_od_table->OverDriveTable.FanLinearTempPoints[i] = + user_od_table_bak.OverDriveTable.FanLinearTempPoints[i]; + user_od_table->OverDriveTable.FanLinearPwmPoints[i] = + user_od_table_bak.OverDriveTable.FanLinearPwmPoints[i]; + } + user_od_table->OverDriveTable.AcousticLimitRpmThreshold = + user_od_table_bak.OverDriveTable.AcousticLimitRpmThreshold; + user_od_table->OverDriveTable.AcousticTargetRpmThreshold = + user_od_table_bak.OverDriveTable.AcousticTargetRpmThreshold; + user_od_table->OverDriveTable.FanTargetTemperature = + user_od_table_bak.OverDriveTable.FanTargetTemperature; + user_od_table->OverDriveTable.FanMinimumPwm = + user_od_table_bak.OverDriveTable.FanMinimumPwm; + } + + smu_v14_0_2_set_supported_od_feature_mask(smu); + + return 0; +} + +static int smu_v14_0_2_restore_user_od_settings(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = table_context->overdrive_table; + OverDriveTableExternal_t *user_od_table = table_context->user_overdrive_table; + int res; + + user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | + BIT(PP_OD_FEATURE_UCLK_BIT) | + BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | + BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + res = smu_v14_0_2_upload_overdrive_table(smu, user_od_table); + user_od_table->OverDriveTable.FeatureCtrlMask = 0; + if (res == 0) + memcpy(od_table, user_od_table, sizeof(OverDriveTableExternal_t)); + + return res; +} + +static int smu_v14_0_2_od_restore_table_single(struct smu_context *smu, long input) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *boot_overdrive_table = + (OverDriveTableExternal_t *)table_context->boot_overdrive_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + struct amdgpu_device *adev = smu->adev; + int i; + + switch (input) { + case PP_OD_EDIT_FAN_CURVE: + for (i = 0; i < NUM_OD_FAN_MAX_POINTS; i++) { + od_table->OverDriveTable.FanLinearTempPoints[i] = + boot_overdrive_table->OverDriveTable.FanLinearTempPoints[i]; + od_table->OverDriveTable.FanLinearPwmPoints[i] = + boot_overdrive_table->OverDriveTable.FanLinearPwmPoints[i]; + } + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_ACOUSTIC_LIMIT: + od_table->OverDriveTable.AcousticLimitRpmThreshold = + boot_overdrive_table->OverDriveTable.AcousticLimitRpmThreshold; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_ACOUSTIC_TARGET: + od_table->OverDriveTable.AcousticTargetRpmThreshold = + boot_overdrive_table->OverDriveTable.AcousticTargetRpmThreshold; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_FAN_TARGET_TEMPERATURE: + od_table->OverDriveTable.FanTargetTemperature = + boot_overdrive_table->OverDriveTable.FanTargetTemperature; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_FAN_MINIMUM_PWM: + od_table->OverDriveTable.FanMinimumPwm = + boot_overdrive_table->OverDriveTable.FanMinimumPwm; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + default: + dev_info(adev->dev, "Invalid table index: %ld\n", input); + return -EINVAL; + } + + return 0; +} + +static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, + enum PP_OD_DPM_TABLE_COMMAND type, + long input[], + uint32_t size) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + struct amdgpu_device *adev = smu->adev; + uint32_t offset_of_voltageoffset; + int32_t minimum, maximum; + uint32_t feature_ctrlmask; + int i, ret = 0; + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { + dev_warn(adev->dev, "GFXCLK_LIMITS setting not supported!\n"); + return -ENOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + dev_info(adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } + + switch (input[i]) { + case 0: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMIN, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "GfxclkFmin (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.GfxclkFmin = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; + break; + + case 1: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMAX, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "GfxclkFmax (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.GfxclkFmax = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; + break; + + default: + dev_info(adev->dev, "Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); + dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + } + + if (od_table->OverDriveTable.GfxclkFmin > od_table->OverDriveTable.GfxclkFmax) { + dev_err(adev->dev, + "Invalid setting: GfxclkFmin(%u) is bigger than GfxclkFmax(%u)\n", + (uint32_t)od_table->OverDriveTable.GfxclkFmin, + (uint32_t)od_table->OverDriveTable.GfxclkFmax); + return -EINVAL; + } + break; + + case PP_OD_EDIT_MCLK_VDDC_TABLE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT)) { + dev_warn(adev->dev, "UCLK_LIMITS setting not supported!\n"); + return -ENOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + dev_info(adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } + + switch (input[i]) { + case 0: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMIN, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "UclkFmin (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.UclkFmin = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_UCLK_BIT; + break; + + case 1: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMAX, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "UclkFmax (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.UclkFmax = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_UCLK_BIT; + break; + + default: + dev_info(adev->dev, "Invalid MCLK_VDDC_TABLE index: %ld\n", input[i]); + dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + } + + if (od_table->OverDriveTable.UclkFmin > od_table->OverDriveTable.UclkFmax) { + dev_err(adev->dev, + "Invalid setting: UclkFmin(%u) is bigger than UclkFmax(%u)\n", + (uint32_t)od_table->OverDriveTable.UclkFmin, + (uint32_t)od_table->OverDriveTable.UclkFmax); + return -EINVAL; + } + break; + + case PP_OD_EDIT_VDDGFX_OFFSET: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) { + dev_warn(adev->dev, "Gfx offset setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFX_VF_CURVE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "Voltage offset (%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) + od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_CURVE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + if (input[0] >= NUM_OD_FAN_MAX_POINTS - 1 || + input[0] < 0) + return -EINVAL; + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_TEMP, + &minimum, + &maximum); + if (input[1] < minimum || + input[1] > maximum) { + dev_info(adev->dev, "Fan curve temp setting(%ld) must be within [%d, %d]!\n", + input[1], minimum, maximum); + return -EINVAL; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_PWM, + &minimum, + &maximum); + if (input[2] < minimum || + input[2] > maximum) { + dev_info(adev->dev, "Fan curve pwm setting(%ld) must be within [%d, %d]!\n", + input[2], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanLinearTempPoints[input[0]] = input[1]; + od_table->OverDriveTable.FanLinearPwmPoints[input[0]] = input[2]; + od_table->OverDriveTable.FanMode = FAN_MODE_MANUAL_LINEAR; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_ACOUSTIC_LIMIT: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "acoustic limit threshold setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.AcousticLimitRpmThreshold = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_ACOUSTIC_TARGET: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_TARGET, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "acoustic target threshold setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.AcousticTargetRpmThreshold = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_TARGET_TEMPERATURE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_TARGET_TEMPERATURE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "fan target temperature setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanTargetTemperature = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_MINIMUM_PWM: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_MINIMUM_PWM, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "fan minimum pwm setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanMinimumPwm = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_RESTORE_DEFAULT_TABLE: + if (size == 1) { + ret = smu_v14_0_2_od_restore_table_single(smu, input[0]); + if (ret) + return ret; + } else { + feature_ctrlmask = od_table->OverDriveTable.FeatureCtrlMask; + memcpy(od_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTableExternal_t)); + od_table->OverDriveTable.FeatureCtrlMask = feature_ctrlmask; + } + fallthrough; + case PP_OD_COMMIT_DPM_TABLE: + /* + * The member below instructs PMFW the settings focused in + * this single operation. + * `uint32_t FeatureCtrlMask;` + * It does not contain actual informations about user's custom + * settings. Thus we do not cache it. + */ + offset_of_voltageoffset = offsetof(OverDriveTable_t, VoltageOffsetPerZoneBoundary); + if (memcmp((u8 *)od_table + offset_of_voltageoffset, + table_context->user_overdrive_table + offset_of_voltageoffset, + sizeof(OverDriveTableExternal_t) - offset_of_voltageoffset)) { + smu_v14_0_2_dump_od_table(smu, od_table); + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + od_table->OverDriveTable.FeatureCtrlMask = 0; + memcpy(table_context->user_overdrive_table + offset_of_voltageoffset, + (u8 *)od_table + offset_of_voltageoffset, + sizeof(OverDriveTableExternal_t) - offset_of_voltageoffset); + + if (!memcmp(table_context->user_overdrive_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTableExternal_t))) + smu->user_dpm_profile.user_od = false; + else + smu->user_dpm_profile.user_od = true; + } + break; + + default: + return -ENOSYS; + } + + return ret; +} + +static int smu_v14_0_2_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + int ret = 0; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + + if (limit <= msg_limit) { + if (smu->current_power_limit > msg_limit) { + od_table->OverDriveTable.Ppt = 0; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + } + return smu_v14_0_set_power_limit(smu, limit_type, limit); + } else if (smu->od_enabled) { + ret = smu_v14_0_set_power_limit(smu, limit_type, msg_limit); + if (ret) + return ret; + + od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + smu->current_power_limit = limit; + } else { + return -EINVAL; + } + + return 0; +} + static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_allowed_feature_mask = smu_v14_0_2_get_allowed_feature_mask, .set_default_dpm_table = smu_v14_0_2_set_default_dpm_table, @@ -1988,13 +2867,16 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, + .set_default_od_settings = smu_v14_0_2_set_default_od_settings, + .restore_user_od_settings = smu_v14_0_2_restore_user_od_settings, + .od_edit_dpm_table = smu_v14_0_2_od_edit_dpm_table, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, .populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk, .set_performance_level = smu_v14_0_set_performance_level, .gfx_off_control = smu_v14_0_gfx_off_control, .get_unique_id = smu_v14_0_2_get_unique_id, .get_power_limit = smu_v14_0_2_get_power_limit, - .set_power_limit = smu_v14_0_set_power_limit, + .set_power_limit = smu_v14_0_2_set_power_limit, .set_power_source = smu_v14_0_set_power_source, .get_power_profile_mode = smu_v14_0_2_get_power_profile_mode, .set_power_profile_mode = smu_v14_0_2_set_power_profile_mode, From 17277da26623d4aa8bdda628d0024cf2f2e39ae6 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 30 Jul 2024 11:00:06 +0530 Subject: [PATCH 211/447] drm/amdgpu: Remove debugfs amdgpu_reset_dump_register_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some problem with existing amdgpu_reset_dump_register_list debugfs node. It is supposed to read a list of registers but there could be cases when the IP is not in correct power state. Register read in such cases could lead to more problems. We are taking care of all such power states in devcoredump and dumping the registers of need for debugging. So cleaning this code and we dont need this functionality via debugfs anymore. Reviewed-by: Christian König Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 96 --------------------- 1 file changed, 96 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 0e1a11b6b989..cbef720de779 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2026,100 +2026,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL, DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL, amdgpu_debugfs_sclk_set, "%llu\n"); -static ssize_t amdgpu_reset_dump_register_list_read(struct file *f, - char __user *buf, size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; - char reg_offset[12]; - int i, ret, len = 0; - - if (*pos) - return 0; - - memset(reg_offset, 0, 12); - ret = down_read_killable(&adev->reset_domain->sem); - if (ret) - return ret; - - for (i = 0; i < adev->reset_info.num_regs; i++) { - sprintf(reg_offset, "0x%x\n", adev->reset_info.reset_dump_reg_list[i]); - up_read(&adev->reset_domain->sem); - if (copy_to_user(buf + len, reg_offset, strlen(reg_offset))) - return -EFAULT; - - len += strlen(reg_offset); - ret = down_read_killable(&adev->reset_domain->sem); - if (ret) - return ret; - } - - up_read(&adev->reset_domain->sem); - *pos += len; - - return len; -} - -static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, - const char __user *buf, size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; - char reg_offset[11]; - uint32_t *new = NULL, *tmp = NULL; - unsigned int len = 0; - int ret, i = 0; - - do { - memset(reg_offset, 0, 11); - if (copy_from_user(reg_offset, buf + len, - min(10, (size-len)))) { - ret = -EFAULT; - goto error_free; - } - - new = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto error_free; - } - tmp = new; - if (sscanf(reg_offset, "%X %n", &tmp[i], &ret) != 1) { - ret = -EINVAL; - goto error_free; - } - - len += ret; - i++; - } while (len < size); - - new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto error_free; - } - ret = down_write_killable(&adev->reset_domain->sem); - if (ret) - goto error_free; - - swap(adev->reset_info.reset_dump_reg_list, tmp); - swap(adev->reset_info.reset_dump_reg_value, new); - adev->reset_info.num_regs = i; - up_write(&adev->reset_domain->sem); - ret = size; - -error_free: - if (tmp != new) - kfree(tmp); - kfree(new); - return ret; -} - -static const struct file_operations amdgpu_reset_dump_register_list = { - .owner = THIS_MODULE, - .read = amdgpu_reset_dump_register_list_read, - .write = amdgpu_reset_dump_register_list_write, - .llseek = default_llseek -}; - int amdgpu_debugfs_init(struct amdgpu_device *adev) { struct dentry *root = adev_to_drm(adev)->primary->debugfs_root; @@ -2204,8 +2110,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) &amdgpu_debugfs_vm_info_fops); debugfs_create_file("amdgpu_benchmark", 0200, root, adev, &amdgpu_benchmark_fops); - debugfs_create_file("amdgpu_reset_dump_register_list", 0644, root, adev, - &amdgpu_reset_dump_register_list); adev->debugfs_vbios_blob.data = adev->bios; adev->debugfs_vbios_blob.size = adev->bios_size; From 836af5be1b6d8e93d736c252e711a20db7dbde9d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 30 Jul 2024 11:19:53 +0530 Subject: [PATCH 212/447] drm/amdgpu: Clean up the register dump via debugfs list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit debugfs register list for dump is cleaned as it have some issues related to proper power state of the IP before register read. Since the above mentioned is removed we no longer want this to be dumped part of the devcoredump and hence removed. Reviewed-by: Christian König Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ------------- .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 10 +--------- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 ------------------- 3 files changed, 1 insertion(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 137a88b8de45..c54ddd3e68aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -823,17 +823,6 @@ struct amdgpu_mqd { struct amdgpu_reset_domain; struct amdgpu_fru_info; -struct amdgpu_reset_info { - /* reset dump register */ - u32 *reset_dump_reg_list; - u32 *reset_dump_reg_value; - int num_regs; - -#ifdef CONFIG_DEV_COREDUMP - struct amdgpu_coredump_info *coredump_info; -#endif -}; - /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -1157,8 +1146,6 @@ struct amdgpu_device { struct mutex benchmark_mutex; - struct amdgpu_reset_info reset_info; - bool scpm_enabled; uint32_t scpm_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index f6806ae1c061..cf2b4dd4d865 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -203,7 +203,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, struct amdgpu_coredump_info *coredump = data; struct drm_print_iterator iter; struct amdgpu_vm_fault_info *fault_info; - int i, ver; + int ver; iter.data = buffer; iter.offset = 0; @@ -317,14 +317,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->reset_info.num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->reset_info.num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_info.reset_dump_reg_list[i], - coredump->adev->reset_info.reset_dump_reg_value[i]); - } return count - iter.remain; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a2a1a3da17e3..3a43754e7f10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5277,23 +5277,6 @@ mode1_reset_failed: return ret; } -static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) -{ - int i; - - lockdep_assert_held(&adev->reset_domain->sem); - - for (i = 0; i < adev->reset_info.num_regs; i++) { - adev->reset_info.reset_dump_reg_value[i] = - RREG32(adev->reset_info.reset_dump_reg_list[i]); - - trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], - adev->reset_info.reset_dump_reg_value[i]); - } - - return 0; -} - int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { @@ -5359,8 +5342,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, } if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { - amdgpu_reset_reg_dumps(tmp_adev); - dev_info(tmp_adev->dev, "Dumping IP State\n"); /* Trigger ip dump before we reset the asic */ for (i = 0; i < tmp_adev->num_ip_blocks; i++) From 4a4c815b08dc774dde67fb90a0286925f98204af Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 30 Jul 2024 09:39:14 +0530 Subject: [PATCH 213/447] drm/amd/display: Align 'dpp401_dscl_program_isharp' with actual function parameters This commit corrects the function comment for 'dpp401_dscl_program_isharp' in 'dcn401_dpp_dscl.c'. The comment previously included a description for a non-existent parameter 'bs_coeffs_updated'. This parameter description has been removed to reflect the function's actual parameters. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/../display/dc/dpp/dcn401/dcn401_dpp_dscl.c:981: warning: Excess function parameter 'bs_coeffs_updated' description in 'dpp401_dscl_program_isharp' Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 88d24e36fe00..505929800426 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -971,7 +971,6 @@ static void dpp401_dscl_set_isharp_filter( * * @dpp_base: High level DPP struct * @scl_data: scalaer_data info - * @bs_coeffs_updated: coeffs update flag * * This is the primary function to program isharp * From e89d2fec4cde967445e16e02e406481bac380cc4 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 30 Jul 2024 00:24:44 +0530 Subject: [PATCH 214/447] drm/amdgpu: optimize the padding for gfx10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Cc: Christian König Cc: Pierre-Eric Pelloux-Prayer Cc: Tvrtko Ursulin Cc: Marek Olšák Reviewed-by: Christian König Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 853084a2ce7f..1b88528b512b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9397,6 +9397,24 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static void gfx_v10_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -9588,7 +9606,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v10_0_ring_emit_sb, .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, @@ -9629,7 +9647,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, From ee0a469cf9175aeb6131c0476c4a4a8eb5997dfa Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 25 Jun 2024 11:22:50 -0400 Subject: [PATCH 215/447] drm/amdkfd: support per-queue reset on gfx9 Support per-queue reset for GFX9. The recommendation is for the driver to target reset the HW queue via a SPI MMIO register write. Since this requires pipe and HW queue info and MEC FW is limited to doorbell reports of hung queues after an unmap failure, scan the HW queue slots defined by SET_RESOURCES first to identify the user queue candidates to reset. Only signal reset events to processes that have had a queue reset. If queue reset fails, fall back to GPU reset. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 2 + .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 4 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 4 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 16 ++ .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 9 + .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 4 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c | 18 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 85 ++++++++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 9 + .../drm/amd/amdkfd/kfd_device_queue_manager.c | 184 +++++++++++++++++- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 12 ++ drivers/gpu/drm/amd/amdkfd/kfd_events.c | 22 +++ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 + .../gpu/drm/amd/include/kgd_kfd_interface.h | 6 + 16 files changed, 373 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index aff08321e976..8dfdb18197c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -191,4 +191,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 3a3f3ce09f00..017e8a3013aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -418,5 +418,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, - .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index a5c7259cf2a3..e2ae714a700f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -541,5 +541,7 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { kgd_gfx_v9_4_3_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode, .set_address_watch = kgd_gfx_v9_4_3_set_address_watch, - .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch + .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 3ab6c3aa0ad1..62176d607bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -1070,6 +1070,20 @@ static void program_trap_handler_settings(struct amdgpu_device *adev, unlock_srbm(adev); } +uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + return 0; +} + +uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -1097,4 +1111,6 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, .program_trap_handler_settings = program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v10_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index 67bcaa3d4226..9efd2dd4fdd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -56,3 +56,12 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); +uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst); +uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst, + unsigned int utimeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 8c8437a4383f..c718bedda0ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -680,5 +680,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode, .set_address_watch = kgd_gfx_v10_set_address_watch, - .clear_address_watch = kgd_gfx_v10_clear_address_watch + .clear_address_watch = kgd_gfx_v10_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v10_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index b61a32d6af4b..a4ba49cb22db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -786,6 +786,20 @@ static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev, return 0; } +static uint64_t kgd_gfx_v11_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + return 0; +} + +static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .program_sh_mem_settings = program_sh_mem_settings_v11, .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11, @@ -808,5 +822,7 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode, .set_address_watch = kgd_gfx_v11_set_address_watch, - .clear_address_watch = kgd_gfx_v11_clear_address_watch + .clear_address_watch = kgd_gfx_v11_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v11_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 5a35a8ca8922..32f28c12077b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1144,6 +1144,89 @@ void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, kgd_gfx_v9_unlock_srbm(adev, inst); } +uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + uint32_t low, high; + uint64_t queue_addr = 0; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + amdgpu_gfx_rlc_enter_safe_mode(adev, inst); + + if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE)) + goto unlock_out; + + low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE); + high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI); + + /* only concerned with user queues. */ + if (!high) + goto unlock_out; + + queue_addr = (((queue_addr | high) << 32) | low) << 8; + +unlock_out: + amdgpu_gfx_rlc_exit_safe_mode(adev, inst); + kgd_gfx_v9_release_queue(adev, inst); + + return queue_addr; +} + +uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + uint32_t low, high, temp; + unsigned long end_jiffies; + uint64_t queue_addr = 0; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + amdgpu_gfx_rlc_enter_safe_mode(adev, inst); + + if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE)) + goto unlock_out; + + low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE); + high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI); + + /* only concerned with user queues. */ + if (!high) + goto unlock_out; + + queue_addr = (((queue_addr | high) << 32) | low) << 8; + + pr_debug("Attempting queue reset on XCC %i pipe id %i queue id %i\n", + inst, pipe_id, queue_id); + + /* assume previous dequeue request issued will take affect after reset */ + WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); + + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + + if (time_after(jiffies, end_jiffies)) { + queue_addr = 0; + break; + } + + usleep_range(500, 1000); + } + + pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n", + inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!"); + +unlock_out: + amdgpu_gfx_rlc_exit_safe_mode(adev, inst); + kgd_gfx_v9_release_queue(adev, inst); + + return queue_addr; +} + const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -1172,4 +1255,6 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index ce424615f59b..988c50ac3be0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -101,3 +101,12 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); +uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst); +uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst, + unsigned int utimeout); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f0bfeb35246f..f6e211070299 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -153,6 +153,20 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, static void kfd_hws_hang(struct device_queue_manager *dqm) { + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + + /* Mark all device queues as reset. */ + list_for_each_entry(cur, &dqm->queues, list) { + qpd = cur->qpd; + list_for_each_entry(q, &qpd->queues_list, list) { + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + + pdd->has_reset_queue = true; + } + } + /* * Issue a GPU reset if HWS is unresponsive */ @@ -878,6 +892,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, else if (prev_active) retval = remove_queue_mes(dqm, q, &pdd->qpd); + /* queue is reset so inaccessable */ + if (pdd->has_reset_queue) { + retval = -EACCES; + goto out_unlock; + } + if (retval) { dev_err(dev, "unmap queue failed\n"); goto out_unlock; @@ -1662,7 +1682,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) static int start_cpsch(struct device_queue_manager *dqm) { struct device *dev = dqm->dev->adev->dev; - int retval; + int retval, num_hw_queue_slots; retval = 0; @@ -1715,9 +1735,24 @@ static int start_cpsch(struct device_queue_manager *dqm) &dqm->wait_times); } + /* setup per-queue reset detection buffer */ + num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * + dqm->dev->kfd->shared_resources.num_pipe_per_mec * + NUM_XCC(dqm->dev->xcc_mask); + + dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); + dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); + + if (!dqm->detect_hang_info) { + retval = -ENOMEM; + goto fail_detect_hang_buffer; + } + dqm_unlock(dqm); return 0; +fail_detect_hang_buffer: + kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); fail_allocate_vidmem: fail_set_sched_resources: if (!dqm->dev->kfd->shared_resources.enable_mes) @@ -1748,6 +1783,8 @@ static int stop_cpsch(struct device_queue_manager *dqm) kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); if (!dqm->dev->kfd->shared_resources.enable_mes) pm_uninit(&dqm->packet_mgr); + kfree(dqm->detect_hang_info); + dqm->detect_hang_info = NULL; dqm_unlock(dqm); return 0; @@ -1965,6 +2002,135 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) return retval; } +static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + + dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid 0x%0x is reset\n", + q->properties.queue_id, q->process->pasid); + + pdd->has_reset_queue = true; + if (q->properties.is_active) { + q->properties.is_active = false; + decrement_queue_count(dqm, qpd, q); + } +} + +static int detect_queue_hang(struct device_queue_manager *dqm) +{ + int i; + + /* detect should be used only in dqm locked queue reset */ + if (WARN_ON(dqm->detect_hang_count > 0)) + return 0; + + memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); + + for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { + uint32_t mec, pipe, queue; + int xcc_id; + + mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) + / dqm->dev->kfd->shared_resources.num_pipe_per_mec; + + if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) + continue; + + amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); + + for_each_inst(xcc_id, dqm->dev->xcc_mask) { + uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( + dqm->dev->adev, pipe, queue, xcc_id); + struct dqm_detect_hang_info hang_info; + + if (!queue_addr) + continue; + + hang_info.pipe_id = pipe; + hang_info.queue_id = queue; + hang_info.xcc_id = xcc_id; + hang_info.queue_address = queue_addr; + + dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; + dqm->detect_hang_count++; + } + } + + return dqm->detect_hang_count; +} + +static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) +{ + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + + list_for_each_entry(cur, &dqm->queues, list) { + qpd = cur->qpd; + list_for_each_entry(q, &qpd->queues_list, list) { + if (queue_address == q->properties.queue_address) + return q; + } + } + + return NULL; +} + +/* only for compute queue */ +static int reset_queues_on_hws_hang(struct device_queue_manager *dqm) +{ + int r = 0, reset_count = 0, i; + + if (!dqm->detect_hang_info || dqm->is_hws_hang) + return -EIO; + + /* assume dqm locked. */ + if (!detect_queue_hang(dqm)) + return -ENOTRECOVERABLE; + + for (i = 0; i < dqm->detect_hang_count; i++) { + struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; + struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); + struct kfd_process_device *pdd; + uint64_t queue_addr = 0; + + if (!q) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + pdd = kfd_get_process_device_data(dqm->dev, q->process); + if (!pdd) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, + hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, + KFD_UNMAP_LATENCY_MS); + + /* either reset failed or we reset an unexpected queue. */ + if (queue_addr != q->properties.queue_address) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + set_queue_as_reset(dqm, q, &pdd->qpd); + reset_count++; + } + + if (reset_count == dqm->detect_hang_count) + kfd_signal_reset_event(dqm->dev); + else + r = -ENOTRECOVERABLE; + +reset_fail: + dqm->detect_hang_count = 0; + + return r; +} + /* dqm->lock mutex has to be locked before calling this function */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, @@ -2015,11 +2181,14 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { - while (halt_if_hws_hang) - schedule(); - kfd_hws_hang(dqm); - retval = -ETIME; - goto out; + if (reset_queues_on_hws_hang(dqm)) { + while (halt_if_hws_hang) + schedule(); + dqm->is_hws_hang = true; + kfd_hws_hang(dqm); + retval = -ETIME; + goto out; + } } /* We need to reset the grace period value for this device */ @@ -2038,8 +2207,7 @@ out: } /* only for compute queue */ -static int reset_queues_cpsch(struct device_queue_manager *dqm, - uint16_t pasid) +static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) { int retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 3b9b8eabaacc..dfb36a246637 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -210,6 +210,13 @@ struct device_queue_manager_asic_ops { struct kfd_node *dev); }; +struct dqm_detect_hang_info { + int pipe_id; + int queue_id; + int xcc_id; + uint64_t queue_address; +}; + /** * struct device_queue_manager * @@ -264,6 +271,11 @@ struct device_queue_manager { uint32_t wait_times; wait_queue_head_t destroy_wait; + + /* for per-queue reset support */ + struct dqm_detect_hang_info *detect_hang_info; + size_t detect_hang_info_size; + int detect_hang_count; }; void device_queue_manager_init_cik( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 9b33d9d2c9ad..ea3792249209 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -31,6 +31,7 @@ #include #include "kfd_priv.h" #include "kfd_events.h" +#include "kfd_device_queue_manager.h" #include /* @@ -1244,12 +1245,33 @@ void kfd_signal_reset_event(struct kfd_node *dev) idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id); + struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p); if (unlikely(user_gpu_id == -EINVAL)) { WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id); continue; } + if (unlikely(!pdd)) { + WARN_ONCE(1, "Could not get device data from pasid:0x%x\n", p->pasid); + continue; + } + + if (dev->dqm->detect_hang_count && !pdd->has_reset_queue) + continue; + + if (dev->dqm->detect_hang_count) { + struct amdgpu_task_info *ti; + + ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid); + if (ti) { + dev_err(dev->adev->dev, + "Queues reset on process %s tid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); + amdgpu_vm_put_task_info(ti); + } + } + rcu_read_lock(); id = KFD_FIRST_NONSIGNAL_EVENT_ID; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 66c73825c0a0..84e8ea3a8a0c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -321,8 +321,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, static bool check_preemption_failed(struct mqd_manager *mm, void *mqd) { struct v9_mqd *m = (struct v9_mqd *)mqd; + uint32_t doorbell_id = m->queue_doorbell_id0; - return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0); + m->queue_doorbell_id0 = 0; + + return kfd_check_hiq_mqd_doorbell_id(mm->dev, doorbell_id, 0); } static int get_wave_state(struct mqd_manager *mm, void *mqd, @@ -624,6 +627,7 @@ static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd) m = get_mqd(mqd + hiq_mqd_size * inst); ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, inst); + m->queue_doorbell_id0 = 0; ++inst; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4190fa339913..a5d47048c147 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -846,6 +846,9 @@ struct kfd_process_device { void *proc_ctx_bo; uint64_t proc_ctx_gpu_addr; void *proc_ctx_cpu_ptr; + + /* Tracks queue reset status */ + bool has_reset_queue; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 9e29b92eb523..a902950cc060 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1851,6 +1851,8 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) goto fail; } n_evicted++; + + pdd->dev->dqm->is_hws_hang = false; } return r; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 6d094cf3587d..7744ca3ef4b1 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -318,6 +318,12 @@ struct kfd2kgd_calls { void (*program_trap_handler_settings)(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); + uint64_t (*hqd_get_pq_addr)(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst); + uint64_t (*hqd_reset)(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout); }; #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ From 67c4ca9f794951482c54a7006c8b3c367d6c3efc Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 31 Jul 2024 13:36:15 +0530 Subject: [PATCH 216/447] drm/amdgpu: do not call insert_nop fn for zero count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not make a function call for zero size NOP as it does not add anything in the ring and is unnecessary function call. Reviewed-by: Christian König Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ad49cecb20b8..0d72d2cbb64b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -145,7 +145,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) count = ring->funcs->align_mask + 1 - (ring->wptr & ring->funcs->align_mask); count %= ring->funcs->align_mask + 1; - ring->funcs->insert_nop(ring, count); + + if (count != 0) + ring->funcs->insert_nop(ring, count); mb(); amdgpu_ring_set_wptr(ring); From b41a382932263b2951bc9e83a22168d579a94865 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 22 Jul 2024 13:26:08 -0400 Subject: [PATCH 217/447] drm/amdkfd: fix debug watchpoints for logical devices The number of watchpoints should be set and constrained per logical partition device, not by the socket device. Signed-off-by: Jonathan Kim Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 20 ++++++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 ++++---- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 34a282540c7e..312dfa84f29f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -365,47 +365,47 @@ static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_i *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID; - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); for (i = 0; i < MAX_WATCH_ADDRESSES; i++) { /* device watchpoint in use so skip */ - if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1) + if ((pdd->dev->alloc_watch_ids >> i) & 0x1) continue; pdd->alloc_watch_ids |= 0x1 << i; - pdd->dev->kfd->alloc_watch_ids |= 0x1 << i; + pdd->dev->alloc_watch_ids |= 0x1 << i; *watch_id = i; - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return 0; } - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return -ENOMEM; } static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id) { - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); /* process owns device watch point so safe to clear */ if ((pdd->alloc_watch_ids >> watch_id) & 0x1) { pdd->alloc_watch_ids &= ~(0x1 << watch_id); - pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id); + pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id); } - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); } static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id) { bool owns_watch_id = false; - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); owns_watch_id = watch_id < MAX_WATCH_ADDRESSES && ((pdd->alloc_watch_ids >> watch_id) & 0x1); - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return owns_watch_id; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 6619028dd58b..c2d2598f776c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -884,13 +884,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, dev_err(kfd_device, "Error initializing KFD node\n"); goto node_init_error; } + + spin_lock_init(&node->watch_points_lock); + kfd->nodes[i] = node; } svm_range_set_max_pages(kfd->adev); - spin_lock_init(&kfd->watch_points_lock); - kfd->init_complete = true; dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor, kfd->adev->pdev->device); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index a5d47048c147..057d20446c31 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -310,6 +310,10 @@ struct kfd_node { struct kfd_local_mem_info local_mem_info; struct kfd_dev *kfd; + + /* Track per device allocated watch points */ + uint32_t alloc_watch_ids; + spinlock_t watch_points_lock; }; struct kfd_dev { @@ -362,10 +366,6 @@ struct kfd_dev { struct kfd_node *nodes[MAX_KFD_NODES]; unsigned int num_nodes; - /* Track per device allocated watch points */ - uint32_t alloc_watch_ids; - spinlock_t watch_points_lock; - /* Kernel doorbells for KFD device */ struct amdgpu_bo *doorbells; From 847e387e00547b0cc728a5e61f5beb2ff861ed1d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 31 Jul 2024 13:39:20 +0530 Subject: [PATCH 218/447] drm/amdgpu: optimize the padding for gfx11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Reviewed-by: Christian König Cc: Christian König Cc: Pierre-Eric Pelloux-Prayer Cc: Tvrtko Ursulin Cc: Marek Olšák Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 4a9766635933..e7c160b9d0fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -481,6 +481,24 @@ static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, amdgpu_ring_write(ring, inv); /* poll interval */ } +static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -6709,7 +6727,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, .test_ring = gfx_v11_0_ring_test_ring, .test_ib = gfx_v11_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v11_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, @@ -6751,7 +6769,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, .test_ring = gfx_v11_0_ring_test_ring, .test_ib = gfx_v11_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v11_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, From 62eefd10ac1c7e976bda47ff311bd87cee40ab8d Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 30 Jul 2024 10:45:08 +0800 Subject: [PATCH 219/447] drm/amdgpu: use CPU for page table update if SDMA is unavailable avoid using SDMA if it is unavailable. Signed-off-by: Yifan Zhang Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a060c28f0877..bcb729094521 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2397,6 +2397,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { + struct amdgpu_ip_block *ip_block; struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; int r, i; @@ -2426,6 +2427,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); + /* use CPU for page table update if SDMA is unavailable */ + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA); + if (!ip_block || ip_block->status.valid == false) + vm->use_cpu_for_update = true; + DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && From 62ed6f0f198da04e884062264df308277628004f Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 31 Jul 2024 13:09:28 +0530 Subject: [PATCH 220/447] drm/amd/display: Add NULL check for function pointer in dcn20_set_output_transfer_func This commit adds a null check for the set_output_gamma function pointer in the dcn20_set_output_transfer_func function. Previously, set_output_gamma was being checked for null at line 1030, but then it was being dereferenced without any null check at line 1048. This could potentially lead to a null pointer dereference error if set_output_gamma is null. To fix this, we now ensure that set_output_gamma is not null before dereferencing it. We do this by adding a null check for set_output_gamma before the call to set_output_gamma at line 1048. Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 5a6064999033..425432ca497f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1045,7 +1045,8 @@ bool dcn20_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, /* * if above if is not executed then 'params' equal to 0 and set in bypass */ - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); return true; } From dd340acd42c24a3f28dd22fae6bf38662334264c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 31 Jul 2024 13:22:06 +0530 Subject: [PATCH 221/447] drm/amd/display: Add NULL check for function pointer in dcn401_set_output_transfer_func This commit adds a null check for the set_output_gamma function pointer in the dcn401_set_output_transfer_func function. Previously, set_output_gamma was being checked for null, but then it was being dereferenced without any null check. This could lead to a null pointer dereference if set_output_gamma is null. To fix this, we now ensure that set_output_gamma is not null before dereferencing it. We do this by adding a null check for set_output_gamma before the call to set_output_gamma. Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index ceaaa8df3641..77489bbcda02 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -743,7 +743,9 @@ bool dcn401_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + return ret; } From 28574b08c70e56d34d6f6379326a860b96749051 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 31 Jul 2024 13:15:00 +0530 Subject: [PATCH 222/447] drm/amd/display: Add NULL check for function pointer in dcn32_set_output_transfer_func This commit adds a null check for the set_output_gamma function pointer in the dcn32_set_output_transfer_func function. Previously, set_output_gamma was being checked for null, but then it was being dereferenced without any null check. This could lead to a null pointer dereference if set_output_gamma is null. To fix this, we now ensure that set_output_gamma is not null before dereferencing it. We do this by adding a null check for set_output_gamma before the call to set_output_gamma. Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index fcaabad204a2..c3bbbfd1be94 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -582,7 +582,9 @@ bool dcn32_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + return ret; } From f59902ffcc43ce7e1db5d0c4dfee37ec2a1bae0c Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 31 Jul 2024 18:27:28 +0530 Subject: [PATCH 223/447] drm/amdgpu: optimize the padding for gfx12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Reviewed-by: Christian König Cc: Christian König Cc: Pierre-Eric Pelloux-Prayer Cc: Tvrtko Ursulin Cc: Marek Olšák Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f932c7ff85e3..29b3bf1b29b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5005,6 +5005,24 @@ static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static void gfx_v12_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5186,7 +5204,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, .test_ring = gfx_v12_0_ring_test_ring, .test_ib = gfx_v12_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v12_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl, .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec, @@ -5224,7 +5242,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, .test_ring = gfx_v12_0_ring_test_ring, .test_ib = gfx_v12_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v12_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, From 9192c7613ca53572908ba23a4c3f39c7f8ba8021 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Wed, 31 Jul 2024 11:58:46 +0800 Subject: [PATCH 224/447] drm/amdgpu: force to use legacy inv in mmhub MMHUB v4.1.0 only support fixed cache mode, so only use legacy invalidation accordingly. Signed-off-by: Likun Gao Reviewed-by: Frank Min Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 5bbaa2b2caab..0fbc3be81f14 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -80,7 +80,8 @@ static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + /* Only use legacy inv on mmhub side */ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); From dfe9d047b162f3a79ab63046608c693ee14c5b7a Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 1 Aug 2024 13:45:27 +0800 Subject: [PATCH 225/447] drm/amdgpu: Add more types for boot time error reporting Data abort exception and unknown errors are supported. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 0fb2d9285834..9cda368ad794 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -4748,6 +4748,16 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n", socket_id, aid_id, hbm_id, fw_status); + + if (AMDGPU_RAS_GPU_ERR_DATA_ABORT(boot_error)) + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n", + socket_id, aid_id, fw_status); + + if (AMDGPU_RAS_GPU_ERR_UNKNOWN(boot_error)) + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, unknown boot time errors\n", + socket_id, aid_id, fw_status); } static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 7ddd13d5c06b..0d49b74bfe5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -46,6 +46,8 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8) #define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11) #define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13) +#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x) AMDGPU_GET_REG_FIELD(x, 29, 29) +#define AMDGPU_RAS_GPU_ERR_UNKNOWN(x) AMDGPU_GET_REG_FIELD(x, 30, 30) #define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100 #define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA From bb670c31e13a402d30afa3d484403196031d0fbd Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 31 Jul 2024 20:19:15 +0530 Subject: [PATCH 226/447] drm/amdpgu: Micro-optimise amdgpu_ring_commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some value of optimisation we can replace the division with an bitwise and. And it even shrinks the code. Before: 6c9: 53 push %rbx 6ca: 4c 8b 47 08 mov 0x8(%rdi),%r8 6ce: 31 d2 xor %edx,%edx 6d0: 48 89 fb mov %rdi,%rbx 6d3: 8b 87 c8 05 00 00 mov 0x5c8(%rdi),%eax 6d9: 41 8b 48 04 mov 0x4(%r8),%ecx 6dd: f7 d0 not %eax 6df: 21 c8 and %ecx,%eax 6e1: 83 c1 01 add $0x1,%ecx 6e4: 83 c0 01 add $0x1,%eax 6e7: f7 f1 div %ecx 6e9: 89 d6 mov %edx,%esi 6eb: 41 ff 90 88 00 00 00 call *0x88(%r8) After: 6c9: 53 push %rbx 6ca: 48 8b 57 08 mov 0x8(%rdi),%rdx 6ce: 48 89 fb mov %rdi,%rbx 6d1: 8b 87 c8 05 00 00 mov 0x5c8(%rdi),%eax 6d7: 8b 72 04 mov 0x4(%rdx),%esi 6da: f7 d0 not %eax 6dc: 21 f0 and %esi,%eax 6de: 83 c0 01 add $0x1,%eax 6e1: 21 c6 and %eax,%esi 6e3: ff 92 88 00 00 00 call *0x88(%rdx) Reviewed-by: Christian König Reviewed-by: Sunil Khatri Signed-off-by: Tvrtko Ursulin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 0d72d2cbb64b..8c39bf7e1fac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -144,7 +144,7 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) /* We pad to match fetch size */ count = ring->funcs->align_mask + 1 - (ring->wptr & ring->funcs->align_mask); - count %= ring->funcs->align_mask + 1; + count &= ring->funcs->align_mask; if (count != 0) ring->funcs->insert_nop(ring, count); From dee44a7cb577f2aba39e1713a51e9faee2f65534 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 31 Jul 2024 18:35:14 +0530 Subject: [PATCH 227/447] drm/amdgpu: optimize the padding for gfx9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Reviewed-by: Christian König Cc: Christian König Cc: Pierre-Eric Pelloux-Prayer Cc: Tvrtko Ursulin Cc: Marek Olšák Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 675a1a8e2515..991f7c2fc1a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7100,6 +7100,24 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static void gfx_v9_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -7240,7 +7258,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, @@ -7294,7 +7312,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, - .insert_nop = amdgpu_sw_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, @@ -7338,7 +7356,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, From fd69ef05029f9beb7b031ef96e7a36970806a670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 11 Jul 2024 15:25:06 +0200 Subject: [PATCH 228/447] drm/radeon: use GEM references instead of TTMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of a TTM reference grab a GEM reference whenever necessary. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 210e8d43bb23..9735f4968b86 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -88,7 +88,7 @@ static void radeon_gem_object_free(struct drm_gem_object *gobj) if (robj) { radeon_mn_unregister(robj); - radeon_bo_unref(&robj); + ttm_bo_put(&robj->tbo); } } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 450ff7daa46c..d0e4b43d155c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -256,18 +256,15 @@ struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo) if (bo == NULL) return NULL; - ttm_bo_get(&bo->tbo); + drm_gem_object_get(&bo->tbo.base); return bo; } void radeon_bo_unref(struct radeon_bo **bo) { - struct ttm_buffer_object *tbo; - if ((*bo) == NULL) return; - tbo = &((*bo)->tbo); - ttm_bo_put(tbo); + drm_gem_object_put(&(*bo)->tbo.base); *bo = NULL; } From 62341f7bc2ccb7c024eb023a05714d798ba6437d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 31 Jul 2024 18:37:42 +0530 Subject: [PATCH 229/447] drm/amdgpu: optimize the padding for gfx_v9_4_3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Reviewed-by: Christian König Cc: Christian König Cc: Pierre-Eric Pelloux-Prayer Cc: Tvrtko Ursulin Cc: Marek Olšák Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 98fe6c40da64..c4832a5725c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4178,6 +4178,24 @@ static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev) amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer); } +static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, @@ -4227,7 +4245,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush, .test_ring = gfx_v9_4_3_ring_test_ring, .test_ib = gfx_v9_4_3_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_4_3_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v9_4_3_ring_emit_wreg, .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, From 089525632d40bbfa507f224c20563529b3f8a4b3 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 24 Jul 2024 09:29:13 -0600 Subject: [PATCH 230/447] drm/amd/display: Add missing DCN314 to the DML Makefile Include display_mode_vba_314 and display_rq_dlg_calc_314 to the dml Makefile. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 3c0222aa4df1..46f9c05de16e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -83,6 +83,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcfla CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) From 0964fbd59e7131c1bad4c8549ec13a27104f0b11 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 24 Jul 2024 15:19:31 -0600 Subject: [PATCH 231/447] drm/amd/display: Cleanup dml2 and dc/resource Makefile Remove some useless lines from DC Makefiles. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 -- drivers/gpu/drm/amd/display/dc/resource/Makefile | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index dcd01fe04296..cf979ab172bd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -35,8 +35,6 @@ frame_warn_flag := -Wframe-larger-than=2048 endif endif -# DRIVER_BUILD is mostly used in DML2.1 source -subdir-ccflags-y += -DDRIVER_BUILD=1 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_core subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_mcg/ diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile index 4860bb2531a1..09320344d8e9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/Makefile +++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile @@ -198,8 +198,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN351) ############################################################################### -############################################################################### - RESOURCE_DCN401 = dcn401_resource.o AMD_DAL_RESOURCE_DCN401 = $(addprefix $(AMDDALPATH)/dc/resource/dcn401/,$(RESOURCE_DCN401)) From 0345c8bc22c92f3d5fc0287957a96fd214297aa4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 24 Jul 2024 15:21:40 -0600 Subject: [PATCH 232/447] drm/amd/display: Remove useless defines Remove __cplusplus defines added by accident. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/spl/dc_spl.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h index f1fd3eb92f8a..205e59a2a8ee 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h @@ -9,16 +9,8 @@ #define BLACK_OFFSET_RGB_Y 0x0 #define BLACK_OFFSET_CBCR 0x8000 -#ifdef __cplusplus -extern "C" { -#endif - /* SPL interfaces */ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out); -#ifdef __cplusplus -} -#endif - #endif /* __DC_SPL_H__ */ From e9180253e22596991db426ab5b01111c2a739db4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 24 Jul 2024 15:22:26 -0600 Subject: [PATCH 233/447] drm/amd/display: Remove unused fields from dmub_cmd_update_dirty_rect_data Drop coasting_vtotal_high and pad from dmub_cmd_update_dirty_rect_data, since it is not used. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 7c3838362c49..c5f99cbff0b6 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -3028,14 +3028,6 @@ struct dmub_cmd_update_dirty_rect_data { * Currently the support is only for 0 or 1 */ uint8_t panel_inst; - /** - * 16-bit value dicated by driver that indicates the coasting vtotal high byte part. - */ - uint16_t coasting_vtotal_high; - /** - * Explicit padding to 4 byte boundary. - */ - uint8_t pad[2]; }; /** From 4f842ba7cdd83cba1d5c0bbd15ed9d14d882cf89 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 25 Jul 2024 14:35:09 -0600 Subject: [PATCH 234/447] drm/amd/display: Remove unused fields from dc_caps [Why & How] Identify few unused fileds in dc_caps. Remove them. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 250d5d48c2d3..95b0413e9f17 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -261,10 +261,7 @@ struct dc_caps { bool zstate_support; bool ips_support; uint32_t num_of_internal_disp; - uint32_t max_dwb_htap; - uint32_t max_dwb_vtap; enum dp_protocol_version max_dp_protocol_version; - bool spdif_aud; unsigned int mall_size_per_mem_channel; unsigned int mall_size_total; unsigned int cursor_cache_size; @@ -1370,7 +1367,6 @@ struct dc_plane_info { int global_alpha_value; bool input_csc_enabled; int layer_index; - bool front_buffer_rendering_active; enum chroma_cositing cositing; }; From e1dbe625d6ac2821eb29e087db46cb539d8079f0 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 25 Jul 2024 16:41:38 -0600 Subject: [PATCH 235/447] drm/amd/display: Add missing program DET segment call to pipe init Add a callback that program the DET segment when initializing pipes. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e31249d1dd22..3cd584419b88 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1403,6 +1403,8 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } From db65eb46de135338d6177f8853e0fd208f19d63e Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 2 Aug 2024 11:13:19 +0530 Subject: [PATCH 236/447] drm/buddy: Add start address support to trim function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add a new start parameter in trim function to specify exact address from where to start the trimming. This would help us in situations like if drivers would like to do address alignment for specific requirements. - Add a new flag DRM_BUDDY_TRIM_DISABLE. Drivers can use this flag to disable the allocator trimming part. This patch enables the drivers control trimming and they can do it themselves based on the application requirements. v1:(Matthew) - check new_start alignment with min chunk_size - use range_overflows() Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/drm_buddy.c | 25 +++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 +- include/drm/drm_buddy.h | 2 ++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 6a8e45e9d0ec..103c185bb1c8 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -851,6 +851,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * drm_buddy_block_trim - free unused pages * * @mm: DRM buddy manager + * @start: start address to begin the trimming. * @new_size: original size requested * @blocks: Input and output list of allocated blocks. * MUST contain single block as input to be trimmed. @@ -866,11 +867,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * 0 on success, error code on failure. */ int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks) { struct drm_buddy_block *parent; struct drm_buddy_block *block; + u64 block_start, block_end; LIST_HEAD(dfs); u64 new_start; int err; @@ -882,6 +885,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, struct drm_buddy_block, link); + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block); + if (WARN_ON(!drm_buddy_block_is_allocated(block))) return -EINVAL; @@ -894,6 +900,20 @@ int drm_buddy_block_trim(struct drm_buddy *mm, if (new_size == drm_buddy_block_size(mm, block)) return 0; + new_start = block_start; + if (start) { + new_start = *start; + + if (new_start < block_start) + return -EINVAL; + + if (!IS_ALIGNED(new_start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(new_start, new_size, block_end)) + return -EINVAL; + } + list_del(&block->link); mark_free(mm, block); mm->avail += drm_buddy_block_size(mm, block); @@ -904,7 +924,6 @@ int drm_buddy_block_trim(struct drm_buddy *mm, parent = block->parent; block->parent = NULL; - new_start = drm_buddy_block_offset(block); list_add(&block->tmp_link, &dfs); err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); if (err) { @@ -1066,7 +1085,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); /* Trim the allocated block to the required size */ - if (original_size != size) { + if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + original_size != size) { struct list_head *trim_list; LIST_HEAD(temp); u64 trim_size; @@ -1083,6 +1103,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } drm_buddy_block_trim(mm, + NULL, trim_size, trim_list); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index fe3779fdba2c..423b261ea743 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -150,7 +150,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, } while (remaining_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) size = vres->base.size; } diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 2a74fa9d0ce5..9689a7c5dd36 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -27,6 +27,7 @@ #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) #define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) #define DRM_BUDDY_CLEARED BIT(4) +#define DRM_BUDDY_TRIM_DISABLE BIT(5) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) @@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned long flags); int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks); From 507293b1b207e419fc7ea4ff1d72c2f1db18e33c Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Tue, 30 Jul 2019 16:27:24 -0400 Subject: [PATCH 237/447] drm/amd/display: Fix overlay with pre-blend color processing [WHY] Overlay works similarly to MPO, but uses global alpha on both planes and sets the desktop as the rear plane instead of the front plane [HOW] Ensure that top plane isn't overlay by checking global alpha before applying the previously added MPO fix Reviewed-by: Rodrigo Siqueira Signed-off-by: Michael Strauss Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 3cd584419b88..a7b5b25e3f34 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -2586,8 +2586,11 @@ static bool dcn10_is_rear_mpo_fix_required(struct pipe_ctx *pipe_ctx, enum dc_co while (top->top_pipe) top = top->top_pipe; // Traverse to top pipe_ctx - if (top->plane_state && top->plane_state->layer_index == 0) - return true; // Front MPO plane not hidden + if (top->plane_state && top->plane_state->layer_index == 0 && !top->plane_state->global_alpha) + // Global alpha used by top plane for PIP overlay + // Pre-multiplied/per-pixel alpha used by MPO + // Check top plane's global alpha to ensure layer_index > 0 not caused by PIP + return true; // MPO in use and front plane not hidden } } return false; From 64a905203fd0da3e8a3f649593cc48c9fbab25d0 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Tue, 23 Jul 2024 00:26:19 -0400 Subject: [PATCH 238/447] drm/amd/display: Add stream and char control callback [why & how] Add new stream and char control functions based on DCCG spec Reviewed-by: Muhammad Ahmed Signed-off-by: Hansen Dsouza Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 132 ++++++++++++++++-- 1 file changed, 122 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index bd3757de51c9..13e3d64ee2f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -896,7 +896,7 @@ static void dccg35_disable_symclk32_le_new( dccg35_set_symclk32_le_rcg(dccg, inst, true); } -static void dccg35_enable_dpp_new( +static void dccg35_enable_dpp_clk_new( struct dccg *dccg, int inst, enum dppclk_clock_source src) @@ -915,7 +915,7 @@ static void dccg35_enable_dpp_new( DPPCLK0_DTO_MODULO, 0xFF); } -static void dccg35_disable_dpp_new( +static void dccg35_disable_dpp_clk_new( struct dccg *dccg, int inst) { @@ -956,27 +956,25 @@ static void dccg35_enable_dtbclk_p_new(struct dccg *dccg, } static void dccg35_disable_dtbclk_p_new(struct dccg *dccg, - enum dtbclk_source src, int inst) { dccg35_set_dtbclk_p_src_new(dccg, DTBCLK_REFCLK, inst); dccg35_set_dtbclk_p_rcg(dccg, inst, true); } -static void dccg35_enable_dpstreamclk_new(struct dccg *dccg, - enum dtbclk_source src, +static void dccg35_disable_dpstreamclk_new(struct dccg *dccg, int inst) { dccg35_set_dpstreamclk_src_new(dccg, DP_STREAM_REFCLK, inst); dccg35_set_dpstreamclk_rcg(dccg, inst, true); } -static void dccg35_disable_dpstreamclk_new(struct dccg *dccg, - enum dtbclk_source src, +static void dccg35_enable_dpstreamclk_new(struct dccg *dccg, + enum dp_stream_clk_source src, int inst) { dccg35_set_dpstreamclk_rcg(dccg, inst, false); - dccg35_set_dtbclk_p_src_new(dccg, src, inst); + dccg35_set_dpstreamclk_src_new(dccg, src, inst); } static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) @@ -1935,6 +1933,114 @@ static void dccg35_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst } } +static void dccg35_set_dpstreamclk_cb( + struct dccg *dccg, + enum streamclk_source src, + int otg_inst, + int dp_hpo_inst) +{ + + enum dtbclk_source dtb_clk_src; + enum dp_stream_clk_source dp_stream_clk_src; + + ASSERT(otg_inst >= DP_STREAM_DTBCLK_P5); + + switch (src) { + case REFCLK: + dtb_clk_src = DTBCLK_REFCLK; + dp_stream_clk_src = DP_STREAM_REFCLK; + break; + case DPREFCLK: + dtb_clk_src = DTBCLK_DPREFCLK; + dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst; + break; + case DTBCLK0: + dtb_clk_src = DTBCLK_DTBCLK0; + dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst; + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + + if (dtb_clk_src == DTBCLK_REFCLK && + dp_stream_clk_src == DP_STREAM_REFCLK) { + dccg35_disable_dtbclk_p_new(dccg, otg_inst); + dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst); + } else { + dccg35_enable_dtbclk_p_new(dccg, dtb_clk_src, otg_inst); + dccg35_enable_dpstreamclk_new(dccg, + dp_stream_clk_src, + dp_hpo_inst); + } +} + +static void dccg35_set_dpstreamclk_root_clock_gating_cb( + struct dccg *dccg, + int dp_hpo_inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Instance 0 is implied here since only one streamclock resource + * Redundant as gating when enabled is acheived through set_dpstreamclk + */ + if (power_on) + dccg35_enable_dpstreamclk_new(dccg, + DP_STREAM_REFCLK, + dp_hpo_inst); + else + dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst); +} + +static void dccg35_update_dpp_dto_cb(struct dccg *dccg, int dpp_inst, + int req_dppclk) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (dccg->ref_dppclk && req_dppclk) { + int ref_dppclk = dccg->ref_dppclk; + int modulo, phase; + + // phase / modulo = dpp pipe clk / dpp global clk + modulo = 0xff; // use FF at the end + phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk; + + if (phase > 0xff) { + ASSERT(false); + phase = 0xff; + } + + /* Enable DPP CLK DTO output */ + dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_DCCG_DTO); + + /* Program DTO */ + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, + DPPCLK0_DTO_PHASE, phase, + DPPCLK0_DTO_MODULO, modulo); + } else + dccg35_disable_dpp_clk_new(dccg, dpp_inst); + + dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; +} + +static void dccg35_dpp_root_clock_control_cb( + struct dccg *dccg, + unsigned int dpp_inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through update_dpp_dto + */ + if (power_on) + dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_REFCLK); + else + dccg35_disable_dpp_clk_new(dccg, dpp_inst); +} + static const struct dccg_funcs dccg35_funcs = { .update_dpp_dto = dccg35_update_dpp_dto, .dpp_root_clock_control = dccg35_dpp_root_clock_control, @@ -2010,14 +2116,20 @@ struct dccg *dccg35_create( (void)&dccg35_disable_symclk32_se_new; (void)&dccg35_enable_symclk32_le_new; (void)&dccg35_disable_symclk32_le_new; - (void)&dccg35_enable_dpp_new; - (void)&dccg35_disable_dpp_new; + (void)&dccg35_enable_dpp_clk_new; + (void)&dccg35_enable_dpp_clk_new; (void)&dccg35_disable_dscclk_new; (void)&dccg35_enable_dscclk_new; (void)&dccg35_enable_dtbclk_p_new; (void)&dccg35_disable_dtbclk_p_new; (void)&dccg35_enable_dpstreamclk_new; (void)&dccg35_disable_dpstreamclk_new; + (void)&dccg35_set_dpstreamclk_cb; + (void)&dccg35_dpp_root_clock_control_cb; + (void)&dccg35_set_dpstreamclk_root_clock_gating_cb; + (void)&dccg35_update_dpp_dto_cb; + (void)&dccg35_dpp_root_clock_control_cb; + base = &dccg_dcn->base; base->ctx = ctx; base->funcs = &dccg35_funcs; From 792be2e23ac69821db7860ba4ba94592101f0b07 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 1 Aug 2024 14:26:19 +0800 Subject: [PATCH 239/447] drm/amdgpu: create function to check RAS RMA status In the convenience of calling it globally. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 22 ++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c | 2 +- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9cda368ad794..16da939a8406 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2153,7 +2153,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * /* gpu reset is fallback for failed and default cases. * For RMA case, amdgpu_umc_poison_handler will handle gpu reset. */ - if (poison_stat && !con->is_rma) { + if (poison_stat && !amdgpu_ras_is_rma(adev)) { event_id = amdgpu_ras_acquire_event_id(adev, type); RAS_EVENT_LOG(adev, event_id, "GPU reset for %s RAS poison consumption is issued!\n", @@ -2945,7 +2945,7 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work) amdgpu_ras_error_data_fini(&err_data); - if (err_cnt && con->is_rma) + if (err_cnt && amdgpu_ras_is_rma(adev)) amdgpu_ras_reset_gpu(adev); amdgpu_ras_schedule_retirement_dwork(con, @@ -3046,7 +3046,7 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, } /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */ - if (reset_flags && !con->is_rma) { + if (reset_flags && !amdgpu_ras_is_rma(adev)) { if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) @@ -3192,7 +3192,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) * This calling fails when is_rma is true or * ret != 0. */ - if (con->is_rma || ret) + if (amdgpu_ras_is_rma(adev) || ret) goto free; if (con->eeprom_control.ras_num_recs) { @@ -3241,7 +3241,7 @@ out: * Except error threshold exceeding case, other failure cases in this * function would not fail amdgpu driver init. */ - if (!con->is_rma) + if (!amdgpu_ras_is_rma(adev)) ret = 0; else ret = -EINVAL; @@ -4284,7 +4284,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); /* mode1 is the only selection for RMA status */ - if (ras->is_rma) { + if (amdgpu_ras_is_rma(adev)) { ras->gpu_reset_flags = 0; ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; } @@ -4824,3 +4824,13 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, va_end(args); } + +bool amdgpu_ras_is_rma(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return false; + + return con->is_rma; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 0d49b74bfe5e..9625e5c92413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -974,4 +974,5 @@ __printf(3, 4) void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, const char *fmt, ...); +bool amdgpu_ras_is_rma(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 2ed55f3c5fa2..bb7b9b2eaac1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -196,7 +196,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, amdgpu_umc_handle_bad_pages(adev, ras_error_status); if ((err_data->ue_count || err_data->de_count) && - (reset || (con && con->is_rma))) { + (reset || amdgpu_ras_is_rma(adev))) { con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 9cd221ed240c..999bb3cc88b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -97,7 +97,7 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev, ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; } - if (con && !con->is_rma) + if (con && !amdgpu_ras_is_rma(adev)) amdgpu_ras_reset_gpu(adev); } From eaf3adb8faab611ba57594fa915893fc93a7788c Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 19 Jul 2024 14:10:58 -0400 Subject: [PATCH 240/447] drm/amd/display: fix a UBSAN warning in DML2.1 When programming phantom pipe, since cursor_width is explicity set to 0, this causes calculation logic to trigger overflow for an unsigned int triggering the kernel's UBSAN check as below: [ 40.962845] UBSAN: shift-out-of-bounds in /tmp/amd.EfpumTkO/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:3312:34 [ 40.962849] shift exponent 4294967170 is too large for 32-bit type 'unsigned int' [ 40.962852] CPU: 1 PID: 1670 Comm: gnome-shell Tainted: G W OE 6.5.0-41-generic #41~22.04.2-Ubuntu [ 40.962854] Hardware name: Gigabyte Technology Co., Ltd. X670E AORUS PRO X/X670E AORUS PRO X, BIOS F21 01/10/2024 [ 40.962856] Call Trace: [ 40.962857] [ 40.962860] dump_stack_lvl+0x48/0x70 [ 40.962870] dump_stack+0x10/0x20 [ 40.962872] __ubsan_handle_shift_out_of_bounds+0x1ac/0x360 [ 40.962878] calculate_cursor_req_attributes.cold+0x1b/0x28 [amdgpu] [ 40.963099] dml_core_mode_support+0x6b91/0x16bc0 [amdgpu] [ 40.963327] ? srso_alias_return_thunk+0x5/0x7f [ 40.963331] ? CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport+0x18b8/0x2790 [amdgpu] [ 40.963534] ? srso_alias_return_thunk+0x5/0x7f [ 40.963536] ? dml_core_mode_support+0xb3db/0x16bc0 [amdgpu] [ 40.963730] dml2_core_calcs_mode_support_ex+0x2c/0x90 [amdgpu] [ 40.963906] ? srso_alias_return_thunk+0x5/0x7f [ 40.963909] ? dml2_core_calcs_mode_support_ex+0x2c/0x90 [amdgpu] [ 40.964078] core_dcn4_mode_support+0x72/0xbf0 [amdgpu] [ 40.964247] dml2_top_optimization_perform_optimization_phase+0x1d3/0x2a0 [amdgpu] [ 40.964420] dml2_build_mode_programming+0x23d/0x750 [amdgpu] [ 40.964587] dml21_validate+0x274/0x770 [amdgpu] [ 40.964761] ? srso_alias_return_thunk+0x5/0x7f [ 40.964763] ? resource_append_dpp_pipes_for_plane_composition+0x27c/0x3b0 [amdgpu] [ 40.964942] dml2_validate+0x504/0x750 [amdgpu] [ 40.965117] ? dml21_copy+0x95/0xb0 [amdgpu] [ 40.965291] ? srso_alias_return_thunk+0x5/0x7f [ 40.965295] dcn401_validate_bandwidth+0x4e/0x70 [amdgpu] [ 40.965491] update_planes_and_stream_state+0x38d/0x5c0 [amdgpu] [ 40.965672] update_planes_and_stream_v3+0x52/0x1e0 [amdgpu] [ 40.965845] ? srso_alias_return_thunk+0x5/0x7f [ 40.965849] dc_update_planes_and_stream+0x71/0xb0 [amdgpu] Fix this by adding a guard for checking cursor width before triggering the size calculation. Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../src/dml2_core/dml2_core_dcn4_calcs.c | 89 ++++++++++--------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index c54f1af1845c..cbecdc9f253a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -7247,10 +7247,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out /* Cursor Support Check */ mode_lib->ms.support.CursorSupport = true; for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].cursor.cursor_width > 0.0) { - if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) { + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) mode_lib->ms.support.CursorSupport = false; - } } } @@ -8111,27 +8110,31 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); bool cursor_not_enough_urgent_latency_hiding = 0; - calculate_cursor_req_attributes( - display_cfg->plane_descriptors[k].cursor.cursor_width, - display_cfg->plane_descriptors[k].cursor.cursor_bpp, - // output - &s->cursor_lines_per_chunk[k], - &s->cursor_bytes_per_line[k], - &s->cursor_bytes_per_chunk[k], - &s->cursor_bytes[k]); + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, - calculate_cursor_urgent_burst_factor( - mode_lib->ip.cursor_buffer_size, - display_cfg->plane_descriptors[k].cursor.cursor_width, - s->cursor_bytes_per_chunk[k], - s->cursor_lines_per_chunk[k], - line_time_us, - mode_lib->ms.UrgLatency, + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->ms.UrgLatency, + + // output + &mode_lib->ms.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } - // output - &mode_lib->ms.UrgentBurstFactorCursor[k], - &cursor_not_enough_urgent_latency_hiding); mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; #ifdef __DML_VBA_DEBUG__ @@ -10608,31 +10611,33 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { bool cursor_not_enough_urgent_latency_hiding = 0; - double line_time_us; + double line_time_us = 0.0; - calculate_cursor_req_attributes( - display_cfg->plane_descriptors[k].cursor.cursor_width, - display_cfg->plane_descriptors[k].cursor.cursor_bpp, + line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, - // output - &s->cursor_lines_per_chunk[k], - &s->cursor_bytes_per_line[k], - &s->cursor_bytes_per_chunk[k], - &s->cursor_bytes[k]); + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); - line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->mp.UrgentLatency, - calculate_cursor_urgent_burst_factor( - mode_lib->ip.cursor_buffer_size, - display_cfg->plane_descriptors[k].cursor.cursor_width, - s->cursor_bytes_per_chunk[k], - s->cursor_lines_per_chunk[k], - line_time_us, - mode_lib->mp.UrgentLatency, - - // output - &mode_lib->mp.UrgentBurstFactorCursor[k], - &cursor_not_enough_urgent_latency_hiding); + // output + &mode_lib->mp.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k]; CalculateUrgentBurstFactor( From c9875d0a789060facc274dee0d4eb6500d471772 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 1 Aug 2024 10:47:16 +0800 Subject: [PATCH 241/447] drm/amdgpu: add golden setting for gc v12 Adding Manual GDB golden setting for gc v12 revision 0 ASIC. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 29b3bf1b29b3..0a71e216a7f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,6 +202,12 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), + SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3481,6 +3487,24 @@ static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); } +static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->rev_id == 0) + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + break; + default: + break; + } +} + static int gfx_v12_0_hw_init(void *handle) { int r; @@ -3521,6 +3545,9 @@ static int gfx_v12_0_hw_init(void *handle) } } + if (!amdgpu_emu_mode) + gfx_v12_0_init_golden_registers(adev); + adev->gfx.is_poweron = true; if (get_gb_addr_config(adev)) From 671af06690e7f79db51b475a35c3b2619f345abc Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 2 Aug 2024 10:11:37 +0800 Subject: [PATCH 242/447] drm/amdgpu: remove RAS unused paramter 'err_addr' - amdgpu_ras_error_statistic_ue_count() - amdgpu_ras_error_statistic_ce_count() - amdgpu_ras_error_statistic_de_count() The parameter 'err_addr' is no longer used since following patch. Fixes: a7e8467fbeee ("drm/amdgpu: Remove unused code") Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 14 +++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 18 +++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 18 ++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 6 +++--- 9 files changed, 31 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 19158cc30f31..929095a2e088 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -453,13 +453,13 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er switch (type) { case ACA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, count); break; case ACA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, count); break; case ACA_ERROR_TYPE_DEFERRED: - amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, count); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 2542bd7aa7c7..18ee60378727 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -396,7 +396,6 @@ static int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set, struct ras_err_data *err_data) { - struct ras_err_addr err_addr; struct amdgpu_smuio_mcm_config_info mcm_info; struct mca_bank_node *node, *tmp; struct mca_bank_entry *entry; @@ -421,27 +420,20 @@ static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_r continue; memset(&mcm_info, 0, sizeof(mcm_info)); - memset(&err_addr, 0, sizeof(err_addr)); mcm_info.socket_id = entry->info.socket_id; mcm_info.die_id = entry->info.aid; - if (blk == AMDGPU_RAS_BLOCK__UMC) { - err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS]; - err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID]; - err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR]; - } - if (type == AMDGPU_MCA_ERROR_TYPE_UE) { amdgpu_ras_error_statistic_ue_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); } else { if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS])) amdgpu_ras_error_statistic_de_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); else amdgpu_ras_error_statistic_ce_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); } amdgpu_mca_bank_set_remove_node(mca_set, node); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 16da939a8406..61a2f386d9fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1223,11 +1223,11 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; amdgpu_ras_error_statistic_de_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->de_count); + &err_info->mcm_info, err_info->de_count); amdgpu_ras_error_statistic_ce_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->ce_count); + &err_info->mcm_info, err_info->ce_count); amdgpu_ras_error_statistic_ue_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->ue_count); + &err_info->mcm_info, err_info->ue_count); } } else { /* for legacy asic path which doesn't has error source info */ @@ -4618,8 +4618,8 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d } int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; @@ -4640,8 +4640,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; @@ -4662,8 +4662,8 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 9625e5c92413..669720a9c60a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -572,12 +572,6 @@ struct ras_fs_data { char debugfs_name[32]; }; -struct ras_err_addr { - uint64_t err_status; - uint64_t err_ipid; - uint64_t err_addr; -}; - struct ras_err_info { struct amdgpu_smuio_mcm_config_info mcm_info; u64 ce_count; @@ -939,14 +933,14 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, int amdgpu_ras_error_data_init(struct ras_err_data *err_data); void amdgpu_ras_error_data_fini(struct ras_err_data *err_data); int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances); int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, const struct aca_info *aca_info, void *data); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 821ba2309dec..7de449fae1e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1389,10 +1389,10 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { case ACA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL); + amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL); break; case ACA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL); + amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index c4832a5725c3..8455fda750a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4075,8 +4075,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, /* the caller should make sure initialize value of * err_data->ue_count and err_data->ce_count */ - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); } static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 621761a17ac7..915203b91c5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -670,8 +670,8 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); } static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index cb7fedb34fa6..c77889040760 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2243,7 +2243,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); } static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 0e6c3ce3ea8f..1a8ea834efa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -157,9 +157,9 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, &de_count, umc_v12_0_is_deferred_error); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); - amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, de_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, de_count); return 0; } From 434967aadbbbe3ad9103cc29e9a327de20fdba01 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 7 Mar 2024 19:04:31 +0000 Subject: [PATCH 243/447] drm/amdgpu: Forward soft recovery errors to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we discussed before[1], soft recovery should be forwarded to userspace, or we can get into a really bad state where apps will keep submitting hanging command buffers cascading us to a hard reset. 1: https://lore.kernel.org/all/bf23d5ed-9a6b-43e7-84ee-8cbfd0d60f18@froggi.es/ Signed-off-by: Joshua Ashton Reviewed-by: Marek Olšák Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e238f2832f65..908e13455152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -264,9 +264,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct dma_fence *fence = NULL; int r; - /* Ignore soft recovered fences here */ r = drm_sched_entity_error(s_entity); - if (r && r != -ENODATA) + if (r) goto error; if (!fence && job->gang_submit) From 65f6e9f7e1319119096a6dc93e62894e4375b578 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Fri, 19 Jul 2024 20:01:01 -0400 Subject: [PATCH 244/447] drm/amd/display: Print Pcon FRL Link BW in Debug Message Under autonomous mode, source reads dpcd DP_PCON_HDMI_POST_FRL_STATUS for the frl link status. Without dsc passthrough, it serves as bw bottleneck on the entire link, compared with the dp link from source to the converter where dsc is available. Reviewed-by: Rodrigo Siqueira Signed-off-by: Fangzhi Zuo Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index f3b6d8936f91..59c9dde10885 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1166,6 +1166,8 @@ static void get_active_converter_info( link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = intersect_frl_link_bw_support( link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps, hdmi_encoded_link_bw); + DC_LOG_DC("%s: pcon frl link bw = %u\n", __func__, + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps); } if (link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps > 0) From dd3e296289346109d41c6317124f51aee0269c25 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 1 Aug 2024 16:26:27 +0800 Subject: [PATCH 245/447] drm/amdgpu: update bad state check in GPU recovery Return RMA status without message print. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3a43754e7f10..f595ba6ba7c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5498,7 +5498,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, * bad_page_threshold value to fix this once * probing driver again. */ - if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) { + if (!amdgpu_ras_is_rma(tmp_adev)) { /* must succeed. */ amdgpu_ras_resume(tmp_adev); } else { From b3a3c9a6b27b68310f1d4d486f47556808c7c855 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 31 Jul 2024 15:54:27 +0800 Subject: [PATCH 246/447] drm/amdgpu: report bad status in GPU recovery Instead of printing GPU reset failed. v2: add check for reset_context->src. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f595ba6ba7c7..29a4adee9286 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5876,8 +5876,14 @@ skip_hw_reset: tmp_adev->asic_reset_res = 0; if (r) { - /* bad news, how to tell it to userspace ? */ - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); + /* bad news, how to tell it to userspace ? + * for ras error, we should report GPU bad status instead of + * reset failure + */ + if (reset_context->src != AMDGPU_RESET_SRC_RAS || + !amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", + atomic_read(&tmp_adev->gpu_reset_counter)); amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); From 5aacf8917fde5bc2a640f3cd49130c0e2e85e726 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Fri, 2 Aug 2024 11:15:11 +0800 Subject: [PATCH 247/447] drm/amdgpu: change non-dcc buffer copy configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without setting cpv bit and 7th ib dw, non-dcc buffer copy will have random corruption So set the cpv bit and clear the 7th ib dw for copy non-dcc buffers Signed-off-by: Frank Min Acked-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 62ef4a737a56..e2918318600b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1688,8 +1688,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) | - SDMA_PKT_COPY_LINEAR_HEADER_CPV((copy_flags & - (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)) ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ @@ -1703,6 +1702,8 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); + else + ib->ptr[ib->length_dw++] = 0; } /** From 7b2363e06c0ff4b868e7d768d605a9e656ff61f3 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Tue, 23 Jul 2024 17:26:47 -0400 Subject: [PATCH 248/447] drm/amd/display: Disable SubVP if Hardware Rotation is Used [Why and How] SubVP is not supported when hardware rotation is being used Reviewed-by: Alvin Lee Signed-off-by: Austin Zheng Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 9331a8fe77c9..9c6397aafd38 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1058,7 +1058,8 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, /* check recout height covers entire otg vactive, and single plane */ if (num_planes_per_stream[plane_descriptor->stream_index] > 1 || - !plane_descriptor->composition.rect_out_height_spans_vactive) { + !plane_descriptor->composition.rect_out_height_spans_vactive || + plane_descriptor->composition.rotation_angle != dml2_rotation_0) { return false; } } From 51d334d6a49629ea03a2dde562d46846eb7d07a0 Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Tue, 23 Jul 2024 16:20:15 -0400 Subject: [PATCH 249/447] drm/amd/display: Assume 32 bpp cursor in DML21 [Why] Cursor size can change dynamically at runtime without re-validation, so DML should calculate with the max size cursor to cover all cases. Reviewed-by: Dillon Varone Signed-off-by: Joshua Aberback Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/dml21_translation_helper.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 65776602648d..b0de8920f7e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -725,18 +725,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context); struct dc_stream_state *stream = context->streams[stream_index]; - if (stream->cursor_attributes.color_format == CURSOR_MODE_MONO) - plane->cursor.cursor_bpp = 2; - else if (stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_1BIT_AND - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - plane->cursor.cursor_bpp = 32; - } else if (stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED) { - plane->cursor.cursor_bpp = 64; - } else - plane->cursor.cursor_bpp = 32; - + plane->cursor.cursor_bpp = 32; plane->cursor.cursor_width = 256; plane->cursor.num_cursors = 1; From b8dc6ca028d9a39196a3a066b9ef2d4a5eca475d Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Tue, 23 Jul 2024 15:54:23 -0400 Subject: [PATCH 250/447] drm/amd/display: Force enable 3DLUT DMA check for dcn401 in DML [WHY] Currently TR0 (trip 0) is not properly budgeting for urgent latency in DML2.1. This results in overly aggressive prefetch schedules that are vulnerable to request return jitter, resulting in severe underflow at the start of the frame. [HOW] Forcing 3DLUT DMA check to enable causes urgent latency to be budgeted properly into the prefetch schedule, avoiding the vulnerability. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/dml21_translation_helper.c | 6 ++++-- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h | 1 + .../drm/amd/display/dc/resource/dcn401/dcn401_resource.c | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index b0de8920f7e7..006667aa961b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -816,6 +816,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { plane->tdlut.setup_for_tdlut = true; + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.layout) { case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: @@ -825,6 +826,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->tdlut.tdlut_addressing_mode = dml2_tdlut_simple_linear; break; } + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.size) { case DC_CM2_GPU_MEM_SIZE_171717: plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; @@ -833,8 +835,8 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined break; } - } else - plane->tdlut.setup_for_tdlut = false; + } + plane->tdlut.setup_for_tdlut |= dml_ctx->config.force_tdlut_enable; plane->dynamic_meta_data.enable = false; plane->dynamic_meta_data.lines_before_active_required = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 023325e8f6e2..0f944fcfd5a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -236,6 +236,7 @@ struct dml2_configuration_options { bool use_clock_dc_limits; bool gpuvm_enable; + bool force_tdlut_enable; struct dml2_soc_bb *bb_from_dmub; }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 3e76732ac0dc..ec676d269d33 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -2099,6 +2099,7 @@ static bool dcn401_resource_construct( dc->dml2_options.use_native_soc_bb_construction = true; dc->dml2_options.minimize_dispclk_using_odm = true; dc->dml2_options.map_dc_pipes_with_callbacks = true; + dc->dml2_options.force_tdlut_enable = true; resource_init_common_dml2_callbacks(dc, &dc->dml2_options); dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; From be64336307a6c3ee71fe1337c1b9f0495aa83c50 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Wed, 26 Jun 2024 17:02:23 +0800 Subject: [PATCH 251/447] drm/amd/display: Re-enable panel replay feature [Why & How] Fixed the replay issues and now re-enable the panel replay feature. Reported-by: Arthur Borsboom Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3344 Reviewed-by: Sun peng Li Signed-off-by: Tom Chung Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b4fbccbf2f29..ec6064d40dbf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4850,18 +4850,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) /* Determine whether to enable Replay support by default. */ if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) { switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { -/* - * Disabled by default due to https://gitlab.freedesktop.org/drm/amd/-/issues/3344 - * case IP_VERSION(3, 1, 4): - * case IP_VERSION(3, 1, 5): - * case IP_VERSION(3, 1, 6): - * case IP_VERSION(3, 2, 0): - * case IP_VERSION(3, 2, 1): - * case IP_VERSION(3, 5, 0): - * case IP_VERSION(3, 5, 1): - * replay_feature_enabled = true; - * break; - */ + case IP_VERSION(3, 1, 4): + case IP_VERSION(3, 2, 0): + case IP_VERSION(3, 2, 1): + case IP_VERSION(3, 5, 0): + case IP_VERSION(3, 5, 1): + replay_feature_enabled = true; + break; + default: replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK; break; From 78c508a1c162c90c48e12faa62bdab8b90e6f17c Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Thu, 25 Jul 2024 11:45:36 -0400 Subject: [PATCH 252/447] drm/amd/display: Add clock control callbacks [why & how] Add clock source selection an control functions based on spec Reviewed-by: Muhammad Ahmed Signed-off-by: Hansen Dsouza Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 281 ++++++++++++++++-- 1 file changed, 252 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 13e3d64ee2f0..b4f441c405bb 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -41,13 +41,22 @@ #define DC_LOGGER \ dccg->ctx->logger -enum physymclk_fe_source { - PHYSYMCLK_FE_SYMCLK_A = 0, // Select functional clock from backend symclk A - PHYSYMCLK_FE_SYMCLK_B, - PHYSYMCLK_FE_SYMCLK_C, - PHYSYMCLK_FE_SYMCLK_D, - PHYSYMCLK_FE_SYMCLK_E, - PHYSYMCLK_FE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +enum symclk_fe_source { + SYMCLK_FE_SYMCLK_A = 0, // Select functional clock from backend symclk A + SYMCLK_FE_SYMCLK_B, + SYMCLK_FE_SYMCLK_C, + SYMCLK_FE_SYMCLK_D, + SYMCLK_FE_SYMCLK_E, + SYMCLK_FE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk_be_source { + SYMCLK_BE_PHYCLK = 0, // Select phy clk when sym_clk_enable = 1 + SYMCLK_BE_DPIACLK_810 = 4, + SYMCLK_BE_DPIACLK_162 = 5, + SYMCLK_BE_DPIACLK_540 = 6, + SYMCLK_BE_DPIACLK_270 = 7, + SYMCLK_BE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software }; enum physymclk_source { @@ -252,7 +261,7 @@ static void dccg35_set_physymclk_rcg( } } -static void dccg35_set_physymclk_fe_rcg( +static void dccg35_set_symclk_fe_rcg( struct dccg *dccg, int inst, bool enable) @@ -289,6 +298,45 @@ static void dccg35_set_physymclk_fe_rcg( } } +static void dccg35_set_symclk_be_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* TBD add symclk_be in rcg control bits */ + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) { @@ -665,6 +713,42 @@ static void dccg35_set_physymclk_src_new( } } +static void dccg35_set_symclk_be_src_new( + struct dccg *dccg, + enum symclk_be_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, + SYMCLKA_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKA_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE, + SYMCLKB_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKB_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE, + SYMCLKC_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKC_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE, + SYMCLKD_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKD_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE, + SYMCLKE_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKE_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + } +} + static int dccg35_is_symclk_fe_src_functional_be(struct dccg *dccg, int symclk_fe_inst, int symclk_be_inst) @@ -699,35 +783,35 @@ static int dccg35_is_symclk_fe_src_functional_be(struct dccg *dccg, return 0; } -static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum physymclk_fe_source src, int inst) +static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum symclk_fe_source src, int inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); switch (inst) { case 0: REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, - SYMCLKA_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, - SYMCLKA_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + SYMCLKA_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKA_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); break; case 1: REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE, - SYMCLKB_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, - SYMCLKB_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + SYMCLKB_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKB_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); break; case 2: REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE, - SYMCLKC_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, - SYMCLKC_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + SYMCLKC_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKC_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); break; case 3: REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE, - SYMCLKD_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, - SYMCLKD_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + SYMCLKD_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKD_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); break; case 4: REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE, - SYMCLKE_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, - SYMCLKE_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + SYMCLKE_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKE_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); break; } } @@ -804,9 +888,9 @@ static uint32_t dccg35_is_symclk32_se_rcg(struct dccg *dccg, int inst) static void dccg35_enable_symclk_fe_new( struct dccg *dccg, int inst, - enum physymclk_fe_source src) + enum symclk_fe_source src) { - dccg35_set_physymclk_fe_rcg(dccg, inst, false); + dccg35_set_symclk_fe_rcg(dccg, inst, false); dccg35_set_symclk_fe_src_new(dccg, src, inst); } @@ -814,17 +898,17 @@ static void dccg35_disable_symclk_fe_new( struct dccg *dccg, int inst) { - dccg35_set_symclk_fe_src_new(dccg, PHYSYMCLK_FE_REFCLK, inst); - dccg35_set_physymclk_fe_rcg(dccg, inst, true); + dccg35_set_symclk_fe_src_new(dccg, SYMCLK_FE_REFCLK, inst); + dccg35_set_symclk_fe_rcg(dccg, inst, true); } static void dccg35_enable_symclk_be_new( struct dccg *dccg, int inst, - enum physymclk_source src) + enum symclk_be_source src) { - dccg35_set_physymclk_rcg(dccg, inst, false); - dccg35_set_physymclk_src_new(dccg, inst, src); + dccg35_set_symclk_be_rcg(dccg, inst, false); + dccg35_set_symclk_be_src_new(dccg, inst, src); } static void dccg35_disable_symclk_be_new( @@ -834,7 +918,7 @@ static void dccg35_disable_symclk_be_new( int i; /* Switch from functional clock to refclock */ - dccg35_set_physymclk_src_new(dccg, inst, PHYSYMCLK_REFCLK); + dccg35_set_symclk_be_src_new(dccg, inst, SYMCLK_BE_REFCLK); /* Check if any other SE connected LE and disable them */ for (i = 0; i < 4; i++) { @@ -845,7 +929,7 @@ static void dccg35_disable_symclk_be_new( } } /* Safe to RCG SYMCLK*/ - dccg35_set_physymclk_rcg(dccg, inst, true); + dccg35_set_symclk_be_rcg(dccg, inst, true); } static void dccg35_enable_symclk32_se_new( @@ -2041,6 +2125,132 @@ static void dccg35_dpp_root_clock_control_cb( dccg35_disable_dpp_clk_new(dccg, dpp_inst); } +static void dccg35_enable_symclk32_se_cb( + struct dccg *dccg, + int inst, + enum phyd32clk_clock_source phyd32clk) +{ + dccg35_enable_symclk32_se_new(dccg, inst, (enum symclk32_se_clk_source)phyd32clk); +} + +static void dccg35_disable_symclk32_se_cb(struct dccg *dccg, int inst) +{ + dccg35_disable_symclk32_se_new(dccg, inst); +} + +static void dccg35_disable_symclk32_le_cb(struct dccg *dccg, int inst) +{ + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_symclk32_le_root_clock_gating( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through disable_symclk32_le + */ + if (power_on) + dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK); + else + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_dtbclk_p_src_cb( + struct dccg *dccg, + enum streamclk_source src, + uint32_t inst) +{ + if (src == DTBCLK0) + dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, inst); + else + dccg35_disable_dtbclk_p_new(dccg, inst); +} + +static void dccg35_set_dtbclk_dto_cb( + struct dccg *dccg, + const struct dtbclk_dto_params *params) +{ + /* set_dtbclk_p_src typ called earlier to switch to DTBCLK + * if params->ref_dtbclk_khz and req_dtbclk_khz are 0 switch to ref-clock + */ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* DTO Output Rate / Pixel Rate = 1/4 */ + int req_dtbclk_khz = params->pixclk_khz / 4; + + if (params->ref_dtbclk_khz && req_dtbclk_khz) { + uint32_t modulo, phase; + + dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, params->otg_inst); + + // phase / modulo = dtbclk / dtbclk ref + modulo = params->ref_dtbclk_khz * 1000; + phase = req_dtbclk_khz * 1000; + + REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo); + REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase); + + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 1); + + REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1, + 1, 100); + + /* program OTG_PIXEL_RATE_DIV for DIVK1 and DIVK2 fields */ + dccg35_set_pixel_rate_div(dccg, params->otg_inst, PIXEL_RATE_DIV_BY_1, PIXEL_RATE_DIV_BY_1); + + /* The recommended programming sequence to enable DTBCLK DTO to generate + * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should + * be set only after DTO is enabled + */ + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + PIPE_DTO_SRC_SEL[params->otg_inst], 2); + } else { + dccg35_disable_dtbclk_p_new(dccg, params->otg_inst); + + REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 0, + PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); + + REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); + REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); + } +} + +static void dccg35_disable_dscclk_cb(struct dccg *dccg, + int inst) +{ + dccg35_disable_dscclk_new(dccg, inst); +} + +static void dccg35_enable_dscclk_cb(struct dccg *dccg, int inst) +{ + dccg35_enable_dscclk_new(dccg, inst, DSC_DTO_TUNED_CK_GPU_DISCLK_3); +} + +static void dccg35_enable_symclk_se_cb(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst) +{ + /* Switch to functional clock if already not selected */ + dccg35_enable_symclk_be_new(dccg, SYMCLK_BE_PHYCLK, link_enc_inst); + + dccg35_enable_symclk_fe_new(dccg, stream_enc_inst, (enum symclk_fe_source) link_enc_inst); + +} + +static void dccg35_disable_symclk_se_cb( + struct dccg *dccg, + uint32_t stream_enc_inst, + uint32_t link_enc_inst) +{ + dccg35_disable_symclk_fe_new(dccg, stream_enc_inst); + + /* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */ +} + static const struct dccg_funcs dccg35_funcs = { .update_dpp_dto = dccg35_update_dpp_dto, .dpp_root_clock_control = dccg35_dpp_root_clock_control, @@ -2070,6 +2280,7 @@ static const struct dccg_funcs dccg35_funcs = { .enable_symclk_se = dccg35_enable_symclk_se, .disable_symclk_se = dccg35_disable_symclk_se, .set_dtbclk_p_src = dccg35_set_dtbclk_p_src, + }; struct dccg *dccg35_create( @@ -2091,7 +2302,7 @@ struct dccg *dccg35_create( (void)&dccg35_set_symclk32_se_rcg; (void)&dccg35_set_symclk32_le_rcg; (void)&dccg35_set_physymclk_rcg; - (void)&dccg35_set_physymclk_fe_rcg; + (void)&dccg35_set_symclk_fe_rcg; (void)&dccg35_set_dtbclk_p_rcg; (void)&dccg35_set_dppclk_rcg; (void)&dccg35_set_dpstreamclk_rcg; @@ -2129,6 +2340,18 @@ struct dccg *dccg35_create( (void)&dccg35_set_dpstreamclk_root_clock_gating_cb; (void)&dccg35_update_dpp_dto_cb; (void)&dccg35_dpp_root_clock_control_cb; + (void)&dccg35_disable_symclk_se_cb; + (void)&dccg35_enable_symclk_se_cb; + (void)&dccg35_enable_dscclk_cb; + (void)&dccg35_disable_dscclk_cb; + (void)&dccg35_set_dtbclk_dto_cb; + (void)&dccg35_set_dtbclk_p_src_cb; + (void)&dccg35_set_symclk32_le_root_clock_gating; + (void)&dccg35_disable_symclk32_le_cb; + (void)&dccg35_set_symclk_be_src_new; + (void)&dccg35_set_symclk_be_rcg; + (void)&dccg35_enable_symclk32_se_cb; + (void)&dccg35_disable_symclk32_se_cb; base = &dccg_dcn->base; base->ctx = ctx; From 41cb5a5e8480fa41ee452ee60e620a4c1de3c323 Mon Sep 17 00:00:00 2001 From: Fudong Wang Date: Thu, 25 Jul 2024 16:48:25 +0800 Subject: [PATCH 253/447] drm/amd/display: skip crtc power down when ips switch [Why & How] Add a dc debug option to keep crtc on when ips switch. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Fudong Wang Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 95b0413e9f17..b6a5ea93fd45 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -979,6 +979,7 @@ struct dc_debug_options { bool disable_z10; bool enable_z9_disable_interface; bool psr_skip_crtc_disable; + uint32_t ips_skip_crtc_disable_mask; union dpia_debug_options dpia_debug; bool disable_fixed_vs_aux_timeout_wa; uint32_t fixed_vs_aux_delay_config_wa; From 8151a6c13111b465dbabe07c19f572f7cbd16fef Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Fri, 12 Jul 2024 16:30:03 -0400 Subject: [PATCH 254/447] drm/amd/display: Skip Recompute DSC Params if no Stream on Link [why] Encounter NULL pointer dereference uner mst + dsc setup. BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 917 Comm: sway Not tainted 6.3.9-arch1-1 #1 124dc55df4f5272ccb409f39ef4872fc2b3376a2 Hardware name: LENOVO 20NKS01Y00/20NKS01Y00, BIOS R12ET61W(1.31 ) 07/28/2022 RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? plist_add+0xbe/0x100 ? exc_page_fault+0x7c/0x180 ? asm_exc_page_fault+0x26/0x30 ? drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] ? drm_dp_atomic_find_time_slots+0x28/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] compute_mst_dsc_configs_for_link+0x2ff/0xa40 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] ? fill_plane_buffer_attributes+0x419/0x510 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] amdgpu_dm_atomic_check+0xecd/0x1190 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] drm_atomic_check_only+0x5c5/0xa40 drm_mode_atomic_ioctl+0x76e/0xbc0 [how] dsc recompute should be skipped if no mode change detected on the new request. If detected, keep checking whether the stream is already on current state or not. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Rodrigo Siqueira Signed-off-by: Fangzhi Zuo Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5442da90f508..915eb2c08ece 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1270,6 +1270,9 @@ static bool is_dsc_need_re_compute( } } + if (new_stream_on_link_num == 0) + return false; + /* check current_state if there stream on link but it is not in * new request state */ From 329ee7087bc9862977fb6e431b8b0ea5e7e261b0 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Thu, 25 Jul 2024 16:09:35 -0400 Subject: [PATCH 255/447] drm/amd/display: Address coverity change [Why] Coverity picks up a defect with regards to array underflow. [How] Address coverity issue as recommended. Reviewed-by: Leo Ma Signed-off-by: Chris Park Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index cce425dd62d2..01ea3a31e54d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -517,10 +517,12 @@ static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr if (!use_hpo_encoder) continue; - otg_master->clock_source->funcs->program_pix_clk( + if (otg_master->stream_res.pix_clk_params.controller_id > CONTROLLER_ID_UNDEFINED) + otg_master->clock_source->funcs->program_pix_clk( otg_master->clock_source, &otg_master->stream_res.pix_clk_params, - dccg->ctx->dc->link_srv->dp_get_encoding_format(&otg_master->link_config.dp_link_settings), + dccg->ctx->dc->link_srv->dp_get_encoding_format( + &otg_master->link_config.dp_link_settings), &otg_master->pll_settings); } } From 00f06855f6e68954f67616cd5560fca25c755eba Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Fri, 26 Jul 2024 00:24:55 -0400 Subject: [PATCH 256/447] drm/amd/display: Add clock control callbacks [why & how] Add clock source selection control functions based on spec Reviewed-by: Muhammad Ahmed Signed-off-by: Hansen Dsouza Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 153 +++++++++++------- 1 file changed, 99 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index b4f441c405bb..7f91e48902e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -980,6 +980,21 @@ static void dccg35_disable_symclk32_le_new( dccg35_set_symclk32_le_rcg(dccg, inst, true); } +static void dccg35_enable_physymclk_new(struct dccg *dccg, + int inst, + enum physymclk_source src) +{ + dccg35_set_physymclk_rcg(dccg, inst, false); + dccg35_set_physymclk_src_new(dccg, src, inst); +} + +static void dccg35_disable_physymclk_new(struct dccg *dccg, + int inst) +{ + dccg35_set_physymclk_src_new(dccg, PHYSYMCLK_REFCLK, inst); + dccg35_set_physymclk_rcg(dccg, inst, true); +} + static void dccg35_enable_dpp_clk_new( struct dccg *dccg, int inst, @@ -2138,11 +2153,62 @@ static void dccg35_disable_symclk32_se_cb(struct dccg *dccg, int inst) dccg35_disable_symclk32_se_new(dccg, inst); } +static void dccg35_enable_symclk32_le_cb( + struct dccg *dccg, + int inst, + enum phyd32clk_clock_source src) +{ + dccg35_enable_symclk32_le_new(dccg, inst, (enum symclk32_le_clk_source) src); +} + static void dccg35_disable_symclk32_le_cb(struct dccg *dccg, int inst) { dccg35_disable_symclk32_le_new(dccg, inst); } +static void dccg35_set_symclk32_le_root_clock_gating_cb( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through disable_symclk32_le + */ + if (power_on) + dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK); + else + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_physymclk_cb( + struct dccg *dccg, + int inst, + enum physymclk_clock_source clk_src, + bool force_enable) +{ + /* force_enable = 0 indicates we can switch to ref clock */ + if (force_enable) + dccg35_enable_physymclk_new(dccg, inst, (enum physymclk_source)clk_src); + else + dccg35_disable_physymclk_new(dccg, inst); +} + +static void dccg35_set_physymclk_root_clock_gating_cb( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* Redundant RCG already done in disable_physymclk + * power_on = 1 indicates we need to ungate + */ + if (power_on) + dccg35_enable_physymclk_new(dccg, inst, PHYSYMCLK_REFCLK); + else + dccg35_disable_physymclk_new(dccg, inst); +} + static void dccg35_set_symclk32_le_root_clock_gating( struct dccg *dccg, int inst, @@ -2251,6 +2317,37 @@ static void dccg35_disable_symclk_se_cb( /* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */ } +static const struct dccg_funcs dccg35_funcs_new = { + .update_dpp_dto = dccg35_update_dpp_dto_cb, + .dpp_root_clock_control = dccg35_dpp_root_clock_control_cb, + .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, + .dccg_init = dccg35_init, + .set_dpstreamclk = dccg35_set_dpstreamclk_cb, + .set_dpstreamclk_root_clock_gating = dccg35_set_dpstreamclk_root_clock_gating_cb, + .enable_symclk32_se = dccg35_enable_symclk32_se_cb, + .disable_symclk32_se = dccg35_disable_symclk32_se_cb, + .enable_symclk32_le = dccg35_enable_symclk32_le_cb, + .disable_symclk32_le = dccg35_disable_symclk32_le_cb, + .set_symclk32_le_root_clock_gating = dccg35_set_symclk32_le_root_clock_gating_cb, + .set_physymclk = dccg35_set_physymclk_cb, + .set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating_cb, + .set_dtbclk_dto = dccg35_set_dtbclk_dto_cb, + .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto, + .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en, + .otg_add_pixel = dccg31_otg_add_pixel, + .otg_drop_pixel = dccg31_otg_drop_pixel, + .set_dispclk_change_mode = dccg31_set_dispclk_change_mode, + .disable_dsc = dccg35_disable_dscclk_cb, + .enable_dsc = dccg35_enable_dscclk_cb, + .set_pixel_rate_div = dccg35_set_pixel_rate_div, + .get_pixel_rate_div = dccg35_get_pixel_rate_div, + .trigger_dio_fifo_resync = dccg35_trigger_dio_fifo_resync, + .set_valid_pixel_rate = dccg35_set_valid_pixel_rate, + .enable_symclk_se = dccg35_enable_symclk_se_cb, + .disable_symclk_se = dccg35_disable_symclk_se_cb, + .set_dtbclk_p_src = dccg35_set_dtbclk_p_src_cb, +}; + static const struct dccg_funcs dccg35_funcs = { .update_dpp_dto = dccg35_update_dpp_dto, .dpp_root_clock_control = dccg35_dpp_root_clock_control, @@ -2296,62 +2393,10 @@ struct dccg *dccg35_create( BREAK_TO_DEBUGGER(); return NULL; } - - /* Temporary declaration to handle unused static functions */ - (void)&dccg35_set_dsc_clk_rcg; - (void)&dccg35_set_symclk32_se_rcg; - (void)&dccg35_set_symclk32_le_rcg; - (void)&dccg35_set_physymclk_rcg; - (void)&dccg35_set_symclk_fe_rcg; - (void)&dccg35_set_dtbclk_p_rcg; - (void)&dccg35_set_dppclk_rcg; - (void)&dccg35_set_dpstreamclk_rcg; - (void)&dccg35_set_smclk32_se_rcg; - (void)&dccg35_set_dsc_clk_src_new; - (void)&dccg35_set_symclk32_se_src_new; - (void)&dccg35_is_symclk32_se_src_functional_le_new; - (void)&dccg35_set_symclk32_le_src_new; - (void)&dcn35_set_dppclk_src_new; - (void)&dccg35_set_dtbclk_p_src_new; - (void)&dccg35_set_dpstreamclk_src_new; - (void)&dccg35_set_physymclk_src_new; - (void)&dccg35_is_symclk_fe_src_functional_be; - (void)&dccg35_set_symclk_fe_src_new; - (void)&dccg35_is_fe_rcg; - (void)&dccg35_is_symclk32_se_rcg; - (void)&dccg35_enable_symclk_fe_new; - (void)&dccg35_disable_symclk_fe_new; - (void)&dccg35_enable_symclk_be_new; (void)&dccg35_disable_symclk_be_new; - (void)&dccg35_enable_symclk32_se_new; - (void)&dccg35_disable_symclk32_se_new; - (void)&dccg35_enable_symclk32_le_new; - (void)&dccg35_disable_symclk32_le_new; - (void)&dccg35_enable_dpp_clk_new; - (void)&dccg35_enable_dpp_clk_new; - (void)&dccg35_disable_dscclk_new; - (void)&dccg35_enable_dscclk_new; - (void)&dccg35_enable_dtbclk_p_new; - (void)&dccg35_disable_dtbclk_p_new; - (void)&dccg35_enable_dpstreamclk_new; - (void)&dccg35_disable_dpstreamclk_new; - (void)&dccg35_set_dpstreamclk_cb; - (void)&dccg35_dpp_root_clock_control_cb; - (void)&dccg35_set_dpstreamclk_root_clock_gating_cb; - (void)&dccg35_update_dpp_dto_cb; - (void)&dccg35_dpp_root_clock_control_cb; - (void)&dccg35_disable_symclk_se_cb; - (void)&dccg35_enable_symclk_se_cb; - (void)&dccg35_enable_dscclk_cb; - (void)&dccg35_disable_dscclk_cb; - (void)&dccg35_set_dtbclk_dto_cb; - (void)&dccg35_set_dtbclk_p_src_cb; (void)&dccg35_set_symclk32_le_root_clock_gating; - (void)&dccg35_disable_symclk32_le_cb; - (void)&dccg35_set_symclk_be_src_new; - (void)&dccg35_set_symclk_be_rcg; - (void)&dccg35_enable_symclk32_se_cb; - (void)&dccg35_disable_symclk32_se_cb; + (void)&dccg35_set_smclk32_se_rcg; + (void)&dccg35_funcs_new; base = &dccg_dcn->base; base->ctx = ctx; From e80f8f491df873ea2e07c941c747831234814612 Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Thu, 25 Jul 2024 18:42:21 -0400 Subject: [PATCH 257/447] drm/amd/display: Revert Avoid overflow assignment This reverts commit a15268787b79 ("drm/amd/display: Avoid overflow assignment in link_dp_cts") Due to regression causing DPMS hang. Reviewed-by: Alex Hung Signed-off-by: Gabe Teeger Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 2 +- drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c | 3 +-- drivers/gpu/drm/amd/display/include/dpcd_defs.h | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 95c275bf649b..519c3df78ee5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -727,7 +727,7 @@ struct dp_audio_test_data_flags { struct dp_audio_test_data { struct dp_audio_test_data_flags flags; - uint32_t sampling_rate; + uint8_t sampling_rate; uint8_t channel_count; uint8_t pattern_type; uint8_t pattern_period[8]; diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 32d5a4b14333..df3781081da7 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -775,8 +775,7 @@ bool dp_set_test_pattern( core_link_read_dpcd(link, DP_TRAINING_PATTERN_SET, &training_pattern.raw, sizeof(training_pattern)); - if (pattern <= PHY_TEST_PATTERN_END_DP11) - training_pattern.v1_3.LINK_QUAL_PATTERN_SET = pattern; + training_pattern.v1_3.LINK_QUAL_PATTERN_SET = pattern; core_link_write_dpcd(link, DP_TRAINING_PATTERN_SET, &training_pattern.raw, sizeof(training_pattern)); diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h index c246235e4afe..aee5170f5fb2 100644 --- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h +++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h @@ -76,7 +76,6 @@ enum dpcd_phy_test_patterns { PHY_TEST_PATTERN_D10_2, PHY_TEST_PATTERN_SYMBOL_ERROR, PHY_TEST_PATTERN_PRBS7, - PHY_TEST_PATTERN_END_DP11 = PHY_TEST_PATTERN_PRBS7, PHY_TEST_PATTERN_80BIT_CUSTOM,/* For DP1.2 only */ PHY_TEST_PATTERN_CP2520_1, PHY_TEST_PATTERN_CP2520_2, From a0fcd3df4591043d447bb08919eed2ce68fbdb5b Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 25 Jul 2024 14:18:10 -0400 Subject: [PATCH 258/447] drm/amd/display: Add DML2.1 option to disable DRR clamped P-State Strategies [WHY & HOW] When DRR is active with variable refresh rate, add the ability to block DRR clamped P-State strategies (such as SubVP). Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 4 +++- .../gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h | 1 + .../dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 7 +++++-- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index b6a5ea93fd45..49725f06a2d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -511,6 +511,7 @@ enum in_game_fams_config { INGAME_FAMS_SINGLE_DISP_ENABLE, // enable in-game fams INGAME_FAMS_DISABLE, // disable in-game fams INGAME_FAMS_MULTI_DISP_ENABLE, //enable in-game fams for multi-display + INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY, //enable in-game fams for multi-display only for clamped RR strategies }; /** diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 41ecf00ed196..d35dd507cb9f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -66,7 +66,9 @@ static void dml21_apply_debug_options(const struct dc *in_dc, struct dml2_contex disable_fams2; pmo_options->disable_fams2 = disable_fams2; - pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming; + pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE || + in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY; + pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE; } static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index a824ce56c54e..1c773bbb9992 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -74,6 +74,7 @@ struct dml2_pmo_options { bool disable_drr_var; bool disable_drr_clamped; bool disable_drr_var_when_var_active; + bool disable_drr_clamped_when_var_active; bool disable_fams2; bool disable_vactive_det_fill_bw_pad; /* dml2_project_dcn4x_stage2_auto_drr_svp and above only */ bool disable_dyn_odm; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 9c6397aafd38..06e786995390 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1441,8 +1441,11 @@ static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, strategy_matches_drr_requirements = false; } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && (pmo->options->disable_drr_clamped || - !stream_descriptor->timing.drr_config.enabled || - (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable))) { + (!stream_descriptor->timing.drr_config.enabled || + (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) || + (pmo->options->disable_drr_clamped_when_var_active && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable))) { /* DRR fixed strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && From 130376ab8300ef06231ebac4db147f06d601d53c Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 28 Jul 2024 14:55:56 -0400 Subject: [PATCH 259/447] drm/amd/display: 3.2.295 This version brings along following fixes: - Clean up some files style problems - Program the DET segment when initializing pipes in dcn10_hwseq - Fix overlay with pre-blend color processing - Disable SubVP if Hardware Rotation is Used - Fix few things in DML - Re-enable panel replay feature - Fix null pointer dereference under mst+dsc setup Acked-by: Tom Chung Signed-off-by: Aric Cyr Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 49725f06a2d5..7873daf72608 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.294" +#define DC_VER "3.2.295" #define MAX_SURFACES 3 #define MAX_PLANES 6 From aa94b623cb9233b91ed342dd87ecd62e56ff4938 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Sat, 3 Aug 2024 21:30:18 +0530 Subject: [PATCH 260/447] drm/amdgpu: Add address alignment support to DCC buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add address alignment support to the DCC VRAM buffers. v2: - adjust size based on the max_texture_channel_caches values only for GFX12 DCC buffers. - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only for DCC buffers. - roundup non power of two DCC buffer adjusted size to nearest power of two number as the buddy allocator does not support non power of two alignments. This applies only to the contiguous DCC buffers. v3:(Alex) - rewrite the max texture channel caches comparison code in an algorithmic way to determine the alignment size. v4:(Alex) - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c and add a new gmc func callback for dcc alignment. If the callback is non-NULL, call it to get the alignment, otherwise, use the default. v5:(Alex) - Set the Alignment to a default value if the callback doesn't exist. - Add the callback to amdgpu_gmc_funcs. v6: - Fix checkpatch warning reported by Intel CI. v7:(Christian) - remove the AMDGPU_GEM_CREATE_GFX12_DCC flag and keep a flag that checks the BO pinning and for a specific hw generation. v8:(Christian) - move this check into gmc_v12_0_get_dcc_alignment. v9: - Fix 32bit build errors Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Acked-by: Christian König Reviewed-by: Frank Min Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 35 ++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 18 ++++++++++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index febca3130497..4d951a1baefa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + /* get the DCC buffer alignment */ + unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); @@ -363,6 +365,10 @@ struct amdgpu_gmc { (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) +#define amdgpu_gmc_get_dcc_alignment(adev) ({ \ + typeof(adev) _adev = (adev); \ + _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \ +}) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f91cc149d06c..b2c94f12da9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; + unsigned int adjust_dcc_size = 0; struct drm_buddy *mm = &mgr->mm; struct drm_buddy_block *block; unsigned long pages_per_block; @@ -511,7 +512,18 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + if (adev->gmc.gmc_funcs->get_dcc_alignment) + adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); + remaining_size = (u64)vres->base.size; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + unsigned int dcc_size; + + dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size); + remaining_size = (u64)dcc_size; + + vres->flags |= DRM_BUDDY_TRIM_DISABLE; + } mutex_lock(&mgr->lock); while (remaining_size) { @@ -521,8 +533,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, min_block_size = mgr->default_page_size; size = remaining_size; - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) + min_block_size = size; + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; BUG_ON(min_block_size < mm->chunk_size); @@ -553,6 +568,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + struct drm_buddy_block *dcc_block; + unsigned long dcc_start; + u64 trim_start; + + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); + /* Adjust the start address for DCC buffers only */ + dcc_start = + roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block), + adjust_dcc_size); + trim_start = (u64)dcc_start; + drm_buddy_block_trim(mm, &trim_start, + (u64)vres->base.size, + &vres->blocks); + } + vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), vres->base.size); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e..26efce9aa410 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) return 0; } +static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) +{ + unsigned int max_tex_channel_caches, alignment; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1)) + return 0; + + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; + if (is_power_of_2(max_tex_channel_caches)) + alignment = (unsigned int)(max_tex_channel_caches / SZ_4); + else + alignment = roundup_pow_of_two(max_tex_channel_caches); + + return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K); +} + static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, @@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, }; static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) From 86598c3819fdc70e59d28221bfa7bc36e9f5777e Mon Sep 17 00:00:00 2001 From: Frank Min Date: Thu, 1 Aug 2024 12:20:18 +0800 Subject: [PATCH 261/447] drm/amdgpu: correct sdma7 max dw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit correct sdma7 max dw into 8 Signed-off-by: Frank Min Acked-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index e2918318600b..cfd8e183ad50 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1730,7 +1730,7 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { .copy_max_bytes = 0x400000, - .copy_num_dw = 7, + .copy_num_dw = 8, .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, .fill_max_bytes = 0x400000, .fill_num_dw = 5, From 57b09a168ffe88a4c088e8f7ca5de2ffbb8fefa3 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 1 Aug 2024 11:12:24 +0800 Subject: [PATCH 262/447] drm/amd/pm: fix unchecked return value warning for vega10_hwmgr This resolves the unchecked return value warning reported by Coverity. Signed-off-by: Tim Huang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 6e717ddbb029..9ace863792d4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -2934,9 +2934,7 @@ static int vega10_stop_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap) } } - vega10_enable_smc_features(hwmgr, false, feature_mask); - - return 0; + return vega10_enable_smc_features(hwmgr, false, feature_mask); } /** From c0277b9d7c2ee9ee5dbc948548984f0fbb861301 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 1 Aug 2024 10:38:37 +0800 Subject: [PATCH 263/447] drm/amdgpu: fix unchecked return value warning for amdgpu_gfx This resolves the unchecded return value warning reported by Coverity. Signed-off-by: Tim Huang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 82452606ae6c..5c9f36f01db0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -860,8 +860,11 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r int r; if (amdgpu_ras_is_supported(adev, ras_block->block)) { - if (!amdgpu_persistent_edc_harvesting_supported(adev)) - amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + if (!amdgpu_persistent_edc_harvesting_supported(adev)) { + r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + if (r) + return r; + } r = amdgpu_ras_block_late_init(adev, ras_block); if (r) @@ -1005,7 +1008,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ pr_err("critical bug! too many kiq readers\n"); goto failed_unlock; } - amdgpu_ring_alloc(ring, 32); + r = amdgpu_ring_alloc(ring, 32); + if (r) + goto failed_unlock; + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) @@ -1071,7 +1077,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 } spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); + r = amdgpu_ring_alloc(ring, 32); + if (r) + goto failed_unlock; + amdgpu_ring_emit_wreg(ring, reg, v); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) @@ -1107,6 +1116,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 failed_undo: amdgpu_ring_undo(ring); +failed_unlock: spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_write: dev_err(adev->dev, "failed to write reg:%x\n", reg); From 92549780e32718d64a6d08bbbb3c6fffecb541c7 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 1 Aug 2024 13:47:55 +0800 Subject: [PATCH 264/447] drm/amdgpu: fix unchecked return value warning for amdgpu_atombios This resolves the unchecded return value warning reported by Coverity. Signed-off-by: Tim Huang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 35 ++++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 7dc102f0bc1d..0c8975ac5af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1018,8 +1018,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (clock_type == COMPUTE_ENGINE_PLL_PARAM) { args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_div = args.v3.ucPostDiv; dividers->enable_post_div = (args.v3.ucCntlFlag & @@ -1039,8 +1040,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (strobe_mode) args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_div = args.v5.ucPostDiv; dividers->enable_post_div = (args.v5.ucCntlFlag & @@ -1058,8 +1060,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, /* fusion */ args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_divider = dividers->post_div = args.v4.ucPostDiv; dividers->real_clock = le32_to_cpu(args.v4.ulClock); @@ -1070,8 +1073,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, args.v6_in.ulClock.ulComputeClockFlag = clock_type; args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv); dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac); @@ -1113,8 +1117,9 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, if (strobe_mode) args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac); mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv); @@ -1211,8 +1216,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v2.ucVoltageMode = 0; args.v2.usVoltageLevel = 0; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; *voltage = le16_to_cpu(args.v2.usVoltageLevel); break; @@ -1221,8 +1227,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL; args.v3.usVoltageLevel = cpu_to_le16(voltage_id); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; *voltage = le16_to_cpu(args.v3.usVoltageLevel); break; From 46142cc1b9272d664e0258e105b537735bfeeccc Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 5 Aug 2024 19:17:04 +0530 Subject: [PATCH 265/447] drm/amdgpu: Add DCC GFX12 flag to enable address alignment We require this flag AMDGPU_GEM_CREATE_GFX12_DCC or any other kernel level GFX12 DCC flag to differentiate the DCC buffers and other pinned display buffers(which has TTM_PL_FLAG_CONTIGUOUS enabled). If we use the TTM_PL_FLAG_CONTIGUOUS flag for DCC buffers, we may over allocate for all the pinned display buffers unnecessarily that leads to memory allocation failure. Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index b2c94f12da9e..7d26a962f811 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -512,7 +512,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - if (adev->gmc.gmc_funcs->get_dcc_alignment) + if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC && + adev->gmc.gmc_funcs->get_dcc_alignment) adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); remaining_size = (u64)vres->base.size; From 34e087e8920e635c62e2ed6a758b0cd27f836d13 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Thu, 18 Jul 2024 16:38:50 +0800 Subject: [PATCH 266/447] drm/amdgpu/mes: fix mes ring buffer overflow wait memory room until enough before writing mes packets to avoid ring buffer overflow. v2: squash in sched_hw_submission fix Fixes: de3246254156 ("drm/amdgpu: cleanup MES11 command submission") Fixes: fffe347e1478 ("drm/amdgpu: cleanup MES12 command submission") Signed-off-by: Jack Xiao Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 18 ++++++++++++++---- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 18 ++++++++++++++---- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 8c39bf7e1fac..690976665cf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -214,6 +214,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + if (ring->funcs->type == AMDGPU_RING_TYPE_MES) + sched_hw_submission = 8; else if (ring == &adev->sdma.instance[0].page) sched_hw_submission = 256; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index f9343642ae7e..1a5ad5be33bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -168,7 +168,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -196,6 +196,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -208,8 +215,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); @@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -252,6 +258,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: spin_unlock_irqrestore(&mes->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 0713bc3eb263..249e5a66205c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -154,7 +154,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -182,6 +182,13 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -194,8 +201,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); @@ -215,7 +221,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -238,6 +244,10 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: spin_unlock_irqrestore(&mes->ring_lock, flags); From 237193e21b29d4aa0617ffeea3d6f49e72999708 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 6 Aug 2024 09:55:55 -0400 Subject: [PATCH 267/447] drm/amd/display: fix s2idle entry for DCN3.5+ To be able to get to the lowest power state when suspending systems with DCN3.5+, we must be in IPS before the display hardware is put into D3cold. So, to ensure that the system always reaches the lowest power state while suspending, force systems that support IPS to enter idle optimizations before entering D3cold. Reviewed-by: Roman Li Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ec6064d40dbf..fe8a88a7fe59 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2904,6 +2904,9 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); + if (adev->dm.dc->caps.ips_support) + dc_allow_idle_optimizations(adev->dm.dc, true); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); From 35c628774e50b3784c59e8ca7973f03bcb067132 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 10:00:33 -0400 Subject: [PATCH 268/447] drm/amdgpu/jpeg2: properly set atomics vmid field This needs to be set as well if the IB uses atomics. Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 99adf3625657..98aa3ccd0d20 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); From c6c2e8b6a427d4fecc7c36cffccb908185afcab2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 10:06:05 -0400 Subject: [PATCH 269/447] drm/amdgpu/jpeg4: properly set atomics vmid field This needs to be set as well if the IB uses atomics. Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ad524ddc9760..f4662920c653 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -782,11 +782,11 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); From 0cee47cde41e22712c034ae961076067d4ac13a0 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Wed, 31 Jul 2024 12:10:40 +0800 Subject: [PATCH 270/447] drm/amd/amdgpu: Properly tune the size of struct The struct assertion is failed because sparse cannot parse `#pragma pack(push, 1)` and `#pragma pack(pop)` correctly. GCC's output is still 1-byte-aligned. No harm to memory layout. The error can be filtered out by sparse-diff, but sometimes multiple lines queezed into one, making the sparse-diff thinks its a new error. I'm trying to aviod this by fixing errors. Link: https://lore.kernel.org/all/20230620045919.492128-1-suhui@nfschina.com/ Link: https://lore.kernel.org/all/93d10611-9fbb-4242-87b8-5860b2606042@suswa.mountain/ Fixes: 1721bc1b2afa ("drm/amdgpu: Update VF2PF interface") Cc: Dan Carpenter Cc: wenlunpeng Reported-by: Su Hui Signed-off-by: WangYuli Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index fb2b394bb9c5..6e9eeaeb3de1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -213,7 +213,7 @@ struct amd_sriov_msg_pf2vf_info { uint32_t gpu_capacity; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; -}; +} __packed; struct amd_sriov_msg_vf2pf_info_header { /* the total structure size in byte */ @@ -273,7 +273,7 @@ struct amd_sriov_msg_vf2pf_info { uint32_t mes_info_size; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; -}; +} __packed; /* mailbox message send from guest to host */ enum amd_sriov_mailbox_request_message { From 7b3a4e1d559e892ea563f46f45c504a60c82d70f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Aug 2024 12:42:52 +0100 Subject: [PATCH 271/447] drm/amd/display: remove extraneous ; after statements There are a several statements with two following semicolons, replace these with just one semicolon. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 2 +- .../dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 2 +- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 006667aa961b..710a25dcfef0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1031,7 +1031,7 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) { struct dml2_core_internal_display_mode_lib *mode_lib = &in_ctx->v21.dml_init.dml2_instance->core_instance.clean_me_up.mode_lib; - double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;; + double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; if (reg_set_idx >= DML2_DCHUB_WATERMARK_SET_NUM) { /* invalid register set index */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index cbecdc9f253a..c3c4d8d9525c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -7218,7 +7218,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #if defined(DV_BUILD) // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming. if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { - lb_buffer_size_bits_luma = 34620 * 57;; + lb_buffer_size_bits_luma = 34620 * 57; lb_buffer_size_bits_chroma = 13560 * 57; } #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c index c54c29711a65..8f3c1c0b1cc1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -6464,8 +6464,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->SwathHeightC[k] = l->MaximumSwathHeightC[k] / 2; l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; } if (p->SwathHeightC[k] == 0) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 7655501e75d4..9e8ff3a9718e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -421,7 +421,7 @@ unsigned int dml2_calc_max_scaled_time( void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx) { - int i, j = 0;; + int i, j = 0; struct mcif_arb_params *wb_arb_params = NULL; struct dcn_bw_writeback *bw_writeback = NULL; enum mmhubbub_wbif_mode wbif_mode = PACKED_444_FP16; /*for now*/ From 3834ce360067b4ee98fdef14571923500a0499a4 Mon Sep 17 00:00:00 2001 From: Remington Brasga Date: Wed, 31 Jul 2024 05:54:51 +0000 Subject: [PATCH 272/447] drm/amdgpu/uvd4: fix mask and shift definitions A few define's are listed twice with different, incorrect values. This fix sets them appropriately. Signed-off-by: Remington Brasga Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h index 8ee3149df5b7..2ef1273e65ab 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h @@ -340,8 +340,6 @@ #define UVD_LMI_CTRL__REQ_MODE_MASK 0x00000200L #define UVD_LMI_CTRL__REQ_MODE__SHIFT 0x00000009 #define UVD_LMI_CTRL__RFU_MASK 0xf8000000L -#define UVD_LMI_CTRL__RFU_MASK 0xfc000000L -#define UVD_LMI_CTRL__RFU__SHIFT 0x0000001a #define UVD_LMI_CTRL__RFU__SHIFT 0x0000001b #define UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK 0x00200000L #define UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN__SHIFT 0x00000015 From 9a12b1c7a0595736d398b24712dc1ce79072662e Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Sun, 4 Aug 2024 15:56:27 +0200 Subject: [PATCH 273/447] drm/amd: Make amd_ip_funcs static for SDMA v5.0 The struct can be static, as it is only used in this translation unit. Signed-off-by: Tobias Jakobi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index d5f0dc132a47..3e48ea38385d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1820,7 +1820,7 @@ static void sdma_v5_0_dump_ip_state(void *handle) amdgpu_gfx_off_ctrl(adev, true); } -const struct amd_ip_funcs sdma_v5_0_ip_funcs = { +static const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .name = "sdma_v5_0", .early_init = sdma_v5_0_early_init, .late_init = NULL, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h index d4e3c2e696f6..2ab71f21755a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h @@ -24,7 +24,6 @@ #ifndef __SDMA_V5_0_H__ #define __SDMA_V5_0_H__ -extern const struct amd_ip_funcs sdma_v5_0_ip_funcs; extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block; #endif /* __SDMA_V5_0_H__ */ From 8641b817392bfb12fb1e71ebb68c31783297bfbd Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Sun, 4 Aug 2024 15:56:28 +0200 Subject: [PATCH 274/447] drm/amd: Make amd_ip_funcs static for SDMA v5.2 The struct can be static, as it is only used in this translation unit. Signed-off-by: Tobias Jakobi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 93890f83e270..d740255edf5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1776,7 +1776,7 @@ static void sdma_v5_2_dump_ip_state(void *handle) amdgpu_gfx_off_ctrl(adev, true); } -const struct amd_ip_funcs sdma_v5_2_ip_funcs = { +static const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, .late_init = NULL, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h index b70414fef2a1..863145b3a77e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h @@ -24,7 +24,6 @@ #ifndef __SDMA_V5_2_H__ #define __SDMA_V5_2_H__ -extern const struct amd_ip_funcs sdma_v5_2_ip_funcs; extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block; #endif /* __SDMA_V5_2_H__ */ From 17d30ed33c8a9e7b866dd7c2ceb4a6858cfcaa81 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Sun, 4 Aug 2024 15:56:29 +0200 Subject: [PATCH 275/447] drm/amdgpu/swsmu: fix SMU11 typos (memlk -> memclk) No functional changes. Signed-off-by: Tobias Jakobi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 076620fa3ef5..16af1a329621 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1989,7 +1989,7 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, - "MEMLK", + "MEMCLK", activity_monitor.Mem_FPS, activity_monitor.Mem_MinFreqStep, activity_monitor.Mem_MinActiveFreqType, @@ -2051,7 +2051,7 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u activity_monitor.Soc_PD_Data_error_coeff = input[8]; activity_monitor.Soc_PD_Data_error_rate_coeff = input[9]; break; - case 2: /* Memlk */ + case 2: /* Memclk */ activity_monitor.Mem_FPS = input[1]; activity_monitor.Mem_MinFreqStep = input[2]; activity_monitor.Mem_MinActiveFreqType = input[3]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 0d3e1a121b67..9c3c48297cba 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1691,7 +1691,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, - "MEMLK", + "MEMCLK", activity_monitor->Mem_FPS, activity_monitor->Mem_MinFreqStep, activity_monitor->Mem_MinActiveFreqType, @@ -1756,7 +1756,7 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * activity_monitor->Fclk_PD_Data_error_coeff = input[8]; activity_monitor->Fclk_PD_Data_error_rate_coeff = input[9]; break; - case 2: /* Memlk */ + case 2: /* Memclk */ activity_monitor->Mem_FPS = input[1]; activity_monitor->Mem_MinFreqStep = input[2]; activity_monitor->Mem_MinActiveFreqType = input[3]; From 020620424b27bababf7f53d00692ab919c357a3f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jul 2024 12:08:28 +0200 Subject: [PATCH 276/447] drm/amd: Use a constant format string for amdgpu_ucode_request Multiple files in amdgpu call amdgpu_ucode_request() with a fw_name variable that the compiler cannot check for being a valid format string, as seen by enabling the (default-disabled) -Wformat-security option: drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c: In function 'amdgpu_mes_init_microcode': drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1517:61: error: format not a string literal and no format arguments [-Werror=format-security] 1517 | r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); | ^~~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c: In function 'amdgpu_uvd_sw_init': drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c:263:9: error: format not a string literal and no format arguments [-Werror=format-security] 263 | r = amdgpu_ucode_request(adev, &adev->uvd.fw, fw_name); | ^ drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c: In function 'amdgpu_vce_sw_init': drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c:161:9: error: format not a string literal and no format arguments [-Werror=format-security] 161 | r = amdgpu_ucode_request(adev, &adev->vce.fw, fw_name); | ^ drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c: In function 'amdgpu_umsch_mm_init_microcode': drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c:590:9: error: format not a string literal and no format arguments [-Werror=format-security] 590 | r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, fw_name); | ^ drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c: In function 'amdgpu_cgs_get_firmware_info': drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c:417:72: error: format not a string literal and no format arguments [-Werror=format-security] 417 | err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name); | ^~~~~~~ drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c: In function 'load_dmcu_fw': drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:2221:9: error: format not a string literal and no format arguments [-Werror=format-security] 2221 | r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, fw_name_dmcu); | ^ drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c: In function 'dm_init_microcode': drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:5147:9: error: format not a string literal and no format arguments [-Werror=format-security] 5147 | r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, fw_name_dmub); | ^ Change these all to use a "%s" format with the actual name as an argument, to let the compiler prove this to be correct. Fixes: e5a7d047f41b ("drm/amd: Use `amdgpu_ucode_*` helpers for CGS") Fixes: 52215e2a5d4a ("drm/amd: Use `amdgpu_ucode_*` helpers for VCE") Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index c3d89088123d..16153d275d7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -414,7 +414,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->pm.fw, "%s", fw_name); if (err) { DRM_ERROR("Failed to load firmware \"%s\"", fw_name); amdgpu_ucode_release(&adev->pm.fw); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index dac88d2dd70d..1b1e94b5b977 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1514,7 +1514,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); } - r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], "%s", fw_name); if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index fbc2852278e1..6162582d0aa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -587,7 +587,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) break; } - r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, "%s", fw_name); if (r) { release_firmware(adev->umsch_mm.fw); adev->umsch_mm.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 07d930339b07..775c09d57222 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -260,7 +260,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->uvd.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->uvd.fw, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", fw_name); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 968ca2c84ef7..51b045de409d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -158,7 +158,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->vce.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->vce.fw, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", fw_name); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index fe8a88a7fe59..454d205e4501 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2220,7 +2220,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, fw_name_dmcu); + r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, "%s", fw_name_dmcu); if (r == -ENODEV) { /* DMCU firmware is not necessary, so don't raise a fuss if it's missing */ DRM_DEBUG_KMS("dm: DMCU firmware not found\n"); @@ -5157,7 +5157,7 @@ static int dm_init_microcode(struct amdgpu_device *adev) /* ASIC doesn't support DMUB. */ return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, fw_name_dmub); + r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, "%s", fw_name_dmub); return r; } From c6dbab46324b1742b50dc2fb5c1fee2c28129439 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 30 Jul 2024 17:58:12 +0200 Subject: [PATCH 277/447] drm/radeon/r100: Handle unknown family in r100_cp_init_microcode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With -Werror: In function ‘r100_cp_init_microcode’, inlined from ‘r100_cp_init’ at drivers/gpu/drm/radeon/r100.c:1136:7: include/linux/printk.h:465:44: error: ‘%s’ directive argument is null [-Werror=format-overflow=] 465 | #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__) | ^ include/linux/printk.h:437:17: note: in definition of macro ‘printk_index_wrap’ 437 | _p_func(_fmt, ##__VA_ARGS__); \ | ^~~~~~~ include/linux/printk.h:508:9: note: in expansion of macro ‘printk’ 508 | printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~ drivers/gpu/drm/radeon/r100.c:1062:17: note: in expansion of macro ‘pr_err’ 1062 | pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name); | ^~~~~~ Fix this by converting the if/else if/... construct into a proper switch() statement with a default to handle the error case. As a bonus, the generated code is ca. 100 bytes smaller (with gcc 11.4.0 targeting arm32). Signed-off-by: Geert Uytterhoeven Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r100.c | 70 ++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index d7d7d23bf9a1..80703417d8a1 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1016,45 +1016,65 @@ static int r100_cp_init_microcode(struct radeon_device *rdev) DRM_DEBUG_KMS("\n"); - if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || - (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || - (rdev->family == CHIP_RS200)) { + switch (rdev->family) { + case CHIP_R100: + case CHIP_RV100: + case CHIP_RV200: + case CHIP_RS100: + case CHIP_RS200: DRM_INFO("Loading R100 Microcode\n"); fw_name = FIRMWARE_R100; - } else if ((rdev->family == CHIP_R200) || - (rdev->family == CHIP_RV250) || - (rdev->family == CHIP_RV280) || - (rdev->family == CHIP_RS300)) { + break; + + case CHIP_R200: + case CHIP_RV250: + case CHIP_RV280: + case CHIP_RS300: DRM_INFO("Loading R200 Microcode\n"); fw_name = FIRMWARE_R200; - } else if ((rdev->family == CHIP_R300) || - (rdev->family == CHIP_R350) || - (rdev->family == CHIP_RV350) || - (rdev->family == CHIP_RV380) || - (rdev->family == CHIP_RS400) || - (rdev->family == CHIP_RS480)) { + break; + + case CHIP_R300: + case CHIP_R350: + case CHIP_RV350: + case CHIP_RV380: + case CHIP_RS400: + case CHIP_RS480: DRM_INFO("Loading R300 Microcode\n"); fw_name = FIRMWARE_R300; - } else if ((rdev->family == CHIP_R420) || - (rdev->family == CHIP_R423) || - (rdev->family == CHIP_RV410)) { + break; + + case CHIP_R420: + case CHIP_R423: + case CHIP_RV410: DRM_INFO("Loading R400 Microcode\n"); fw_name = FIRMWARE_R420; - } else if ((rdev->family == CHIP_RS690) || - (rdev->family == CHIP_RS740)) { + break; + + case CHIP_RS690: + case CHIP_RS740: DRM_INFO("Loading RS690/RS740 Microcode\n"); fw_name = FIRMWARE_RS690; - } else if (rdev->family == CHIP_RS600) { + break; + + case CHIP_RS600: DRM_INFO("Loading RS600 Microcode\n"); fw_name = FIRMWARE_RS600; - } else if ((rdev->family == CHIP_RV515) || - (rdev->family == CHIP_R520) || - (rdev->family == CHIP_RV530) || - (rdev->family == CHIP_R580) || - (rdev->family == CHIP_RV560) || - (rdev->family == CHIP_RV570)) { + break; + + case CHIP_RV515: + case CHIP_R520: + case CHIP_RV530: + case CHIP_R580: + case CHIP_RV560: + case CHIP_RV570: DRM_INFO("Loading R500 Microcode\n"); fw_name = FIRMWARE_R520; + break; + + default: + DRM_ERROR("Unsupported Radeon family %u\n", rdev->family); + return -EINVAL; } err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); From c6b86421f1f9ddf9d706f2453159813ee39d0cf9 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Tue, 6 Aug 2024 22:27:32 +0200 Subject: [PATCH 278/447] drm/amdgpu: Actually check flags for all context ops. Missing validation ... Checked libdrm and it clears all the structs, so we should be safe to just check everything. Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5cb33ac99f70..c43d1b6e5d66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_QUERY_STATE2: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_GET_STABLE_PSTATE: From c30fb344a2f7fb5d553e98577185d9f8147598e2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 24 May 2024 10:49:33 -0400 Subject: [PATCH 279/447] drm/amdgpu/mes: add API for legacy queue reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add API for resetting kernel queues. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 24 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 16 ++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 1b1e94b5b977..b2a9df202913 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -819,6 +819,30 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, return r; } +int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct mes_reset_legacy_queue_input queue_input; + int r; + + memset(&queue_input, 0, sizeof(queue_input)); + + queue_input.queue_type = ring->funcs->type; + queue_input.doorbell_offset = ring->doorbell_index; + queue_input.pipe_id = ring->pipe; + queue_input.queue_id = ring->queue; + queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + queue_input.wptr_addr = ring->wptr_gpu_addr; + queue_input.vmid = vmid; + + r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset legacy queue\n"); + + return r; +} + uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) { struct mes_misc_op_input op_input; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2d659c612f03..174283a0fc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -279,6 +279,16 @@ struct mes_resume_gang_input { uint64_t gang_context_addr; }; +struct mes_reset_legacy_queue_input { + uint32_t queue_type; + uint32_t doorbell_offset; + uint32_t pipe_id; + uint32_t queue_id; + uint64_t mqd_addr; + uint64_t wptr_addr; + uint32_t vmid; +}; + enum mes_misc_opcode { MES_MISC_OP_WRITE_REG, MES_MISC_OP_READ_REG, @@ -347,6 +357,9 @@ struct amdgpu_mes_funcs { int (*misc_op)(struct amdgpu_mes *mes, struct mes_misc_op_input *input); + + int (*reset_legacy_queue)(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -381,6 +394,9 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, u64 gpu_addr, u64 seq); +int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int vmid); uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); int amdgpu_mes_wreg(struct amdgpu_device *adev, From 45a2a4514320f9c835eccb661601357cb1fefd82 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 24 May 2024 11:07:57 -0400 Subject: [PATCH 280/447] drm/amdgpu/mes11: add API for legacy queue reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add API for resetting kernel queues. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 1a5ad5be33bf..61b8cb39826d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -595,6 +595,38 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } +static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -603,6 +635,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .suspend_gang = mes_v11_0_suspend_gang, .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, + .reset_legacy_queue = mes_v11_0_reset_legacy_queue, }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, From 947c0808693e267185c5471f87f83146e4300561 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 24 May 2024 11:44:31 -0400 Subject: [PATCH 281/447] drm/amdgpu/mes12: add API for legacy queue reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add API for resetting kernel queues. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 249e5a66205c..5e06a982eb54 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -646,6 +646,38 @@ static void mes_v12_0_enable_unmapped_doorbell_handling( WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data); } +static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + return mes_v12_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .add_hw_queue = mes_v12_0_add_hw_queue, .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -654,6 +686,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .suspend_gang = mes_v12_0_suspend_gang, .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, + .reset_legacy_queue = mes_v12_0_reset_legacy_queue, }; static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, From a46a7bef7d41ee7787c246f47a656fbafe02f122 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 1 Aug 2024 19:17:11 +0530 Subject: [PATCH 282/447] drm/amdgpu: add vcn_v5_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v5_0. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 170 +++++++++++++++++++++++- 1 file changed, 169 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 68c97fcd539b..6e6eaf2358d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -37,6 +37,134 @@ #include +static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_VMIDS_MULTI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC_VMIDS_MULTI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SOFT_RESET), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SOFT_RESET2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_GATE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_CTRL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_CTRL3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_CTRL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_STATUS2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_GPGPU_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_GPGPU_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_DBW_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_DBW_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CM_COLOC_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CM_COLOC_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD4_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD4_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE4_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE4_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE5_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE5_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE6_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE6_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE7_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE7_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_IPX_DLDO_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_IPX_DLDO_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_VCPU_CACHE_OFFSET0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_VMID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_CLK_EN_VCPU_REPORT), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SCRATCH1) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -83,6 +211,8 @@ static int vcn_v5_0_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -137,6 +267,14 @@ static int vcn_v5_0_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -173,6 +311,8 @@ static int vcn_v5_0_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1297,6 +1437,34 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v5_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1315,7 +1483,7 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v5_0_dump_ip_state, .print_ip_state = NULL, }; From 3df34334147e73b05480db6cf8353a405597d04a Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 23:29:09 +0530 Subject: [PATCH 283/447] Revert "drm/amdgpu: add vcn_v5_0 ip dump support" This reverts commit a46a7bef7d41ee7787c246f47a656fbafe02f122. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 170 +----------------------- 1 file changed, 1 insertion(+), 169 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 6e6eaf2358d2..68c97fcd539b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -37,134 +37,6 @@ #include -static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = { - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET0), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET1), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_VMIDS_MULTI), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC_VMIDS_MULTI), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SOFT_RESET), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SOFT_RESET2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_GATE), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_CTRL), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_CTRL3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_CTRL), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_STATUS2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_GPGPU_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_GPGPU_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_DBW_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_DBW_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CM_COLOC_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CM_COLOC_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD4_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD4_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE4_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE4_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE5_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE5_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE6_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE6_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE7_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE7_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_IPX_DLDO_CONFIG), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_IPX_DLDO_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_VCPU_CACHE_OFFSET0), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_VMID), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_CLK_EN_VCPU_REPORT), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SCRATCH1) -}; - static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -211,8 +83,6 @@ static int vcn_v5_0_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -267,14 +137,6 @@ static int vcn_v5_0_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } return 0; } @@ -311,8 +173,6 @@ static int vcn_v5_0_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); - kfree(adev->vcn.ip_dump); - return r; } @@ -1437,34 +1297,6 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v5_0_dump_ip_state(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); - } -} - static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1483,7 +1315,7 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = vcn_v5_0_dump_ip_state, + .dump_ip_state = NULL, .print_ip_state = NULL, }; From 2ab5dc59177419d8a49e89585e82ff41524270fc Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 11:43:45 +0800 Subject: [PATCH 284/447] drm/amdgpu/mes12: update mes_v12_api_def.h Update mes12 api definition. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/mes_v12_api_def.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index 4cf2c9f30b3d..101e2fe962c6 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -97,6 +97,7 @@ enum MES_QUEUE_TYPE { MES_QUEUE_TYPE_SDMA, MES_QUEUE_TYPE_MAX, + MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX, }; struct MES_API_STATUS { @@ -242,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES { uint32_t send_write_data : 1; uint32_t os_tdr_timeout_override : 1; uint32_t use_rs64mem_for_proc_gang_ctx : 1; + uint32_t halt_on_misaligned_access : 1; + uint32_t use_add_queue_unmap_flag_addr : 1; + uint32_t enable_mes_sch_stb_log : 1; + uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; - uint32_t reserved : 15; + uint32_t reserved : 11; }; uint32_t uint32_all; }; From c7d4355648ffa02a1551495b05c71ea6c884d29c Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 11:53:35 +0800 Subject: [PATCH 285/447] drm/amdgpu/mes: add multiple mes ring instances support Add multiple mes ring instances in mes structure to support multiple mes pipes. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 5 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 34 ++++++++++++------------ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 34 ++++++++++++------------ 9 files changed, 47 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 5c9f36f01db0..28bd2098a65e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -998,7 +998,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ if (amdgpu_device_skip_hw_access(adev)) return 0; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) return amdgpu_mes_rreg(adev, reg); BUG_ON(!ring->funcs->emit_rreg); @@ -1071,7 +1071,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 if (amdgpu_device_skip_hw_access(adev)) return; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_wreg(adev, reg, v); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c02659025656..b49b3650fd62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring = adev->rings[i]; vmhub = ring->vm_hub; - if (ring == &adev->mes.ring || + if (ring == &adev->mes.ring[0] || + ring == &adev->mes.ring[1] || ring == &adev->umsch_mm.ring) continue; @@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, ref, mask); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index b2a9df202913..be2156bf0252 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); - spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) + spin_lock_init(&adev->mes.ring_lock[i]); + adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 174283a0fc07..d87d068952e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -82,8 +82,8 @@ struct amdgpu_mes { uint64_t default_process_quantum; uint64_t default_gang_quantum; - struct amdgpu_ring ring; - spinlock_t ring_lock; + struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES]; + spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 111c380f929b..b287a82e6177 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) adev->gfx.is_poweron = false; } - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b88a6fa173b3..2797fd84432b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid, GET_INST(GC, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 26efce9aa410..edcb5351f8cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 61b8cb39826d..4c7899e527fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -162,7 +162,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; @@ -191,7 +191,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; @@ -221,7 +221,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v11_0_get_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); @@ -263,7 +263,7 @@ error_undo: amdgpu_ring_undo(ring); error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -1058,7 +1058,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); return amdgpu_ring_test_helper(kiq_ring); } @@ -1072,7 +1072,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1114,7 +1114,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v11_0_ring_funcs; @@ -1167,7 +1167,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1259,12 +1259,12 @@ static int mes_v11_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1375,9 +1375,9 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v11_0_kiq_dequeue(&adev->mes.ring); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); + adev->mes.ring[0].sched.ready = false; } if (amdgpu_sriov_vf(adev)) { @@ -1395,7 +1395,7 @@ static int mes_v11_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq) { @@ -1440,7 +1440,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 5e06a982eb54..ac6209a0029c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -148,7 +148,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; @@ -177,7 +177,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; @@ -207,7 +207,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); @@ -249,7 +249,7 @@ error_undo: amdgpu_ring_undo(ring); error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -1128,7 +1128,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); r = amdgpu_ring_test_ring(kiq_ring); if (r) { @@ -1147,7 +1147,7 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1193,7 +1193,7 @@ static int mes_v12_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v12_0_ring_funcs; @@ -1246,7 +1246,7 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1335,12 +1335,12 @@ static int mes_v12_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1384,7 +1384,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) @@ -1448,9 +1448,9 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { mes_v12_0_kiq_dequeue_sched(adev); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } mes_v12_0_enable(adev, false); @@ -1463,7 +1463,7 @@ static int mes_v12_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq || adev->enable_uni_mes) { @@ -1515,7 +1515,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; From 2f93ec07ab54cae66155d0a09182843f358da178 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 23:33:05 +0530 Subject: [PATCH 286/447] Revert "drm/amdgpu: add print support for vcn_v3_0 ip dump" This reverts commit cd162ae9bc3ba91eb630a1321afd3d1dde5f2000. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 34 +-------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index c2278cc49dd5..0d871859690a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -2324,38 +2324,6 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - static void vcn_v3_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2403,7 +2371,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, .dump_ip_state = vcn_v3_0_dump_ip_state, - .print_ip_state = vcn_v3_0_print_ip_state, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { From 434b3554d6435dc4e19083a2214dee40a88e09e1 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 23:34:58 +0530 Subject: [PATCH 287/447] Revert "drm/amdgpu: add vcn_v3_0 ip dump support" This reverts commit 58d283801d06d4434df6625ed6e6b8d2ba47fe65. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 151 +------------------------- 1 file changed, 1 insertion(+), 150 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 0d871859690a..24f947751c46 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -60,115 +60,6 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 -static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = { - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RBC_RB_RPTR), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RBC_RB_WPTR), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_RBC_IB_VMID), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE_VMIDS_MULTI), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC_VMIDS_MULTI), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SOFT_RESET), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SOFT_RESET2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_GATE), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_CTRL), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_GATE), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_CTRL), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_STATUS2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_GATE2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_GPGPU_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_GPGPU_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_DBW_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_DBW_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CM_COLOC_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CM_COLOC_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD4_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD4_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE4_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE4_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE5_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE5_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE6_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE6_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE7_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE7_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SCRATCH1) -}; - static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -235,8 +126,6 @@ static int vcn_v3_0_sw_init(void *handle) struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; r = amdgpu_vcn_sw_init(adev); @@ -357,15 +246,6 @@ static int vcn_v3_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } - return 0; } @@ -404,7 +284,6 @@ static int vcn_v3_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); - kfree(adev->vcn.ip_dump); return r; } @@ -2324,34 +2203,6 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v3_0_dump_ip_state(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); - } -} - static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2370,7 +2221,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, - .dump_ip_state = vcn_v3_0_dump_ip_state, + .dump_ip_state = NULL, .print_ip_state = NULL, }; From 311f2b587461f86e3c30e7ac28df38be4a862ac7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 23:36:26 +0530 Subject: [PATCH 288/447] Revert "drm/amdgpu: add vcn ip dump ptr in vcn global struct" This reverts commit f3392e662efdc095f10109f588aa4f3be86f7eb5. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index f127eccf59d7..1a5439abd1a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -330,9 +330,6 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; bool using_unified_queue; - - /* IP reg dump */ - uint32_t *ip_dump; }; struct amdgpu_fw_shared_rb_ptrs_struct { From 0fe20258b4989b9112b5e9470df33a0939403fd4 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 2 Aug 2024 12:20:36 +0530 Subject: [PATCH 289/447] drm/amd/display: Add null check for 'afb' in amdgpu_dm_update_cursor (v2) This commit adds a null check for the 'afb' variable in the amdgpu_dm_update_cursor function. Previously, 'afb' was assumed to be null at line 8388, but was used later in the code without a null check. This could potentially lead to a null pointer dereference. Changes since v1: - Moved the null check for 'afb' to the line where 'afb' is used. (Alex) Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:8433 amdgpu_dm_update_cursor() error: we previously assumed 'afb' could be null (see line 8388) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Co-developed-by: Alex Hung Signed-off-by: Alex Hung Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 454d205e4501..7d999e352df3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8696,7 +8696,8 @@ static void amdgpu_dm_update_cursor(struct drm_plane *plane, adev->dm.dc->caps.color.dpp.gamma_corr) attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1; - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + if (afb) + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; if (crtc_state->stream) { if (!dc_stream_set_cursor_attributes(crtc_state->stream, From 596a4ec72876f1061f0ef4be47076b093f03b4f9 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Mon, 27 May 2024 10:30:45 -0400 Subject: [PATCH 290/447] drm/amd/display: fix minor coding errors where dml21 phase 5 uses wrong variables [why & how] There is a coding error which causes incorrect variables to be assigned in DML21 phase 5. Reviewed-by: Austin Zheng Signed-off-by: Wenjing Liu Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index e9b40a45ffdd..a9c3ed6d50ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -274,7 +274,7 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o /* * Phase 5: Optimize for Stutter */ - memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); + memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params)); l->stutter_phase.dml = dml; l->stutter_phase.display_config = &l->base_display_config_with_meta; l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter; @@ -287,7 +287,7 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o if (stutter_success) { memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = true; + l->base_display_config_with_meta.stage5.success = true; } /* From 782cef7fc367542ccc851d93edbed166defdfc27 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 31 May 2024 11:37:15 -0400 Subject: [PATCH 291/447] drm/amd/display: apply vmin optimization even if it doesn't reach vmin level [why] Based on power measurement result, in most cases when display clock is higher than Vmin display clock, lowering display clock using dynamic ODM will improve overall power consumption by 0 to 4 watts even if we can't reach Vmin. [how] Allow vmin optimization applied even if dispclk can't reach Vmin. Reviewed-by: Austin Zheng Signed-off-by: Wenjing Liu Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 14 +++++++++----- .../display/dc/dml2/dml21/src/dml2_top/dml_top.c | 13 +++++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 06e786995390..68b333b68933 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -717,6 +717,8 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) &in_out->base_display_config->display_config; const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; + struct dml2_optimization_stage4_state *state = + &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -737,28 +739,30 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) */ if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; for (i = 0; i < display_config->num_streams; i++) { if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * ODM Combine requires horizontal timing divisible by 2 so each * ODM segment has the same size. */ else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * Our hardware support seamless ODM transitions for DP encoders * only. */ else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; } + state->performed = true; + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index a9c3ed6d50ef..f9f8869cd8b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -266,9 +266,18 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); - if (vmin_success) { + if (l->optimized_display_config_with_meta.stage4.performed) { + /* + * when performed is true, optimization has applied to + * optimized_display_config_with_meta and it has passed mode + * support. However it may or may not pass the test function to + * reach actual Vmin. As long as voltage is optimized even if it + * doesn't reach Vmin level, there is still power benefit so in + * this case we will still copy this optimization into base + * display config. + */ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = true; + l->base_display_config_with_meta.stage4.success = vmin_success; } /* From c20da89edb69f05ab78fe804899d50be2596b0f0 Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Mon, 29 Jul 2024 14:35:45 -0400 Subject: [PATCH 292/447] drm/amd/display: 3DLUT non-DMA refactor [Why] Currently the handling for 3DLUT is found in multiple different places, which causes issues when the different functions are not in sync with each other. Frequently bugs occur because the LUT handling is broken up, and what has already been handled isn't kept track of well, which can cause earlier changes to the LUT params to be overridden. [How] Remove DMA LUT handling from DCN401 and refactor legacy LUT handling in one place to make it easier to keep track of what has and needs to be done. Reviewed-by: Ilya Bakoulin Signed-off-by: Relja Vojvodic Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn32/dcn32_init.c | 1 - .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 46 ++++++++----------- .../amd/display/dc/hwss/dcn401/dcn401_init.c | 2 +- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 968b010971ea..58bed01fc20e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -162,7 +162,6 @@ static const struct hwseq_private_funcs dcn32_private_funcs = { .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, - .populate_mcm_luts = dcn401_populate_mcm_luts, }; void dcn32_hw_sequencer_init_functions(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 77489bbcda02..44c1184868e0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -670,46 +670,40 @@ bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, struct dpp *dpp_base = pipe_ctx->plane_res.dpp; int mpcc_id = pipe_ctx->plane_res.hubp->inst; struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; - bool result = true; + bool result; const struct pwl_params *lut_params = NULL; bool rval; mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE; // 1D LUT - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { - if (plane_state->blend_tf.type == TF_TYPE_HWPWL) - lut_params = &plane_state->blend_tf.pwl; - else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { - rval = cm3_helper_translate_curve_to_hw_format(&plane_state->blend_tf, - &dpp_base->regamma_params, false); - lut_params = rval ? &dpp_base->regamma_params : NULL; - } - result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); - lut_params = NULL; + if (plane_state->blend_tf.type == TF_TYPE_HWPWL) + lut_params = &plane_state->blend_tf.pwl; + else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->blend_tf, + &dpp_base->regamma_params, false); + lut_params = rval ? &dpp_base->regamma_params : NULL; } + result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); + lut_params = NULL; // Shaper - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { - if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL) - lut_params = &plane_state->in_shaper_func.pwl; - else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { - // TODO: dpp_base replace - ASSERT(false); - rval = cm3_helper_translate_curve_to_hw_format(&plane_state->in_shaper_func, - &dpp_base->shaper_params, true); - lut_params = rval ? &dpp_base->shaper_params : NULL; - } - - result = mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); + if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL) + lut_params = &plane_state->in_shaper_func.pwl; + else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { + // TODO: dpp_base replace + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->in_shaper_func, + &dpp_base->shaper_params, true); + lut_params = rval ? &dpp_base->shaper_params : NULL; } + result &= mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); // 3D - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { + if (mpc->funcs->program_3dlut) { if (plane_state->lut3d_func.state.bits.initialized == 1) - result = mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id); + result &= mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id); else - result = mpc->funcs->program_3dlut(mpc, NULL, mpcc_id); + result &= mpc->funcs->program_3dlut(mpc, NULL, mpcc_id); } return result; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 457f4167e848..f4eda4a55ea7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -136,7 +136,7 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .calculate_dccg_k1_k2_values = NULL, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, - .populate_mcm_luts = dcn401_populate_mcm_luts, + .populate_mcm_luts = NULL, }; void dcn401_hw_sequencer_init_functions(struct dc *dc) From 95d9e0803e51d5a24276b7643b244c7477daf463 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 29 Jul 2024 15:29:09 -0600 Subject: [PATCH 293/447] drm/amd/display: Check null pointers before using dc->clk_mgr [WHY & HOW] dc->clk_mgr is null checked previously in the same function, indicating it might be null. Passing "dc" to "dc->hwss.apply_idle_power_optimizations", which dereferences null "dc->clk_mgr". (The function pointer resolves to "dcn35_apply_idle_power_optimizations".) This fixes 1 FORWARD_NULL issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b1253e4c81a8..3ba2acfdae2a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5425,7 +5425,8 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const if (allow == dc->idle_optimizations_allowed) return; - if (dc->hwss.apply_idle_power_optimizations && dc->hwss.apply_idle_power_optimizations(dc, allow)) + if (dc->hwss.apply_idle_power_optimizations && dc->clk_mgr != NULL && + dc->hwss.apply_idle_power_optimizations(dc, allow)) dc->idle_optimizations_allowed = allow; } From 4af0d8ebf74ccbb60d33fdd410891283dd6cb109 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Tue, 30 Jul 2024 11:55:23 -0400 Subject: [PATCH 294/447] drm/amd/display: Unlock Pipes Based On DET Allocation [Why] DML21 does not allocate DET evenly between pipes. May result in underflow when unlocking the pipes as DET could be overallocated. [How] 1. Unlock pipes that have a decreased amount of DET allocation 2. Wait for the double buffer to be updated. 3. Unlock the remaining pipes. Reviewed-by: Alvin Lee Signed-off-by: Austin Zheng Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 28 ++++++ .../display/dc/hubbub/dcn401/dcn401_hubbub.c | 23 +++++ .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 91 +++++++++++++++++++ .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 2 + .../amd/display/dc/hwss/dcn401/dcn401_init.c | 2 +- .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 1 + drivers/gpu/drm/amd/display/dc/inc/resource.h | 5 + 7 files changed, 151 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 1c379a6b1b4c..b38340c690c6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -5275,3 +5275,31 @@ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuratio dml2_options->svp_pstate.callbacks.remove_phantom_streams_and_planes = &dc_state_remove_phantom_streams_and_planes; dml2_options->svp_pstate.callbacks.release_phantom_streams_and_planes = &dc_state_release_phantom_streams_and_planes; } + +/* Returns number of DET segments allocated for a given OTG_MASTER pipe */ +int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master) +{ + struct pipe_ctx *opp_heads[MAX_PIPES]; + struct pipe_ctx *dpp_pipes[MAX_PIPES]; + + int dpp_count = 0; + int det_segments = 0; + + if (!otg_master->stream) + return 0; + + int slice_count = resource_get_opp_heads_for_otg_master(otg_master, + &state->res_ctx, opp_heads); + + for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) { + if (opp_heads[slice_idx]->plane_state) { + dpp_count = resource_get_dpp_pipes_for_opp_head( + opp_heads[slice_idx], + &state->res_ctx, + dpp_pipes); + for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) + det_segments += dpp_pipes[dpp_idx]->hubp_regs.det_size; + } + } + return det_segments; +} diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index d36f758971a8..37d26fa0b6fb 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -1170,6 +1170,28 @@ static void dcn401_program_compbuf_segments(struct hubbub *hubbub, unsigned comp } } +static void dcn401_wait_for_det_update(struct hubbub *hubbub, int hubp_inst) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + switch (hubp_inst) { + case 0: + REG_WAIT(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, hubbub2->det0_size, 1, 100000); /* 1 vupdate at 10hz */ + break; + case 1: + REG_WAIT(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, hubbub2->det1_size, 1, 100000); + break; + case 2: + REG_WAIT(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, hubbub2->det2_size, 1, 100000); + break; + case 3: + REG_WAIT(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, hubbub2->det3_size, 1, 100000); + break; + default: + break; + } +} + static const struct hubbub_funcs hubbub4_01_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx, @@ -1192,6 +1214,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .set_request_limit = hubbub32_set_request_limit, .program_det_segments = dcn401_program_det_segments, .program_compbuf_segments = dcn401_program_compbuf_segments, + .wait_for_det_update = dcn401_wait_for_det_update, }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 44c1184868e0..22c7afbcfc4e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1663,3 +1663,94 @@ void dcn401_hardware_release(struct dc *dc) } } +void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master) +{ + struct pipe_ctx *opp_heads[MAX_PIPES]; + struct pipe_ctx *dpp_pipes[MAX_PIPES]; + struct hubbub *hubbub = dc->res_pool->hubbub; + int dpp_count = 0; + + if (!otg_master->stream) + return; + + int slice_count = resource_get_opp_heads_for_otg_master(otg_master, + &context->res_ctx, opp_heads); + + for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) { + if (opp_heads[slice_idx]->plane_state) { + dpp_count = resource_get_dpp_pipes_for_opp_head( + opp_heads[slice_idx], + &context->res_ctx, + dpp_pipes); + for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) { + struct pipe_ctx *dpp_pipe = dpp_pipes[dpp_idx]; + if (dpp_pipe && hubbub && + dpp_pipe->plane_res.hubp && + hubbub->funcs->wait_for_det_update) + hubbub->funcs->wait_for_det_update(hubbub, dpp_pipe->plane_res.hubp->inst); + } + } + } +} + +void dcn401_interdependent_update_lock(struct dc *dc, + struct dc_state *context, bool lock) +{ + unsigned int i = 0; + struct pipe_ctx *pipe = NULL; + struct timing_generator *tg = NULL; + bool pipe_unlocked[MAX_PIPES] = {0}; + + if (lock) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + + if (!resource_is_pipe_type(pipe, OTG_MASTER) || + !tg->funcs->is_tg_enabled(tg) || + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) + continue; + dc->hwss.pipe_control_lock(dc, pipe, true); + } + } else { + /* Unlock pipes based on the change in DET allocation instead of pipe index + * Prevents over allocation of DET during unlock process + * e.g. 2 pipe config with different streams with a max of 20 DET segments + * Before: After: + * - Pipe0: 10 DET segments - Pipe0: 12 DET segments + * - Pipe1: 10 DET segments - Pipe1: 8 DET segments + * If Pipe0 gets updated first, 22 DET segments will be allocated + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + int current_pipe_idx = i; + + if (!resource_is_pipe_type(pipe, OTG_MASTER) || + !tg->funcs->is_tg_enabled(tg) || + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + pipe_unlocked[i] = true; + continue; + } + + // If the same stream exists in old context, ensure the OTG_MASTER pipes for the same stream get compared + struct pipe_ctx *old_otg_master = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, pipe->stream); + + if (old_otg_master) + current_pipe_idx = old_otg_master->pipe_idx; + if (resource_calculate_det_for_stream(context, pipe) < + resource_calculate_det_for_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[current_pipe_idx])) { + dc->hwss.pipe_control_lock(dc, pipe, false); + pipe_unlocked[i] = true; + dcn401_wait_for_det_buffer_update(dc, context, pipe); + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (pipe_unlocked[i]) + continue; + pipe = &context->res_ctx.pipe_ctx[i]; + dc->hwss.pipe_control_lock(dc, pipe, false); + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 8e9c1c17aa66..3ecb1ebffcee 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -81,4 +81,6 @@ void dcn401_hardware_release(struct dc *dc); void dcn401_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy); +void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); +void dcn401_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index f4eda4a55ea7..b5f63675afcb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -38,7 +38,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .disable_audio_stream = dce110_disable_audio_stream, .disable_plane = dcn20_disable_plane, .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn32_interdependent_update_lock, + .interdependent_update_lock = dcn401_interdependent_update_lock, .cursor_lock = dcn10_cursor_lock, .prepare_bandwidth = dcn401_prepare_bandwidth, .optimize_bandwidth = dcn401_optimize_bandwidth, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index dd2b2864876c..67c32401893e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -227,6 +227,7 @@ struct hubbub_funcs { void (*get_mall_en)(struct hubbub *hubbub, unsigned int *mall_in_use); void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg); void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); + void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 96d40d33a1f9..9cd80d3864c7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -639,4 +639,9 @@ struct dscl_prog_data *resource_get_dscl_prog_data(struct pipe_ctx *pipe_ctx); * @dml2_options: struct to hold callbacks */ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuration_options *dml2_options); + +/* + *Calculate total DET allocated for all pipes for a given OTG_MASTER pipe + */ +int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ From 680458d41aa46a009909482f58358205b5c4b438 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Tue, 30 Jul 2024 16:32:44 -0400 Subject: [PATCH 295/447] drm/amd/display: Update to using new dccg callbacks [Why and how] Update to using new dccg callbacks Reviewed-by: Chris Park Signed-off-by: Hansen Dsouza Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 7f91e48902e2..004c4fe3ddfc 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -2396,11 +2396,11 @@ struct dccg *dccg35_create( (void)&dccg35_disable_symclk_be_new; (void)&dccg35_set_symclk32_le_root_clock_gating; (void)&dccg35_set_smclk32_se_rcg; - (void)&dccg35_funcs_new; + (void)&dccg35_funcs; base = &dccg_dcn->base; base->ctx = ctx; - base->funcs = &dccg35_funcs; + base->funcs = &dccg35_funcs_new; dccg_dcn->regs = regs; dccg_dcn->dccg_shift = dccg_shift; From 8c4f9e466169b3e411947347fef09382c14e5733 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 30 Jul 2024 14:57:48 -0400 Subject: [PATCH 296/447] drm/amd/display: Add more logging for MALL static screen [why & how] print additional info for MALL related calculations and DMCUB messaging to aid debugging. Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 10 ++++++++-- drivers/gpu/drm/amd/display/include/logger_types.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 22c7afbcfc4e..b604c8886ef4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1309,8 +1309,10 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) for (i = 0; i < dc->current_state->stream_count; i++) { /* MALL SS messaging is not supported with PSR at this time */ if (dc->current_state->streams[i] != NULL && - dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) + dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) { + DC_LOG_MALL("MALL SS not supported with PSR at this time\n"); return false; + } } memset(&cmd, 0, sizeof(cmd)); @@ -1320,8 +1322,9 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) if (enable) { if (dcn401_check_no_memory_request_for_cab(dc)) { /* 1. Check no memory request case for CAB. - * If no memory request case, send CAB_ACTION NO_DF_REQ DMUB message + * If no memory request case, send CAB_ACTION NO_DCN_REQ DMUB message */ + DC_LOG_MALL("sending CAB action NO_DCN_REQ\n"); cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ; } else { /* 2. Check if all surfaces can fit in CAB. @@ -1349,13 +1352,16 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) if (ways <= dc->caps.cache_num_ways && !mall_ss_unsupported) { cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB; cmd.cab.cab_alloc_ways = ways; + DC_LOG_MALL("cab allocation: %d ways. CAB action: DCN_SS_FIT_IN_CAB\n", ways); } else { cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB; + DC_LOG_MALL("frame does not fit in CAB: %d ways required. CAB action: DCN_SS_NOT_FIT_IN_CAB\n", ways); } } } else { /* Disable CAB */ cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_IDLE_OPTIMIZATION; + DC_LOG_MALL("idle optimization disabled\n"); } dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index 83479951732a..a48d564d1660 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -61,6 +61,7 @@ #define DC_LOG_ALL_TF_CHANNELS(...) pr_debug("[GAMMA]:"__VA_ARGS__) #define DC_LOG_DSC(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_SMU(...) pr_debug("[SMU_MSG]:"__VA_ARGS__) +#define DC_LOG_MALL(...) pr_debug("[MALL]:"__VA_ARGS__) #define DC_LOG_DWB(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_DP2(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_AUTO_DPM_TEST(...) pr_debug("[AutoDPMTest]: "__VA_ARGS__) From ce4f9f79ff8cfc78a064c533f0aab563a5613d81 Mon Sep 17 00:00:00 2001 From: Muhammad Ahmed Date: Wed, 31 Jul 2024 18:55:57 -0400 Subject: [PATCH 297/447] drm/amd/display: guard otg disable w/a for test [why & how] HW removed this w/a, but we will still keep it to avoid regression. but return in test mode. Reviewed-by: Charlene Liu Signed-off-by: Muhammad Ahmed Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index e075b2720f96..e2d906327e2e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -126,6 +126,9 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * struct dc *dc = clk_mgr_base->ctx->dc; int i; + if (dc->ctx->dce_environment == DCE_ENV_DIAG) + return; + for (i = 0; i < dc->res_pool->pipe_count; ++i) { struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; From 1b686053c06ffb9f4524b288110cf2a831ff7a25 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 30 Jul 2024 20:02:45 -0600 Subject: [PATCH 298/447] drm/amd/display: Check null pointer before try to access it [why & how] Change the order of the pipe_ctx->plane_state check to ensure that plane_state is not null before accessing it. Reviewed-by: Alex Hung Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 425432ca497f..a68da1a7092d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1932,6 +1932,11 @@ static void dcn20_program_pipe( (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.hdr_mult) || + pipe_ctx->update_flags.bits.enable) + hws->funcs.set_hdr_multiplier(pipe_ctx); + + if (hws->funcs.populate_mcm_luts) { if (pipe_ctx->plane_state) { hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, @@ -1939,13 +1944,13 @@ static void dcn20_program_pipe( pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; } } - if (pipe_ctx->update_flags.bits.enable || - (pipe_ctx->plane_state && + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change) || (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.gamma_change) || (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.lut_3d)) + pipe_ctx->plane_state->update_flags.bits.lut_3d) || + pipe_ctx->update_flags.bits.enable) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish From 66e2d2d9a59f896def82a1c8684368be45cf4c06 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Wed, 31 Jul 2024 17:04:44 +0800 Subject: [PATCH 299/447] drm/amd/display: Check null pointer before try to access it [why & how] Make sure plane_state is not null before calling a function that dereferences it. Besides, remove redundant codes. Reviewed-by: Alex Hung Signed-off-by: Wayne Lin Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index a68da1a7092d..a80c08582932 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1928,15 +1928,10 @@ static void dcn20_program_pipe( pipe_ctx->stream->update_flags.raw) dcn20_update_dchubp_dpp(dc, pipe_ctx, context); - if (pipe_ctx->update_flags.bits.enable || - (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.hdr_mult)) + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); - if ((pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.hdr_mult) || - pipe_ctx->update_flags.bits.enable) - hws->funcs.set_hdr_multiplier(pipe_ctx); - - if (hws->funcs.populate_mcm_luts) { if (pipe_ctx->plane_state) { hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, @@ -1944,13 +1939,12 @@ static void dcn20_program_pipe( pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; } } - if ((pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change) || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.gamma_change) || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.lut_3d) || - pipe_ctx->update_flags.bits.enable) + + if (pipe_ctx->plane_state && + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change || + pipe_ctx->plane_state->update_flags.bits.lut_3d || + pipe_ctx->update_flags.bits.enable)) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish From 07f4f9c00ec545dfa6251a44a09d2c48a76e7ee5 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 7 Aug 2024 17:21:53 +0530 Subject: [PATCH 300/447] drm/amdgpu: fix ptr check warning in gfx9 ip_dump Change if (ptr == NULL) to if (!ptr) for a better format and fix the warning. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 991f7c2fc1a2..ab10a05c7885 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2129,7 +2129,7 @@ static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -2142,7 +2142,7 @@ static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { From 98df5a7732e3b78bf8824d2938a8865a45cfc113 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 7 Aug 2024 17:25:24 +0530 Subject: [PATCH 301/447] drm/amdgpu: fix ptr check warning in gfx10 ip_dump Change condition, if (ptr == NULL) to if (!ptr) for a better format and fix the warning. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 1b88528b512b..75a6ca645964 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4648,7 +4648,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -4661,7 +4661,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -4674,7 +4674,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { From bd15f805cdc503ac229a14f5fe21db12e6e7f84a Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 7 Aug 2024 17:27:10 +0530 Subject: [PATCH 302/447] drm/amdgpu: fix ptr check warning in gfx11 ip_dump Change condition, if (ptr == NULL) to if (!ptr) for a better format and fix the warning. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index e7c160b9d0fe..22bb35278691 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1502,7 +1502,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -1515,7 +1515,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -1528,7 +1528,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { From 9b7e697839c2745c10f63fe5fd54c9e328fa2e3b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 7 Aug 2024 17:28:02 +0530 Subject: [PATCH 303/447] drm/amdgpu: fix ptr check warning in gfx12 ip_dump Change condition, if (ptr == NULL) to if (!ptr) for a better format and fix the warning. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 0a71e216a7f5..df72fa125fd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1281,7 +1281,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -1294,7 +1294,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -1307,7 +1307,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { From 70f83e7706e57200edb8ffa36883b2f43d214142 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 7 Aug 2024 15:33:41 -0400 Subject: [PATCH 304/447] drm/amdkfd: fix partition query when setting up recommended sdma engines When users dynamically set the partition mode through sysfs writes, this can lead to a double lock situation where the KFD is trying to take the partition lock when updating the recommended SDMA engines. Have the KFD reference its saved socket device number count instead. Also ensure we have enough SDMA xGMI engines to report the recommended engines in the first place. Fixes: e06b71b2313a ("drm/amdkfd: allow users to target recommended SDMA engines") Signed-off-by: Jonathan Kim Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 40771f8752cb..27d452e50ca9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1286,9 +1286,8 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, struct amdgpu_device *adev = gpu->adev; int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && - adev->aid_mask && num_xgmi_nodes && - (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE) == - AMDGPU_SPX_PARTITION_MODE) && + adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && + kfd_get_num_xgmi_sdma_engines(gpu) >= 14 && (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); if (support_rec_eng) { From a1fc9f584c4aaf8bc1ebfa459fc57a3f26a290d8 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 2 Aug 2024 11:28:45 -0400 Subject: [PATCH 305/447] drm/amdkfd: Handle queue destroy buffer access race Add helper function kfd_queue_unreference_buffers to reduce queue buffer refcount, separate it from release queue buffers. Because it is circular locking to hold dqm_lock to take vm lock, kfd_ioctl_destroy_queue should take vm lock, unreference queue buffers first, but not release queue buffers, to handle error in case failed to hold vm lock. Then hold dqm_lock to remove queue from queue list and then release queue buffers. Restore process worker restore queue hold dqm_lock, will always find the queue with valid queue buffers. v2 (Felix): - renamed kfd_queue_unreference_buffer(s) to kfd_queue_unref_bo_va(s) - added two FIXME comments for follow up Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +- .../amd/amdkfd/kfd_process_queue_manager.c | 8 ++- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 66 ++++++++++++------- 4 files changed, 53 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 0622ebd7e8ef..00350eccd571 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -400,6 +400,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, return 0; err_create_queue: + kfd_queue_unref_bo_vas(pdd, &q_properties); kfd_queue_release_buffers(pdd, &q_properties); err_acquire_queue_buf: err_sdma_engine_id: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 057d20446c31..f7c12d4f0abb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1298,9 +1298,12 @@ void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, u64 expected_size); -void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo); +void kfd_queue_buffer_put(struct amdgpu_bo **bo); int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); +void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo); +int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, + struct queue_properties *properties); void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index f732ee35b531..20ea745729ee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -217,6 +217,7 @@ void pqm_uninit(struct process_queue_manager *pqm) list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { if (pqn->q) { pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); + kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); } @@ -512,7 +513,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q) { - retval = kfd_queue_release_buffers(pdd, &pqn->q->properties); + retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); if (retval) goto err_destroy_queue; @@ -526,7 +527,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } - + kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); } @@ -579,7 +580,8 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, return -EFAULT; } - kfd_queue_buffer_put(vm, &pqn->q->properties.ring_bo); + kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo); + kfd_queue_buffer_put(&pqn->q->properties.ring_bo); amdgpu_bo_unreserve(vm->root.bo); pqn->q->properties.ring_bo = p->ring_bo; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index e0a073ae4a49..ad29634f8b44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -224,16 +224,9 @@ out_err: return -EINVAL; } -void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo) +/* FIXME: remove this function, just call amdgpu_bo_unref directly */ +void kfd_queue_buffer_put(struct amdgpu_bo **bo) { - if (*bo) { - struct amdgpu_bo_va *bo_va; - - bo_va = amdgpu_vm_bo_find(vm, *bo); - if (bo_va) - bo_va->queue_refcount--; - } - amdgpu_bo_unref(bo); } @@ -327,6 +320,10 @@ out_unreserve: out_err_unreserve: amdgpu_bo_unreserve(vm->root.bo); out_err_release: + /* FIXME: make a _locked version of this that can be called before + * dropping the VM reservation. + */ + kfd_queue_unref_bo_vas(pdd, properties); kfd_queue_release_buffers(pdd, properties); return err; } @@ -334,22 +331,13 @@ out_err_release: int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { struct kfd_topology_device *topo_dev; - struct amdgpu_vm *vm; u32 total_cwsr_size; - int err; - vm = drm_priv_to_vm(pdd->drm_priv); - err = amdgpu_bo_reserve(vm->root.bo, false); - if (err) - return err; - - kfd_queue_buffer_put(vm, &properties->wptr_bo); - kfd_queue_buffer_put(vm, &properties->rptr_bo); - kfd_queue_buffer_put(vm, &properties->ring_bo); - kfd_queue_buffer_put(vm, &properties->eop_buf_bo); - kfd_queue_buffer_put(vm, &properties->cwsr_bo); - - amdgpu_bo_unreserve(vm->root.bo); + kfd_queue_buffer_put(&properties->wptr_bo); + kfd_queue_buffer_put(&properties->rptr_bo); + kfd_queue_buffer_put(&properties->ring_bo); + kfd_queue_buffer_put(&properties->eop_buf_bo); + kfd_queue_buffer_put(&properties->cwsr_bo); topo_dev = kfd_topology_device_by_id(pdd->dev->id); if (!topo_dev) @@ -362,6 +350,38 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope return 0; } +void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo) +{ + if (*bo) { + struct amdgpu_bo_va *bo_va; + + bo_va = amdgpu_vm_bo_find(vm, *bo); + if (bo_va && bo_va->queue_refcount) + bo_va->queue_refcount--; + } +} + +int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, + struct queue_properties *properties) +{ + struct amdgpu_vm *vm; + int err; + + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + kfd_queue_unref_bo_va(vm, &properties->wptr_bo); + kfd_queue_unref_bo_va(vm, &properties->rptr_bo); + kfd_queue_unref_bo_va(vm, &properties->ring_bo); + kfd_queue_unref_bo_va(vm, &properties->eop_buf_bo); + kfd_queue_unref_bo_va(vm, &properties->cwsr_bo); + + amdgpu_bo_unreserve(vm->root.bo); + return 0; +} + #define SGPR_SIZE_PER_CU 0x4000 #define LDS_SIZE_PER_CU 0x10000 #define HWREG_SIZE_PER_CU 0x1000 From 3bb41f2e9134906e0814766c07dfbdd9de0bfaf5 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 30 Jul 2024 17:32:35 -0400 Subject: [PATCH 306/447] drm/amd/display: Fix print format specifiers in DC_LOG_IPS [Why] %d specifier is used for printing unsigned values. It can result in negative values in logs for unsigned variables. [How] Replace %d with %u for unsigned. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Roman Li Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 26 ++++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 41270fade5f2..b1265124608b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1285,7 +1285,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) union dmub_shared_state_ips_driver_signals new_signals; DC_LOG_IPS( - "%s wait idle (ips1_commit=%d ips2_commit=%d)", + "%s wait idle (ips1_commit=%u ips2_commit=%u)", __func__, ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1331,7 +1331,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) } DC_LOG_IPS( - "%s send allow_idle=%d (ips1_commit=%d ips2_commit=%d)", + "%s send allow_idle=%d (ips1_commit=%u ips2_commit=%u)", __func__, allow_idle, ips_fw->signals.bits.ips1_commit, @@ -1374,7 +1374,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv->driver_signals = ips_driver->signals; DC_LOG_IPS( - "%s (allow ips1=%d ips2=%d) (commit ips1=%d ips2=%d) (count rcg=%d ips1=%d ips2=%d)", + "%s (allow ips1=%u ips2=%u) (commit ips1=%u ips2=%u) (count rcg=%u ips1=%u ips2=%u)", __func__, ips_driver->signals.bits.allow_ips1, ips_driver->signals.bits.allow_ips2, @@ -1393,7 +1393,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) (!dc->debug.optimize_ips_handshake || ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle)) { DC_LOG_IPS( - "wait IPS2 eval (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 eval (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1402,7 +1402,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (ips_fw->signals.bits.ips2_commit) { DC_LOG_IPS( - "exit IPS2 #1 (ips1_commit=%d ips2_commit=%d)", + "exit IPS2 #1 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1410,7 +1410,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); DC_LOG_IPS( - "wait IPS2 entry delay (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 entry delay (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1418,14 +1418,14 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(dc->debug.ips2_entry_delay_us); DC_LOG_IPS( - "exit IPS2 #2 (ips1_commit=%d ips2_commit=%d)", + "exit IPS2 #2 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); DC_LOG_IPS( - "wait IPS2 commit clear (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 commit clear (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1433,7 +1433,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(1); DC_LOG_IPS( - "wait hw_pwr_up (ips1_commit=%d ips2_commit=%d)", + "wait hw_pwr_up (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1441,7 +1441,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ASSERT(0); DC_LOG_IPS( - "resync inbox1 (ips1_commit=%d ips2_commit=%d)", + "resync inbox1 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1452,7 +1452,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv_notify_idle(dc, false); if (prev_driver_signals.bits.allow_ips1) { DC_LOG_IPS( - "wait for IPS1 commit clear (ips1_commit=%d ips2_commit=%d)", + "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1460,7 +1460,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(1); DC_LOG_IPS( - "wait for IPS1 commit clear done (ips1_commit=%d ips2_commit=%d)", + "wait for IPS1 commit clear done (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); } @@ -1469,7 +1469,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true)) ASSERT(0); - DC_LOG_IPS("%s exit (count rcg=%d ips1=%d ips2=%d)", + DC_LOG_IPS("%s exit (count rcg=%u ips1=%u ips2=%u)", __func__, rcg_exit_count, ips1_exit_count, From 027347d17a16562f3be272833243b835c21aa2a5 Mon Sep 17 00:00:00 2001 From: Robin Chen Date: Thu, 18 Jul 2024 16:48:26 +0800 Subject: [PATCH 307/447] drm/amd/display: Optimize vstartup position for AS-SDP [Why] In current design, the vstartup position is adjusted to vblank start position when AS-SDP is enabled. However when the vblank length is too big, it may over vstartup boundary. [How] To adjust vstartup position to 1 line before vsync position. Reviewed-by: Anthony Koo Signed-off-by: Robin Chen Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index efe337ebf7c8..e9fea9c2162e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -1132,7 +1132,8 @@ static void dcn20_adjust_freesync_v_startup( patched_crtc_timing.v_addressable - patched_crtc_timing.v_border_top; - newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start); + /* The newVStartUp is 1 line before vsync point */ + newVstartup = asic_blank_end + 1; *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start); } From cd9e9e0852d501f169aa3bb34e4b413d2eb48c37 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 2 Aug 2024 12:35:13 +0530 Subject: [PATCH 308/447] drm/amd/display: Add null check for 'afb' in amdgpu_dm_plane_handle_cursor_update (v2) This commit adds a null check for the 'afb' variable in the amdgpu_dm_plane_handle_cursor_update function. Previously, 'afb' was assumed to be null, but was used later in the code without a null check. This could potentially lead to a null pointer dereference. Changes since v1: - Moved the null check for 'afb' to the line where 'afb' is used. (Alex) Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:1298 amdgpu_dm_plane_handle_cursor_update() error: we previously assumed 'afb' could be null (see line 1252) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Co-developed-by: Alex Hung Signed-off-by: Alex Hung Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index a83bd0331c3b..1ff469ef51af 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1372,7 +1372,8 @@ void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane, adev->dm.dc->caps.color.dpp.gamma_corr) attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1; - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + if (afb) + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; if (crtc_state->stream) { mutex_lock(&adev->dm.dc_lock); From cc2991203c9d4e23051dbe5bcb1fc700fea26992 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Mon, 29 Jul 2024 18:17:55 -0400 Subject: [PATCH 309/447] drm/amd/display: Reduce redundant minimal transitions due to SubVP [WHY] Stream ID's associated with phantom pipes can change often as they are reconstructed on full updates, however they can remain identical depending on the required update. [HOW] In the case phantom streams and pipe topologies remain the same between updates, mark the transition as seamless. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index c3bbbfd1be94..d75a811c90d9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1719,6 +1719,28 @@ void dcn32_blank_phantom(struct dc *dc, hws->funcs.wait_for_blank_complete(opp); } +/* phantom stream id's can change often, but can be identical between contexts. +* This function checks for the condition the streams are identical to avoid +* redundant pipe transitions. +*/ +static bool is_subvp_phantom_topology_transition_seamless( + const struct dc_state *cur_ctx, + const struct dc_state *new_ctx, + const struct pipe_ctx *cur_pipe, + const struct pipe_ctx *new_pipe) +{ + enum mall_stream_type cur_pipe_type = dc_state_get_pipe_subvp_type(cur_ctx, cur_pipe); + enum mall_stream_type new_pipe_type = dc_state_get_pipe_subvp_type(new_ctx, new_pipe); + + const struct dc_stream_state *cur_paired_stream = dc_state_get_paired_subvp_stream(cur_ctx, cur_pipe->stream); + const struct dc_stream_state *new_paired_stream = dc_state_get_paired_subvp_stream(new_ctx, new_pipe->stream); + + return cur_pipe_type == SUBVP_PHANTOM && + cur_pipe_type == new_pipe_type && + cur_paired_stream && new_paired_stream && + cur_paired_stream->stream_id == new_paired_stream->stream_id; +} + bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, const struct dc_state *cur_ctx, const struct dc_state *new_ctx) @@ -1737,7 +1759,8 @@ bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, continue; else if (resource_is_pipe_type(cur_pipe, OTG_MASTER)) { if (resource_is_pipe_type(new_pipe, OTG_MASTER)) - if (cur_pipe->stream->stream_id == new_pipe->stream->stream_id) + if (cur_pipe->stream->stream_id == new_pipe->stream->stream_id || + is_subvp_phantom_topology_transition_seamless(cur_ctx, new_ctx, cur_pipe, new_pipe)) /* OTG master with the same stream is seamless */ continue; } else if (resource_is_pipe_type(cur_pipe, OPP_HEAD)) { From 67ea53a4bd9d03a85eecd99875a2a794c886f788 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 1 Aug 2024 15:35:51 -0400 Subject: [PATCH 310/447] drm/amd/display: Disable DCN401 UCLK P-State support on full updates [WHY&HOW] It is not guaranteed even for HW exclusive P-State methods (like VActive) that P-state will be supported properly until optimize bandwidth is called, so unconditionally disable it on full updates. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index b604c8886ef4..ac0a21ac318f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1401,8 +1401,8 @@ void dcn401_prepare_bandwidth(struct dc *dc, bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; unsigned int compbuf_size_kb = 0; - /* Any transition into or out of a FAMS config should disable MCLK switching first to avoid hangs */ - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + /* Any transition into P-State support should disable MCLK switching first to avoid hangs */ + if (p_state_change_support) { dc->optimized_required = true; context->bw_ctx.bw.dcn.clk.p_state_change_support = false; } @@ -1441,7 +1441,7 @@ void dcn401_prepare_bandwidth(struct dc *dc, dcn401_fams2_global_control_lock(dc, context, false); } - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + if (p_state_change_support != context->bw_ctx.bw.dcn.clk.p_state_change_support) { /* After disabling P-State, restore the original value to ensure we get the correct P-State * on the next optimize. */ context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support; From 7a1eb66809390d06b744aa13123b925b64b54c4c Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 1 Aug 2024 15:38:34 -0400 Subject: [PATCH 311/447] drm/amd/display: Perform outstanding programming on full updates [WHY] In certain scenarios DC can internally trigger back to back full updates which will miss some required programming that is normally deferred until post update via optimize_bandwidth. [HOW] In back to back update scenarios, wait for pending updates to complete and perform any strictly required outstanding programming. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 121 +---------------- .../drm/amd/display/dc/core/dc_hw_sequencer.c | 123 ++++++++++++++++++ .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 10 ++ .../amd/display/dc/hwss/dcn32/dcn32_hwseq.h | 4 + .../amd/display/dc/hwss/dcn32/dcn32_init.c | 1 + .../amd/display/dc/hwss/dcn35/dcn35_init.c | 1 + .../amd/display/dc/hwss/dcn351/dcn351_init.c | 1 + .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 18 ++- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 1 + .../amd/display/dc/hwss/dcn401/dcn401_init.c | 1 + .../drm/amd/display/dc/hwss/hw_sequencer.h | 17 +++ 11 files changed, 176 insertions(+), 122 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3ba2acfdae2a..c8dabb081b3d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1352,80 +1352,6 @@ static void disable_vbios_mode_if_required( } } -/** - * wait_for_blank_complete - wait for all active OPPs to finish pending blank - * pattern updates - * - * @dc: [in] dc reference - * @context: [in] hardware context in use - */ -static void wait_for_blank_complete(struct dc *dc, - struct dc_state *context) -{ - struct pipe_ctx *opp_head; - struct dce_hwseq *hws = dc->hwseq; - int i; - - if (!hws->funcs.wait_for_blank_complete) - return; - - for (i = 0; i < MAX_PIPES; i++) { - opp_head = &context->res_ctx.pipe_ctx[i]; - - if (!resource_is_pipe_type(opp_head, OPP_HEAD) || - dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM) - continue; - - hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp); - } -} - -static void wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context) -{ - struct pipe_ctx *otg_master; - struct timing_generator *tg; - int i; - - for (i = 0; i < MAX_PIPES; i++) { - otg_master = &context->res_ctx.pipe_ctx[i]; - if (!resource_is_pipe_type(otg_master, OTG_MASTER) || - dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM) - continue; - tg = otg_master->stream_res.tg; - if (tg->funcs->wait_odm_doublebuffer_pending_clear) - tg->funcs->wait_odm_doublebuffer_pending_clear(tg); - } - - /* ODM update may require to reprogram blank pattern for each OPP */ - wait_for_blank_complete(dc, context); -} - -static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) -{ - int i; - PERF_TRACE(); - for (i = 0; i < MAX_PIPES; i++) { - int count = 0; - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) - continue; - - /* Timeout 100 ms */ - while (count < 100000) { - /* Must set to false to start with, due to OR in update function */ - pipe->plane_state->status.is_flip_pending = false; - dc->hwss.update_pending_status(pipe); - if (!pipe->plane_state->status.is_flip_pending) - break; - udelay(1); - count++; - } - ASSERT(!pipe->plane_state->status.is_flip_pending); - } - PERF_TRACE(); -} - /* Public functions */ struct dc *dc_create(const struct dc_init_data *init_params) @@ -2109,12 +2035,12 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c if (context->stream_count > get_seamless_boot_stream_count(context) || context->stream_count == 0) { /* Must wait for no flips to be pending before doing optimize bw */ - wait_for_no_pipes_pending(dc, context); + hwss_wait_for_no_pipes_pending(dc, context); /* * optimized dispclk depends on ODM setup. Need to wait for ODM * update pending complete before optimizing bandwidth. */ - wait_for_odm_update_pending_complete(dc, context); + hwss_wait_for_odm_update_pending_complete(dc, context); /* pplib is notified if disp_num changed */ dc->hwss.optimize_bandwidth(dc, context); /* Need to do otg sync again as otg could be out of sync due to otg @@ -3786,47 +3712,6 @@ static void commit_planes_for_stream_fast(struct dc *dc, top_pipe_to_program->stream->update_flags.raw = 0; } -static void wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) -{ -/* - * This function calls HWSS to wait for any potentially double buffered - * operations to complete. It should be invoked as a pre-amble prior - * to full update programming before asserting any HW locks. - */ - int pipe_idx; - int opp_inst; - int opp_count = dc->res_pool->res_cap->num_opp; - struct hubp *hubp; - int mpcc_inst; - const struct pipe_ctx *pipe_ctx; - - for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) { - pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx]; - - if (!pipe_ctx->stream) - continue; - - if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear) - pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg); - - hubp = pipe_ctx->plane_res.hubp; - if (!hubp) - continue; - - mpcc_inst = hubp->inst; - // MPCC inst is equal to pipe index in practice - for (opp_inst = 0; opp_inst < opp_count; opp_inst++) { - if ((dc->res_pool->opps[opp_inst] != NULL) && - (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) { - dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); - dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false; - break; - } - } - } - wait_for_odm_update_pending_complete(dc, dc_context); -} - static void commit_planes_for_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, @@ -3850,7 +3735,7 @@ static void commit_planes_for_stream(struct dc *dc, dc_z10_restore(dc); if (update_type == UPDATE_TYPE_FULL) - wait_for_outstanding_hw_updates(dc, context); + hwss_process_outstanding_hw_updates(dc, dc->current_state); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 5f9b6e8ef428..9a569aac3c00 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -978,3 +978,126 @@ void get_surface_tile_visual_confirm_color( break; } } + +/** + * hwss_wait_for_blank_complete - wait for all active OPPs to finish pending blank + * pattern updates + * + * @dc: [in] dc reference + * @context: [in] hardware context in use + */ +void hwss_wait_for_all_blank_complete(struct dc *dc, + struct dc_state *context) +{ + struct pipe_ctx *opp_head; + struct dce_hwseq *hws = dc->hwseq; + int i; + + if (!hws->funcs.wait_for_blank_complete) + return; + + for (i = 0; i < MAX_PIPES; i++) { + opp_head = &context->res_ctx.pipe_ctx[i]; + + if (!resource_is_pipe_type(opp_head, OPP_HEAD) || + dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM) + continue; + + hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp); + } +} + +void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *otg_master; + struct timing_generator *tg; + int i; + + for (i = 0; i < MAX_PIPES; i++) { + otg_master = &context->res_ctx.pipe_ctx[i]; + if (!resource_is_pipe_type(otg_master, OTG_MASTER) || + dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM) + continue; + tg = otg_master->stream_res.tg; + if (tg->funcs->wait_odm_doublebuffer_pending_clear) + tg->funcs->wait_odm_doublebuffer_pending_clear(tg); + } + + /* ODM update may require to reprogram blank pattern for each OPP */ + hwss_wait_for_all_blank_complete(dc, context); +} + +void hwss_wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) +{ + int i; + for (i = 0; i < MAX_PIPES; i++) { + int count = 0; + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) + continue; + + /* Timeout 100 ms */ + while (count < 100000) { + /* Must set to false to start with, due to OR in update function */ + pipe->plane_state->status.is_flip_pending = false; + dc->hwss.update_pending_status(pipe); + if (!pipe->plane_state->status.is_flip_pending) + break; + udelay(1); + count++; + } + ASSERT(!pipe->plane_state->status.is_flip_pending); + } +} + +void hwss_wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +{ +/* + * This function calls HWSS to wait for any potentially double buffered + * operations to complete. It should be invoked as a pre-amble prior + * to full update programming before asserting any HW locks. + */ + int pipe_idx; + int opp_inst; + int opp_count = dc->res_pool->res_cap->num_opp; + struct hubp *hubp; + int mpcc_inst; + const struct pipe_ctx *pipe_ctx; + + for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) { + pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx]; + + if (!pipe_ctx->stream) + continue; + + if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear) + pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg); + + hubp = pipe_ctx->plane_res.hubp; + if (!hubp) + continue; + + mpcc_inst = hubp->inst; + // MPCC inst is equal to pipe index in practice + for (opp_inst = 0; opp_inst < opp_count; opp_inst++) { + if ((dc->res_pool->opps[opp_inst] != NULL) && + (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) { + dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); + dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false; + break; + } + } + } + hwss_wait_for_odm_update_pending_complete(dc, dc_context); +} + +void hwss_process_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +{ + /* wait for outstanding updates */ + hwss_wait_for_outstanding_hw_updates(dc, dc_context); + + /* perform outstanding post update programming */ + if (dc->hwss.program_outstanding_updates) + dc->hwss.program_outstanding_updates(dc, dc_context); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index d75a811c90d9..6a40b2e2beb5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1846,3 +1846,13 @@ void dcn32_interdependent_update_lock(struct dc *dc, dc->hwss.pipe_control_lock(dc, pipe, false); } } + +void dcn32_program_outstanding_updates(struct dc *dc, + struct dc_state *context) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* update compbuf if required */ + if (hubbub->funcs->program_compbuf_size) + hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h index b1563e2c0491..cac4a08b92a4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h @@ -133,4 +133,8 @@ void dcn32_prepare_bandwidth(struct dc *dc, void dcn32_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); + +void dcn32_program_outstanding_updates(struct dc *dc, + struct dc_state *context); + #endif /* __DC_HWSS_DCN32_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 58bed01fc20e..3422b564ae98 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -120,6 +120,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .blank_phantom = dcn32_blank_phantom, .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn32_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 55dc5799e725..2bbf1fef94fd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -123,6 +123,7 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn35_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index b1b2a58684e7..5da3069fc1ab 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -122,6 +122,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn351_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index ac0a21ac318f..0b743669f23b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1399,7 +1399,7 @@ void dcn401_prepare_bandwidth(struct dc *dc, { struct hubbub *hubbub = dc->res_pool->hubbub; bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; - unsigned int compbuf_size_kb = 0; + unsigned int compbuf_size = 0; /* Any transition into P-State support should disable MCLK switching first to avoid hangs */ if (p_state_change_support) { @@ -1429,10 +1429,10 @@ void dcn401_prepare_bandwidth(struct dc *dc, /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_segments) { - compbuf_size_kb = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); + compbuf_size = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; + dc->wm_optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); - hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size_kb, false); + hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size, false); } if (dc->debug.fams2_config.bits.enable) { @@ -1760,3 +1760,13 @@ void dcn401_interdependent_update_lock(struct dc *dc, } } } + +void dcn401_program_outstanding_updates(struct dc *dc, + struct dc_state *context) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* update compbuf if required */ + if (hubbub->funcs->program_compbuf_segments) + hubbub->funcs->program_compbuf_segments(hubbub, context->bw_ctx.bw.dcn.arb_regs.compbuf_size, true); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 3ecb1ebffcee..a27e62081685 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -83,4 +83,5 @@ void dcn401_update_odm(struct dc *dc, struct dc_state *context, void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy); void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); void dcn401_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); +void dcn401_program_outstanding_updates(struct dc *dc, struct dc_state *context); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index b5f63675afcb..a2ca07235c83 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -99,6 +99,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .fams2_global_control_lock = dcn401_fams2_global_control_lock, .fams2_update_config = dcn401_fams2_update_config, .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, + .program_outstanding_updates = dcn401_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn401_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index f50b2955ce8c..326854489802 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -459,6 +459,8 @@ struct hw_sequencer_funcs { bool enable); void (*fams2_global_control_lock_fast)(union block_sequence_params *params); void (*set_long_vtotal)(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max); + void (*program_outstanding_updates)(struct dc *dc, + struct dc_state *context); }; void color_space_to_black_color( @@ -519,6 +521,21 @@ void hwss_build_fast_sequence(struct dc *dc, struct dc_stream_status *stream_status, struct dc_state *context); +void hwss_wait_for_all_blank_complete(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_odm_update_pending_complete(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_no_pipes_pending(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_outstanding_hw_updates(struct dc *dc, + struct dc_state *dc_context); + +void hwss_process_outstanding_hw_updates(struct dc *dc, + struct dc_state *dc_context); + void hwss_send_dmcub_cmd(union block_sequence_params *params); void hwss_program_manual_trigger(union block_sequence_params *params); From 24f483ba49c9acc1139a0b4cbfd1b122fbe1ed7f Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 2 Aug 2024 13:50:10 -0400 Subject: [PATCH 312/447] drm/amd/display: Set max VTotal cap for dcn401 [WHY&HOW] Set max VTotal cap for dcn401 because VTotal register is only 16 bits wide on dcn401. Reviewed-by: Chris Park Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index ec676d269d33..02e63b95c36d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -1822,6 +1822,7 @@ static bool dcn401_resource_construct( dc->caps.edp_dsc_support = true; dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; + dc->caps.max_v_total = (1 << 15) - 1; if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) dc->caps.dcc_plane_width_limit = 7680; From 5b7813bc6218100d7bbc6d3a1b582bfd64034bf4 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Thu, 1 Aug 2024 18:18:20 -0400 Subject: [PATCH 313/447] drm/amd/display: remove redundant msg to pmfw at boot/resume [why & how] this is to remove redundant msg to pmfw at boot/resume since bios already power up dcn. Reviewed-by: Chris Park Signed-off-by: Charlene Liu Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index e2f5c4d34a55..217344ccf644 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1845,6 +1845,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) bool can_apply_edp_fast_boot = false; bool can_apply_seamless_boot = false; bool keep_edp_vdd_on = false; + struct dc_bios *dcb = dc->ctx->dc_bios; DC_LOGGER_INIT(); @@ -1921,13 +1922,15 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) hws->funcs.edp_backlight_control(edp_link_with_sink, false); } /*resume from S3, no vbios posting, no need to power down again*/ - clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); + if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb)) + clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); power_down_all_hw_blocks(dc); disable_vga_and_power_gate_all_controllers(dc); if (edp_link_with_sink && !keep_edp_vdd_on) dc->hwss.edp_power_control(edp_link_with_sink, false); - clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); + if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb)) + clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); } bios_set_scratch_acc_mode_change(dc->ctx->dc_bios, 1); } From d6ed53712f583423db61fbb802606759e023bf7b Mon Sep 17 00:00:00 2001 From: Loan Chen Date: Fri, 2 Aug 2024 13:57:40 +0800 Subject: [PATCH 314/447] drm/amd/display: Enable otg synchronization logic for DCN321 [Why] Tiled display cannot synchronize properly after S3. The fix for commit 5f0c74915815 ("drm/amd/display: Fix for otg synchronization logic") is not enable in DCN321, which causes the otg is excluded from synchronization. [How] Enable otg synchronization logic in dcn321. Fixes: 5f0c74915815 ("drm/amd/display: Fix for otg synchronization logic") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Alvin Lee Signed-off-by: Loan Chen Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index a414ed60a724..827a94f84f10 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; dc->config.disable_hbr_audio_dp2 = true; From 12dbb3ed212fc7655fce421542a5add637f8af7a Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 29 Jul 2024 10:23:03 -0400 Subject: [PATCH 315/447] drm/amd/display: Fix MST BW calculation Regression [Why & How] Revert commit 8b2cb32cf0c6 ("drm/amd/display: FEC overhead should be checked once for mst slot nums") Because causes bw calculation regression Cc: mario.limonciello@amd.com Cc: alexander.deucher@amd.com Reported-by: jirislaby@kernel.org Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3495 Closes: https://bugzilla.suse.com/show_bug.cgi?id=1228093 Reviewed-by: Wayne Lin Signed-off-by: Fangzhi Zuo Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 33 ++++++++++++++----- .../display/amdgpu_dm/amdgpu_dm_mst_types.h | 3 ++ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 915eb2c08ece..2e9f6da1acdc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -804,12 +804,25 @@ struct dsc_mst_fairness_params { }; #if defined(CONFIG_DRM_AMD_DC_FP) -static int kbps_to_peak_pbn(int kbps) +static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) +{ + u8 link_coding_cap; + uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); + if (link_coding_cap == DP_128b_132b_ENCODING) + fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; + + return fec_overhead_multiplier_x1000; +} + +static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) { u64 peak_kbps = kbps; peak_kbps *= 1006; - peak_kbps = div_u64(peak_kbps, 1000); + peak_kbps *= fec_overhead_multiplier_x1000; + peak_kbps = div_u64(peak_kbps, 1000 * 1000); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } @@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int link_timeslots_used; int fair_pbn_alloc; int ret = 0; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = - kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; + kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; bpp_increased[i] = false; remaining_to_increase += 1; } else { @@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int next_index; int remaining_to_try = 0; int ret; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn( - params[next_index].bw_range.max_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int count = 0; int i, k, ret; bool debugfs_overwrite = false; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); memset(params, 0, sizeof(params)); @@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try no compression */ for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, @@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try max compression */ for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret < 0) return ret; } else { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index fa84d34b7373..600d6e221011 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -46,6 +46,9 @@ #define SYNAPTICS_CASCADED_HUB_ID 0x5A #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) +#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 +#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 + enum mst_msg_ready_type { NONE_MSG_RDY_EVENT = 0, DOWN_REP_MSG_RDY_EVENT = 1, From 5ac2557d75f94777076885b5119d3b6755e6761d Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 2 Aug 2024 12:31:42 -0600 Subject: [PATCH 316/447] drm/amd/display: Remove unused field [why & how] Remove force_backlight_start_level since it is never used. Acked-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7873daf72608..91a351f8711e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -306,8 +306,6 @@ struct dc_bug_wa { uint8_t dcfclk_ds: 1; } clock_update_disable_mask; bool skip_psr_ips_crtc_disable; - //Customer Specific WAs - uint32_t force_backlight_start_level; }; struct dc_dcc_surface_param { struct dc_size surface_size; From 17b6527dcfb3249401e037734ed3fd0f4752572f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 29 Jul 2024 11:59:25 -0600 Subject: [PATCH 317/447] drm/amd/display: Improve FAM control for DCN401 [why & how] When the commit 5324e2b205a2 ("drm/amd/display: Add driver support for future FAMS versions") was introduced, it missed some of the FAM2 code. This commit introduces the code that control the FAM enable and disable. Fixes: 5324e2b205a2 ("drm/amd/display: Add driver support for future FAMS versions") Acked-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 6a40b2e2beb5..a36e11606f90 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -985,8 +985,19 @@ void dcn32_init_hw(struct dc *dc) dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; - if (dc->ctx->dmub_srv->dmub->fw_version < + /* for DCN401 testing only */ + dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; + if (dc->caps.dmub_caps.fams_ver == 2) { + /* FAMS2 is enabled */ + dc->debug.fams2_config.bits.enable &= true; + } else if (dc->ctx->dmub_srv->dmub->fw_version < DMUB_FW_VERSION(7, 0, 35)) { + /* FAMS2 is disabled */ + dc->debug.fams2_config.bits.enable = false; + if (dc->debug.using_dml2 && dc->res_pool->funcs->update_bw_bounding_box) { + /* update bounding box if FAMS2 disabled */ + dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + } dc->debug.force_disable_subvp = true; dc->debug.disable_fpo_optimizations = true; } From 1fd2cf090096af8a25bf85564341cfc21cec659d Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Tue, 31 Jan 2023 15:05:46 -0100 Subject: [PATCH 318/447] drm/amd/display: fix cursor offset on rotation 180 [why & how] Cursor gets clipped off in the middle of the screen with hw rotation 180. Fix a miscalculation of cursor offset when it's placed near the edges in the pipe split case. Cursor bugs with hw rotation were reported on AMD issue tracker: https://gitlab.freedesktop.org/drm/amd/-/issues/2247 The issues on rotation 270 was fixed by: https://lore.kernel.org/amd-gfx/20221118125935.4013669-22-Brian.Chang@amd.com/ that partially addressed the rotation 180 too. So, this patch is the final bits for rotation 180. Reported-by: Xaver Hugl Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2247 Reviewed-by: Harry Wentland Fixes: 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") Signed-off-by: Melissa Wen Signed-off-by: Hamza Mahfooz Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index a7b5b25e3f34..802902f54d09 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3594,7 +3594,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { From 8f9b23abbae5ffcd64856facd26a86b67195bc2f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 1 Aug 2024 16:16:35 -0600 Subject: [PATCH 319/447] drm/amd/display: Adjust cursor position [why & how] When the commit 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") was introduced, it used the wrong calculation for the position copy for X. This commit uses the correct calculation for that based on the original patch. Fixes: 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") Cc: Mario Limonciello Cc: Alex Deucher Acked-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 802902f54d09..01dffed4d30b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3687,7 +3687,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; + pos_cpy.x = temp_x + viewport_width; } } } else { From 267c5876c977d5f2d5a89f377d74adf42b5c38a5 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 2 Aug 2024 12:33:20 -0600 Subject: [PATCH 320/447] drm/amd/display: Remove unnecessary call to REG_SEQ_SUBMIT|WAIT_DONE [why & how] Remove unnecessary call to REG_SEQ_SUBMIT and REG_SEQ_WAIT_DONE, since those macros are not necessary anymore at the dpp1 set degamma. Those are part of an old implementation. Acked-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c index f2a2d53e9689..f8f6019d8304 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c @@ -684,9 +684,6 @@ void dpp1_set_degamma( BREAK_TO_DEBUGGER(); break; } - - REG_SEQ_SUBMIT(); - REG_SEQ_WAIT_DONE(); } void dpp1_degamma_ram_select( From bbec7cea6fa4a0463d4766ed0e6bb347773d3949 Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Mon, 5 Aug 2024 11:00:14 -0400 Subject: [PATCH 321/447] drm/amd/display: Promote DAL to 3.2.296 This version brings along following fixes: - Fix some cursor issue - Fix print format specifiers in DC_LOG_IPS - Fix minor coding errors in dml21 phase 5 - Fix MST BW calculation Regression - Improve FAM control for DCN401 - Add null pointer checks for some code - Refactor 3DLUT for non-DMA - Optimize vstartup position for AS-SDP - Update to using new dccg callbacks - Enable otg synchronization logic for DCN321 - Disable DCN401 UCLK P-State support on full updates Acked-by: Wayne Lin Signed-off-by: Martin Leung Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 91a351f8711e..6b036417a73a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.295" +#define DC_VER "3.2.296" #define MAX_SURFACES 3 #define MAX_PLANES 6 From ef6c2cb349c708676b7820c36a5beb75868ad544 Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Wed, 7 Aug 2024 17:32:27 +0800 Subject: [PATCH 322/447] drm/amd/sriov: extend NV_MAILBOX_POLL_MSG_TIMEDOUT on MI300/MI308 UBB products, when doing mode1 reset, since 1 gpu need to wait all 8 gpus finish mode1 reset and then do re-init. As observed, sometimes the gpu which triggered the reset need to wait 15s for all gpus to finish. If poll msg timeout, guest driver will send the reset message again, and may mess up the following reinit sequence on other gpus. So extend the time to cover the maximum time needed to recover. Signed-off-by: Victor Zhao Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index caf616a2c8a6..1d099ffb3a5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -25,7 +25,7 @@ #define __MXGPU_NV_H__ #define NV_MAILBOX_POLL_ACK_TIMEDOUT 500 -#define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000 +#define NV_MAILBOX_POLL_MSG_TIMEDOUT 15000 #define NV_MAILBOX_POLL_FLR_TIMEDOUT 10000 #define NV_MAILBOX_POLL_MSG_REP_MAX 11 From 61cffacb3a1c590b15c0e9ff987de02d293e0dd8 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 8 Aug 2024 12:19:22 +0800 Subject: [PATCH 323/447] drm/amd/amdgpu: add HDP_SD support on gc 12.0.0/1 add HDP_SD support on gc 12.0.0/1 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc24.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index 7d641d0dadba..b0c3678cfb31 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | @@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | From 42b3a6f12aa56dcf789464d29fdf11f33bf0e793 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 7 Aug 2024 21:41:59 +0530 Subject: [PATCH 324/447] drm/amdkfd: Add node_id to location_id generically If there are multiple nodes per kfd device, add nodeid to location_id to differentiate. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 27d452e50ca9..3871591c9aec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -2078,7 +2078,7 @@ int kfd_topology_add_device(struct kfd_node *gpu) HSA_CAP_ASIC_REVISION_MASK); dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); - if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3)) + if (gpu->kfd->num_nodes > 1) dev->node_props.location_id |= dev->gpu->node_id; dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); From aa02486fb18cecbaca0c4fd393d1a03f1d4c3f9a Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Fri, 9 Aug 2024 17:20:26 -0400 Subject: [PATCH 325/447] drm/amdgpu: Update kmd_fw_shared for VCN5 kmd_fw_shared changed in VCN5 Signed-off-by: Yinjie Yao Reviewed-by: Ruijing Dong Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1a5439abd1a0..c87d68d4be53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -461,8 +461,11 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; - uint8_t pad2[4]; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; + struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 From 35c7152202e111968b10140383f49da9159d2704 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Thu, 25 Jul 2024 09:51:56 -0400 Subject: [PATCH 326/447] Revert "drm/amdgpu: Extend KIQ reg polling wait for VF" KIQ timeouts no longer seen. This reverts commit 3a19a8af64eaff8a8b230796741a1a8277205344. Signed-off-by: Victor Skvortsov Reviewed-by: Zhigang Luo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c54ddd3e68aa..f3980b40f2ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -347,9 +347,9 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; -#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ -#define MAX_KIQ_REG_WAIT (amdgpu_sriov_vf(adev) ? 50000 : 5000) /* in usecs, extend for VF */ -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 int amdgpu_device_ip_set_clockgating_state(void *dev, From f83cec3b3a7c968bbceb810b7acd1baf3fe8cd87 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Thu, 8 Aug 2024 13:22:34 -0400 Subject: [PATCH 327/447] drm/amdgpu: Disable dpm_enabled flag while VF is in reset VFs do not perform HW fini/suspend in FLR, so the dpm_enabled is incorrectly kept enabled. Add interface to disable it in virt_pre_reset call. v2: Made implementation generic for all asics v3: Re-order conditionals so PP_MP1_STATE_FLR is only evaluated on VF Signed-off-by: Victor Skvortsov Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 + drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 6 +++++- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 29a4adee9286..a6b8d0ba4758 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5289,10 +5289,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (reset_context->reset_req_dev == adev) job = reset_context->job; - if (amdgpu_sriov_vf(adev)) { - /* stop the data exchange thread */ - amdgpu_virt_fini_data_exchange(adev); - } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_pre_reset(adev); amdgpu_fence_driver_isr_toggle(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index b287a82e6177..b6397d3229e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -33,6 +33,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_reset.h" +#include "amdgpu_dpm.h" #include "vi.h" #include "soc15.h" #include "nv.h" @@ -849,6 +850,13 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad return mode; } +void amdgpu_virt_pre_reset(struct amdgpu_device *adev) +{ + /* stop the data exchange thread */ + amdgpu_virt_fini_data_exchange(adev); + amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR); +} + void amdgpu_virt_post_reset(struct amdgpu_device *adev) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b42a8854dca0..b650a2032c42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -376,6 +376,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id); bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); +void amdgpu_virt_pre_reset(struct amdgpu_device *adev); void amdgpu_virt_post_reset(struct amdgpu_device *adev); bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 4b20e2274313..19a48d98830a 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -218,6 +218,7 @@ enum pp_mp1_state { PP_MP1_STATE_SHUTDOWN, PP_MP1_STATE_UNLOAD, PP_MP1_STATE_RESET, + PP_MP1_STATE_FLR, }; enum pp_df_cstate { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 8b7d6ed7e2ed..9dc82f4d7c93 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -168,7 +168,11 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, int ret = 0; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - if (pp_funcs && pp_funcs->set_mp1_state) { + if (mp1_state == PP_MP1_STATE_FLR) { + /* VF lost access to SMU */ + if (amdgpu_sriov_vf(adev)) + adev->pm.dpm_enabled = false; + } else if (pp_funcs && pp_funcs->set_mp1_state) { mutex_lock(&adev->pm.mutex); ret = pp_funcs->set_mp1_state( From e69c2dd7534f3fcabf7bb801db2a7ac71e7e5da6 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 13:19:59 +0800 Subject: [PATCH 328/447] drm/amdgpu/mes12: load unified mes fw on pipe0 and pipe1 Enable unified mes firmware to load on pipe0 and pipe1. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 27 +++---------------------- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index be2156bf0252..8f6feb887a56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1525,7 +1525,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_uni_mes.bin", ucode_prefix); } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index ac6209a0029c..1b6c8d9b6891 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -777,16 +777,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) if (enable) { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - soc21_grbm_select(adev, 3, pipe, 0, 0); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; @@ -800,8 +795,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) /* unhalt MES and activate pipe0 */ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); if (amdgpu_emu_mode) @@ -817,8 +811,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) data = REG_SET_FIELD(data, CP_MES_CNTL, MES_INVALIDATE_ICACHE, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); } @@ -833,10 +826,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - /* me=3, queue=0 */ soc21_grbm_select(adev, 3, pipe, 0, 0); @@ -1558,17 +1547,7 @@ static int mes_v12_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe, r; - if (adev->enable_uni_mes) { - r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE); - if (!r) - return 0; - - adev->enable_uni_mes = false; - } - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; r = amdgpu_mes_init_microcode(adev, pipe); if (r) return r; From 9c081c11c62112f1c30ff2426f755279a43fa1a1 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 12 Aug 2024 09:02:57 +0530 Subject: [PATCH 329/447] drm/amdgpu: Reorder to read EFI exported ROM first On EFI BIOSes, PCI ROM may be exported through EFI_PCI_IO_PROTOCOL and expansion ROM BARs may not be enabled. Choose to read from EFI exported ROM data before reading PCI Expansion ROM BAR. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 618e469e3622..42e64bce661e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -431,6 +431,11 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto success; } + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + if (amdgpu_read_bios(adev)) { dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); goto success; @@ -446,11 +451,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto success; } - if (amdgpu_read_platform_bios(adev)) { - dev_info(adev->dev, "Fetched VBIOS from platform\n"); - goto success; - } - dev_err(adev->dev, "Unable to locate a BIOS ROM\n"); return false; From a85c3db6b3b088f63b5b8c4fd4352f56f0e4ce3d Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 30 Jul 2024 12:52:20 -0400 Subject: [PATCH 330/447] drm/amdkfd: fallback to pipe reset on queue reset fail for gfx9 If queue reset fails, tell the CP to reset the pipe. Since queues multiplex context per pipe and we've issued a device wide preemption prior to the hang, we can assume the hung pipe only has one queue to reset on pipe reset. Signed-off-by: Jonathan Kim Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 46 +++++++++++++------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 32f28c12077b..c63528a4e894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1173,12 +1173,30 @@ unlock_out: return queue_addr; } +/* assume queue acquired */ +static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev, uint32_t inst, + unsigned int utimeout) +{ + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + while (true) { + uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); + + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + return 0; + + if (time_after(jiffies, end_jiffies)) + return -ETIME; + + usleep_range(500, 1000); + } +} + uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t inst, unsigned int utimeout) { - uint32_t low, high, temp; - unsigned long end_jiffies; + uint32_t low, high, pipe_reset_data = 0; uint64_t queue_addr = 0; kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); @@ -1202,25 +1220,23 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, /* assume previous dequeue request issued will take affect after reset */ WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1); - end_jiffies = (utimeout * HZ / 1000) + jiffies; - while (true) { - temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); + if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout)) + goto unlock_out; - if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) - break; + pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst, pipe_id); - if (time_after(jiffies, end_jiffies)) { - queue_addr = 0; - break; - } + pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL, MEC_ME1_PIPE0_RESET, 1); + pipe_reset_data = pipe_reset_data << pipe_id; - usleep_range(500, 1000); - } + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, pipe_reset_data); + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0); - pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n", - inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!"); + if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout)) + queue_addr = 0; unlock_out: + pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n", + inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!"); amdgpu_gfx_rlc_exit_safe_mode(adev, inst); kgd_gfx_v9_release_queue(adev, inst); From 9e823f307074c0f82b5f6044943b0086e3079bed Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Thu, 8 Aug 2024 13:40:23 -0400 Subject: [PATCH 331/447] drm/amdgpu: Block MMR_READ IOCTL in reset Register access from userspace should be blocked until reset is complete. Signed-off-by: Victor Skvortsov Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 45 ++++++++++++++++++------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 66782be5917b..96af9ff1acb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,6 +43,7 @@ #include "amdgpu_gem.h" #include "amdgpu_display.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "amd_pcie.h" void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) @@ -778,6 +779,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ? -EFAULT : 0; } case AMDGPU_INFO_READ_MMR_REG: { + int ret = 0; unsigned int n, alloc_size; uint32_t *regs; unsigned int se_num = (info->read_mmr_reg.instance >> @@ -787,24 +789,37 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) AMDGPU_INFO_MMR_SH_INDEX_SHIFT) & AMDGPU_INFO_MMR_SH_INDEX_MASK; + if (!down_read_trylock(&adev->reset_domain->sem)) + return -ENOENT; + /* set full masks if the userspace set all bits * in the bitfields */ - if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) + if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) { se_num = 0xffffffff; - else if (se_num >= AMDGPU_GFX_MAX_SE) - return -EINVAL; - if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) - sh_num = 0xffffffff; - else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) - return -EINVAL; + } else if (se_num >= AMDGPU_GFX_MAX_SE) { + ret = -EINVAL; + goto out; + } - if (info->read_mmr_reg.count > 128) - return -EINVAL; + if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) { + sh_num = 0xffffffff; + } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) { + ret = -EINVAL; + goto out; + } + + if (info->read_mmr_reg.count > 128) { + ret = -EINVAL; + goto out; + } regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); - if (!regs) - return -ENOMEM; + if (!regs) { + ret = -ENOMEM; + goto out; + } + alloc_size = info->read_mmr_reg.count * sizeof(*regs); amdgpu_gfx_off_ctrl(adev, false); @@ -816,13 +831,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) info->read_mmr_reg.dword_offset + i); kfree(regs); amdgpu_gfx_off_ctrl(adev, true); - return -EFAULT; + ret = -EFAULT; + goto out; } } amdgpu_gfx_off_ctrl(adev, true); n = copy_to_user(out, regs, min(size, alloc_size)); kfree(regs); - return n ? -EFAULT : 0; + ret = (n ? -EFAULT : 0); +out: + up_read(&adev->reset_domain->sem); + return ret; } case AMDGPU_INFO_DEV_INFO: { struct drm_amdgpu_info_device *dev_info; From b2dee0837a4be63e8d3e00550a9f057644f962c4 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 14:15:48 +0800 Subject: [PATCH 332/447] drm/amdgpu/mes12: add mes pipe switch support Add mes pipe switch to let caller choose pipe to submit packet. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 60 +++++++++++++++----------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 1b6c8d9b6891..449f7cd8f490 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -142,13 +142,14 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) } static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size, - int api_status_off) + int pipe, void *pkt, int size, + int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring[0]; + struct amdgpu_ring *ring = &mes->ring[pipe]; + spinlock_t *ring_lock = &mes->ring_lock[pipe]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; @@ -177,7 +178,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock[0], flags); + spin_lock_irqsave(ring_lock, flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; @@ -207,32 +208,33 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock[0], flags); + spin_unlock_irqrestore(ring_lock, flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, - misc_op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n", + pipe, op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", - x_pkt->header.opcode); + dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n", + pipe, x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", - op_str, misc_op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s\n", - op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n", + pipe, op_str); else - dev_err(adev->dev, "MES failed to respond to msg=%d\n", - x_pkt->header.opcode); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n", + pipe, x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); @@ -249,7 +251,7 @@ error_undo: amdgpu_ring_undo(ring); error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock[0], flags); + spin_unlock_irqrestore(ring_lock, flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -321,6 +323,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gds_size = input->queue_size; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -340,6 +343,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -365,6 +369,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.map_legacy_kq = 1; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -398,6 +403,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, } return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -414,7 +420,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, return 0; } -static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) +static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; @@ -424,7 +430,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_status_pkt, sizeof(mes_status_pkt), offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } @@ -486,11 +492,12 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, } return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } -static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) { union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; @@ -501,12 +508,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; struct amdgpu_device *adev = mes->adev; @@ -566,7 +573,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } @@ -674,6 +681,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, } return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), offsetof(union MESAPI__RESET, api_status)); } @@ -1479,19 +1487,19 @@ static int mes_v12_0_hw_init(void *handle) if (r) goto failure; - r = mes_v12_0_set_hw_resources(&adev->mes); + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); /* Enable the MES to handle doorbell ring on unmapped queue */ mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - r = mes_v12_0_query_sched_status(&adev->mes); + r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); goto failure; From aa539da8aff07ab08def6490e8c9b441439e70ba Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 14:44:07 +0800 Subject: [PATCH 333/447] drm/amdgpu/mes12: adjust mes12 sw/hw init for multiple pipes Adjust mes12 sw/hw initiailization for both pipe0 and pipe1 enablement. The two pipes are almost identical pipe. Pipe0 behaves like schq and pipe1 like kiq, pipe0 was mapped by pipe1. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 115 +++++++++++++++---------- 1 file changed, 69 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 449f7cd8f490..6e50a6233db5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -266,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type) return MES_QUEUE_TYPE_COMPUTE; else if (queue_type == AMDGPU_RING_TYPE_SDMA) return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; else BUG(); return -1; @@ -352,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -368,8 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.map_legacy_kq = 1; - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -378,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -402,8 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); } - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -439,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, struct mes_misc_op_input *input) { union MESAPI__MISC misc_pkt; + int pipe; memset(&misc_pkt, 0, sizeof(misc_pkt)); @@ -491,8 +504,12 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } @@ -657,6 +674,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, struct mes_reset_legacy_queue_input *input) { union MESAPI__RESET mes_reset_queue_pkt; + int pipe; memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); @@ -680,8 +698,12 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), offsetof(union MESAPI__RESET, api_status)); } @@ -1141,14 +1163,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; int r; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring[0]; else - BUG(); + ring = &adev->mes.ring[pipe]; - if ((pipe == AMDGPU_MES_SCHED_PIPE) && + if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && (amdgpu_in_reset(adev) || adev->in_suspend)) { *(ring->wptr_cpu_addr) = 0; *(ring->rptr_cpu_addr) = 0; @@ -1160,13 +1180,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return r; if (pipe == AMDGPU_MES_SCHED_PIPE) { - if (adev->enable_uni_mes) { - mes_v12_0_queue_init_register(ring); - } else { + if (adev->enable_uni_mes) + r = amdgpu_mes_map_legacy_queue(adev, ring); + else r = mes_v12_0_kiq_enable_queue(adev); - if (r) - return r; - } + if (r) + return r; } else { mes_v12_0_queue_init_register(ring); } @@ -1186,25 +1205,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return 0; } -static int mes_v12_0_ring_init(struct amdgpu_device *adev) +static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe) { struct amdgpu_ring *ring; - ring = &adev->mes.ring[0]; + ring = &adev->mes.ring[pipe]; ring->funcs = &mes_v12_0_ring_funcs; ring->me = 3; - ring->pipe = 0; + ring->pipe = pipe; ring->queue = 0; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe]; ring->no_scheduler = true; sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + if (pipe == AMDGPU_MES_SCHED_PIPE) + ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } @@ -1218,7 +1241,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) ring = &adev->gfx.kiq[0].ring; ring->me = 3; - ring->pipe = adev->enable_uni_mes ? 0 : 1; + ring->pipe = 1; ring->queue = 0; ring->adev = NULL; @@ -1240,12 +1263,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, int r, mqd_size = sizeof(struct v12_compute_mqd); struct amdgpu_ring *ring; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring[0]; else - BUG(); + ring = &adev->mes.ring[pipe]; if (ring->mqd_obj) return 0; @@ -1286,9 +1307,6 @@ static int mes_v12_0_sw_init(void *handle) return r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - r = mes_v12_0_allocate_eop_buf(adev, pipe); if (r) return r; @@ -1296,18 +1314,15 @@ static int mes_v12_0_sw_init(void *handle) r = mes_v12_0_mqd_sw_init(adev, pipe); if (r) return r; - } - if (adev->enable_mes_kiq) { - r = mes_v12_0_kiq_ring_init(adev); + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + r = mes_v12_0_kiq_ring_init(adev); + else + r = mes_v12_0_ring_init(adev, pipe); if (r) return r; } - r = mes_v12_0_ring_init(adev); - if (r) - return r; - return 0; } @@ -1402,10 +1417,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; - mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); - if (adev->enable_uni_mes) - return mes_v12_0_hw_init(adev); + mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]); + else + mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1432,6 +1447,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) if (r) goto failure; + if (adev->enable_uni_mes) { + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE); + if (r) + goto failure; + + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); + } + r = mes_v12_0_hw_init(adev); if (r) goto failure; @@ -1463,7 +1486,7 @@ static int mes_v12_0_hw_init(void *handle) if (adev->mes.ring[0].sched.ready) goto out; - if (!adev->enable_mes_kiq || adev->enable_uni_mes) { + if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, true); @@ -1483,6 +1506,9 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_enable(adev, true); } + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; @@ -1496,9 +1522,6 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_init_aggregated_doorbell(&adev->mes); - /* Enable the MES to handle doorbell ring on unmapped queue */ - mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); From ea5d6db17a8e3635ad91e8c53faa1fdc9570fbbb Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 14:49:30 +0800 Subject: [PATCH 334/447] drm/amdgpu/mes12: configure two pipes hardware resources Configure two pipes with different hardware resources. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 77 +++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 12 ++-- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 7 +-- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 49 ++++++++-------- 4 files changed, 81 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 8f6feb887a56..c598c3edff7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -165,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); - if (r) { - dev_err(adev->dev, - "(%d) ring trail_fence_offs wb alloc failed\n", r); - goto error_ids; - } - adev->mes.sch_ctx_gpu_addr = - adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); - adev->mes.sch_ctx_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.sch_ctx_gpu_addr[i] = + adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); + adev->mes.sch_ctx_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; - r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); - if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - dev_err(adev->dev, - "(%d) query_status_fence_offs wb alloc failed\n", r); - goto error_ids; + r = amdgpu_device_wb_get(adev, + &adev->mes.query_status_fence_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) query_status_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + + (adev->mes.query_status_fence_offs[i] * 4); + adev->mes.query_status_fence_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; } - adev->mes.query_status_fence_gpu_addr = - adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); - adev->mes.query_status_fence_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); dev_err(adev->dev, "(%d) read_val_offs alloc failed\n", r); - goto error_ids; + goto error; } adev->mes.read_val_gpu_addr = adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); @@ -214,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error_doorbell: amdgpu_mes_doorbell_free(adev); error: - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); -error_ids: + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); @@ -228,13 +236,22 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + int i; + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index d87d068952e0..548e724e3a75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -112,12 +112,12 @@ struct amdgpu_mes { uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; - uint32_t sch_ctx_offs; - uint64_t sch_ctx_gpu_addr; - uint64_t *sch_ctx_ptr; - uint32_t query_status_fence_offs; - uint64_t query_status_fence_gpu_addr; - uint64_t *query_status_fence_ptr; + uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES]; + uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; uint32_t read_val_offs; uint64_t read_val_gpu_addr; uint32_t *read_val_ptr; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 4c7899e527fe..c0340ee3dec0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -522,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; + mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes_set_hw_res_pkt.compute_hqd_mask[i] = @@ -1243,9 +1243,6 @@ static int mes_v11_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 6e50a6233db5..f5d681f69d06 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -542,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } + + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; - - for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; - - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; - - for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; - - for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; + mes->query_status_fence_gpu_addr[pipe]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -1331,9 +1337,6 @@ static int mes_v12_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); From 98cae695a8ae0e4291b1fa7feef9b54fabefe885 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 15:23:16 +0800 Subject: [PATCH 335/447] drm/amdgpu/mes12: sw/hw fini for unified mes Free memory for two pipes and unmap pipe0 via pipe1. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 31 +++++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index f5d681f69d06..35cd6ad73912 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1344,18 +1344,21 @@ static int mes_v12_0_sw_fini(void *handle) &adev->mes.eop_gpu_addr[pipe], NULL); amdgpu_ucode_release(&adev->mes.fw[pipe]); + + if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { + amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj, + &adev->mes.ring[pipe].mqd_gpu_addr, + &adev->mes.ring[pipe].mqd_ptr); + amdgpu_ring_fini(&adev->mes.ring[pipe]); + } } - amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, - &adev->gfx.kiq[0].ring.mqd_gpu_addr, - &adev->gfx.kiq[0].ring.mqd_ptr); - - amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, - &adev->mes.ring[0].mqd_gpu_addr, - &adev->mes.ring[0].mqd_ptr); - - amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring[0]); + if (!adev->enable_uni_mes) { + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1472,7 +1475,13 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { if (adev->mes.ring[0].sched.ready) { - mes_v12_0_kiq_dequeue_sched(adev); + if (adev->enable_uni_mes) + amdgpu_mes_unmap_legacy_queue(adev, + &adev->mes.ring[AMDGPU_MES_SCHED_PIPE], + RESET_QUEUES, 0, 0); + else + mes_v12_0_kiq_dequeue_sched(adev); + adev->mes.ring[0].sched.ready = false; } From f7fb9d677faf0460131bc2af15afd766d48a1f47 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 12:03:11 +0800 Subject: [PATCH 336/447] drm/amdgpu/mes12: fix suspend issue Use mes pipe to unmap kcq and kgq. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 27 +------------------------ 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 28bd2098a65e..9be8cafdcecc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.compute_ring[j], + RESET_QUEUES, 0, 0); + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.gfx_ring[j], + PREEMPT_QUEUES, 0, 0); + } + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index df72fa125fd2..f14e27f86e0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3595,33 +3595,9 @@ static int gfx_v12_0_hw_init(void *handle) return r; } -static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - - if (adev->gfx.kiq[0].ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); - - return r; -} - static int gfx_v12_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); @@ -3630,8 +3606,7 @@ static int gfx_v12_0_hw_fini(void *handle) if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { - r = gfx_v12_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } From a7f670d5d8e77b092404ca8a35bb0f8f89ed3117 Mon Sep 17 00:00:00 2001 From: "David (Ming Qiang) Wu" Date: Thu, 8 Aug 2024 12:19:50 -0400 Subject: [PATCH 337/447] drm/amd/amdgpu: command submission parser for JPEG Add JPEG IB command parser to ensure registers in the command are within the JPEG IP block. Reviewed-by: Alex Deucher Signed-off-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++ drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 61 +++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h | 7 ++- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 1 + drivers/gpu/drm/amd/amdgpu/soc15d.h | 6 +++ 5 files changed, 76 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1e167d925b64..78b3c067fea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1061,6 +1061,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, r = amdgpu_ring_parse_cs(ring, p, job, ib); if (r) return r; + + if (ib->sa_bo) + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } else { ib->ptr = (uint32_t *)kptr; r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index f4662920c653..6ae5a784e187 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" @@ -782,7 +783,11 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1084,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -1248,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } + +/** + * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 747a3e5f6856..71c54b294e15 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,6 +46,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); - +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index d694a276498a..f4daff90c770 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 2357ff39323f..e74e1983da53 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -76,6 +76,12 @@ ((cond & 0xF) << 24) | \ ((type & 0xF) << 28)) +#define CP_PACKETJ_NOP 0x60000000 +#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF) +#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F) +#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF) +#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 From 6a28a072d90e4543b5e07a904e3a6afad0117e26 Mon Sep 17 00:00:00 2001 From: "David (Ming Qiang) Wu" Date: Fri, 2 Aug 2024 14:29:41 -0400 Subject: [PATCH 338/447] drm/amd/amdgpu: cleanup parse_cs callbacks Because gpu_addr is updated in the calling routine (amdgpu_cs_patch_ibs()),it is removed in the callback. Use .patch_cs_in_place instead of .parse_cs for amdgpu_vce_ring_parse_cs_vm() as there is no need for keeping a temporary IB, therefore ib->sa_bo is NULL and amdgpu_ib_free() is removed. Reviewed-by: Alex Deucher Signed-off-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 -- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 775c09d57222..31fd30dcd593 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1088,7 +1088,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, int r; job->vm = NULL; - ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); if (ib->length_dw % 16) { DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 51b045de409d..74fdbf71d95b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -749,7 +749,6 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, int i, r = 0; job->vm = NULL; - ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); for (idx = 0; idx < ib->length_dw;) { uint32_t len = amdgpu_ib_get_value(ib, idx); @@ -1044,7 +1043,6 @@ out: if (!r) { /* No error, free all destroyed handle slots */ tmp = destroyed; - amdgpu_ib_free(p->adev, ib, NULL); } else { /* Error during parsing, free all allocated handle slots */ tmp = allocated; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 32517c364cf7..4bfba2931b08 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -950,7 +950,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, - .parse_cs = amdgpu_vce_ring_parse_cs_vm, + .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = 6 + /* vce_v3_0_emit_vm_flush */ 4 + /* vce_v3_0_emit_pipeline_sync */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 06d787385ad4..0748bf44c880 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1102,7 +1102,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, - .parse_cs = amdgpu_vce_ring_parse_cs_vm, + .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + From 1091796fb1d9d6888656f2416ad5c99cfc62a4bf Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 12:10:18 +0530 Subject: [PATCH 339/447] drm/amdgpu: add gfx9_4_3 register support in ipdump Add general registers of gfx9_4_3 in ipdump for devcoredump support. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 133 +++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 8455fda750a6..3bd84acba643 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -63,6 +63,94 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); #define NORMALIZE_XCC_REG_OFFSET(offset) \ (offset & 0xFFFF) +static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_DCACHE_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_ICACHE_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQ_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regTCP_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC2_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_COMMAND), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_MESSAGE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_1), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_2), + SOC15_REG_ENTRY_STR(GC, 0, regSMU_RLC_RESPONSE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3) +}; + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -885,6 +973,22 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio, NULL); } +static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + uint32_t *ptr, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + ptr = kcalloc(reg_count * num_xcc, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; + } else { + adev->gfx.ip_dump_core = ptr; + } +} + static int gfx_v9_4_3_sw_init(void *handle) { int i, j, k, r, ring_id, xcc_id, num_xcc; @@ -986,6 +1090,8 @@ static int gfx_v9_4_3_sw_init(void *handle) if (!amdgpu_sriov_vf(adev)) r = amdgpu_gfx_sysfs_init(adev); + gfx_v9_4_3_alloc_ip_dump(adev); + return r; } @@ -1010,6 +1116,8 @@ static int gfx_v9_4_3_sw_fini(void *handle) if (!amdgpu_sriov_vf(adev)) amdgpu_gfx_sysfs_fini(adev); + kfree(adev->gfx.ip_dump_core); + return 0; } @@ -4196,6 +4304,29 @@ static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_no amdgpu_ring_write(ring, ring->funcs->nop); } +static void gfx_v9_4_3_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i; + uint32_t xcc_id, xcc_offset, num_xcc; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + + if (!adev->gfx.ip_dump_core) + return; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + amdgpu_gfx_off_ctrl(adev, false); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count; + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump_core[xcc_offset + i] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i], + GET_INST(GC, xcc_id))); + } + amdgpu_gfx_off_ctrl(adev, true); +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, @@ -4212,7 +4343,7 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .set_clockgating_state = gfx_v9_4_3_set_clockgating_state, .set_powergating_state = gfx_v9_4_3_set_powergating_state, .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, - .dump_ip_state = NULL, + .dump_ip_state = gfx_v9_4_3_ip_dump, .print_ip_state = NULL, }; From b232c4a63a176ed837e3c6bb4a3ac79a1ca5ef1d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 12:28:59 +0530 Subject: [PATCH 340/447] drm/amdgpu: add print support for gfx9_4_3 ipdump Add support of gfx9_4_3 ipdump print so devcoredump could trigger it to dump the captured registers in devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 3bd84acba643..59417feac9a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4304,6 +4304,28 @@ static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_no amdgpu_ring_write(ring, ring->funcs->nop); } +static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i; + uint32_t xcc_id, xcc_offset, num_xcc; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + + if (!adev->gfx.ip_dump_core) + return; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + drm_printf(p, "Number of Instances:%d\n", num_xcc); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count; + drm_printf(p, "\nInstance id:%d\n", xcc_id); + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_9_4_3[i].reg_name, + adev->gfx.ip_dump_core[xcc_offset + i]); + } +} + static void gfx_v9_4_3_ip_dump(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -4344,7 +4366,7 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .set_powergating_state = gfx_v9_4_3_set_powergating_state, .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, .dump_ip_state = gfx_v9_4_3_ip_dump, - .print_ip_state = NULL, + .print_ip_state = gfx_v9_4_3_ip_print, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { From 98aded657f506cd5d6d459d68ab4996d9dc0938c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 12 Aug 2024 15:53:36 +0530 Subject: [PATCH 341/447] drm/amd/display: Align hwss_wait_for_all_blank_complete descriptor with implementation The descriptor for `hwss_wait_for_all_blank_complete` was previously misaligned with the actual implementation. This commit refines the descriptor to reflect the implementation of `hwss_wait_for_all_blank_complete` Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_hw_sequencer.c:991: warning: expecting prototype for hwss_wait_for_blank_complete(). Prototype was for hwss_wait_for_all_blank_complete() instead Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 9a569aac3c00..7ee2be8f82c4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -980,7 +980,7 @@ void get_surface_tile_visual_confirm_color( } /** - * hwss_wait_for_blank_complete - wait for all active OPPs to finish pending blank + * hwss_wait_for_all_blank_complete - wait for all active OPPs to finish pending blank * pattern updates * * @dc: [in] dc reference From f9e491c8633277d5398f384cf1fd1a477e04363f Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 14:09:06 +0530 Subject: [PATCH 342/447] drm/amdgpu: add cp queue registers for gfx9_4_3 ipdump Add gfx9 support of CP queue registers for all queues to be used by devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 98 ++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 59417feac9a5..5af4abca759d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -151,6 +151,47 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3) }; +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = { + /* compute queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS), +}; + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -976,7 +1017,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev) { uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); - uint32_t *ptr, num_xcc; + uint32_t *ptr, num_xcc, inst; num_xcc = NUM_XCC(adev->gfx.xcc_mask); @@ -987,6 +1028,19 @@ static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev) } else { adev->gfx.ip_dump_core = ptr; } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst * num_xcc, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } } static int gfx_v9_4_3_sw_init(void *handle) @@ -1117,6 +1171,7 @@ static int gfx_v9_4_3_sw_fini(void *handle) amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); return 0; } @@ -4329,8 +4384,9 @@ static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) static void gfx_v9_4_3_ip_dump(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - uint32_t i; - uint32_t xcc_id, xcc_offset, num_xcc; + uint32_t i, j, k; + uint32_t num_xcc, reg, num_inst; + uint32_t xcc_id, xcc_offset, inst_offset; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); if (!adev->gfx.ip_dump_core) @@ -4347,6 +4403,42 @@ static void gfx_v9_4_3_ip_dump(void *handle) GET_INST(GC, xcc_id))); } amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count * num_inst; + inst_offset = 0; + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + soc15_grbm_select(adev, 1 + i, j, k, 0, + GET_INST(GC, xcc_id)); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST( + gc_cp_reg_list_9_4_3[reg], + GET_INST(GC, xcc_id))); + } + inst_offset += reg_count; + } + } + } + } + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); } static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { From 37ee1456239cc5680f672f37417e52db2349965b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 14:23:07 +0530 Subject: [PATCH 343/447] drm/amdgpu: add cp queue registers print for gfx9_4_3 Add gfx9_4_3 print support of CP queue registers for all queues to be used by devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 42 +++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 5af4abca759d..7b4ae197eb49 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4362,8 +4362,9 @@ static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_no static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - uint32_t i; - uint32_t xcc_id, xcc_offset, num_xcc; + uint32_t i, j, k; + uint32_t xcc_id, xcc_offset, inst_offset; + uint32_t num_xcc, reg, num_inst; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); if (!adev->gfx.ip_dump_core) @@ -4379,6 +4380,43 @@ static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) gc_reg_list_9_4_3[i].reg_name, adev->gfx.ip_dump_core[xcc_offset + i]); } + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + drm_printf(p, "\nnum_xcc: %d num_mec: %d num_pipe: %d num_queue: %d\n", + num_xcc, + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count * num_inst; + inst_offset = 0; + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, + "\nxcc:%d mec:%d, pipe:%d, queue:%d\n", + xcc_id, i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, + "%-50s \t 0x%08x\n", + gc_cp_reg_list_9_4_3[reg].reg_name, + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); + } + inst_offset += reg_count; + } + } + } + } } static void gfx_v9_4_3_ip_dump(void *handle) From 406792dc2a5c82e2f312e10c3c2c887de6ef80a4 Mon Sep 17 00:00:00 2001 From: Soham Dandapat Date: Mon, 29 Jul 2024 11:59:11 +0530 Subject: [PATCH 344/447] drm/amdgpu: Return earlier in amdgpu_sw_ring_ib_end if mcbp is off As we don't trigger preemption is sw ring muxer when mcbp is disabled,so return earlier in amdgpu_sw_ring_ib_end function if mcbp is disabled ,not required to call amdgpu_ring_mux_end_ib Signed-off-by: Soham Dandapat Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index d234b7ccfaaf..1c66da1c3fb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -410,7 +410,7 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) struct amdgpu_ring_mux *mux = &adev->gfx.muxer; WARN_ON(!ring->is_sw_ring); - if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) + if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) return; amdgpu_ring_mux_end_ib(mux, ring); } From 57a372f67688dc7aee23a2a00bcaf6188f592934 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Apr 2024 13:11:53 -0400 Subject: [PATCH 345/447] drm/amdgpu: add new ring reset callback Use this to reset just a single ring. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 582053f1cd56..c7f15edeb367 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -235,6 +235,7 @@ struct amdgpu_ring_funcs { void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset); void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); + int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); }; struct amdgpu_ring { @@ -334,6 +335,7 @@ struct amdgpu_ring { #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) #define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o))) +#define amdgpu_ring_reset(r, v) (r)->funcs->reset((r), (v)) unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); From 15789fa0f0e29cf802f30d0e308da9c6b18c116a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 14:38:20 -0400 Subject: [PATCH 346/447] drm/amdgpu: add per ring reset support (v5) If a specific job is hung, try and reset just the ring associated with the job. v2: move to amdgpu_job.c v3: fix drm_sched_stop() handling when ring reset fails v4: drop unnecessary amdgpu_fence_driver_clear_job_fences() and drm_sched_increase_karma() v5: rework sched_stop handling Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 908e13455152..c2de3fd17245 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -72,6 +72,25 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) dma_fence_set_error(&s_job->s_fence->finished, -ETIME); + /* attempt a per ring reset */ + if (amdgpu_gpu_recovery && + ring->funcs->reset) { + /* stop the scheduler, but don't mess with the + * bad job yet because if ring reset fails + * we'll fall back to full GPU reset. + */ + drm_sched_wqueue_stop(&ring->sched); + r = amdgpu_ring_reset(ring, job->vmid); + if (!r) { + if (amdgpu_ring_sched_ready(ring)) + drm_sched_stop(&ring->sched, s_job); + amdgpu_fence_driver_force_completion(ring); + if (amdgpu_ring_sched_ready(ring)) + drm_sched_start(&ring->sched, true); + goto exit; + } + } + if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; memset(&reset_context, 0, sizeof(reset_context)); From fb0a5834a338329bc665c7ce2b89f3e376557565 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 12 Jun 2024 15:49:38 +0800 Subject: [PATCH 347/447] drm/amdgpu: increase the reset counter for the queue reset Update the reset counter for the amdgpu_cs_query_reset_state() Acked-by: Vitaly Prosyak Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index c2de3fd17245..c6a1783fc9ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -84,6 +84,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) if (!r) { if (amdgpu_ring_sched_ready(ring)) drm_sched_stop(&ring->sched, s_job); + atomic_inc(&ring->adev->gpu_reset_counter); amdgpu_fence_driver_force_completion(ring); if (amdgpu_ring_sched_ready(ring)) drm_sched_start(&ring->sched, true); From 5fb4d2a77113d3ebaa5c9dcdbef8b7bdfdeeffb2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 17:23:14 -0400 Subject: [PATCH 348/447] drm/amdgpu/gfx9: add ring reset callback Add ring reset callback for compute. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 38 +++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ab10a05c7885..b70cdb59c384 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7118,6 +7118,43 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_write(ring, ring->funcs->nop); } +static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int r; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* reset the ring */ + ring->wptr = 0; + *ring->wptr_cpu_addr = 0; + amdgpu_ring_clear_ring(ring); + + return amdgpu_ring_test_ring(ring); +} + static void gfx_v9_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -7364,6 +7401,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_wave_limit = gfx_v9_0_emit_wave_limit, + .reset = gfx_v9_0_reset_kcq, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { From b5e1a3874fe3cde73a4b02870bf3e8fa43777c5c Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Tue, 11 Jun 2024 18:06:44 +0800 Subject: [PATCH 349/447] drm/amdgpu/gfx9: remap queue after reset successfully Kiq command unmap_queues only does the dequeueing action. We have to map the queue back with clean mqd. Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 36 ++++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b70cdb59c384..cd18c10a290d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3742,7 +3742,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -3754,8 +3754,8 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) */ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; - if (!tmp_mqd->cp_hqd_pq_control || - (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + if (!restore && (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend))) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -3819,7 +3819,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_0_kcq_init_queue(ring); + r = gfx_v9_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -7147,11 +7147,29 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, if (r) return r; - /* reset the ring */ - ring->wptr = 0; - *ring->wptr_cpu_addr = 0; - amdgpu_ring_clear_ring(ring); - + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)){ + DRM_ERROR("fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r){ + DRM_ERROR("fail to unresv mqd_obj\n"); + return r; + } + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + kiq->pmf->kiq_map_queues(kiq_ring, ring); + r = amdgpu_ring_test_ring(kiq_ring); + if (r){ + DRM_ERROR("fail to remap queue\n"); + return r; + } return amdgpu_ring_test_ring(ring); } From fdbd69486b468e4963b4ef9f76901d3788252dd5 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Tue, 2 Jul 2024 09:03:49 +0800 Subject: [PATCH 350/447] drm/amdgpu/gfx9: wait for reset done before remap There is a racing condition that cp firmware modifies MQD in reset sequence after driver updates it for remapping. We have to wait till CP_HQD_ACTIVE becoming false then remap the queue. v2: fix KIQ locking (Alex) v3: fix KIQ locking harder Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 36 +++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index cd18c10a290d..f87e6e9c7d6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7125,7 +7125,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; - int r; + int i, r; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -7147,9 +7147,28 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, if (r) return r; + /* make sure dequeue is complete*/ + gfx_v9_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v9_0_unset_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + return r; + } + r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)){ - DRM_ERROR("fail to resv mqd_obj\n"); + dev_err(adev->dev, "fail to resv mqd_obj\n"); return r; } r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); @@ -7159,14 +7178,21 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, ring->mqd_ptr = NULL; } amdgpu_bo_unreserve(ring->mqd_obj); - if (r){ - DRM_ERROR("fail to unresv mqd_obj\n"); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); return r; } + spin_lock_irqsave(&kiq->ring_lock, flags); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); - if (r){ + if (r) { DRM_ERROR("fail to remap queue\n"); return r; } From 5d0112f77793c0351faee6c723a6fb9191c12be6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 17:24:03 -0400 Subject: [PATCH 351/447] drm/amdgpu/gfx9.4.3: add ring reset callback Add ring reset callback for compute. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 38 +++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 7b4ae197eb49..f1c73bc1bd95 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3426,6 +3426,43 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int r; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* reset the ring */ + ring->wptr = 0; + *ring->wptr_cpu_addr = 0; + amdgpu_ring_clear_ring(ring); + + return amdgpu_ring_test_ring(ring); +} + enum amdgpu_gfx_cp_ras_mem_id { AMDGPU_GFX_CP_MEM1 = 1, AMDGPU_GFX_CP_MEM2, @@ -4536,6 +4573,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .soft_recovery = gfx_v9_4_3_ring_soft_recovery, .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, + .reset = gfx_v9_4_3_reset_kcq, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { From 6f38589e170f7649bb4b5efbff6b681c31433440 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Fri, 14 Jun 2024 13:05:32 +0800 Subject: [PATCH 352/447] drm/amdgpu/gfx9.4.3: remap queue after reset successfully Kiq command unmap_queues only does the dequeueing action. We have to map the queue back with clean mqd. Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 36 ++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index f1c73bc1bd95..44c6e2d44722 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2080,7 +2080,7 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) return 0; } -static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) +static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -2092,8 +2092,8 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) */ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; - if (!tmp_mqd->cp_hqd_pq_control || - (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + if (!restore && (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend))) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -2178,7 +2178,7 @@ static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id); + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -3455,11 +3455,29 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, if (r) return r; - /* reset the ring */ - ring->wptr = 0; - *ring->wptr_cpu_addr = 0; - amdgpu_ring_clear_ring(ring); - + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)){ + DRM_ERROR("fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r){ + DRM_ERROR("fail to unresv mqd_obj\n"); + return r; + } + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + kiq->pmf->kiq_map_queues(kiq_ring, ring); + r = amdgpu_ring_test_ring(kiq_ring); + if (r){ + DRM_ERROR("fail to remap queue\n"); + return r; + } return amdgpu_ring_test_ring(ring); } From 4c953e53cc34f8601b7b7c6286c65322452d35fe Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Fri, 28 Jun 2024 11:48:22 +0800 Subject: [PATCH 353/447] drm/amdgpu/gfx_9.4.3: wait for reset done before remap There is a racing condition that cp firmware modifies MQD in reset sequence after driver updates it for remapping. We have to wait till CP_HQD_ACTIVE becoming false then remap the queue. v2: fix KIQ locking (Alex) v3: fix KIQ locking harder Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 39 +++++++++++++++++++++---- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 44c6e2d44722..9a740020243d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3433,7 +3433,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; - int r; + int r, i; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -3455,9 +3455,28 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, if (r) return r; + /* make sure dequeue is complete*/ + gfx_v9_4_3_xcc_set_safe_mode(adev, ring->xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, ring->xcc_id)); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, ring->xcc_id)); + mutex_unlock(&adev->srbm_mutex); + gfx_v9_4_3_xcc_unset_safe_mode(adev, ring->xcc_id); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + return r; + } + r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)){ - DRM_ERROR("fail to resv mqd_obj\n"); + dev_err(adev->dev, "fail to resv mqd_obj\n"); return r; } r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); @@ -3467,15 +3486,23 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, ring->mqd_ptr = NULL; } amdgpu_bo_unreserve(ring->mqd_obj); - if (r){ - DRM_ERROR("fail to unresv mqd_obj\n"); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); return r; } + spin_lock_irqsave(&kiq->ring_lock, flags); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + r = amdgpu_ring_test_ring(kiq_ring); - if (r){ - DRM_ERROR("fail to remap queue\n"); + if (r) { + dev_err(adev->dev, "fail to remap queue\n"); return r; } return amdgpu_ring_test_ring(ring); From 186020c16650d6f7a05774ef318ae9056aae4f21 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Thu, 4 Jul 2024 12:12:42 +0800 Subject: [PATCH 354/447] drm/amdgpu/gfx: add a new kiq_pm4_funcs callback for reset_hw_queue Add reset_hw_queue in kiq_pm4_funcs callbacks. Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 86d3fa7eef90..6fe77e483bb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -138,6 +138,10 @@ struct kiq_pm4_funcs { void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, uint16_t pasid, uint32_t flush_type, bool all_hub); + void (*kiq_reset_hw_queue)(struct amdgpu_ring *kiq_ring, + uint32_t queue_type, uint32_t me_id, + uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid); /* Packet sizes */ int set_resources_size; int map_queues_size; From 2e9bbdd7b7cb5f364a917e7c6eff2a2c36f11895 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Thu, 4 Jul 2024 12:24:31 +0800 Subject: [PATCH 355/447] drm/amdgpu/gfx9: implement reset_hw_queue for gfx9 Using mmio to do queue reset. Enter safe mode when writing registers. Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f87e6e9c7d6b..02ff70f4b416 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -893,6 +893,8 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid); +static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) @@ -1004,12 +1006,47 @@ static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); } + +static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + + /* enter save mode */ + gfx_v9_0_set_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type); + } + + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + gfx_v9_0_unset_safe_mode(adev, xcc_id); +} + static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_0_kiq_set_resources, .kiq_map_queues = gfx_v9_0_kiq_map_queues, .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, .kiq_query_status = gfx_v9_0_kiq_query_status, .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, From 4dc4422f11b041a9cb7a86f2f9bc310e48620fa2 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Thu, 4 Jul 2024 14:51:58 +0800 Subject: [PATCH 356/447] drm/amdgpu/gfx9.4.3: implement reset_hw_queue for gfx9.4.3 Using mmio to do queue reset. Enter safe mode before writing mmio registers. v2: set register instance offset according to xcc id. Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 9a740020243d..18cb6d45d54f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -200,6 +200,8 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); +static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) @@ -311,12 +313,46 @@ static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); } +static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + + /* enter save mode */ + gfx_v9_4_3_xcc_set_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, xcc_id); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n\n", queue_type); + } + + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + gfx_v9_4_3_xcc_unset_safe_mode(adev, xcc_id); +} + static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_4_3_kiq_set_resources, .kiq_map_queues = gfx_v9_4_3_kiq_map_queues, .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues, .kiq_query_status = gfx_v9_4_3_kiq_query_status, .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v9_4_3_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, From 31ef969301e58daaaed1728690e16192b77b9028 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Jul 2024 10:20:56 -0400 Subject: [PATCH 357/447] drm/amdgpu/gfx9: per queue reset only on bare metal It's not supported under SR-IOV at the moment. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 02ff70f4b416..cad13e01dd3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7164,6 +7164,9 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 18cb6d45d54f..092e229f4097 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3471,6 +3471,9 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int r, i; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; From c4f503551fd65e50e657219c9bfc2987c51805ca Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 17 Jul 2024 19:02:50 -0400 Subject: [PATCH 358/447] drm/amdgpu/gfx9: add ring reset callback for gfx Add ring reset callback for gfx. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 46 +++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index cad13e01dd3a..6373c8caaabc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7155,6 +7155,51 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_write(ring, ring->funcs->nop); } +static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v9_0_ring_emit_wreg(kiq_ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) + return -ENOMEM; + gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v9_0_ring_emit_reg_wait(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); + gfx_v9_0_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); + + return amdgpu_ring_test_ring(ring); +} + static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { @@ -7391,6 +7436,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, + .reset = gfx_v9_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { From 27ef61f9617478c432bc477d4eed8963deb89f24 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Jul 2024 17:59:47 -0400 Subject: [PATCH 359/447] drm/amdgpu/gfx9: use proper rlc safe mode helpers Rather than open coding it for the queue reset. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6373c8caaabc..f8f5bb96d486 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1015,7 +1015,7 @@ static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t q unsigned i; /* enter save mode */ - gfx_v9_0_set_safe_mode(adev, xcc_id); + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); @@ -1037,7 +1037,7 @@ static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t q soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); /* exit safe mode */ - gfx_v9_0_unset_safe_mode(adev, xcc_id); + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); } static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { @@ -7233,7 +7233,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, return r; /* make sure dequeue is complete*/ - gfx_v9_0_set_safe_mode(adev, 0); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); for (i = 0; i < adev->usec_timeout; i++) { @@ -7245,7 +7245,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, r = -ETIMEDOUT; soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - gfx_v9_0_unset_safe_mode(adev, 0); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); if (r) { dev_err(adev->dev, "fail to wait on hqd deactive\n"); return r; From a48f31fb78265d992c75d45bea215998367d4956 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Jul 2024 18:04:44 -0400 Subject: [PATCH 360/447] drm/amdgpu/gfx9.4.3: use proper rlc safe mode helpers Rather than open coding it for the queue reset. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 092e229f4097..9215666a6318 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -321,7 +321,7 @@ static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t unsigned i; /* enter save mode */ - gfx_v9_4_3_xcc_set_safe_mode(adev, xcc_id); + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, xcc_id); @@ -343,7 +343,7 @@ static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); /* exit safe mode */ - gfx_v9_4_3_xcc_unset_safe_mode(adev, xcc_id); + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); } static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { @@ -3495,7 +3495,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, return r; /* make sure dequeue is complete*/ - gfx_v9_4_3_xcc_set_safe_mode(adev, ring->xcc_id); + amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, ring->xcc_id)); for (i = 0; i < adev->usec_timeout; i++) { @@ -3507,7 +3507,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, r = -ETIMEDOUT; soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, ring->xcc_id)); mutex_unlock(&adev->srbm_mutex); - gfx_v9_4_3_xcc_unset_safe_mode(adev, ring->xcc_id); + amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id); if (r) { dev_err(adev->dev, "fail to wait on hqd deactive\n"); return r; From d082e5cde489caf7b29e966a9dbbc3fb43fb9164 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Jul 2024 18:20:44 -0400 Subject: [PATCH 361/447] drm/amdgpu/gfx9.4.3: use rlc safe mode for soft recovery Protect the MMIO access with safe mode. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 9215666a6318..dd146322f209 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3054,7 +3054,9 @@ static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring, value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id); WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id); } static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( From 3ec2ad7c34c412bd9264cd1ff235d0812be90e82 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Jul 2024 18:20:57 -0400 Subject: [PATCH 362/447] drm/amdgpu/gfx9: use rlc safe mode for soft recovery Protect the MMIO access with safe mode. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f8f5bb96d486..db21fb951e0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5907,7 +5907,9 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, mmSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, From 0f2c243dbfa008cec2dad03ea074156b6b176a03 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 13 Aug 2024 22:34:26 +0530 Subject: [PATCH 363/447] drm/amdgpu: remove ME0 registers from mi300 dump Remove ME0 registers from MI300 gfx_9_4_3 ipdump MI300 does not have gfx ME and hence those register are just empty one and could be dropped. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 37 ------------------------- 1 file changed, 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index dd146322f209..619ff3ec2c86 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -75,42 +75,11 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), - SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), - SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_CNTL), - SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regSQC_DCACHE_UTCL1_STATUS), @@ -122,11 +91,8 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC2_INSTR_PNTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regRLC_STAT), SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_COMMAND), @@ -139,11 +105,8 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT), SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6), /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), From 89ec85d16eb8110d88c273d1d34f1fe5a70ba8cc Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 13 Aug 2024 13:51:48 +0800 Subject: [PATCH 364/447] drm/amdgpu: fixing rlc firmware loading failure issue Skip rlc firmware validation to ignore firmware header size mismatch issues. This restores the workaround added in commit 849e133c973c ("drm/amdgpu: Fix the null pointer when load rlc firmware") Fixes: 3af2c80ae2f5 ("drm/amdgpu: refine gfx10 firmware loading") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3551 Signed-off-by: Yang Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 75a6ca645964..ca983a014ba0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4116,6 +4116,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -4149,8 +4150,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - "amdgpu/%s_rlc.bin", ucode_prefix); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; From 3fbaf475a5b8361ebee7da18964db809e37518b7 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Tue, 6 Aug 2024 10:19:04 -0700 Subject: [PATCH 365/447] drm/radeon/evergreen_cs: fix int overflow errors in cs track offsets Several cs track offsets (such as 'track->db_s_read_offset') either are initialized with or plainly take big enough values that, once shifted 8 bits left, may be hit with integer overflow if the resulting values end up going over u32 limit. Same goes for a few instances of 'surf.layer_size * mslice' multiplications that are added to 'offset' variable - they may potentially overflow as well and need to be validated properly. While some debug prints in this code section take possible overflow issues into account, simply casting to (unsigned long) may be erroneous in its own way, as depending on CPU architecture one is liable to get different results. Fix said problems by: - casting 'offset' to fixed u64 data type instead of ambiguous unsigned long. - casting one of the operands in vulnerable to integer overflow cases to u64. - adjust format specifiers in debug prints to properly represent 'offset' values. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 285484e2d55e ("drm/radeon: add support for evergreen/ni tiling informations v11") Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 62 +++++++++++++-------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 1fe6e0d883c7..675a649fa7ab 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -395,7 +395,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1; @@ -433,14 +433,14 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i return r; } - offset = track->cb_color_bo_offset[id] << 8; + offset = (u64)track->cb_color_bo_offset[id] << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d cb[%d] bo base %llu not aligned with %ld\n", __func__, __LINE__, id, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->cb_color_bo[id])) { /* old ddx are broken they allocate bo with w*h*bpp but * program slice with ALIGN(h, 8), catch this and patch @@ -448,14 +448,14 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i */ if (!surf.mode) { uint32_t *ib = p->ib.ptr; - unsigned long tmp, nby, bsize, size, min = 0; + u64 tmp, nby, bsize, size, min = 0; /* find the height the ddx wants */ if (surf.nby > 8) { min = surf.nby - 8; } bsize = radeon_bo_size(track->cb_color_bo[id]); - tmp = track->cb_color_bo_offset[id] << 8; + tmp = (u64)track->cb_color_bo_offset[id] << 8; for (nby = surf.nby; nby > min; nby--) { size = nby * surf.nbx * surf.bpe * surf.nsamples; if ((tmp + size * mslice) <= bsize) { @@ -467,7 +467,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i slice = ((nby * surf.nbx) / 64) - 1; if (!evergreen_surface_check(p, &surf, "cb")) { /* check if this one works */ - tmp += surf.layer_size * mslice; + tmp += (u64)surf.layer_size * mslice; if (tmp <= bsize) { ib[track->cb_color_slice_idx[id]] = slice; goto old_ddx_ok; @@ -476,9 +476,9 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i } } dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " - "offset %d, max layer %d, bo size %ld, slice %d)\n", + "offset %llu, max layer %d, bo size %ld, slice %d)\n", __func__, __LINE__, id, surf.layer_size, - track->cb_color_bo_offset[id] << 8, mslice, + (u64)track->cb_color_bo_offset[id] << 8, mslice, radeon_bo_size(track->cb_color_bo[id]), slice); dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", __func__, __LINE__, surf.nbx, surf.nby, @@ -562,7 +562,7 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -608,18 +608,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return r; } - offset = track->db_s_read_offset << 8; + offset = (u64)track->db_s_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_read_bo)) { dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_read_offset << 8, mslice, + (u64)track->db_s_read_offset << 8, mslice, radeon_bo_size(track->db_s_read_bo)); dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", __func__, __LINE__, track->db_depth_size, @@ -627,18 +627,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return -EINVAL; } - offset = track->db_s_write_offset << 8; + offset = (u64)track->db_s_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_write_bo)) { dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_write_offset << 8, mslice, + (u64)track->db_s_write_offset << 8, mslice, radeon_bo_size(track->db_s_write_bo)); return -EINVAL; } @@ -659,7 +659,7 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -706,34 +706,34 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) return r; } - offset = track->db_z_read_offset << 8; + offset = (u64)track->db_z_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_read_bo)) { dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_read_offset << 8, mslice, + (u64)track->db_z_read_offset << 8, mslice, radeon_bo_size(track->db_z_read_bo)); return -EINVAL; } - offset = track->db_z_write_offset << 8; + offset = (u64)track->db_z_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_write_bo)) { dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_write_offset << 8, mslice, + (u64)track->db_z_write_offset << 8, mslice, radeon_bo_size(track->db_z_write_bo)); return -EINVAL; } From 20588d5afce3992ff4fc9b61085e3e1affbac620 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Mon, 12 Aug 2024 20:24:15 +0800 Subject: [PATCH 366/447] drm/amd: Remove unused declarations amdgpu_gart_table_vram_pin() and amdgpu_gart_table_vram_unpin() has been removed since commit 575e55ee4fbc ("drm/amdgpu: recover gart table at resume") remain the declarations untouched in the header files. Besides, amdgpu_dm_display_resume() has also beed removed since commit a80aa93de1a0 ("drm/amd/display: Unify dm resume sequence into a single call"). So, let's remove this unused declarations. Signed-off-by: Zhang Zekun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ------- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h | 2 -- 2 files changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f3980b40f2ce..937de21a7142 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1575,13 +1575,6 @@ static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif -#if defined(CONFIG_DRM_AMD_DC) -int amdgpu_dm_display_resume(struct amdgpu_device *adev ); -#else -static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; } -#endif - - void amdgpu_register_gpu_instance(struct amdgpu_device *adev); void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 8283d682f543..7cc980bf4725 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -55,8 +55,6 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev); void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, From 27a74c125d029d0606b81ef865bb68dd975ca2f7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 23 Jul 2024 13:08:55 +0530 Subject: [PATCH 367/447] drm/amdgpu: add vcn ip dump ptr in vcn global struct Add pointer to the vcn ip dump in the vcn global structure to be accessible for all vcn version via global adev. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index c87d68d4be53..2a1f3dbb14d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -330,6 +330,9 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; bool using_unified_queue; + + /* IP reg dump */ + uint32_t *ip_dump; }; struct amdgpu_fw_shared_rb_ptrs_struct { From ab10f7748789fa9247949b530d8ee7d56eafe9a3 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Jul 2024 16:35:41 +0530 Subject: [PATCH 368/447] drm/amdgpu: add vcn_v3_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v3_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 78 ++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 24f947751c46..693eb676c01d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -60,6 +60,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -126,6 +162,8 @@ static int vcn_v3_0_sw_init(void *handle) struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; r = amdgpu_vcn_sw_init(adev); @@ -246,6 +284,15 @@ static int vcn_v3_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -284,6 +331,7 @@ static int vcn_v3_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); return r; } @@ -2203,6 +2251,34 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2221,7 +2297,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v3_0_dump_ip_state, .print_ip_state = NULL, }; From 6d88c0f94ac07ffc9f08e459cca036f4af08617d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Jul 2024 16:48:28 +0530 Subject: [PATCH 369/447] drm/amdgpu: add print support for vcn_v3_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v3_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 35 ++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 693eb676c01d..65dd68b32280 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -2251,6 +2251,39 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t inst_off; + bool is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v3_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2298,7 +2331,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, .dump_ip_state = vcn_v3_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v3_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { From f685b38455b0c990a0f018a17b238d8ffb5acccc Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 1 Aug 2024 19:17:11 +0530 Subject: [PATCH 370/447] drm/amdgpu: add vcn_v5_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v5_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 76 ++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 68c97fcd539b..a30a42b1ba03 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -37,6 +37,40 @@ #include +static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -83,6 +117,8 @@ static int vcn_v5_0_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -137,6 +173,14 @@ static int vcn_v5_0_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -173,6 +217,8 @@ static int vcn_v5_0_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1297,6 +1343,34 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v5_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1315,7 +1389,7 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v5_0_dump_ip_state, .print_ip_state = NULL, }; From 4af8071b654dbd9b8bd003ab8e49eb16b4cb9fcd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Jul 2024 15:50:23 -0400 Subject: [PATCH 371/447] drm/amdgpu/gfx8: add ring reset callback for gfx Add ring reset callback for gfx. v2: fix operator precedence (kernel test robot) Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 75 ++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/vid.h | 1 + 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a1963e6c5cab..bc8295812cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6149,6 +6149,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. @@ -6172,7 +6173,8 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EOP_TC_ACTION_EN | EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + EVENT_INDEX(5) | + (exec ? EOP_EXEC : 0))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -6380,6 +6382,34 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, amdgpu_ring_write(ring, val); } +static void gfx_v8_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static void gfx_v8_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v8_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) { struct amdgpu_device *adev = ring->adev; @@ -6856,6 +6886,48 @@ static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } +static int gfx_v8_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v8_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) + return -ENOMEM; + gfx_v8_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v8_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); + gfx_v8_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6923,6 +6995,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, + .reset = gfx_v8_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 80ce42aacc0c..b61f6b838ec2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -246,6 +246,7 @@ * 1 - Stream * 2 - Bypass */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data From d479158f6502a3698b91829fa03bd3f2ea38efe7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Jul 2024 15:59:20 -0400 Subject: [PATCH 372/447] drm/amdgpu/gfx7: add ring reset callback for gfx Add ring reset callback for gfx. v2: fix operator precedence (kernel test robot) Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cikd.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 76 ++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 55982c0064b5..06088d52d81c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -364,6 +364,7 @@ * 1 - Stream * 2 - Bypass */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 5fbdef04c9aa..f146806c4633 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2114,6 +2114,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; + /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. */ @@ -2133,7 +2135,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + EVENT_INDEX(5) | + (exec ? EOP_EXEC : 0))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -4921,6 +4924,76 @@ static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } +static void gfx_v7_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static void gfx_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v7_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static int gfx_v7_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v7_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) + return -ENOMEM; + gfx_v7_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v7_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); + gfx_v7_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -4972,6 +5045,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_wreg = gfx_v7_0_ring_emit_wreg, .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync, + .reset = gfx_v7_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { From b5be054c585110b2c5c1b180136800e8c41c7bb4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 15:36:19 -0400 Subject: [PATCH 373/447] drm/amdgpu/gfx11: enter safe mode before touching CP_INT_CNTL Need to enter safe mode before touching GC MMIO. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 22bb35278691..98261000e022 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4781,6 +4781,8 @@ static int gfx_v11_0_soft_reset(void *handle) int r, i, j, k; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gfx_v11_0_set_safe_mode(adev, 0); + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); @@ -4788,8 +4790,6 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_set_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { From 76acba7b7f12517990f326fabfecb6f55e334233 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 16:37:33 -0400 Subject: [PATCH 374/447] drm/amdgpu/gfx11: add a mutex for the gfx semaphore This will be used in more places in the future so add a mutex. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 +++++++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a6b8d0ba4758..482db4ebcc4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4059,6 +4059,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->notifier_lock); mutex_init(&adev->pm.stable_pstate_ctx_lock); mutex_init(&adev->benchmark_mutex); + mutex_init(&adev->gfx.reset_sem_mutex); amdgpu_device_init_apu_flags(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 6fe77e483bb7..17b945b545b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -444,6 +444,8 @@ struct amdgpu_gfx { uint32_t *ip_dump_core; uint32_t *ip_dump_compute_queues; uint32_t *ip_dump_gfx_queues; + + struct mutex reset_sem_mutex; }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 98261000e022..01f220ee4561 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4743,10 +4743,12 @@ static int gfx_v11_0_wait_for_idle(void *handle) } static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, - int req) + bool req) { u32 i, tmp, val; + if (req) + mutex_lock(&adev->gfx.reset_sem_mutex); for (i = 0; i < adev->usec_timeout; i++) { /* Request with MeId=2, PipeId=0 */ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); @@ -4767,6 +4769,8 @@ static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, } udelay(1); } + if (!req) + mutex_unlock(&adev->gfx.reset_sem_mutex); if (i >= adev->usec_timeout) return -EINVAL; @@ -4814,7 +4818,7 @@ static int gfx_v11_0_soft_reset(void *handle) mutex_unlock(&adev->srbm_mutex); /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ - r = gfx_v11_0_request_gfx_index_mutex(adev, 1); + r = gfx_v11_0_request_gfx_index_mutex(adev, true); if (r) { DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); return r; @@ -4829,7 +4833,7 @@ static int gfx_v11_0_soft_reset(void *handle) RREG32_SOC15(GC, 0, regCP_VMID_RESET); /* release the gfx mutex */ - r = gfx_v11_0_request_gfx_index_mutex(adev, 0); + r = gfx_v11_0_request_gfx_index_mutex(adev, false); if (r) { DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); return r; From 478efcb90b074f0fdd18e62b30ce09140bd69022 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 16:39:30 -0400 Subject: [PATCH 375/447] drm/amdgpu/gfx11: export gfx_v11_0_request_gfx_index_mutex() It will be used by the queue reset code. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 01f220ee4561..5685aee479df 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4742,8 +4742,8 @@ static int gfx_v11_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, - bool req) +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req) { u32 i, tmp, val; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h index 10cfc29c27c9..157a5c812259 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h @@ -26,4 +26,7 @@ extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block; +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req); + #endif From 5b7a59de4845460a313d93d4839258bfb982357c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 13:35:05 -0400 Subject: [PATCH 376/447] drm/amdgpu/mes: add API for user queue reset Add API for resetting user queues. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 43 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 9 ++++++ 2 files changed, 52 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index c598c3edff7e..04a4f0dfec15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -793,6 +793,49 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) return 0; } +int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id) +{ + unsigned long flags; + struct amdgpu_mes_queue *queue; + struct amdgpu_mes_gang *gang; + struct mes_reset_queue_input queue_input; + int r; + + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); + + /* remove the mes gang from idr list */ + spin_lock_irqsave(&adev->mes.queue_id_lock, flags); + + queue = idr_find(&adev->mes.queue_id_idr, queue_id); + if (!queue) { + spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); + amdgpu_mes_unlock(&adev->mes); + DRM_ERROR("queue id %d doesn't exist\n", queue_id); + return -EINVAL; + } + spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); + + DRM_DEBUG("try to reset queue, doorbell off = 0x%llx\n", + queue->doorbell_off); + + gang = queue->gang; + queue_input.doorbell_offset = queue->doorbell_off; + queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; + + r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset hardware queue, queue id = %d\n", + queue_id); + + amdgpu_mes_unlock(&adev->mes); + + return 0; +} + int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 548e724e3a75..5c8867d2380a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -248,6 +248,11 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; +struct mes_reset_queue_input { + uint32_t doorbell_offset; + uint64_t gang_context_addr; +}; + struct mes_map_legacy_queue_input { uint32_t queue_type; uint32_t doorbell_offset; @@ -360,6 +365,9 @@ struct amdgpu_mes_funcs { int (*reset_legacy_queue)(struct amdgpu_mes *mes, struct mes_reset_legacy_queue_input *input); + + int (*reset_hw_queue)(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -387,6 +395,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, struct amdgpu_mes_queue_properties *qprops, int *queue_id); int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id); int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); From d4f1fde734eb73767015272dd2e8af1440b30a9b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 13:48:07 -0400 Subject: [PATCH 377/447] drm/amdgpu/mes11: add API for user queue reset Add API for resetting user queues. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index c0340ee3dec0..6f5a80519af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -360,6 +360,26 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -636,6 +656,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, .reset_legacy_queue = mes_v11_0_reset_legacy_queue, + .reset_hw_queue = mes_v11_0_reset_hw_queue, }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, From 32aada4d0a35ee613ffed23090bc23e1b40da419 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 13:48:40 -0400 Subject: [PATCH 378/447] drm/amdgpu/mes12: add API for user queue reset Add API for resetting user queues. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 35cd6ad73912..47a73f6ae4da 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -350,6 +350,32 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -723,6 +749,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, .reset_legacy_queue = mes_v12_0_reset_legacy_queue, + .reset_hw_queue = mes_v12_0_reset_hw_queue, }; static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, From f3c958ab857927e1ef2cc6806fcb0eb3f36c923a Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 1 Aug 2024 19:19:27 +0530 Subject: [PATCH 379/447] drm/amdgpu: add print support for vcn_v5_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v5_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 34 ++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index a30a42b1ba03..c305386358b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1343,6 +1343,38 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v5_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_5_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v5_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1390,7 +1422,7 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, .dump_ip_state = vcn_v5_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v5_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { From 8962915044364bb7c36b3018f74371a798aee46d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:49:20 +0530 Subject: [PATCH 380/447] drm/amdgpu: add vcn_v4_0_3 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v4_0_3. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 81 ++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 9bae95538b62..77cc6807d119 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,42 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + #define NORMALIZE_VCN_REG_OFFSET(offset) \ (offset & 0x1FFFF) @@ -92,6 +128,8 @@ static int vcn_v4_0_3_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; int i, r, vcn_inst; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -159,6 +197,15 @@ static int vcn_v4_0_3_sw_init(void *handle) } } + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -194,6 +241,8 @@ static int vcn_v4_0_3_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1684,6 +1733,36 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; } +static void vcn_v4_0_3_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off, inst_id; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_id = GET_INST(VCN, i); + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, inst_id, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_3[j], + inst_id)); + } +} + static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .name = "vcn_v4_0_3", .early_init = vcn_v4_0_3_early_init, @@ -1702,7 +1781,7 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v4_0_3_dump_ip_state, .print_ip_state = NULL, }; From 9d87dac3f9adbe30d545c577aab483dfce71143d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:31:21 +0530 Subject: [PATCH 381/447] drm/amdgpu: add vcn_v4_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v4_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 80 ++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 776c539bfdda..abd5a0793e58 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -52,6 +52,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -137,6 +173,8 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -200,6 +238,15 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -239,6 +286,8 @@ static int vcn_v4_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -2109,6 +2158,35 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0[j], + i)); + } +} + static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .name = "vcn_v4_0", .early_init = vcn_v4_0_early_init, @@ -2127,7 +2205,7 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v4_0_dump_ip_state, .print_ip_state = NULL, }; From 46553db49cf7b7dce95879ee0725f7d95de3c184 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:57:09 +0530 Subject: [PATCH 382/447] drm/amdgpu: add vcn_v4_0_5 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v4_0_5. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 79 ++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 8d75061f9f38..b05bfe1dad75 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -52,6 +52,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_5[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -97,6 +133,8 @@ static int vcn_v4_0_5_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -168,6 +206,14 @@ static int vcn_v4_0_5_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -207,6 +253,8 @@ static int vcn_v4_0_5_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1733,6 +1781,35 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_5_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_5[j], + i)); + } +} + static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .name = "vcn_v4_0_5", .early_init = vcn_v4_0_5_early_init, @@ -1751,7 +1828,7 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v4_0_5_dump_ip_state, .print_ip_state = NULL, }; From dc57edda816df4fb43bfc2809675e91d15994195 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:52:27 +0530 Subject: [PATCH 383/447] drm/amdgpu: add print support for vcn_v4_0_3 ip dump Add support for logging the registers in devcoredump buffer for vcn_v4_0_3. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 34 ++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 77cc6807d119..0fda70336300 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1733,6 +1733,38 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; } +static void vcn_v4_0_3_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_3[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v4_0_3_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1782,7 +1814,7 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, .dump_ip_state = vcn_v4_0_3_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v4_0_3_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { From 3a50a51d04d2ca5066949073274e70191104f8e5 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:43:07 +0530 Subject: [PATCH 384/447] drm/amdgpu: add print support for vcn_v4_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v4_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 34 ++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index abd5a0793e58..26c6f10a8c8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -2158,6 +2158,38 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v4_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2206,7 +2238,7 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, .dump_ip_state = vcn_v4_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v4_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { From 439c3b124e9ee704766040d5182ccdaeb4d45499 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:58:46 +0530 Subject: [PATCH 385/447] drm/amdgpu: add print support for vcn_v4_0_5 ip dump Add support for logging the registers in devcoredump buffer for vcn_v4_0_5. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 34 ++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index b05bfe1dad75..b1fd226b7efb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1781,6 +1781,38 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_5_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_5[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v4_0_5_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1829,7 +1861,7 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, .dump_ip_state = vcn_v4_0_5_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v4_0_5_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { From 837cc7f1bf2bbebb05781efb6bc1b10d5c2e9308 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:06:24 +0530 Subject: [PATCH 386/447] drm/amdgpu: add vcn_v1_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v1_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 78 ++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index a280b9fecb77..f0c4b705c4e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -45,6 +45,42 @@ #define mmUVD_REG_XX_MASK_1_0 0x05ac #define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_1_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -90,6 +126,8 @@ static int vcn_v1_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCN DEC TRAP */ @@ -161,6 +199,14 @@ static int vcn_v1_0_sw_init(void *handle) r = jpeg_v1_0_sw_init(handle); + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return r; } @@ -184,6 +230,8 @@ static int vcn_v1_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1877,6 +1925,34 @@ void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring) mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround); } +static void vcn_v1_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_1_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", .early_init = vcn_v1_0_early_init, @@ -1895,7 +1971,7 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v1_0_dump_ip_state, .print_ip_state = NULL, }; From ef9f3b5fd9d2594766c60b1e12b0e72e4918512c Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:08:22 +0530 Subject: [PATCH 387/447] drm/amdgpu: add print support for vcn_v1_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v1_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 34 ++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index f0c4b705c4e7..ecdfbfefd66a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1925,6 +1925,38 @@ void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring) mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround); } +static void vcn_v1_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_1_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v1_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1972,7 +2004,7 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, .dump_ip_state = vcn_v1_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v1_0_print_ip_state, }; /* From 2239aaa204f1c5002018a02903df7e45a0e0e503 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:18:09 +0530 Subject: [PATCH 388/447] drm/amdgpu: add vcn_v2_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v2_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 79 ++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index d3d096909a7f..710d054e96f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -53,6 +53,42 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); @@ -96,6 +132,8 @@ static int vcn_v2_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; volatile struct amdgpu_fw_shared *fw_shared; @@ -184,6 +222,15 @@ static int vcn_v2_0_sw_init(void *handle) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(adev->vcn.inst); + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -213,6 +260,8 @@ static int vcn_v2_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1985,6 +2034,34 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table); } +static void vcn_v2_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .name = "vcn_v2_0", .early_init = vcn_v2_0_early_init, @@ -2003,7 +2080,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v2_0_dump_ip_state, .print_ip_state = NULL, }; From b910cacb4e70066238feafaf3f2430ef2c8c3b12 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:19:42 +0530 Subject: [PATCH 389/447] drm/amdgpu: add print support for vcn_v2_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v2_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 34 ++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 710d054e96f8..bfd067e2d2f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -2034,6 +2034,38 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table); } +static void vcn_v2_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v2_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2081,7 +2113,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, .dump_ip_state = vcn_v2_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v2_0_print_ip_state, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { From 0eea81ee2e12900bd5276558434b675b52ab2d5d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:23:55 +0530 Subject: [PATCH 390/447] drm/amdgpu: add vcn_v2_5 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v2_5. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 80 ++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 96f60c303161..343a9667e03a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -55,6 +55,43 @@ #define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); @@ -122,6 +159,8 @@ static int vcn_v2_5_sw_init(void *handle) { struct amdgpu_ring *ring; int i, j, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (j = 0; j < adev->vcn.num_vcn_inst; j++) { @@ -241,6 +280,15 @@ static int vcn_v2_5_sw_init(void *handle) if (r) return r; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -277,6 +325,8 @@ static int vcn_v2_5_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1876,6 +1926,34 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v2_5_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_5[j], i)); + } +} + static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .name = "vcn_v2_5", .early_init = vcn_v2_5_early_init, @@ -1894,7 +1972,7 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v2_5_dump_ip_state, .print_ip_state = NULL, }; From bc62abe1b92db4e027a92a6799f2193bb93970ea Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:26:31 +0530 Subject: [PATCH 391/447] drm/amdgpu: add print support for vcn_v2_5 ip dump Add support for logging the registers in devcoredump buffer for vcn_v2_5. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 34 ++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 343a9667e03a..661eef38aec9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1926,6 +1926,38 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v2_5_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_5[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v2_5_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1973,7 +2005,7 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, .dump_ip_state = vcn_v2_5_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v2_5_print_ip_state, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { From 1a2103d68547407a098c237d7cade79b4ad2b88f Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:27:58 +0530 Subject: [PATCH 392/447] drm/amdgpu: add vcn ip dump support for vcn_v2_6 Add support for logging the registers in devcoredump buffer for vcn_v2_6. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 661eef38aec9..04e9e806e318 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -2026,8 +2026,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_5_dump_ip_state, + .print_ip_state = vcn_v2_5_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = From 2dc3851ef7d9c5439ea8e9623fc36878f3b40649 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 14 Aug 2024 10:28:24 -0400 Subject: [PATCH 393/447] drm/amdgpu/sdma5.2: limit wptr workaround to sdma 5.2.1 The workaround seems to cause stability issues on other SDMA 5.2.x IPs. Fixes: a03ebf116303 ("drm/amdgpu/sdma5.2: Update wptr registers as well as doorbell") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3556 Acked-by: Ruijing Dong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index d740255edf5a..bc9b240a3488 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -225,14 +225,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - /* SDMA seems to miss doorbells sometimes when powergating kicks in. - * Updating the wptr directly will wake it. This is only safe because - * we disallow gfxoff in begin_use() and then allow it again in end_use(). - */ - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) { + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " From f49280ffd254e718ee01ef515fe91854fdf005cf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 14 Aug 2024 19:06:36 -0400 Subject: [PATCH 394/447] drm/amdgpu: handle enforce isolation on non-0 gfxhub Some chips have more than one gfxhub so check if we are a gfxhub rather than just gfxhub 0. Acked-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b6a8bddada4c..6608eeb61e5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -484,7 +484,7 @@ error: bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) { return vm->reserved_vmid[vmhub] || - (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0))); + (enforce_isolation && AMDGPU_IS_GFXHUB(vmhub)); } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, From aec773a1fb0253494b85b073f46a0ba1d798b726 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 6 Jun 2024 13:12:40 +0530 Subject: [PATCH 395/447] drm/amdgpu: Add infrastructure for Cleaner Shader feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cleaner shader is used by the CP firmware to clean LDS and GPRs between processes on the CUs. This adds an internal API for GFX IP code to allocate and initialize the cleaner shader. Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 35 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 14 ++++++++++ 2 files changed, 49 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 9be8cafdcecc..4ed69fcfe9c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1416,3 +1416,38 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_current_compute_partition); device_remove_file(adev->dev, &dev_attr_available_compute_partition); } + +int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size) +{ + if (!adev->gfx.enable_cleaner_shader) + return -EOPNOTSUPP; + + return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.cleaner_shader_obj, + &adev->gfx.cleaner_shader_gpu_addr, + (void **)&adev->gfx.cleaner_shader_cpu_ptr); +} + +void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev) +{ + if (!adev->gfx.enable_cleaner_shader) + return; + + amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj, + &adev->gfx.cleaner_shader_gpu_addr, + (void **)&adev->gfx.cleaner_shader_cpu_ptr); +} + +void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size, + const void *cleaner_shader_ptr) +{ + if (!adev->gfx.enable_cleaner_shader) + return; + + if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr) + memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr, + cleaner_shader_size); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 17b945b545b4..09379ef7388f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -446,6 +446,14 @@ struct amdgpu_gfx { uint32_t *ip_dump_gfx_queues; struct mutex reset_sem_mutex; + + /* cleaner shader */ + struct amdgpu_bo *cleaner_shader_obj; + unsigned int cleaner_shader_size; + u64 cleaner_shader_gpu_addr; + void *cleaner_shader_cpu_ptr; + const void *cleaner_shader_ptr; + bool enable_cleaner_shader; }; struct amdgpu_gfx_ras_reg_entry { @@ -547,6 +555,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, void *ras_error_status, void (*func)(struct amdgpu_device *adev, void *ras_error_status, int xcc_id)); +int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size); +void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev); +void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size, + const void *cleaner_shader_ptr); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { From ee7a846ea27bcbef5182d15923339a7bf182ec65 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 12 Mar 2024 14:22:26 -0400 Subject: [PATCH 396/447] drm/amdgpu: Emit cleaner shader at end of IB submission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces the emission of a cleaner shader at the end of the IB submission process. This is achieved by adding a new function pointer, `emit_cleaner_shader`, to the `amdgpu_ring_funcs` structure. If the `emit_cleaner_shader` function is set in the ring functions, it is called during the VM flush process. The cleaner shader is only emitted if the `enable_cleaner_shader` flag is set in the `amdgpu_device` structure. This allows the cleaner shader emission to be controlled on a per-device basis. By emitting a cleaner shader at the end of the IB submission, we can ensure that the VM state is properly cleaned up after each submission. Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index c7f15edeb367..f93f51002201 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -236,6 +236,7 @@ struct amdgpu_ring_funcs { void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); + void (*emit_cleaner_shader)(struct amdgpu_ring *ring); }; struct amdgpu_ring { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bcb729094521..71ef3308be92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -681,6 +681,10 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; + if (adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader) + ring->funcs->emit_cleaner_shader(ring); + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; @@ -742,6 +746,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); return 0; } From 96595204195d7e13736a84295e217316610d4cdb Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jul 2024 21:35:26 +0530 Subject: [PATCH 397/447] drm/amdgpu: Make enforce_isolation setting per GPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit makes enforce_isolation setting to be per GPU and per partition by adding the enforce_isolation array to the adev structure. The adev variable is set based on the global enforce_isolation module parameter during device initialization. In amdgpu_ids.c, the adev->enforce_isolation value for the current GPU is used to determine whether to enforce isolation between graphics and compute processes on that GPU. In amdgpu_ids.c, the adev->enforce_isolation value for the current GPU and partition is used to determine whether to enforce isolation between graphics and compute processes on that GPU and partition. This allows the enforce_isolation setting to be controlled individually for each GPU and each partition, which is useful in a system with multiple GPUs and partitions where different isolation settings might be desired for different GPUs and partitions. v2: fix loop in amdgpu_vmid_mgr_init() (Alex) Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher Suggested-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 17 +++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 3 ++- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 937de21a7142..0dceeea235cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1162,6 +1162,8 @@ struct amdgpu_device { bool debug_disable_soft_recovery; bool debug_use_vram_fw_buf; bool debug_enable_ras_aca; + + bool enforce_isolation[MAX_XCP]; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 78b3c067fea7..5d5ba1e3d90f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1110,7 +1110,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) struct drm_gpu_scheduler *sched = entity->rq->sched; struct amdgpu_ring *ring = to_amdgpu_ring(sched); - if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) + if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) return -EINVAL; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 482db4ebcc4b..e623af740aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1916,6 +1916,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) */ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { + int i; + if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -1970,6 +1972,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + for (i = 0; i < MAX_XCP; i++) + adev->enforce_isolation[i] = !!enforce_isolation; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 6608eeb61e5a..92d27d32de41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -424,7 +424,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (amdgpu_vmid_uses_reserved(vm, vmhub)) { + if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -476,15 +476,19 @@ error: /* * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID + * @adev: amdgpu_device pointer * @vm: the VM to check * @vmhub: the VMHUB which will be used * * Returns: True if the VM will use a reserved VMID. */ -bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) +bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, unsigned int vmhub) { return vm->reserved_vmid[vmhub] || - (enforce_isolation && AMDGPU_IS_GFXHUB(vmhub)); + (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ? + vm->root.bo->xcp_id : 0] && + AMDGPU_IS_GFXHUB(vmhub)); } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, @@ -600,9 +604,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) } } /* alloc a default reserved vmid to enforce isolation */ - if (enforce_isolation) - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); - + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + if (adev->enforce_isolation[i]) + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 240fa6751260..4012fb2dd08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,7 +78,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); -bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); +bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, From 19cff16559a4f2d763faf4f8392bf86d3a21b93c Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Fri, 2 Aug 2024 14:22:26 -0400 Subject: [PATCH 398/447] drm/amdgpu: abort KIQ waits when there is a pending reset Stop waiting for the KIQ to return back when there is a reset pending. It's quite likely that the KIQ will never response. Signed-off-by: Koenig Christian Suggested-by: Lazar Lijo Tested-by: Victor Skvortsov Signed-off-by: Victor Skvortsov Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index b49b3650fd62..17a19d49d30a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -786,7 +786,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, goto failed_kiq; might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY && + !amdgpu_reset_pending(adev->reset_domain)) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 4ae581f3fcb5..1cb920abc2fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -136,6 +136,12 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma return queue_work(domain->wq, work); } +static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) +{ + lockdep_assert_held(&domain->sem); + return rwsem_is_contended(&domain->sem); +} + void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); From dba1a6cfc311833e10df978f07147ea93b7045fa Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 20 Mar 2024 06:42:38 +0530 Subject: [PATCH 399/447] drm/amdgpu: Enforce isolation as part of the job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a new parameter 'enforce_isolation' to the amdgpu_job structure. This parameter is used to determine whether shader isolation should be enforced for a job. The enforce_isolation parameter is then stored in the amdgpu_job structure and used when flushing the VM. The enforce_isolation field of the amdgpu_job structure is set directly after the job is allocated This change allows more fine-grained control over shader isolation, making it possible to enforce isolation on a per-job basis rather than globally. This can be useful in scenarios where only certain jobs require isolation. Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5d5ba1e3d90f..1e475eb01417 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -296,6 +296,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, num_ibs[i], &p->jobs[i]); if (ret) goto free_all_kdata; + p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; } p->gang_leader = p->jobs[p->gang_leader_idx]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index a963a25ddd62..ce6b9ba967ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -76,6 +76,9 @@ struct amdgpu_job { /* job_run_counter >= 1 means a resubmit job */ uint32_t job_run_counter; + /* enforce isolation */ + bool enforce_isolation; + uint32_t num_ibs; struct amdgpu_ib ibs[]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 71ef3308be92..1468222ea0cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -682,7 +682,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, ring->funcs->emit_wreg; if (adev->gfx.enable_cleaner_shader && - ring->funcs->emit_cleaner_shader) + ring->funcs->emit_cleaner_shader && + job->enforce_isolation) ring->funcs->emit_cleaner_shader(ring); if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) From e189be9b2e3820c88164d95090f1fd6343cd77fc Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 27 May 2024 07:30:47 +0530 Subject: [PATCH 400/447] drm/amdgpu: Add enforce_isolation sysfs attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a new sysfs attribute 'enforce_isolation' to control the 'enforce_isolation' setting per GPU. The attribute can be read and written, and accepts values 0 (disabled) and 1 (enabled). When 'enforce_isolation' is enabled, reserved VMIDs are allocated for each ring. When it's disabled, the reserved VMIDs are freed. The set function locks a mutex before changing the 'enforce_isolation' flag and the VMIDs, and unlocks it afterwards. This ensures that these operations are atomic and prevents race conditions and other concurrency issues. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 101 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 + 4 files changed, 107 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0dceeea235cf..aa97bbefe934 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1164,6 +1164,8 @@ struct amdgpu_device { bool debug_enable_ras_aca; bool enforce_isolation[MAX_XCP]; + /* Added this mutex for cleaner shader isolation between GFX and compute processes */ + struct mutex enforce_isolation_mutex; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e623af740aa3..2f1bc02309fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4065,6 +4065,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->pm.stable_pstate_ctx_lock); mutex_init(&adev->benchmark_mutex); mutex_init(&adev->gfx.reset_sem_mutex); + /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ + mutex_init(&adev->enforce_isolation_mutex); amdgpu_device_init_apu_flags(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 4ed69fcfe9c1..2e35fc2577f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1391,6 +1391,88 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, return sysfs_emit(buf, "%s\n", supported_partition); } +static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int i; + ssize_t size = 0; + + if (adev->xcp_mgr) { + for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { + size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]); + if (i < (adev->xcp_mgr->num_xcps - 1)) + size += sysfs_emit_at(buf, size, " "); + } + buf[size++] = '\n'; + } else { + size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]); + } + + return size; +} + +static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + long partition_values[MAX_XCP] = {0}; + int ret, i, num_partitions; + const char *input_buf = buf; + + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + ret = sscanf(input_buf, "%ld", &partition_values[i]); + if (ret <= 0) + break; + + /* Move the pointer to the next value in the string */ + input_buf = strchr(input_buf, ' '); + if (input_buf) { + input_buf++; + } else { + i++; + break; + } + } + num_partitions = i; + + if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps) + return -EINVAL; + + if (!adev->xcp_mgr && num_partitions != 1) + return -EINVAL; + + for (i = 0; i < num_partitions; i++) { + if (partition_values[i] != 0 && partition_values[i] != 1) + return -EINVAL; + } + + mutex_lock(&adev->enforce_isolation_mutex); + + for (i = 0; i < num_partitions; i++) { + if (adev->enforce_isolation[i] && !partition_values[i]) { + /* Going from enabled to disabled */ + amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); + } else if (!adev->enforce_isolation[i] && partition_values[i]) { + /* Going from disabled to enabled */ + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + } + adev->enforce_isolation[i] = partition_values[i]; + } + + mutex_unlock(&adev->enforce_isolation_mutex); + + return count; +} + +static DEVICE_ATTR(enforce_isolation, 0644, + amdgpu_gfx_get_enforce_isolation, + amdgpu_gfx_set_enforce_isolation); + static DEVICE_ATTR(current_compute_partition, 0644, amdgpu_gfx_get_current_compute_partition, amdgpu_gfx_set_compute_partition); @@ -1417,6 +1499,25 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_available_compute_partition); } +int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) +{ + int r; + + if (!amdgpu_sriov_vf(adev)) { + r = device_create_file(adev->dev, &dev_attr_enforce_isolation); + if (r) + return r; + } + + return 0; +} + +void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev)) + device_remove_file(adev->dev, &dev_attr_enforce_isolation); +} + int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, unsigned int cleaner_shader_size) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 09379ef7388f..f7b37c340e36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -561,6 +561,8 @@ void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev); void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, unsigned int cleaner_shader_size, const void *cleaner_shader_ptr); +int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); +void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { From d361ad5d2fc0e4d59d5d538092c9b37889756642 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 27 May 2024 07:38:21 +0530 Subject: [PATCH 401/447] drm/amdgpu: Add sysfs interface for running cleaner shader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a new sysfs interface for running the cleaner shader on AMD GPUs. The cleaner shader is used to clear GPU memory before it's reused, which can help prevent data leakage between different processes. The new sysfs file is write-only and is named `run_cleaner_shader`. Write the number of the partition to this file to trigger the cleaner shader on that partition. There is only one partition on GPUs which do not support partitioning. Changes made in this patch: - Added `amdgpu_set_run_cleaner_shader` function to handle writes to the `run_cleaner_shader` sysfs file. - Added `run_cleaner_shader` to the list of device attributes in `amdgpu_device_attrs`. - Updated `default_attr_update` to handle `run_cleaner_shader`. - Added `AMDGPU_DEVICE_ATTR_WO` macro to create write-only device attributes. v2: fix error handling (Alex) Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 134 ++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 2e35fc2577f9..76f77cf562af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -24,10 +24,13 @@ */ #include +#include + #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "amdgpu_xcp.h" #include "amdgpu_xgmi.h" @@ -1391,6 +1394,129 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, return sysfs_emit(buf, "%s\n", supported_partition); } +static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + long timeout = msecs_to_jiffies(1000); + struct dma_fence *f = NULL; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + int i, r; + + r = amdgpu_job_alloc_with_ib(adev, NULL, NULL, + 64, AMDGPU_IB_POOL_DIRECT, + &job); + if (r) + goto err; + + job->enforce_isolation = true; + + ib = &job->ibs[0]; + for (i = 0; i <= ring->funcs->align_mask; ++i) + ib->ptr[i] = ring->funcs->nop; + ib->length_dw = ring->funcs->align_mask + 1; + + r = amdgpu_job_submit_direct(job, ring, &f); + if (r) + goto err_free; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) + r = -ETIMEDOUT; + else if (r > 0) + r = 0; + + amdgpu_ib_free(adev, ib, f); + dma_fence_put(f); + + return 0; + +err_free: + amdgpu_job_free(job); + amdgpu_ib_free(adev, ib, f); +err: + return r; +} + +static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id) +{ + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); + struct amdgpu_ring *ring; + int num_xcc_to_clear; + int i, r, xcc_id; + + if (adev->gfx.num_xcc_per_xcp) + num_xcc_to_clear = adev->gfx.num_xcc_per_xcp; + else + num_xcc_to_clear = 1; + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; + if ((ring->xcp_id == xcp_id) && ring->sched.ready) { + r = amdgpu_gfx_run_cleaner_shader_job(ring); + if (r) + return r; + num_xcc_to_clear--; + break; + } + } + } + + if (num_xcc_to_clear) + return -ENOENT; + + return 0; +} + +static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int ret; + long value; + + if (amdgpu_in_reset(adev)) + return -EPERM; + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + + ret = kstrtol(buf, 0, &value); + + if (ret) + return -EINVAL; + + if (value < 0) + return -EINVAL; + + if (adev->xcp_mgr) { + if (value >= adev->xcp_mgr->num_xcps) + return -EINVAL; + } else { + if (value > 1) + return -EINVAL; + } + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + ret = amdgpu_gfx_run_cleaner_shader(adev, value); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + if (ret) + return ret; + + return count; +} + static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, struct device_attribute *attr, char *buf) @@ -1469,6 +1595,9 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, return count; } +static DEVICE_ATTR(run_cleaner_shader, 0200, + NULL, amdgpu_gfx_set_run_cleaner_shader); + static DEVICE_ATTR(enforce_isolation, 0644, amdgpu_gfx_get_enforce_isolation, amdgpu_gfx_set_enforce_isolation); @@ -1509,6 +1638,10 @@ int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) return r; } + r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); + if (r) + return r; + return 0; } @@ -1516,6 +1649,7 @@ void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) { if (!amdgpu_sriov_vf(adev)) device_remove_file(adev->dev, &dev_attr_enforce_isolation); + device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); } int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, From 22ff907d4f0457b2800f4c6d4f40d4d4d31f7de1 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 7 Jul 2024 08:54:04 +0530 Subject: [PATCH 402/447] drm/amdgpu: Add PACKET3_RUN_CLEANER_SHADER for cleaner shader execution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the PACKET3_RUN_CLEANER_SHADER definition. This packet is a command packet used to instruct the GPU to execute the cleaner shader. The cleaner shader is a piece of GPU code that is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Clearing these resources is important for ensuring data isolation between different workloads running on the GPU. The PACKET3_RUN_CLEANER_SHADER packet is used to trigger the execution of the cleaner shader on the GPU. The packet consists of a header followed by a RESERVED field, which is programmed to zero. When the GPU receives this packet, it fetches and executes the cleaner shader instructions from the location specified in the packet. The cleaner shader feature helps to enhances security and reliability by preventing data leaks between workloads. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15d.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index e74e1983da53..b9cbeb389edc 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -413,6 +413,10 @@ # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_CLEANER_SHADER 0xD2 +/* 1. header + * 2. RESERVED [31:0] + */ #define VCE_CMD_NO_OP 0x00000000 #define VCE_CMD_END 0x00000001 From c2e70d307f4491ff970208a41cce84c95771f340 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jul 2024 21:56:57 +0530 Subject: [PATCH 403/447] drm/amdgpu/gfx9: Implement cleaner shader support for GFX9 hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch modifies the gfx_v9_0_kiq_set_resources function to write the cleaner shader's memory controller address to the ring buffer. It also adds a new function, gfx_v9_0_ring_emit_cleaner_shader, which emits the PACKET3_RUN_CLEANER_SHADER packet to the ring buffer. This patch adds support for the PACKET3_RUN_CLEANER_SHADER packet in the gfx_v9_0 module. This packet is used to emit the cleaner shader, which is used to clear GPU memory before it's reused, helping to prevent data leakage between different processes. Finally, the patch updates the ring function structures to include the new gfx_v9_0_ring_emit_cleaner_shader function. This allows the cleaner shader to be emitted as part of the ring's operations. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 45 ++++++++++++++++--- .../drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h | 26 +++++++++++ 2 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index db21fb951e0e..3045b8b0796d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -50,6 +50,7 @@ #include "amdgpu_ring_mux.h" #include "gfx_v9_4.h" #include "gfx_v9_0.h" +#include "gfx_v9_0_cleaner_shader.h" #include "gfx_v9_4_2.h" #include "asic_reg/pwr/pwr_10_0_offset.h" @@ -899,6 +900,12 @@ static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | @@ -908,8 +915,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -2211,6 +2218,12 @@ static int gfx_v9_0_sw_init(void *handle) break; } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + default: + adev->gfx.enable_cleaner_shader = false; + break; + } + adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -2373,6 +2386,10 @@ static int gfx_v9_0_sw_init(void *handle) gfx_v9_0_alloc_ip_dump(adev); + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + return r; + return 0; } @@ -2408,6 +2425,8 @@ static int gfx_v9_0_sw_fini(void *handle) } gfx_v9_0_free_microcode(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); + kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); @@ -3952,6 +3971,9 @@ static int gfx_v9_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); + if (!amdgpu_sriov_vf(adev)) gfx_v9_0_init_golden_registers(adev); @@ -7368,6 +7390,13 @@ static void gfx_v9_ip_dump(void *handle) } +static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -7417,7 +7446,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2 + /* SWITCH_BUFFER */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7439,6 +7469,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .reset = gfx_v9_0_reset_kgq, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, }; static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { @@ -7471,7 +7502,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2 + /* SWITCH_BUFFER */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7495,6 +7527,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .patch_cntl = gfx_v9_0_ring_patch_cntl, .patch_de = gfx_v9_0_ring_patch_de_meta, .patch_ce = gfx_v9_0_ring_patch_ce_meta, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -7515,7 +7548,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ - 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ + 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7534,6 +7568,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_wave_limit = gfx_v9_0_emit_wave_limit, .reset = gfx_v9_0_reset_kcq, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h new file mode 100644 index 000000000000..36c0292b5110 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +/* Define the cleaner shader gfx_9_0 */ +static const u32 __maybe_unused gfx_9_0_cleaner_shader_hex[] = { + /* Add the cleaner shader code here */ +}; From d4c38154951b2bff6bfa4d5eb56df0bd08703cf9 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jul 2024 22:12:02 +0530 Subject: [PATCH 404/447] drm/amdgpu/gfx9: Implement cleaner shader support for GFX9.4.3 hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch modifies the gfx_v9_4_3_kiq_set_resources function to write the cleaner shader's memory controller address to the ring buffer. It also adds a new function, gfx_v9_4_3_ring_emit_cleaner_shader, which emits the PACKET3_RUN_CLEANER_SHADER packet to the ring buffer. This patch adds support for the PACKET3_RUN_CLEANER_SHADER packet in the gfx_v9_4_3 module. This packet is used to emit the cleaner shader, which is used to clear GPU memory before it's reused, helping to prevent data leakage between different processes. Finally, the patch updates the ring function structures to include the new gfx_v9_4_3_ring_emit_cleaner_shader function. This allows the cleaner shader to be emitted as part of the ring's operations. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 57 +++++++++++++++++-- .../amd/amdgpu/gfx_v9_4_3_cleaner_shader.h | 26 +++++++++ 2 files changed, 78 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 619ff3ec2c86..28f4212a8db2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -37,6 +37,7 @@ #include "gc/gc_9_4_3_sh_mask.h" #include "gfx_v9_4_3.h" +#include "gfx_v9_4_3_cleaner_shader.h" #include "amdgpu_xcp.h" #include "amdgpu_aca.h" @@ -169,6 +170,12 @@ static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_i static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | @@ -178,8 +185,8 @@ static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -1047,6 +1054,24 @@ static int gfx_v9_4_3_sw_init(void *handle) int i, j, k, r, ring_id, xcc_id, num_xcc; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + adev->gfx.cleaner_shader_ptr = gfx_9_4_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_3_cleaner_shader_hex); + if (adev->gfx.mec_fw_version >= 153) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + default: + adev->gfx.enable_cleaner_shader = false; + break; + } + adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -1140,12 +1165,19 @@ static int gfx_v9_4_3_sw_init(void *handle) return r; - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { r = amdgpu_gfx_sysfs_init(adev); + if (r) + return r; + } gfx_v9_4_3_alloc_ip_dump(adev); - return r; + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + return r; + + return 0; } static int gfx_v9_4_3_sw_fini(void *handle) @@ -1163,11 +1195,14 @@ static int gfx_v9_4_3_sw_fini(void *handle) amdgpu_gfx_kiq_fini(adev, i); } + amdgpu_gfx_cleaner_shader_sw_fini(adev); + gfx_v9_4_3_mec_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); gfx_v9_4_3_free_microcode(adev); if (!amdgpu_sriov_vf(adev)) amdgpu_gfx_sysfs_fini(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); @@ -2308,6 +2343,9 @@ static int gfx_v9_4_3_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); + if (!amdgpu_sriov_vf(adev)) gfx_v9_4_3_init_golden_registers(adev); @@ -4565,6 +4603,13 @@ static void gfx_v9_4_3_ip_dump(void *handle) amdgpu_gfx_off_ctrl(adev, true); } +static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, @@ -4604,7 +4649,8 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_4_3_emit_mem_sync */ 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */ - 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + 15 + /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + 2, /* gfx_v9_4_3_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ .emit_ib = gfx_v9_4_3_ring_emit_ib_compute, .emit_fence = gfx_v9_4_3_ring_emit_fence, @@ -4623,6 +4669,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, .reset = gfx_v9_4_3_reset_kcq, + .emit_cleaner_shader = gfx_v9_4_3_ring_emit_cleaner_shader, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h new file mode 100644 index 000000000000..042944ac75df --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_9_4_3 */ +static const u32 gfx_9_4_3_cleaner_shader_hex[] = { +}; From 335288315af18c0def7f47a37fe7eaa782c98f6d Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jul 2024 22:14:41 +0530 Subject: [PATCH 405/447] drm/amdgpu/gfx9: Add cleaner shader for GFX9.4.3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the cleaner shader microcode for GFX9.4.3 GPUs. The cleaner shader is a piece of GPU code that is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Clearing these resources is important for ensuring data isolation between different workloads running on the GPU. Without the cleaner shader, residual data from a previous workload could potentially be accessed by a subsequent workload, leading to data leaks and incorrect computation results. The cleaner shader microcode is represented as an array of 32-bit words (`gfx_9_4_3_cleaner_shader_hex`). This array is the binary representation of the cleaner shader code, which is written in a low-level GPU instruction set. When the cleaner shader feature is enabled, the AMDGPU driver loads this array into a specific location in the GPU memory. The GPU then reads this memory location to fetch and execute the cleaner shader instructions. The cleaner shader is executed automatically by the GPU at the end of each workload, before the next workload starts. This ensures that all GPU resources are in a clean state before the start of each workload. This addition is part of the cleaner shader feature implementation. The cleaner shader feature helps improve GPU performance and resource utilization by cleaning up GPU resources after they are used. It also enhances security and reliability by preventing data leaks between workloads. v2: fix copyright date (Alex) Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- .../amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm | 153 ++++++++++++++++++ .../amd/amdgpu/gfx_v9_4_3_cleaner_shader.h | 38 +++++ 2 files changed, 191 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm new file mode 100644 index 000000000000..d5325ef80ab0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// MI300 : Clear SGPRs, VGPRs and LDS +// Uses two kernels launched separately: +// 1. Clean VGPRs, LDS, and lower SGPRs +// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU +// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD +// Waves in the workgroup share the 64KB of LDS +// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383 +// Each wave clears 128 VGPRs, so all 512 in the SIMD +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. +// 2. Clean remaining SGPRs +// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU +// Waves are allocating 96 SGPRs +// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223. +// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799 +// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER +// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command +// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799 + +shader main + asic(MI300) + type(CS) + wave_size(64) +// Note: original source code from SQ team + +// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks) + + s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3 + s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1) + S_BARRIER + + s_movk_i32 m0, 0x0000 + s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance) + // + // CLEAR VGPRs + // + s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing +label_0005: + v_mov_b32 v0, 0 + v_mov_b32 v1, 0 + v_mov_b32 v2, 0 + v_mov_b32 v3, 0 + v_mov_b32 v4, 0 + v_mov_b32 v5, 0 + v_mov_b32 v6, 0 + v_mov_b32 v7, 0 + s_sub_u32 s2, s2, 8 + s_set_gpr_idx_idx s2 + s_cbranch_scc0 label_0005 + s_set_gpr_idx_off + + // + // + + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iteraions + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + // + // CLEAR SGPRs + // +label_clean_sgpr_1: + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0 //clear vcc + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 +s_endpgm + +label_0023: + + s_sethalt 1 + + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop1: + + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop1 + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0xee //clear vcc + +s_endpgm +end + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h index 042944ac75df..69aa567c6c1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h @@ -23,4 +23,42 @@ /* Define the cleaner shader gfx_9_4_3 */ static const u32 gfx_9_4_3_cleaner_shader_hex[] = { + 0xbf068100, 0xbf84003b, + 0xbf8a0000, 0xb07c0000, + 0xbe8200ff, 0x00000078, + 0xbf110802, 0x7e000280, + 0x7e020280, 0x7e040280, + 0x7e060280, 0x7e080280, + 0x7e0a0280, 0x7e0c0280, + 0x7e0e0280, 0x80828802, + 0xbe803202, 0xbf84fff5, + 0xbf9c0000, 0xbe8200ff, + 0x80000000, 0x86020102, + 0xbf840011, 0xbefe00c1, + 0xbeff00c1, 0xd28c0001, + 0x0001007f, 0xd28d0001, + 0x0002027e, 0x10020288, + 0xbe8200bf, 0xbefc00c1, + 0xd89c2000, 0x00020201, + 0xd89c6040, 0x00040401, + 0x320202ff, 0x00000400, + 0x80828102, 0xbf84fff8, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea0180, + 0xbeec0180, 0xbeee0180, + 0xbef00180, 0xbef20180, + 0xbef40180, 0xbef60180, + 0xbef80180, 0xbefa0180, + 0xbf810000, 0xbf8d0001, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea01ff, + 0x000000ee, 0xbf810000, }; From b1f49ff9cbe14264c7eb33462fb700c49c7d91a8 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jul 2024 22:18:45 +0530 Subject: [PATCH 406/447] drm/amdgpu/gfx9: Add cleaner shader support for GFX9.4.4 hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit extends the cleaner shader feature to support GFX9.4.4 hardware. The cleaner shader feature is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). This operation needs to be performed in isolation, while no other tasks should be running on the GPU at the same time. Previously, the cleaner shader feature was implemented for GFX9.4.3 hardware. This commit adds support for GFX9.4.4 hardware by allowing the cleaner shader to be used with this hardware version. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 28f4212a8db2..fa6752585a72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1056,6 +1056,7 @@ static int gfx_v9_4_3_sw_init(void *handle) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): adev->gfx.cleaner_shader_ptr = gfx_9_4_3_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_3_cleaner_shader_hex); if (adev->gfx.mec_fw_version >= 153) { From 234eebe16138f94de3046f60c52763dc17fe5fed Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Mon, 29 Jul 2024 14:22:30 -0400 Subject: [PATCH 407/447] drm/amdkfd: APIs to stop/start KFD scheduling Provide amdgpu_amdkfd_stop_sched() for amdgpu to stop KFD scheduling compute work on HIQ. amdgpu_amdkfd_start_sched() resumes the scheduling. When amdgpu_amdkfd_stop_sched is called, KFD will unmap queues from runlist. If users send ioctls to KFD to create queues, they'll be added but those queues won't be mapped to runlist (so not scheduled) until amdgpu_amdkfd_start_sched is called. v2: fix build (Alex) Signed-off-by: Amber Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 18 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 14 +++++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 39 +++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 58 ++++++++++++++++++- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 9 +++ 5 files changed, 137 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index c272461d70a9..64a989cbc301 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -887,3 +887,21 @@ free_ring_funcs: return r; } + +/* Stop scheduling on KFD */ +int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_stop_sched(adev->kfd.dev, node_id); +} + +/* Start scheduling on KFD */ +int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_start_sched(adev->kfd.dev, node_id); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4ed49265c764..825c7ffe4bc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -264,6 +264,8 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, uint32_t *payload); int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, u32 inst); +int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id); +int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -426,6 +428,8 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); int kgd2kfd_check_and_lock_kfd(void); void kgd2kfd_unlock_kfd(void); +int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); #else static inline int kgd2kfd_init(void) { @@ -496,5 +500,15 @@ static inline int kgd2kfd_check_and_lock_kfd(void) static inline void kgd2kfd_unlock_kfd(void) { } + +static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + return 0; +} + +static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + return 0; +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index c2d2598f776c..fad1c8f2bc83 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1446,6 +1446,45 @@ void kgd2kfd_unlock_kfd(void) mutex_unlock(&kfd_processes_mutex); } +int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + int ret; + + if (!kfd->init_complete) + return 0; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return -EINVAL; + } + node = kfd->nodes[node_id]; + + ret = node->dqm->ops.unhalt(node->dqm); + if (ret) + dev_err(kfd_device, "Error in starting scheduler\n"); + + return ret; +} + +int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + + if (!kfd->init_complete) + return 0; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return -EINVAL; + } + + node = kfd->nodes[node_id]; + return node->dqm->ops.halt(node->dqm); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f6e211070299..d23388ea8181 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1679,6 +1679,60 @@ static int initialize_cpsch(struct device_queue_manager *dqm) return 0; } +/* halt_cpsch: + * Unmap queues so the schedule doesn't continue remaining jobs in the queue. + * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch + * is called. + */ +static int halt_cpsch(struct device_queue_manager *dqm) +{ + int ret = 0; + + dqm_lock(dqm); + if (!dqm->sched_running) { + dqm_unlock(dqm); + return 0; + } + + WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); + + if (!dqm->is_hws_hang) { + if (!dqm->dev->kfd->shared_resources.enable_mes) + ret = unmap_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD, false); + else + ret = remove_all_queues_mes(dqm); + } + dqm->sched_halt = true; + dqm_unlock(dqm); + + return ret; +} + +/* unhalt_cpsch + * Unset dqm->sched_halt and map queues back to runlist + */ +static int unhalt_cpsch(struct device_queue_manager *dqm) +{ + int ret = 0; + + dqm_lock(dqm); + if (!dqm->sched_running || !dqm->sched_halt) { + WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n"); + dqm_unlock(dqm); + return 0; + } + dqm->sched_halt = false; + if (!dqm->dev->kfd->shared_resources.enable_mes) + ret = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, + 0, USE_DEFAULT_GRACE_PERIOD); + dqm_unlock(dqm); + + return ret; +} + static int start_cpsch(struct device_queue_manager *dqm) { struct device *dev = dqm->dev->adev->dev; @@ -1984,7 +2038,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) struct device *dev = dqm->dev->adev->dev; int retval; - if (!dqm->sched_running) + if (!dqm->sched_running || dqm->sched_halt) return 0; if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) return 0; @@ -2727,6 +2781,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) dqm->ops.initialize = initialize_cpsch; dqm->ops.start = start_cpsch; dqm->ops.stop = stop_cpsch; + dqm->ops.halt = halt_cpsch; + dqm->ops.unhalt = unhalt_cpsch; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; dqm->ops.register_process = register_process; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index dfb36a246637..08b40826ad1e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -106,6 +106,12 @@ union GRBM_GFX_INDEX_BITS { * @uninitialize: Destroys all the device queue manager resources allocated in * initialize routine. * + * @halt: This routine unmaps queues from runlist and set halt status to true + * so no more queues will be mapped to runlist until unhalt. + * + * @unhalt: This routine unset halt status to flase and maps queues back to + * runlist. + * * @create_kernel_queue: Creates kernel queue. Used for debug queue. * * @destroy_kernel_queue: Destroys kernel queue. Used for debug queue. @@ -153,6 +159,8 @@ struct device_queue_manager_ops { int (*start)(struct device_queue_manager *dqm); int (*stop)(struct device_queue_manager *dqm); void (*uninitialize)(struct device_queue_manager *dqm); + int (*halt)(struct device_queue_manager *dqm); + int (*unhalt)(struct device_queue_manager *dqm); int (*create_kernel_queue)(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd); @@ -264,6 +272,7 @@ struct device_queue_manager { struct work_struct hw_exception_work; struct kfd_mem_obj hiq_sdma_mqd; bool sched_running; + bool sched_halt; /* used for GFX 9.4.3 only */ uint32_t current_logical_xcc_start; From afefd6f245024684fff75100052065d6a9e8f75f Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 6 Jun 2024 13:28:02 +0530 Subject: [PATCH 408/447] drm/amdgpu: Implement Enforce Isolation Handler for KGD/KFD serialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces the Enforce Isolation Handler designed to enforce shader isolation on AMD GPUs, which helps to prevent data leakage between different processes. The handler counts the number of emitted fences for each GFX and compute ring. If there are any fences, it schedules the `enforce_isolation_work` to be run after a delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion Driver (KFD) to resume the runqueue. The function is synchronized using the `enforce_isolation_mutex`. This commit also introduces a reference count mechanism (kfd_sch_req_count) to keep track of the number of requests to enable the KFD scheduler. When a request to enable the KFD scheduler is made, the reference count is decremented. When the reference count reaches zero, a delayed work is scheduled to enforce isolation after a delay of GFX_SLICE_PERIOD. When a request to disable the KFD scheduler is made, the function first checks if the reference count is zero. If it is, it cancels the delayed work for enforcing isolation and checks if the KFD scheduler is active. If the KFD scheduler is active, it sends a request to stop the KFD scheduler and sets the KFD scheduler state to inactive. Then, it increments the reference count. The function is synchronized using the kfd_sch_mutex to ensure that the KFD scheduler state and reference count are updated atomically. Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Christian König Suggested-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 167 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 15 ++ 4 files changed, 200 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index aa97bbefe934..e8c284aea1f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,6 +118,8 @@ #define MAX_GPU_INSTANCE 64 +#define GFX_SLICE_PERIOD msecs_to_jiffies(250) + struct amdgpu_gpu_instance { struct amdgpu_device *adev; int mgpu_fan_enabled; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2f1bc02309fe..ad97f03f1358 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4067,6 +4067,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->gfx.reset_sem_mutex); /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ mutex_init(&adev->enforce_isolation_mutex); + mutex_init(&adev->gfx.kfd_sch_mutex); amdgpu_device_init_apu_flags(adev); @@ -4098,6 +4099,21 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); + /* + * Initialize the enforce_isolation work structures for each XCP + * partition. This work handler is responsible for enforcing shader + * isolation on AMD GPUs. It counts the number of emitted fences for + * each GFX and compute ring. If there are any fences, it schedules + * the `enforce_isolation_work` to be run after a delay. If there are + * no fences, it signals the Kernel Fusion Driver (KFD) to resume the + * runqueue. + */ + for (i = 0; i < MAX_XCP; i++) { + INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work, + amdgpu_gfx_enforce_isolation_handler); + adev->gfx.enforce_isolation[i].adev = adev; + adev->gfx.enforce_isolation[i].xcp_id = i; + } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 76f77cf562af..b4efeef848de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1686,3 +1686,170 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr, cleaner_shader_size); } + +/** + * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver) + * @adev: amdgpu_device pointer + * @idx: Index of the scheduler to control + * @enable: Whether to enable or disable the KFD scheduler + * + * This function is used to control the KFD (Kernel Fusion Driver) scheduler + * from the KGD. It is part of the cleaner shader feature. This function plays + * a key role in enforcing process isolation on the GPU. + * + * The function uses a reference count mechanism (kfd_sch_req_count) to keep + * track of the number of requests to enable the KFD scheduler. When a request + * to enable the KFD scheduler is made, the reference count is decremented. + * When the reference count reaches zero, a delayed work is scheduled to + * enforce isolation after a delay of GFX_SLICE_PERIOD. + * + * When a request to disable the KFD scheduler is made, the function first + * checks if the reference count is zero. If it is, it cancels the delayed work + * for enforcing isolation and checks if the KFD scheduler is active. If the + * KFD scheduler is active, it sends a request to stop the KFD scheduler and + * sets the KFD scheduler state to inactive. Then, it increments the reference + * count. + * + * The function is synchronized using the kfd_sch_mutex to ensure that the KFD + * scheduler state and reference count are updated atomically. + * + * Note: If the reference count is already zero when a request to enable the + * KFD scheduler is made, it means there's an imbalance bug somewhere. The + * function triggers a warning in this case. + */ +static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, + bool enable) +{ + mutex_lock(&adev->gfx.kfd_sch_mutex); + + if (enable) { + /* If the count is already 0, it means there's an imbalance bug somewhere. + * Note that the bug may be in a different caller than the one which triggers the + * WARN_ON_ONCE. + */ + if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { + dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n"); + goto unlock; + } + + adev->gfx.kfd_sch_req_count[idx]--; + + if (adev->gfx.kfd_sch_req_count[idx] == 0 && + adev->gfx.kfd_sch_inactive[idx]) { + schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, + GFX_SLICE_PERIOD); + } + } else { + if (adev->gfx.kfd_sch_req_count[idx] == 0) { + cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); + if (!adev->gfx.kfd_sch_inactive[idx]) { + amdgpu_amdkfd_stop_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = true; + } + } + + adev->gfx.kfd_sch_req_count[idx]++; + } + +unlock: + mutex_unlock(&adev->gfx.kfd_sch_mutex); +} + +/** + * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation + * + * @work: work_struct. + * + * This function is the work handler for enforcing shader isolation on AMD GPUs. + * It counts the number of emitted fences for each GFX and compute ring. If there + * are any fences, it schedules the `enforce_isolation_work` to be run after a + * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion + * Driver (KFD) to resume the runqueue. The function is synchronized using the + * `enforce_isolation_mutex`. + */ +void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) +{ + struct amdgpu_isolation_work *isolation_work = + container_of(work, struct amdgpu_isolation_work, work.work); + struct amdgpu_device *adev = isolation_work->adev; + u32 i, idx, fences = 0; + + if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = isolation_work->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) { + if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id) + fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]); + } + for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) { + if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id) + fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); + } + if (fences) { + schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, + GFX_SLICE_PERIOD); + } else { + /* Tell KFD to resume the runqueue */ + if (adev->kfd.init_complete) { + WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); + WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); + amdgpu_amdkfd_start_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = false; + } + } + mutex_unlock(&adev->enforce_isolation_mutex); +} + +void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 idx; + + if (!adev->gfx.enable_cleaner_shader) + return; + + if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = ring->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + if (adev->kfd.init_complete) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); + } + mutex_unlock(&adev->enforce_isolation_mutex); +} + +void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 idx; + + if (!adev->gfx.enable_cleaner_shader) + return; + + if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = ring->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + if (adev->kfd.init_complete) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); + } + mutex_unlock(&adev->enforce_isolation_mutex); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f7b37c340e36..e28c1ebfa98f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -34,6 +34,7 @@ #include "soc15.h" #include "amdgpu_ras.h" #include "amdgpu_ring_mux.h" +#include "amdgpu_xcp.h" /* GFX current status */ #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -343,6 +344,12 @@ struct amdgpu_me { DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); }; +struct amdgpu_isolation_work { + struct amdgpu_device *adev; + u32 xcp_id; + struct delayed_work work; +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gfx_config config; @@ -454,6 +461,11 @@ struct amdgpu_gfx { void *cleaner_shader_cpu_ptr; const void *cleaner_shader_ptr; bool enable_cleaner_shader; + struct amdgpu_isolation_work enforce_isolation[MAX_XCP]; + /* Mutex for synchronizing KFD scheduler operations */ + struct mutex kfd_sch_mutex; + u64 kfd_sch_req_count[MAX_XCP]; + bool kfd_sch_inactive[MAX_XCP]; }; struct amdgpu_gfx_ras_reg_entry { @@ -563,6 +575,9 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, const void *cleaner_shader_ptr); int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); +void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); +void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { From b710dbe55dee946d82bc4815c40373cf8a391581 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 18 Jul 2024 18:22:35 +0530 Subject: [PATCH 409/447] drm/amdgpu/gfx9: Apply Isolation Enforcement to GFX & Compute rings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit applies isolation enforcement to the GFX and Compute rings in the gfx_v9_0 module. The commit sets `amdgpu_gfx_enforce_isolation_ring_begin_use` and `amdgpu_gfx_enforce_isolation_ring_end_use` as the functions to be called when a ring begins and ends its use, respectively. `amdgpu_gfx_enforce_isolation_ring_begin_use` is called when a ring begins its use. This function cancels any scheduled `enforce_isolation_work` and, if necessary, signals the Kernel Fusion Driver (KFD) to stop the runqueue. `amdgpu_gfx_enforce_isolation_ring_end_use` is called when a ring ends its use. This function schedules `enforce_isolation_work` to be run after a delay. These functions are part of the Enforce Isolation Handler, which enforces shader isolation on AMD GPUs to prevent data leakage between different processes. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher Suggested-by: Christian König --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3045b8b0796d..21089aadbb7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7470,6 +7470,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_mem_sync = gfx_v9_0_emit_mem_sync, .reset = gfx_v9_0_reset_kgq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { @@ -7528,6 +7530,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .patch_de = gfx_v9_0_ring_patch_de_meta, .patch_ce = gfx_v9_0_ring_patch_ce_meta, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -7569,6 +7573,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_wave_limit = gfx_v9_0_emit_wave_limit, .reset = gfx_v9_0_reset_kcq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { From f846250b8a20e6c1225c64ce87a90d4f29cbf351 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 14 May 2024 23:55:20 +0530 Subject: [PATCH 410/447] drm/amdgpu/gfx_v9_4_3: Apply Isolation Enforcement to GFX & Compute rings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit applies isolation enforcement to the GFX and Compute rings in the gfx_v9_4_3 module. The commit sets `amdgpu_gfx_enforce_isolation_ring_begin_use` and `amdgpu_gfx_enforce_isolation_ring_end_use` as the functions to be called when a ring begins and ends its use, respectively. `amdgpu_gfx_enforce_isolation_ring_begin_use` is called when a ring begins its use. This function cancels any scheduled `enforce_isolation_work` and, if necessary, signals the Kernel Fusion Driver (KFD) to stop the runqueue. `amdgpu_gfx_enforce_isolation_ring_end_use` is called when a ring ends its use. This function schedules `enforce_isolation_work` to be run after a delay. These functions are part of the Enforce Isolation Handler, which enforces shader isolation on AMD GPUs to prevent data leakage between different processes. The commit also includes a check for the type of the ring. If the type of the ring is `AMDGPU_RING_TYPE_COMPUTE`, the `xcp_id` of the `enforce_isolation` structure in the `gfx` structure of the `amdgpu_device` is set to the `xcp_id` of the ring. This ensures that the correct `xcp_id` is used when enforcing isolation on compute rings. The `xcp_id` is an identifier for an XCP partition, and different rings can be associated with different XCP partitions. Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 228fd4dd32f1..26e2188101e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -75,6 +75,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, uint32_t inst_mask; ring->xcp_id = AMDGPU_XCP_NO_PARTITION; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id; if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) return; @@ -103,6 +105,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { ring->xcp_id = xcp_id; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index fa6752585a72..2067f26d3a9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4671,6 +4671,8 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, .reset = gfx_v9_4_3_reset_kcq, .emit_cleaner_shader = gfx_v9_4_3_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { From 87758a0ef12cfebb9fab8ef1d0e234dd7b3f4579 Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Mon, 29 Apr 2024 16:40:44 -0400 Subject: [PATCH 411/447] drm/amdkfd: Enable processes isolation on gfx9 When amdgpu enable enforce_isolation, KFD enables single-process mode in HWS and sets exec_cleaner_shader bit in MAP_PROCESS. Signed-off-by: Amber Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 14 +++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h | 5 +++-- .../gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h | 2 +- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 00776f08351c..1f9f5bfeaf86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -37,11 +37,14 @@ static int pm_map_process_v9(struct packet_manager *pm, struct kfd_node *kfd = pm->dqm->dev; struct kfd_process_device *pdd = container_of(qpd, struct kfd_process_device, qpd); + struct amdgpu_device *adev = kfd->adev; packet = (struct pm4_mes_map_process *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); + if (adev->enforce_isolation[kfd->node_id]) + packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -89,14 +92,18 @@ static int pm_map_process_aldebaran(struct packet_manager *pm, struct pm4_mes_map_process_aldebaran *packet; uint64_t vm_page_table_base_addr = qpd->page_table_base; struct kfd_dev *kfd = pm->dqm->dev->kfd; + struct kfd_node *knode = pm->dqm->dev; struct kfd_process_device *pdd = container_of(qpd, struct kfd_process_device, qpd); int i; + struct amdgpu_device *adev = kfd->adev; packet = (struct pm4_mes_map_process_aldebaran *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process_aldebaran)); + if (adev->enforce_isolation[knode->node_id]) + packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -144,17 +151,22 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, int concurrent_proc_cnt = 0; struct kfd_node *kfd = pm->dqm->dev; + struct amdgpu_device *adev = kfd->adev; /* Determine the number of processes to map together to HW: * it can not exceed the number of VMIDs available to the * scheduler, and it is determined by the smaller of the number * of processes in the runlist and kfd module parameter * hws_max_conc_proc. + * However, if enforce_isolation is set (toggle LDS/VGPRs/SGPRs + * cleaner between process switch), enable single-process mode + * in HWS. * Note: the arbitration between the number of VMIDs and * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - concurrent_proc_cnt = min(pm->dqm->processes_count, + concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ? + 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum); packet = (struct pm4_mes_runlist *)buffer; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index 8b6b2bd5c148..cd8611401a66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -145,8 +145,9 @@ struct pm4_mes_map_process { union { struct { - uint32_t pasid:16; - uint32_t reserved1:2; + uint32_t pasid:16; /* 0 - 15 */ + uint32_t reserved1:1; /* 16 */ + uint32_t exec_cleaner_shader:1; /* 17 */ uint32_t debug_vmid:4; uint32_t new_debug:1; uint32_t reserved2:1; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h index 38f5cb6a222a..e0ed62c4ade0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h @@ -37,7 +37,7 @@ struct pm4_mes_map_process_aldebaran { struct { uint32_t pasid:16; /* 0 - 15 */ uint32_t single_memops:1; /* 16 */ - uint32_t reserved1:1; /* 17 */ + uint32_t exec_cleaner_shader:1; /* 17 */ uint32_t debug_vmid:4; /* 18 - 21 */ uint32_t new_debug:1; /* 22 */ uint32_t tmz:1; /* 23 */ From ccf8ef6b7506cc43e7fd504a85465c1c0786a107 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 3 Jun 2024 11:48:23 -0400 Subject: [PATCH 412/447] drm/amdgpu: Implement MES Suspend and Resume APIs for GFX11 Add implementation for MES Suspend and Resume APIs to unmap/map all queues for GFX11. Support for GFX12 will be added when the corresponding firmware support is in place. Signed-off-by: Mukul Joshi Reviewed-by: Alex Deucher Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 71 +++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 2 + drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 32 ++++++++++- 3 files changed, 69 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 04a4f0dfec15..44c74a08987d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -501,60 +501,50 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) int amdgpu_mes_suspend(struct amdgpu_device *adev) { - struct idr *idp; - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; struct mes_suspend_gang_input input; - int r, pasid; + int r; + + if (!amdgpu_mes_suspend_resume_all_supported(adev)) + return 0; + + memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); + input.suspend_all_gangs = 1; /* * Avoid taking any other locks under MES lock to avoid circular * lock dependencies. */ amdgpu_mes_lock(&adev->mes); - - idp = &adev->mes.pasid_idr; - - idr_for_each_entry(idp, process, pasid) { - list_for_each_entry(gang, &process->gang_list, list) { - r = adev->mes.funcs->suspend_gang(&adev->mes, &input); - if (r) - DRM_ERROR("failed to suspend pasid %d gangid %d", - pasid, gang->gang_id); - } - } - + r = adev->mes.funcs->suspend_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); - return 0; + if (r) + DRM_ERROR("failed to suspend all gangs"); + + return r; } int amdgpu_mes_resume(struct amdgpu_device *adev) { - struct idr *idp; - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; struct mes_resume_gang_input input; - int r, pasid; + int r; + + if (!amdgpu_mes_suspend_resume_all_supported(adev)) + return 0; + + memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); + input.resume_all_gangs = 1; /* * Avoid taking any other locks under MES lock to avoid circular * lock dependencies. */ amdgpu_mes_lock(&adev->mes); - - idp = &adev->mes.pasid_idr; - - idr_for_each_entry(idp, process, pasid) { - list_for_each_entry(gang, &process->gang_list, list) { - r = adev->mes.funcs->resume_gang(&adev->mes, &input); - if (r) - DRM_ERROR("failed to resume pasid %d gangid %d", - pasid, gang->gang_id); - } - } - + r = adev->mes.funcs->resume_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); - return 0; + if (r) + DRM_ERROR("failed to resume all gangs"); + + return r; } static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, @@ -1651,6 +1641,19 @@ out: return r; } +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) +{ + uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + bool is_supported = false; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && + mes_rev >= 0x63) + is_supported = true; + + return is_supported; +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 5c8867d2380a..a5b1ea60cac8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -503,4 +503,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) memalloc_noreclaim_restore(mes->saved_flags); mutex_unlock(&mes->mutex_hidden); } + +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 6f5a80519af9..8edcd85a1261 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -441,13 +441,41 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes, struct mes_suspend_gang_input *input) { - return 0; + union MESAPI__SUSPEND mes_suspend_gang_pkt; + + memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); + + mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; + mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs; + mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; + mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), + offsetof(union MESAPI__SUSPEND, api_status)); } static int mes_v11_0_resume_gang(struct amdgpu_mes *mes, struct mes_resume_gang_input *input) { - return 0; + union MESAPI__RESUME mes_resume_gang_pkt; + + memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); + + mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; + mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; + mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), + offsetof(union MESAPI__RESUME, api_status)); } static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) From 9a16042f02cd08bbd0a5a2d8e9c95347717165a3 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 3 Jun 2024 11:57:50 -0400 Subject: [PATCH 413/447] drm/amdkfd: Update queue unmap after VM fault with MES MEC FW expects MES to unmap all queues when a VM fault is observed on a queue and then resumed once the affected process is terminated. Use the MES Suspend and Resume APIs to achieve this. Signed-off-by: Mukul Joshi Acked-by: Alex Deucher Reviewed-by: Harish Kasiviswanathan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 87 ++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d23388ea8181..5825e805da50 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -319,6 +319,46 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm) return retval; } +static int suspend_all_queues_mes(struct device_queue_manager *dqm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r = 0; + + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EIO; + + r = amdgpu_mes_suspend(adev); + up_read(&adev->reset_domain->sem); + + if (r) { + dev_err(adev->dev, "failed to suspend gangs from MES\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + +static int resume_all_queues_mes(struct device_queue_manager *dqm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r = 0; + + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EIO; + + r = amdgpu_mes_resume(adev); + up_read(&adev->reset_domain->sem); + + if (r) { + dev_err(adev->dev, "failed to resume gangs from MES\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + static void increment_queue_count(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -2891,6 +2931,44 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } +static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct device *dev = dqm->dev->adev->dev; + int ret = 0; + + /* Check if process is already evicted */ + dqm_lock(dqm); + if (qpd->evicted) { + /* Increment the evicted count to make sure the + * process stays evicted before its terminated. + */ + qpd->evicted++; + dqm_unlock(dqm); + goto out; + } + dqm_unlock(dqm); + + ret = suspend_all_queues_mes(dqm); + if (ret) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + + ret = dqm->ops.evict_process_queues(dqm, qpd); + if (ret) { + dev_err(dev, "Evicting process queues failed"); + goto out; + } + + ret = resume_all_queues_mes(dqm); + if (ret) + dev_err(dev, "Resuming all queues failed"); + +out: + return ret; +} + int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) { struct kfd_process_device *pdd; @@ -2901,8 +2979,13 @@ int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) return -EINVAL; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); pdd = kfd_get_process_device_data(dqm->dev, p); - if (pdd) - ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + if (pdd) { + if (dqm->dev->kfd->shared_resources.enable_mes) + ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); + else + ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + } + kfd_unref_process(p); return ret; From eb067d65c33eecd4b81771384183ad42eec259bf Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 12 Aug 2024 11:11:28 -0400 Subject: [PATCH 414/447] drm/amdkfd: Update BadOpcode Interrupt handling with MES Based on the recommendation of MEC FW, update BadOpcode interrupt handling by unmapping all queues, removing the queue that got the interrupt from scheduling and remapping rest of the queues back when using MES scheduler. This is done to prevent the case where unmapping of the bad queue can fail thereby causing a GPU reset. Signed-off-by: Mukul Joshi Acked-by: Harish Kasiviswanathan Acked-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 51 +++++++++++++++++++ .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 9 ++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 5825e805da50..577d121cc6d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2931,6 +2931,57 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } +int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) +{ + struct kfd_process_device *pdd; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + struct device_queue_manager *dqm = knode->dqm; + struct device *dev = dqm->dev->adev->dev; + struct qcm_process_device *qpd; + struct queue *q = NULL; + int ret = 0; + + if (!p) + return -EINVAL; + + dqm_lock(dqm); + + pdd = kfd_get_process_device_data(dqm->dev, p); + if (pdd) { + qpd = &pdd->qpd; + + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->doorbell_id == doorbell_id && q->properties.is_active) { + ret = suspend_all_queues_mes(dqm); + if (ret) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + + q->properties.is_evicted = true; + q->properties.is_active = false; + decrement_queue_count(dqm, qpd, q); + + ret = remove_queue_mes(dqm, q, qpd); + if (ret) { + dev_err(dev, "Removing bad queue failed"); + goto out; + } + + ret = resume_all_queues_mes(dqm); + if (ret) + dev_err(dev, "Resuming all queues failed"); + + break; + } + } + } + +out: + dqm_unlock(dqm); + return ret; +} + static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index f524a55eee11..b3f988b275a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -330,11 +330,14 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && - KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) - kfd_set_dbg_ev_from_interrupt(dev, pasid, - KFD_CTXID0_DOORBELL_ID(context_id0), + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) { + u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0); + + kfd_set_dbg_ev_from_interrupt(dev, pasid, doorbell_id, KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), NULL, 0); + kfd_dqm_suspend_bad_queue_mes(dev, pasid, doorbell_id); + } /* SDMA */ else if (source_id == SOC21_INTSRC_SDMA_TRAP) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f7c12d4f0abb..7bba6bed2f48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1324,6 +1324,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_node *dev, enum kfd_queue_type type); void kernel_queue_uninit(struct kernel_queue *kq); int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid); +int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id); /* Process Queue Manager */ struct process_queue_node { From c0a04e3570d72aaf090962156ad085e37c62e442 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 15 Aug 2024 11:37:28 +0800 Subject: [PATCH 415/447] drm/amdgpu: Validate TA binary size Add TA binary size validation to avoid OOB write. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0c856005df6b..38face981c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -166,6 +166,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t if (ret) return -EFAULT; + if (ta_bin_len > PSP_1_MEG) + return -EINVAL; + copy_pos += sizeof(uint32_t); ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); From 18ac82c26da45d033df7eb993139da83dd53ee68 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Thu, 15 Aug 2024 18:45:13 -0400 Subject: [PATCH 416/447] Revert "drm/amd/display: Update to using new dccg callbacks" [Why] Revert updated DCCG wrappers due to regression [How] This reverts commit 680458d41aa46a009909482f58358205b5c4b438. Reviewed-by: Chris Park Signed-off-by: Hansen Dsouza Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 004c4fe3ddfc..7f91e48902e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -2396,11 +2396,11 @@ struct dccg *dccg35_create( (void)&dccg35_disable_symclk_be_new; (void)&dccg35_set_symclk32_le_root_clock_gating; (void)&dccg35_set_smclk32_se_rcg; - (void)&dccg35_funcs; + (void)&dccg35_funcs_new; base = &dccg_dcn->base; base->ctx = ctx; - base->funcs = &dccg35_funcs_new; + base->funcs = &dccg35_funcs; dccg_dcn->regs = regs; dccg_dcn->dccg_shift = dccg_shift; From 9de60462cdba60f575f97ca2655533b35273c715 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 15 Aug 2024 18:45:14 -0400 Subject: [PATCH 417/447] drm/amd/display: Update HPO I/O When Handling Link Retrain Automation Request [WHY] Previous multi-display HPO fix moved where HPO I/O enable/disable is performed. The codepath now taken to enable/disable HPO I/O is not used for compliance test automation, meaning that if a compliance box being driven at a DP1 rate requests retrain at UHBR, HPO I/O will remain off if it was previously off. [HOW] Explicitly update HPO I/O after allocating encoders for test request. Reviewed-by: Charlene Liu Reviewed-by: Wenjing Liu Signed-off-by: Michael Strauss Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 13 ++++++++++++ .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 21 ++++--------------- .../amd/display/dc/hwss/dcn31/dcn31_init.c | 1 + .../amd/display/dc/hwss/dcn314/dcn314_init.c | 1 + .../amd/display/dc/hwss/dcn351/dcn351_init.c | 1 + .../drm/amd/display/dc/hwss/hw_sequencer.h | 1 + drivers/gpu/drm/amd/display/dc/inc/resource.h | 2 ++ .../display/dc/link/accessories/link_dp_cts.c | 8 +++++++ 8 files changed, 31 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index b38340c690c6..b6377efc6253 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -5303,3 +5303,16 @@ int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *o } return det_segments; } + +bool resource_is_hpo_acquired(struct dc_state *context) +{ + int i; + + for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { + if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) { + return true; + } + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 217344ccf644..246fa300ee95 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -2350,19 +2350,6 @@ static void dce110_setup_audio_dto( } } -static bool dce110_is_hpo_enabled(struct dc_state *context) -{ - int i; - - for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { - if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) { - return true; - } - } - - return false; -} - enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context) @@ -2371,8 +2358,8 @@ enum dc_status dce110_apply_ctx_to_hw( struct dc_bios *dcb = dc->ctx->dc_bios; enum dc_status status; int i; - bool was_hpo_enabled = dce110_is_hpo_enabled(dc->current_state); - bool is_hpo_enabled = dce110_is_hpo_enabled(context); + bool was_hpo_acquired = resource_is_hpo_acquired(dc->current_state); + bool is_hpo_acquired = resource_is_hpo_acquired(context); /* reset syncd pipes from disabled pipes */ if (dc->config.use_pipe_ctx_sync_logic) @@ -2415,8 +2402,8 @@ enum dc_status dce110_apply_ctx_to_hw( dce110_setup_audio_dto(dc, context); - if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_enabled != is_hpo_enabled) { - dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_enabled); + if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_acquired != is_hpo_acquired) { + dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_acquired); } for (i = 0; i < dc->res_pool->pipe_count; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index b57dd45611f2..56f3c70d4b55 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -111,6 +111,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn31_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index fe5495a8e7a2..68e6de6b5758 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -114,6 +114,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .calculate_pix_rate_divider = dcn314_calculate_pix_rate_divider, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn314_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 5da3069fc1ab..d00822e8daa5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -123,6 +123,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, .program_outstanding_updates = dcn32_program_outstanding_updates, + .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn351_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 326854489802..ac9205625623 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -461,6 +461,7 @@ struct hw_sequencer_funcs { void (*set_long_vtotal)(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max); void (*program_outstanding_updates)(struct dc *dc, struct dc_state *context); + void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable); }; void color_space_to_black_color( diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 9cd80d3864c7..cd1157d225ab 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -644,4 +644,6 @@ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuratio *Calculate total DET allocated for all pipes for a given OTG_MASTER pipe */ int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master); + +bool resource_is_hpo_acquired(struct dc_state *context); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index df3781081da7..ff8fe1a94965 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -67,6 +67,8 @@ static void dp_retrain_link_dp_test(struct dc_link *link, { struct pipe_ctx *pipes[MAX_PIPES]; struct dc_state *state = link->dc->current_state; + bool was_hpo_acquired = resource_is_hpo_acquired(link->dc->current_state); + bool is_hpo_acquired; uint8_t count; int i; @@ -83,6 +85,12 @@ static void dp_retrain_link_dp_test(struct dc_link *link, pipes[i]); } + if (link->dc->hwss.setup_hpo_hw_control) { + is_hpo_acquired = resource_is_hpo_acquired(state); + if (was_hpo_acquired != is_hpo_acquired) + link->dc->hwss.setup_hpo_hw_control(link->dc->hwseq, is_hpo_acquired); + } + for (i = count-1; i >= 0; i--) link_set_dpms_on(state, pipes[i]); } From 8783a18409b48455b3a63f0cd930c7c88beee93d Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 15 Aug 2024 18:45:15 -0400 Subject: [PATCH 418/447] drm/amd/display: remove an extraneous call for checking dchub clock when removing the amdgpu module and reinserting it, a call trace is triggered: [ 334.230602] RIP: 0010:hubbub2_get_dchub_ref_freq+0xbb/0xe0 [amdgpu] [ 334.230807] Code: 25 28 00 00 00 75 3c 48 8d 65 f0 5b 41 5c 5d 31 c0 31 d2 31 c9 31 f6 31 ff 45 31 c0 45 31 c9 45 31 d2 45 31 db e9 55 a1 ca de <0f> 0b eb c6 0f 0b eb c2 d1 eb 8d 83 c0 63 ff ff 3d 20 4e 00 00 76 [ 334.230809] RSP: 0018:ffffbc8b823fb540 EFLAGS: 00010246 [ 334.230811] RAX: 0000000000001000 RBX: 00000000000186a0 RCX: 0000000000000000 [ 334.230812] RDX: ffffbc8b823fb544 RSI: 0000000000000000 RDI: 0000000000000000 [ 334.230813] RBP: ffffbc8b823fb560 R08: 0000000000000000 R09: 0000000000000000 [ 334.230814] R10: 0000000000000000 R11: 000000000000000f R12: ffff9e644f1f2bb0 [ 334.230815] R13: ffff9e6451361300 R14: 0000000000000000 R15: ffff9e6452c00000 [ 334.230816] FS: 00007af7c8519000(0000) GS:ffff9e737dd00000(0000) knlGS:0000000000000000 [ 334.230817] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 334.230818] CR2: 0000703576b9cbd0 CR3: 00000001095a2000 CR4: 0000000000750ee0 [ 334.230819] PKRU: 55555554 [ 334.230820] Call Trace: [ 334.230822] [ 334.230824] ? show_regs+0x6d/0x80 [ 334.230828] ? __warn+0x89/0x160 [ 334.230832] ? hubbub2_get_dchub_ref_freq+0xbb/0xe0 [amdgpu] [ 334.231024] ? report_bug+0x17e/0x1b0 [ 334.231028] ? handle_bug+0x46/0x90 [ 334.231030] ? exc_invalid_op+0x18/0x80 [ 334.231032] ? asm_exc_invalid_op+0x1b/0x20 [ 334.231036] ? hubbub2_get_dchub_ref_freq+0xbb/0xe0 [amdgpu] [ 334.231217] dc_create_resource_pool+0xfd/0x320 [amdgpu] [ 334.231408] dc_create+0x256/0x700 [amdgpu] [ 334.231588] ? srso_alias_return_thunk+0x5/0x7f [ 334.231590] ? dmi_matches+0xa0/0x230 [ 334.231594] amdgpu_dm_init+0x28c/0x25f0 [amdgpu] [ 334.231791] ? prb_read_valid+0x1c/0x30 [ 334.231795] ? __irq_work_queue_local+0x43/0xf0 [ 334.231798] ? srso_alias_return_thunk+0x5/0x7f [ 334.231800] ? irq_work_queue+0x2f/0x70 [ 334.231802] ? srso_alias_return_thunk+0x5/0x7f [ 334.231803] ? __wake_up_klogd.part.0+0x40/0x70 [ 334.231805] ? srso_alias_return_thunk+0x5/0x7f [ 334.231807] ? vprintk_emit+0xd9/0x210 [ 334.231809] ? set_dev_info+0x130/0x1c0 [ 334.231812] ? srso_alias_return_thunk+0x5/0x7f [ 334.231813] ? dev_printk_emit+0xa1/0xe0 [ 334.231819] dm_hw_init+0x14/0x30 [amdgpu] [ 334.231993] amdgpu_device_init+0x23c7/0x2fc0 [amdgpu] [ 334.232134] ? pci_read_config_word+0x25/0x50 [ 334.232139] amdgpu_driver_load_kms+0x1a/0xd0 [amdgpu] [ 334.232284] amdgpu_pci_probe+0x1f9/0x620 [amdgpu] On DCN401, get_dchub_ref_freq() hook is called before init_hw() hook. Hence, it is expected to trigger an assert. Remove the extraneous call to get_dchub_ref_freq() to suppress the call trace Reviewed-by: Alvin Lee Signed-off-by: Aurabindo Pillai Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index b6377efc6253..ef585a89847b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -342,11 +342,6 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc, res_pool->ref_clocks.xtalin_clock_inKhz; res_pool->ref_clocks.dchub_ref_clock_inKhz = res_pool->ref_clocks.xtalin_clock_inKhz; - if (dc->debug.using_dml2) - if (res_pool->hubbub && res_pool->hubbub->funcs->get_dchub_ref_freq) - res_pool->hubbub->funcs->get_dchub_ref_freq(res_pool->hubbub, - res_pool->ref_clocks.dccg_ref_clock_inKhz, - &res_pool->ref_clocks.dchub_ref_clock_inKhz); } else ASSERT_CRITICAL(false); } From 7c9cb6d1bf122fdac6a7d51f7dd8cc2d8c94b452 Mon Sep 17 00:00:00 2001 From: Nicholas Susanto Date: Thu, 15 Aug 2024 18:45:16 -0400 Subject: [PATCH 419/447] drm/amd/display: Remove redundant check in DCN35 hwseq Removing redundant condition. Reviewed-by: Hansen Dsouza Signed-off-by: Nicholas Susanto Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 899e239352aa..fbbb20b9dbee 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1024,9 +1024,6 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (!hpo_frl_stream_enc_acquired && !hpo_dp_stream_enc_acquired) update_state->pg_res_update[PG_HPO] = true; - if (hpo_frl_stream_enc_acquired) - update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true; - update_state->pg_res_update[PG_DWB] = true; for (i = 0; i < dc->res_pool->pipe_count; i++) { From 4e9e50b6aeda3e3ce727453c5455cf08c68dac8b Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 15 Aug 2024 18:45:17 -0400 Subject: [PATCH 420/447] drm/amd/display: Allow UHBR Interop With eDP Supported Link Rates Table [WHY] eDP 2.0 is introducing support for UHBR link rates, however current eDP ILR link optimization does not account for UHBR capabilities. Either UHBR capabilities will be provided via the same 128b/132b rate DPCD caps that are currently used on DP2.1, or Table 4-13 may be updated to include UHBR rates. [HOW] Add extra Supported Link Rates table translations for UHBR10/13.5/20. Update eDP link setting optimization search to be aware of 128b/132b DPCD rate caps in order to unblock UHBR on panels with Supported Link Rates table. Reviewed-by: Wenjing Liu Signed-off-by: Michael Strauss Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/link_detection.c | 3 +- .../dc/link/protocols/link_dp_capability.c | 59 ++++++++++--------- .../link/protocols/link_edp_panel_control.c | 11 ++-- .../link/protocols/link_edp_panel_control.h | 2 +- 4 files changed, 40 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 391dbe81534d..d21ee9d12d26 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -1189,8 +1189,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, //sink only can use supported link rate table, we are foreced to enable it if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN) link->panel_config.ilr.optimize_edp_link_rate = true; - if (edp_is_ilr_optimization_enabled(link)) - link->reported_link_cap.link_rate = get_max_link_rate_from_ilr_table(link); + link->reported_link_cap.link_rate = get_max_edp_link_rate(link); } } else { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 59c9dde10885..34a618a7278b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -212,6 +212,13 @@ static enum dc_link_rate linkRateInKHzToLinkRateMultiplier(uint32_t link_rate_in case 10000000: link_rate = LINK_RATE_UHBR10; // UHBR10 - 10.0 Gbps/Lane break; + case 13500000: + link_rate = LINK_RATE_UHBR13_5; // UHBR13.5 - 13.5 Gbps/Lane + break; + case 20000000: + link_rate = LINK_RATE_UHBR20; // UHBR20 - 20.0 Gbps/Lane + break; + default: link_rate = LINK_RATE_UNKNOWN; break; @@ -541,6 +548,23 @@ static enum dc_link_rate increase_link_rate(struct dc_link *link, } } +static void increase_edp_link_rate(struct dc_link *link, + struct dc_link_settings *current_link_setting) +{ + if (current_link_setting->use_link_rate_set) { + if (current_link_setting->link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { + current_link_setting->link_rate_set++; + current_link_setting->link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting->link_rate_set]; + } else { + current_link_setting->use_link_rate_set = false; + current_link_setting->link_rate = LINK_RATE_UHBR10; + } + } else { + current_link_setting->link_rate = increase_link_rate(link, current_link_setting->link_rate); + } +} + static bool decide_fallback_link_setting_max_bw_policy( struct dc_link *link, const struct dc_link_settings *max, @@ -759,14 +783,7 @@ bool edp_decide_link_settings(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; - current_link_setting.lane_count = - initial_link_setting.lane_count; - } else - break; + increase_edp_link_rate(link, ¤t_link_setting); } } return false; @@ -818,9 +835,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, if (policy) { /* minimize lane */ if (current_link_setting.link_rate < max_link_rate) { - current_link_setting.link_rate = - increase_link_rate(link, - current_link_setting.link_rate); + increase_edp_link_rate(link, ¤t_link_setting); } else { if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { @@ -839,9 +854,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - current_link_setting.link_rate = - increase_link_rate(link, - current_link_setting.link_rate); + increase_edp_link_rate(link, ¤t_link_setting); current_link_setting.lane_count = initial_link_setting.lane_count; } @@ -874,18 +887,15 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, } if (policy) { /* minimize lane */ - if (current_link_setting.link_rate_set < - link->dpcd_caps.edp_supported_link_rates_count - && current_link_setting.link_rate < max_link_rate) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + if (current_link_setting.link_rate < max_link_rate) { + increase_edp_link_rate(link, ¤t_link_setting); } else { if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { current_link_setting.lane_count = increase_lane_count( current_link_setting.lane_count); current_link_setting.link_rate_set = initial_link_setting.link_rate_set; + current_link_setting.use_link_rate_set = initial_link_setting.use_link_rate_set; current_link_setting.link_rate = link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; } else @@ -899,13 +909,8 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; - current_link_setting.lane_count = - initial_link_setting.lane_count; - } else + increase_edp_link_rate(link, ¤t_link_setting); + if (current_link_setting.link_rate == LINK_RATE_UNKNOWN) break; } } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index bf820d2b4dc4..070b6c8c1aef 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -305,16 +305,17 @@ bool edp_is_ilr_optimization_enabled(struct dc_link *link) return true; } -enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link) +enum dc_link_rate get_max_edp_link_rate(struct dc_link *link) { - enum dc_link_rate link_rate = link->reported_link_cap.link_rate; + enum dc_link_rate max_ilr_rate = LINK_RATE_UNKNOWN; + enum dc_link_rate max_non_ilr_rate = dp_get_max_link_cap(link).link_rate; for (int i = 0; i < link->dpcd_caps.edp_supported_link_rates_count; i++) { - if (link_rate < link->dpcd_caps.edp_supported_link_rates[i]) - link_rate = link->dpcd_caps.edp_supported_link_rates[i]; + if (max_ilr_rate < link->dpcd_caps.edp_supported_link_rates[i]) + max_ilr_rate = link->dpcd_caps.edp_supported_link_rates[i]; } - return link_rate; + return (max_ilr_rate > max_non_ilr_rate ? max_ilr_rate : max_non_ilr_rate); } bool edp_is_ilr_optimization_required(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 8df8ac5bde5b..30dc8c24c008 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -69,7 +69,7 @@ bool edp_wait_for_t12(struct dc_link *link); bool edp_is_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing); bool edp_is_ilr_optimization_enabled(struct dc_link *link); -enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link); +enum dc_link_rate get_max_edp_link_rate(struct dc_link *link); bool edp_backlight_enable_aux(struct dc_link *link, bool enable); void edp_add_delay_for_T9(struct dc_link *link); bool edp_receiver_ready_T9(struct dc_link *link); From 272e6aab14bbf98d7a06b2b1cd6308a02d4a10a1 Mon Sep 17 00:00:00 2001 From: Nevenko Stupar Date: Thu, 15 Aug 2024 18:45:18 -0400 Subject: [PATCH 421/447] drm/amd/display: Hardware cursor changes color when switched to software cursor [Why & How] DCN4 Cursor has separate degamma block and should always do Cursor degamma for Cursor color modes. Reviewed-by: Chris Park Signed-off-by: Nevenko Stupar Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 92b34fe47f74..3b6ca7974e18 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,11 +120,10 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; + // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { - cur_rom_en = 1; - } + cur_rom_en = 1; } REG_UPDATE_3(CURSOR0_CONTROL, From f327189389785b26e49904a7d3ba0c96506a4586 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Thu, 15 Aug 2024 18:45:19 -0400 Subject: [PATCH 422/447] drm/amd/display: Support UHBR10 link rate on eDP [why] Supporting UHBR10 link rate on eDP leverages the existing DP2.0 code but need to add some small adjustments in code. [how] Acknowledge the given DPCD caps for UHBR10 link rate support and allow DP2.0 programming sequence and link training for eDP. Reviewed-by: Wenjing Liu Signed-off-by: Sung Joon Kim Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 23 ++++++++++--------- .../gpu/drm/amd/display/dc/link/link_dpms.c | 4 ++-- .../link/protocols/link_edp_panel_control.c | 3 +++ 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6b036417a73a..3de311533571 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1760,6 +1760,7 @@ struct dc_link { bool dongle_mode_timing_override; bool blank_stream_on_ocs_change; bool read_dpcd204h_on_irq_hpd; + bool disable_assr_for_uhbr; } wa_flags; struct link_mst_stream_allocation_table mst_stream_alloc_table; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 246fa300ee95..d52ce58c6a98 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1232,20 +1232,21 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) * has changed or they enter protection state and hang. */ msleep(60); - } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) { - if (!link->dc->config.edp_no_power_sequencing) { - /* - * Sometimes, DP receiver chip power-controlled externally by an - * Embedded Controller could be treated and used as eDP, - * if it drives mobile display. In this case, - * we shouldn't be doing power-sequencing, hence we can skip - * waiting for T9-ready. - */ - link->dc->link_srv->edp_receiver_ready_T9(link); - } } } + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + !link->dc->config.edp_no_power_sequencing) { + /* + * Sometimes, DP receiver chip power-controlled externally by an + * Embedded Controller could be treated and used as eDP, + * if it drives mobile display. In this case, + * we shouldn't be doing power-sequencing, hence we can skip + * waiting for T9-ready. + */ + link->dc->link_srv->edp_receiver_ready_T9(link); + } + } diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index d6550b904b16..c4e03482ba9a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2358,7 +2358,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); - else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) && dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, false); @@ -2591,7 +2591,7 @@ void link_set_dpms_on( if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) allocate_mst_payload(pipe_ctx); - else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) && dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, true); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 070b6c8c1aef..3aa05a2be6c0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -1168,6 +1168,9 @@ static void edp_set_assr_enable(const struct dc *pDC, struct dc_link *link, link_enc_index = link->link_enc->transmitter - TRANSMITTER_UNIPHY_A; if (link_res->hpo_dp_link_enc) { + if (link->wa_flags.disable_assr_for_uhbr) + return; + link_enc_index = link_res->hpo_dp_link_enc->inst; use_hpo_dp_link_enc = true; } From ec9e2e7acc6dabb8f00c2c60785931310caaa883 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Thu, 15 Aug 2024 18:45:20 -0400 Subject: [PATCH 423/447] drm/amd/display: Fix construct_phy with MXM connector [Why/How] The call to construct_phy will fail in cases where connector type is MXM, and the dc_link won't be properly created/initialized. Reviewed-by: Wenjing Liu Signed-off-by: Ilya Bakoulin Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_factory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 8246006857b3..85fd6e422238 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -524,6 +524,7 @@ static bool construct_phy(struct dc_link *link, link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK; break; case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_MXM: case CONNECTOR_ID_USBC: link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT; From 2344413205521775d3b1d418e5659e3ae3bc263f Mon Sep 17 00:00:00 2001 From: Nicholas Susanto Date: Thu, 15 Aug 2024 18:45:21 -0400 Subject: [PATCH 424/447] drm/amd/display: DCN35 set min dispclk to 50Mhz [Why] Causes hard hangs when resuming after display off on extended/duplicate modes [How] Set the min dispclk to 50Mhz for DCN35 Reviewed-by: Nicholas Kazlauskas Signed-off-by: Nicholas Susanto Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 3 +++ drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index e2d906327e2e..0ce9b40dfc68 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -305,6 +305,9 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) new_clocks->ref_dtbclk_khz = 600000; + if (dc->debug.min_disp_clk_khz > 0 && new_clocks->dispclk_khz < dc->debug.min_disp_clk_khz) + new_clocks->dispclk_khz = dc->debug.min_disp_clk_khz; + /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 5f3705f97bd7..46ad684fe192 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -786,6 +786,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmub_reallow_idle = false, .static_screen_wait_frames = 2, .disable_timeout = true, + .min_disp_clk_khz = 50000, }; static const struct dc_panel_config panel_config_defaults = { From 20b5a8f9f4670a8503aa9fa95ca632e77c6bf55d Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 15 Aug 2024 18:45:22 -0400 Subject: [PATCH 425/447] drm/amd/display: fix double free issue during amdgpu module unload Flexible endpoints use DIGs from available inflexible endpoints, so only the encoders of inflexible links need to be freed. Otherwise, a double free issue may occur when unloading the amdgpu module. [ 279.190523] RIP: 0010:__slab_free+0x152/0x2f0 [ 279.190577] Call Trace: [ 279.190580] [ 279.190582] ? show_regs+0x69/0x80 [ 279.190590] ? die+0x3b/0x90 [ 279.190595] ? do_trap+0xc8/0xe0 [ 279.190601] ? do_error_trap+0x73/0xa0 [ 279.190605] ? __slab_free+0x152/0x2f0 [ 279.190609] ? exc_invalid_op+0x56/0x70 [ 279.190616] ? __slab_free+0x152/0x2f0 [ 279.190642] ? asm_exc_invalid_op+0x1f/0x30 [ 279.190648] ? dcn10_link_encoder_destroy+0x19/0x30 [amdgpu] [ 279.191096] ? __slab_free+0x152/0x2f0 [ 279.191102] ? dcn10_link_encoder_destroy+0x19/0x30 [amdgpu] [ 279.191469] kfree+0x260/0x2b0 [ 279.191474] dcn10_link_encoder_destroy+0x19/0x30 [amdgpu] [ 279.191821] link_destroy+0xd7/0x130 [amdgpu] [ 279.192248] dc_destruct+0x90/0x270 [amdgpu] [ 279.192666] dc_destroy+0x19/0x40 [amdgpu] [ 279.193020] amdgpu_dm_fini+0x16e/0x200 [amdgpu] [ 279.193432] dm_hw_fini+0x26/0x40 [amdgpu] [ 279.193795] amdgpu_device_fini_hw+0x24c/0x400 [amdgpu] [ 279.194108] amdgpu_driver_unload_kms+0x4f/0x70 [amdgpu] [ 279.194436] amdgpu_pci_remove+0x40/0x80 [amdgpu] [ 279.194632] pci_device_remove+0x3a/0xa0 [ 279.194638] device_remove+0x40/0x70 [ 279.194642] device_release_driver_internal+0x1ad/0x210 [ 279.194647] driver_detach+0x4e/0xa0 [ 279.194650] bus_remove_driver+0x6f/0xf0 [ 279.194653] driver_unregister+0x33/0x60 [ 279.194657] pci_unregister_driver+0x44/0x90 [ 279.194662] amdgpu_exit+0x19/0x1f0 [amdgpu] [ 279.194939] __do_sys_delete_module.isra.0+0x198/0x2f0 [ 279.194946] __x64_sys_delete_module+0x16/0x20 [ 279.194950] do_syscall_64+0x58/0x120 [ 279.194954] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 279.194980] Reviewed-by: Rodrigo Siqueira Signed-off-by: Tim Huang Reviewed-by: Roman Li Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_factory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 85fd6e422238..5e1b5ab9fbc6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -385,7 +385,7 @@ static void link_destruct(struct dc_link *link) if (link->panel_cntl) link->panel_cntl->funcs->destroy(&link->panel_cntl); - if (link->link_enc) { + if (link->link_enc && !link->is_dig_mapping_flexible) { /* Update link encoder resource tracking variables. These are used for * the dynamic assignment of link encoders to streams. Virtual links * are not assigned encoder resources on creation. From d07722e1fc749fbd78992650b6d00c9a2619be70 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Thu, 15 Aug 2024 18:45:23 -0400 Subject: [PATCH 426/447] drm/amd/display: DML2.1 Reintegration for Various Fixes [Why and How] DML2.1 reintegration for several fixes and updates to the DML code. Reviewed-by: Dillon Varone Signed-off-by: Austin Zheng Signed-off-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 3 - .../dml21/inc/bounding_boxes/dcn4_soc_bb.h | 2 +- .../dml21/inc/dml_top_soc_parameter_types.h | 1 + .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 1 - .../src/dml2_core/dml2_core_dcn4_calcs.c | 482 +++++++------ .../dml21/src/dml2_core/dml2_core_factory.c | 2 +- .../dml21/src/dml2_core/dml2_core_shared.h | 37 - .../src/dml2_core/dml2_core_shared_types.h | 22 +- .../dml21/src/dml2_core/dml2_core_utils.c | 631 ++++++++++++++++++ .../dml21/src/dml2_core/dml2_core_utils.h | 39 ++ .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 54 +- .../dml21/src/dml2_dpmm/dml2_dpmm_factory.c | 2 +- .../dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 20 +- .../dml21/src/dml2_pmo/dml2_pmo_factory.c | 2 +- 14 files changed, 1011 insertions(+), 287 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index cf979ab172bd..c4378e620cbf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -79,7 +79,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := $(dml2_ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags) @@ -101,7 +100,6 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.o : CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) @@ -122,7 +120,6 @@ DML21 += src/inc/dml2_debug.o DML21 += src/dml2_core/dml2_core_dcn4.o DML21 += src/dml2_core/dml2_core_factory.o DML21 += src/dml2_core/dml2_core_dcn4_calcs.o -DML21 += src/dml2_core/dml2_core_shared.o DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o DML21 += src/dml2_dpmm/dml2_dpmm_factory.o DML21 += src/dml2_mcg/dml2_mcg_dcn4.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index 898b1dd69edd..8ef7977841de 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -355,7 +355,7 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .fams2 = { .max_allow_delay_us = 100 * 1000, .scheduling_delay_us = 125, - .vertical_interrupt_ack_delay_us = 18, + .vertical_interrupt_ack_delay_us = 40, .allow_programming_delay_us = 18, .min_allow_width_us = 20, .subvp_df_throttle_delay_us = 100, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 4a46b21c3e55..ebd8abe894a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -151,6 +151,7 @@ struct dml2_soc_bb { double phy_downspread_percent; double dcn_downspread_percent; double dispclk_dppclk_vco_speed_mhz; + bool no_dfs; bool do_urgent_latency_adjustment; unsigned int mem_word_bytes; unsigned int num_dcc_mcaches; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 9375c6ae1147..698307f3ca39 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -273,7 +273,6 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in programming->fams2_required = display_cfg->stage3.fams2_required; dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); - programming->fams2_global_config.features.bits.enable = display_cfg->stage3.fams2_required; } // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index c3c4d8d9525c..e2c45e498664 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -8,32 +8,55 @@ #include "dml2_debug.h" #include "lib_float_math.h" #include "dml_top_types.h" -#include "dml2_core_shared.h" -//#define DML_TVM_UPDATE_EN #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 -static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + +static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) +{ + *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); + return dividend / divisor; +} + +static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) { dml2_printf("DML: ===================================== \n"); dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); - if (!fail_only || support->ImmediateFlipSupport == 0) - dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); - if (!fail_only || support->WritebackLatencySupport == 0) - dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); if (!fail_only || support->ScaleRatioAndTapsSupport == 0) dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); - if (!fail_only || support->P2IWith420 == 1) - dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); - if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1) - dml2_printf("DML: support: DSCOnlyIfNecessaryWithBPP = %d\n", support->DSCOnlyIfNecessaryWithBPP); - if (!fail_only || support->DSC422NativeNotSupported == 1) - dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); - if (!fail_only || support->DSCSlicesODMModeSupported == 0) - dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) @@ -42,74 +65,87 @@ static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mod dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); if (!fail_only || support->MultistreamWithHDMIOreDP == 1) dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); if (!fail_only || support->NotEnoughLanesForMSO == 1) dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); - if (!fail_only || support->NumberOfOTGSupport == 0) - dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); - if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); - if (!fail_only || support->NumberOfDP2p0Support == 0) - dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); - if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); - if (!fail_only || support->CursorSupport == 0) - dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); - if (!fail_only || support->PitchSupport == 0) - dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); - if (!fail_only || support->ViewportExceedsSurface == 1) - dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); - if (!fail_only || support->ExceededMALLSize == 1) - dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); - if (!fail_only || support->EnoughWritebackUnits == 0) - dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); - if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); - if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); - if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); - if (!fail_only || support->ExceededMultistreamSlots == 1) - dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); if (!fail_only || support->NotEnoughDSCUnits == 1) dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); if (!fail_only || support->NotEnoughDSCSlices == 1) dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); - if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); - if (!fail_only || support->LinkCapacitySupport == 0) - dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); if (!fail_only || support->ROBSupport == 0) dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); if (!fail_only || support->OutstandingRequestsSupport == 0) dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); - if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->AvgBandwidthSupport == 0) - dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); - if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); - if (!fail_only || support->PrefetchSupported == 0) - dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); - if (!fail_only || support->DynamicMetadataSupported == 0) - dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); - if (!fail_only || support->VRatioInPrefetchSupported == 0) - dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); if (!fail_only || support->TotalAvailablePipesSupport == 0) dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + if (!fail_only || support->ModeSupport == 0) dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); - if (!fail_only || support->ViewportSizeSupport == 0) - dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); dml2_printf("DML: ===================================== \n"); } @@ -2849,16 +2885,9 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->hostvm_enable == true) { + if (p->display_cfg->gpuvm_enable == true) { p->vm_group_bytes[k] = 512; p->dpte_group_bytes[k] = 512; - } else if (p->display_cfg->gpuvm_enable == true) { - p->vm_group_bytes[k] = 2048; - if (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes >= 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { - p->dpte_group_bytes[k] = 512; - } else { - p->dpte_group_bytes[k] = 2048; - } } else { p->vm_group_bytes[k] = 0; p->dpte_group_bytes[k] = 0; @@ -4556,15 +4585,6 @@ static void calculate_tdlut_setting( return; } - - if (!p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = 0; - *p->tdlut_opt_time = 0; - *p->tdlut_drain_time = 0; - *p->tdlut_bytes_per_group = 0; - return; - } - if (p->tdlut_mpc_width_flag) { tdlut_mpc_width = 33; tdlut_bytes_per_group_simple = 39*256; @@ -4624,7 +4644,7 @@ static void calculate_tdlut_setting( //the tdlut is fetched during the 2 row times of prefetch. if (p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2(*p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); + *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; } @@ -4637,7 +4657,7 @@ static void calculate_tdlut_setting( dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); - dml2_printf("DML::%s: tdlut_addressing_mode = %u\n", __func__, p->tdlut_addressing_mode); + dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); @@ -4703,11 +4723,12 @@ static void CalculateTarb( static double CalculateTWait( long reserved_vblank_time_ns, double UrgentLatency, - double Ttrip) + double Ttrip, + double g6_temp_read_blackout_us) { double TWait; double t_urg_trip = math_max2(UrgentLatency, Ttrip); - TWait = reserved_vblank_time_ns/1000.0 + t_urg_trip; + TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns); @@ -4855,13 +4876,23 @@ static double get_urgent_bandwidth_required( } if (!exclude_this_plane) { - surface_required_bw[k] = math_max4(NumberOfDPP[k] * prefetch_vmrow_bw[k], - l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur, - l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre, - (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]); + l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; + l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; + l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; + surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw); /* export peak required bandwidth for the surface */ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); + dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); + dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); + dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); + dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); + dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); +#endif } else { surface_required_bw[k] = 0.0; } @@ -4870,6 +4901,8 @@ static double get_urgent_bandwidth_required( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); + dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); @@ -4883,6 +4916,8 @@ static double get_urgent_bandwidth_required( dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); + dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); @@ -5037,7 +5072,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->bytes_pp = 0.0; s->dep_bytes = 0.0; s->min_Lsw_oto = 0.0; + s->min_Lsw_equ = 0.0; s->Tsw_est1 = 0.0; + s->Tsw_est2 = 0.0; s->Tsw_est3 = 0.0; s->cursor_prefetch_bytes = 0; *p->prefetch_cursor_bw = 0; @@ -5059,7 +5096,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup); - dml2_printf("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup); dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); @@ -5092,21 +5128,15 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; s->trip_to_mem = p->Ttrip; -#ifdef DML_TVM_UPDATE_EN *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg); if (dcc_mrq_enable) *p->Tvm_trips_flip = *p->Tvm_trips; else *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; -#else - *p->Tvm_trips = p->ExtraLatencyPrefetch + s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)); - *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; -#endif *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataVMEnabled == true) { *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; @@ -5114,15 +5144,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->Tdmdl_vm = 0; *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex } -#else - if (p->DynamicMetadataVMEnabled == true) { - *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; - *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; - } else { - *p->Tdmdl_vm = 0; - *p->Tdmdl = p->TWait + p->ExtraLatencyPrefetch; // Tex - } -#endif if (p->DynamicMetadataEnable == true) { if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { @@ -5186,7 +5207,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); #endif - s->NoTimeToPrefetch = false; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); @@ -5199,14 +5219,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; } else { -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut) s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0); else - s->Tvm_trips_rounded = s->LineTime / 4.0; -#else - s->Tvm_trips_rounded = s->LineTime / 4.0; -#endif + s->Tvm_trips_rounded = s->LineTime / 4.0; *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; } @@ -5235,16 +5251,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->Tno_bw = 0; } -#ifdef DML_TVM_UPDATE_EN if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3) *p->Tno_bw_flip = *p->Tno_bw; else *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip -#else - *p->Tno_bw_flip = 0; - if (p->display_cfg->gpuvm_enable == true) - *p->Tno_bw_flip = *p->Tno_bw; -#endif if (dml_is_420(p->myPipe->SourcePixelFormat)) { s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; @@ -5266,6 +5276,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); @@ -5289,11 +5303,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); #endif } else { -#ifdef DML_TVM_UPDATE_EN s->Tvm_oto = s->Tvm_trips_rounded; -#else - s->Tvm_oto = s->LineTime / 4.0; -#endif } if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { @@ -5317,19 +5327,16 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); //Tpre_equ in line time -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; else s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; -#else - s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(s->TWait_p + p->TCalc, *p->Tdmdl - p->Ttrip)) / s->LineTime - Lo; -#endif s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); @@ -5367,6 +5374,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; +#ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime); dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup); @@ -5387,18 +5395,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); - - s->dep_bytes = math_max2(vm_bytes * p->HostVMInefficiencyFactor, p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); - - dml2_printf("DML::%s: dep_bytes: %f\n", __func__, s->dep_bytes); - dml2_printf("DML::%s: prefetch_sw_bytes: %f\n", __func__, s->prefetch_sw_bytes); dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); - - if (s->prefetch_sw_bytes < s->dep_bytes) { - s->prefetch_sw_bytes = 2 * s->dep_bytes; - dml2_printf("DML::%s: bump prefetch_sw_bytes to %f\n", __func__, s->prefetch_sw_bytes); - } + dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre)); + dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); +#endif *p->dst_y_per_vm_vblank = 0; *p->dst_y_per_row_vblank = 0; @@ -5411,7 +5413,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time // So that means prefetch bw calculated can be higher since the total time availabe for prefetch is less - if (s->dst_y_prefetch_equ > 1) { + bool min_Lsw_equ_ok = s->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + + if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok) { s->prefetch_bw1 = 0.; s->prefetch_bw2 = 0.; s->prefetch_bw3 = 0.; @@ -5428,28 +5432,35 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw1 = 0; dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); - if ((p->VStartup == p->MaxVStartup) && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { + if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / - (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); + (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, s->Tpre_rounded); - dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_oto * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); - dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); + dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); #endif } // prefetch_bw2: VM + SW - if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0) + if (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / - (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded); - else + (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); + s->Tsw_est2 = s->prefetch_sw_bytes / s->prefetch_bw2; + } else s->prefetch_bw2 = 0; + dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); + if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { + s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); + dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); + } + // prefetch_bw3: 2*R0 + SW if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) / @@ -5459,8 +5470,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw3 = 0; dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); - if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && ((s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); } @@ -5476,6 +5487,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); @@ -5496,9 +5508,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // here is to make sure equ bw wont be more agressive than the latency-based requirement. // check vm time >= vm_trips // check r0 time >= r0_trips + + double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); + + dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); + dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); + if (s->prefetch_bw1 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1 >= s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw1 >= s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; + double row_transfer_time = total_row_bytes / s->prefetch_bw1; + dml2_printf("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case1OK = true; } } @@ -5508,8 +5529,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // check vm time >= vm_trips // check r0 time < r0_trips if (s->prefetch_bw2 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2 >= s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw2 < s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; + double row_transfer_time = total_row_bytes / s->prefetch_bw2; + dml2_printf("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { Case2OK = true; } } @@ -5518,8 +5542,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // check vm time < vm_trips // check r0 time >= r0_trips if (s->prefetch_bw3 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3 < s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw3 >= s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; + double row_transfer_time = total_row_bytes / s->prefetch_bw3; + dml2_printf("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case3OK = true; } } @@ -5585,13 +5612,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->TimeForFetchingVM = s->Tvm_equ; s->TimeForFetchingRowInVBlank = s->Tr0_equ; - if (p->VStartup == p->MaxVStartup) { - *p->dst_y_per_vm_vblank = math_floor2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_floor2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } else { - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); #endif @@ -5635,7 +5658,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); *p->VRatioPrefetchY = 0; } #ifdef __DML_VBA_DEBUG__ @@ -5658,7 +5681,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); *p->VRatioPrefetchC = 0; } #ifdef __DML_VBA_DEBUG__ @@ -5680,14 +5703,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch #endif } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); - dml2_printf("DML::%s: MyErr set, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); *p->VRatioPrefetchY = 0; *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; } - dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); @@ -5698,7 +5720,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); } else { - dml2_printf("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + __func__, min_Lsw_equ_ok, s->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); s->NoTimeToPrefetch = true; s->TimeForFetchingVM = 0; s->TimeForFetchingRowInVBlank = 0; @@ -5730,7 +5754,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { prefetch_vm_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); } if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { @@ -5746,7 +5770,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { prefetch_row_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); } *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); @@ -5763,11 +5787,16 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; + *p->prefetch_vmrow_bw = 0; } dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + return s->NoTimeToPrefetch; } @@ -6174,7 +6203,7 @@ static void CalculateFlipSchedule( { struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; - l->dual_plane = dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; + l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; l->dpte_row_bytes = DPTEBytesPerRow; #ifdef __DML_VBA_DEBUG__ @@ -6250,7 +6279,7 @@ static void CalculateFlipSchedule( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); - dml2_printf("DML::%s: total row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_row_bytes); + dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); @@ -6261,6 +6290,7 @@ static void CalculateFlipSchedule( dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); } #endif l->lb_flip_bw = math_max3(l->lb_flip_bw, @@ -6277,7 +6307,7 @@ static void CalculateFlipSchedule( *dst_y_per_vm_flip = 1; // not used *dst_y_per_row_flip = 1; // not used - *ImmediateFlipSupportedForPipe = true; + *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded); } else { if (iflip_enable) { l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) @@ -6343,6 +6373,7 @@ static void CalculateFlipSchedule( dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); } dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); @@ -6373,6 +6404,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) { + p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + } p->Watermark->g6_temp_read_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ @@ -6579,13 +6616,13 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; - if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) + if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); else p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, p->meta_row_height_c[k]); + dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); @@ -6915,6 +6952,21 @@ static double get_g6_temp_read_blackout_us( return (double)blackout_us; } +static double get_max_urgent_latency_us( + struct dml2_dcn4x_soc_qos_params *dcn4x, + double uclk_freq_mhz, + double FabricClock, + unsigned int min_clk_index) +{ + double latency; + latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz + * (1 + dcn4x->umc_max_latency_margin / 100.0) + + dcn4x->mall_overhead_fclk_cycles / FabricClock + + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock + * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0); + return latency; +} + static void calculate_pstate_keepout_dst_lines( const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_watermarks *watermarks, @@ -6997,7 +7049,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); - dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); @@ -7223,12 +7274,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } #endif */ - mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 / (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); if (mode_lib->ms.BytePerPixelC[k] == 0.0) { mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; } else { - mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 / (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); } @@ -7310,7 +7361,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.ViewportExceedsSurface = false; if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || + display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { mode_lib->ms.support.ViewportExceedsSurface = true; #if defined(__DML_VBA_DEBUG__) dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); @@ -7319,11 +7371,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); #endif - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || - display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { - mode_lib->ms.support.ViewportExceedsSurface = true; - } + } + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || + display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; } } } @@ -7599,7 +7651,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ip.writeback_line_buffer_buffer_size)); } @@ -7684,8 +7736,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) mode_lib->ms.support.P2IWith420 = true; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary && s->OutputBpp[k] != 0) - mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true; if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) mode_lib->ms.support.DSC422NativeNotSupported = true; @@ -8483,7 +8533,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out { mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; - calculate_hostvm_inefficiency_factor( &s->HostVMInefficiencyFactor, &s->HostVMInefficiencyFactorPrefetch, @@ -8568,7 +8617,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->ms.UrgLatency, - mode_lib->ms.TripToMemory); + mode_lib->ms.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; @@ -8615,7 +8666,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; - CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[k]; CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; @@ -8697,8 +8747,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); - dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); } } @@ -8711,20 +8761,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.VRatioInPrefetchSupported = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { mode_lib->ms.support.VRatioInPrefetchSupported = false; + dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); } } - s->AnyLinesForVMOrRowTooLarge = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.LinesForDPTERow[k] >= 16 || mode_lib->ms.LinesForVM[k] >= 32) { - s->AnyLinesForVMOrRowTooLarge = true; - } - } - // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok if (mode_lib->ms.support.PrefetchSupported) { for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { @@ -8961,6 +9006,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out s->mSOCParameters.USRRetrainingLatency = 0; s->mSOCParameters.SMNLatency = 0; s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); + s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; CalculateWatermarks_params->USRRetrainingRequired = false; @@ -8980,7 +9028,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; - //CalculateWatermarks_params->LBBitPerPixel = 57; // FIXME_STAGE2, need a new ip param? CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; @@ -9011,22 +9058,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); } - + dml2_printf("DML::%s: Done prefetch calculation\n", __func__); // End of Prefetch Check - dml2_printf("DML::%s: Done prefetch calculation\n", __func__); + mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; //Re-ordering Buffer Support Check - mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 - / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { + / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -9055,15 +9095,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dram_change_vactive_det_fill_delay_us); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.max_urgent_latency_us); + dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); #endif /*Mode Support, Voltage State and SOC Configuration*/ { - // s->dram_clock_change_support = 1; - // s->f_clock_change_support = 1; - if (mode_lib->ms.support.ScaleRatioAndTapsSupport && mode_lib->ms.support.SourceFormatPixelAndScanSupport && mode_lib->ms.support.ViewportSizeSupport @@ -9074,9 +9111,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out && !mode_lib->ms.support.ExceededMultistreamSlots && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink && !mode_lib->ms.support.NotEnoughLanesForMSO - //&& mode_lib->ms.support.LinkCapacitySupport == true // FIXME_STAGE2 && !mode_lib->ms.support.P2IWith420 - && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP && !mode_lib->ms.support.DSC422NativeNotSupported && mode_lib->ms.support.DSCSlicesODMModeSupported && !mode_lib->ms.support.NotEnoughDSCUnits @@ -9144,7 +9179,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #if defined(__DML_VBA_DEBUG__) if (!mode_lib->ms.support.ModeSupport) - dml2_print_dml_mode_support_info(&mode_lib->ms.support, true); + dml2_print_mode_support_info(&mode_lib->ms.support, true); dml2_printf("DML::%s: --- DONE --- \n", __func__); #endif @@ -9163,6 +9198,10 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); + + for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); return result; @@ -10697,7 +10736,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->mp.UrgentLatency, - mode_lib->mp.TripToMemory); + mode_lib->mp.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->mp.Dppclk[k]; myPipe->Dispclk = mode_lib->mp.Dispclk; @@ -10743,7 +10784,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; - CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k]; CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; @@ -10829,9 +10869,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex if (mode_lib->mp.dst_y_prefetch[k] < 2) s->DestinationLineTimesForPrefetchLessThan2 = true; - if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) + if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { s->VRatioPrefetchMoreThanMax = true; + dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + } if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); @@ -11165,6 +11209,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->mmSOCParameters.USRRetrainingLatency = 0; s->mmSOCParameters.SMNLatency = 0; s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); + s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; CalculateWatermarks_params->USRRetrainingRequired = false; @@ -11184,7 +11231,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; - //CalculateWatermarks_params->LBBitPerPixel = 57; //FIXME_STAGE2 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; @@ -11515,9 +11561,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) { + dml2_printf("DML::%s: ------------- START ----------\n", __func__); bool result = dml_core_mode_programming(in_out_params); - dml2_printf("DML::%s: ------------- START ----------\n", __func__); dml2_printf("DML::%s: result = %0d\n", __func__, result); dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); return result; @@ -12427,7 +12473,7 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); - dml2_core_shared_div_rem(phantom_processing_delay_pix, + dml2_core_div_rem(phantom_processing_delay_pix, display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, &rem); if (rem) @@ -12470,7 +12516,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; - out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP; + out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false; out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c index 640087e862f8..28394de02885 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -10,7 +10,7 @@ bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_core_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h deleted file mode 100644 index f3356b072b59..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h +++ /dev/null @@ -1,37 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_CORE_SHARED_H__ -#define __DML2_CORE_SHARED_H__ - -#define __DML_VBA_DEBUG__ -#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 // 0); + return dividend / divisor; + +} + +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + dml2_printf("DML: ===================================== \n"); + dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->NotEnoughDSCUnits == 1) + dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ROBSupport == 0) + dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->OutstandingRequestsSupport == 0) + dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + + if (!fail_only || support->ModeSupport == 0) + dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); + dml2_printf("DML: ===================================== \n"); +} + +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + + +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); +#endif + } +} + +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) +{ + switch (sw_mode) { + case (dml2_sw_linear): + return 256; break; + case (dml2_sw_256b_2d): + return 256; break; + case (dml2_sw_4kb_2d): + return 4096; break; + case (dml2_sw_64kb_2d): + return 65536; break; + case (dml2_sw_256kb_2d): + return 262144; break; + case (dml2_gfx11_sw_linear): + return 256; break; + case (dml2_gfx11_sw_64kb_d): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_t): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_x): + return 65536; break; + case (dml2_gfx11_sw_64kb_r_x): + return 65536; break; + case (dml2_gfx11_sw_256kb_d_x): + return 262144; break; + case (dml2_gfx11_sw_256kb_r_x): + return 262144; break; + default: + DML2_ASSERT(0); + return 256; + }; +} + + +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + + +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) { + version = 12; + } else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) { + version = 11; + } else { + dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML2_ASSERT(0); + } + + return version; +} + +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, index); +#endif + return index; +} + +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = 0; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = 1; + break; + } + } + + dml2_assert(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (math_log2_approx(a) - subtrahend); +} + +static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, + const struct dml2_implicit_svp_meta *meta) +{ + memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); + + phantom->timing.v_total = meta->v_total; + phantom->timing.v_active = meta->v_active; + phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; + phantom->timing.drr_config.enabled = false; +} + +static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, + const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) +{ + memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); + + phantom->stream_index = phantom_stream_index; + phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; + phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); + phantom->immediate_flip = false; + phantom->dynamic_meta_data.enable = false; + phantom->cursor.num_cursors = 0; + phantom->cursor.cursor_width = 0; + phantom->tdlut.setup_for_tdlut = false; +} + +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index; + const struct dml2_plane_parameters *main_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); + + if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) + return; + + /* disable unbounded requesting for all planes until stage 3 has been performed */ + if (!display_cfg->stage3.performed) { + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; + } + // Create the phantom streams + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; + scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; + + if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { + // Create the phantom stream + create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], + main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); + + // Associate this phantom stream to the main stream + scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; + + // Increment num streams + svp_expanded_display_cfg->num_streams++; + } + } + + // Create the phantom planes + for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { + main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; + + if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { + main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; + create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], + main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); + + // Associate this phantom plane to the main plane + scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; + scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; + + // Increment num planes + svp_expanded_display_cfg->num_planes++; + + // Adjust the main plane settings + svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; + } + } +} + +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + return true; + case dml2_none: + default: + return false; + } +} +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmifrl: + return true; + case dml2_hdmi: + case dml2_none: + default: + return false; + } +} + + +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_edp: + return true; + case dml2_dp2p0: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp2p0: + return true; + case dml2_dp: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + return dml2_core_utils_is_dio_dp_encoder(stream_descriptor) + || dml2_core_utils_is_hpo_dp_encoder(stream_descriptor); +} + + +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + return true; + case dml2_dp_rate_na: + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + return true; + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + case dml2_dp_rate_na: + default: + return false; + } +} + +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode) +{ + switch (odm_mode) { + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + return true; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_combine_2to1: + case dml2_odm_mode_combine_3to1: + case dml2_odm_mode_combine_4to1: + default: + return false; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h new file mode 100644 index 000000000000..a5cc6a07167a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_UTILS_H__ +#define __DML2_CORE_UTILS_H__ +#include "dml2_internal_shared_types.h" +#include "dml2_debug.h" +#include "lib_float_math.h" + +double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format); +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up); +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode); +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format); +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend); +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch); +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode); + +#endif /* __DML2_CORE_UTILS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index f19f6ebaae13..8869ea089312 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -203,6 +203,26 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma return true; } +static bool round_to_non_dfs_granularity(unsigned long dispclk_khz, unsigned long dpprefclk_khz, unsigned long dtbrefclk_khz, + unsigned long *rounded_dispclk_khz, unsigned long *rounded_dpprefclk_khz, unsigned long *rounded_dtbrefclk_khz) +{ + unsigned long pll_frequency_khz; + + pll_frequency_khz = (unsigned long) math_max2(600000, math_ceil2(math_max3(dispclk_khz, dpprefclk_khz, dtbrefclk_khz), 1000)); + + *rounded_dispclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dispclk_khz, 32); + + *rounded_dpprefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dpprefclk_khz, 32); + + if (dtbrefclk_khz > 0) { + *rounded_dtbrefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dtbrefclk_khz, 32); + } else { + *rounded_dtbrefclk_khz = 0; + } + + return true; +} + static bool round_up_and_copy_to_next_dpm(unsigned long min_value, unsigned long *rounded_value, const struct dml2_clk_table *clock_table) { bool result = false; @@ -555,31 +575,39 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o // but still the required dispclk can be more than the maximum dispclk speed: dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); - // DPP Ref is always set to max of all DPP clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; } - - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); - - for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 - * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); - } + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = (unsigned long) (in_out->programming->min_clocks.dcn4x.dpprefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); // DTB Ref is always set to max of all DTB clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; } + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); + if (in_out->soc_bb->no_dfs) { + round_to_non_dfs_granularity((unsigned long)dispclk_khz, in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, + &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz); + } else { + add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); + } + + + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 + * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); + } in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c index dfd01440737d..3861bc6c9621 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -20,7 +20,7 @@ bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_dpmm_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 68b333b68933..30767f330fd4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -718,7 +718,7 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; struct dml2_optimization_stage4_state *state = - &in_out->base_display_config->stage4; + &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -1444,7 +1444,7 @@ static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, /* DRR variable strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && - (pmo->options->disable_drr_clamped || + (pmo->options->disable_drr_clamped || (!stream_descriptor->timing.drr_config.enabled || (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) || (pmo->options->disable_drr_clamped_when_var_active && @@ -1910,7 +1910,8 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met if (is_bit_set_in_bitfield(plane_mask, plane_index)) { plane = &display_config->display_config.plane_descriptors[plane_index]; - plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); + plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0, + plane->overrides.reserved_vblank_time_ns); display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; @@ -2196,15 +2197,15 @@ bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in unsigned int i; - for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) { + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && pmo->scratch.pmo_dcn4.z8_vblank_optimizable && - in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) { + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * 1000) { success = false; break; } if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && - in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) { + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * 1000) { success = false; break; } @@ -2223,8 +2224,11 @@ bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in if (!in_out->last_candidate_failed) { if (pmo->scratch.pmo_dcn4.cur_stutter_candidate < pmo->scratch.pmo_dcn4.num_stutter_candidates) { - for (i = 0; i < in_out->optimized_display_config->display_config.num_streams; i++) { - in_out->optimized_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us = pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate]; + for (i = 0; i < in_out->optimized_display_config->display_config.num_planes; i++) { + /* take the max of the current and the optimal reserved time */ + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns = + (long)math_max2(pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate] * 1000, + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns); } success = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index 95f716e2641f..add51d41a515 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -26,7 +26,7 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_pmo_instance)); From e389eefe34cebc6219dbe76a923b342b2f31e3ba Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Mon, 12 Aug 2024 00:55:56 -0400 Subject: [PATCH 427/447] drm/amd/display: Promote DC to 3.2.297 - Various DML 2.1 fixes - Fix module unload - Fix construct_phy with MXM connector - Support UHBR10 link rate on eDP - Revert updated DCCG wrappers Reviewed-by: Roman Li Signed-off-by: Martin Leung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3de311533571..5bbc7d2daca6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.296" +#define DC_VER "3.2.297" #define MAX_SURFACES 3 #define MAX_PLANES 6 From c69b07f7bbc905022491c45097923d3487479529 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 19 Aug 2024 11:14:29 -0400 Subject: [PATCH 428/447] drm/amdgpu: fix eGPU hotplug regression The driver needs to wait for the on board firmware to finish its initialization before probing the card. Commit 959056982a9b ("drm/amdgpu: Fix discovery initialization failure during pci rescan") switched from using msleep() to using usleep_range() which seems to have caused init failures on some navi1x boards. Switch back to msleep(). Fixes: 959056982a9b ("drm/amdgpu: Fix discovery initialization failure during pci rescan") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3559 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3500 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: Ma Jun --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ac108fca64fe..7b561e8e3caf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -278,7 +278,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; - usleep_range(1000, 1100); + msleep(1); } } From bf2bc61638033d118c9ef4ab1204295ba6694401 Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Mon, 19 Aug 2024 11:16:13 +0800 Subject: [PATCH 429/447] drm/amd/amdgpu: allow use kiq to do hdp flush under sriov when use cpu to do page table update under sriov runtime, since mmio access is blocked, kiq has to be used to flush hdp. change WREG32_NO_KIQ to WREG32 to allow kiq. Signed-off-by: Victor Zhao Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 077c6d920e27..e019249883fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -41,7 +41,7 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index a9ea23fa0def..ed7facacf2fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -32,7 +32,7 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index ab06c2b4b20b..33736d361dd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -35,7 +35,7 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 8d7d0813e331..1c99bb09e2a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -32,7 +32,7 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 186fb12e7a7b038c2710ceb2fb74068f1b5d55a4 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 7 Aug 2024 17:15:12 +0800 Subject: [PATCH 430/447] drm/amd/pm: ensure the fw_info is not null before using it This resolves the dereference null return value warning reported by Coverity. Signed-off-by: Tim Huang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c index ca1c7ae8d146..f06b29e33ba4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c @@ -1183,6 +1183,8 @@ static int init_overdrive_limits(struct pp_hwmgr *hwmgr, fw_info = smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); + PP_ASSERT_WITH_CODE(fw_info != NULL, + "Missing firmware info!", return -EINVAL); if ((fw_info->ucTableFormatRevision == 1) && (le16_to_cpu(fw_info->usStructureSize) >= sizeof(ATOM_FIRMWARE_INFO_V1_4))) From 88c511dea151b931ba4873119b1b3555aac0ce53 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Aug 2024 10:35:49 -0400 Subject: [PATCH 431/447] drm/amd/gfx11: move the gfx mutex into the caller Otherwise we can fail to drop the software mutex when we fail to take the hardware mutex. Fixes: 76acba7b7f12 ("drm/amdgpu/gfx11: add a mutex for the gfx semaphore") Reported-by: Dan Carpenter Reviewed-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 5685aee479df..ee8604722467 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4747,8 +4747,6 @@ int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, { u32 i, tmp, val; - if (req) - mutex_lock(&adev->gfx.reset_sem_mutex); for (i = 0; i < adev->usec_timeout; i++) { /* Request with MeId=2, PipeId=0 */ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); @@ -4769,8 +4767,6 @@ int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, } udelay(1); } - if (!req) - mutex_unlock(&adev->gfx.reset_sem_mutex); if (i >= adev->usec_timeout) return -EINVAL; @@ -4818,8 +4814,10 @@ static int gfx_v11_0_soft_reset(void *handle) mutex_unlock(&adev->srbm_mutex); /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ + mutex_lock(&adev->gfx.reset_sem_mutex); r = gfx_v11_0_request_gfx_index_mutex(adev, true); if (r) { + mutex_unlock(&adev->gfx.reset_sem_mutex); DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); return r; } @@ -4834,6 +4832,7 @@ static int gfx_v11_0_soft_reset(void *handle) /* release the gfx mutex */ r = gfx_v11_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); if (r) { DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); return r; From db6341a9168d2a24ded526277eeab29724d76e9d Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Tue, 20 Aug 2024 13:56:32 +0800 Subject: [PATCH 432/447] drm/amdkfd: Check int source id for utcl2 poison event Traditional utcl2 fault_status polling does not work in SRIOV environment. The polling of fault status register from guest side will be dropped by hardware. Driver should switch to check utcl2 interrupt source id to identify utcl2 poison event. It is set to 1 when poisoned data interrupts are signaled. v2: drop the unused local variable (Tao) Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 18 +----------------- drivers/gpu/drm/amd/amdkfd/soc15_int.h | 1 + 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index a9c3580be8c9..fecdbbab9894 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -431,25 +431,9 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); - int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; - /* gfxhub */ - if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { - hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, - node_id); - if (hub_inst < 0) - hub_inst = 0; - } - - /* mmhub */ - if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) - hub_inst = node_id / 4; - - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, - hub_inst, vmid_type)) { + if (source_id == SOC15_INTSRC_VMC_UTCL2_POISON) { event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h index 10138676f27f..e5c0205f2618 100644 --- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -29,6 +29,7 @@ #define SOC15_INTSRC_CP_BAD_OPCODE 183 #define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 #define SOC15_INTSRC_VMC_FAULT 0 +#define SOC15_INTSRC_VMC_UTCL2_POISON 1 #define SOC15_INTSRC_SDMA_TRAP 224 #define SOC15_INTSRC_SDMA_ECC 220 #define SOC21_INTSRC_SDMA_TRAP 49 From e28604d8337eac97fa956d6682b6312741ce85a1 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 19 Aug 2024 22:23:11 +0800 Subject: [PATCH 433/447] drm/amdkfd: Drop poison hanlding from gfx v10 Not supported. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 71 ------------------- 1 file changed, 71 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 8e0d0356e810..bb8cbfc39b90 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -129,63 +129,6 @@ enum SQ_INTERRUPT_ERROR_TYPE { KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \ >> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT) -static void event_interrupt_poison_consumption(struct kfd_node *dev, - uint16_t pasid, uint16_t client_id) -{ - enum amdgpu_ras_block block = 0; - int old_poison, ret = -EINVAL; - uint32_t reset = 0; - struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); - - if (!p) - return; - - /* all queues of a process will be unmapped in one time */ - old_poison = atomic_cmpxchg(&p->poison, 0, 1); - kfd_unref_process(p); - if (old_poison) - return; - - switch (client_id) { - case SOC15_IH_CLIENTID_SE0SH: - case SOC15_IH_CLIENTID_SE1SH: - case SOC15_IH_CLIENTID_SE2SH: - case SOC15_IH_CLIENTID_SE3SH: - case SOC15_IH_CLIENTID_UTCL2: - ret = kfd_dqm_evict_pasid(dev->dqm, pasid); - block = AMDGPU_RAS_BLOCK__GFX; - if (ret) - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; - break; - case SOC15_IH_CLIENTID_SDMA0: - case SOC15_IH_CLIENTID_SDMA1: - case SOC15_IH_CLIENTID_SDMA2: - case SOC15_IH_CLIENTID_SDMA3: - case SOC15_IH_CLIENTID_SDMA4: - block = AMDGPU_RAS_BLOCK__SDMA; - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; - break; - default: - break; - } - - kfd_signal_poison_consumed_event(dev, pasid); - - /* resetting queue passes, do page retirement without gpu reset - * resetting queue fails, fallback to gpu reset solution - */ - if (!ret) - dev_warn(dev->adev->dev, - "RAS poison consumption, unmap queue flow succeeded: client id %d\n", - client_id); - else - dev_warn(dev->adev->dev, - "RAS poison consumption, fall back to gpu reset flow: client id %d\n", - client_id); - - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); -} - static bool event_interrupt_isr_v10(struct kfd_node *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, @@ -332,11 +275,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID), sq_intr_err_type); - if (sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && - sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { - event_interrupt_poison_consumption(dev, pasid, source_id); - return; - } break; default: break; @@ -362,9 +300,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_SDMA7) { if (source_id == SOC15_INTSRC_SDMA_TRAP) { kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); - } else if (source_id == SOC15_INTSRC_SDMA_ECC) { - event_interrupt_poison_consumption(dev, pasid, source_id); - return; } } else if (client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || @@ -388,12 +323,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) hub_inst = node_id / 4; - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, - hub_inst, vmid_type)) { - event_interrupt_poison_consumption(dev, pasid, client_id); - return; - } - info.vmid = vmid; info.mc_id = client_id; info.page_addr = ih_ring_entry[4] | From 01bfabc2d1d8aaffe5268f8df0843a6d916dcbaa Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 20 Aug 2024 08:57:15 +0800 Subject: [PATCH 434/447] drm/amd/pm: update message interface for smu v14.0.2/3 update message interface for smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h | 18 ++++++++++++++---- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 1 - 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h index de2e442281ff..87ca5ceb1ece 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h @@ -92,7 +92,6 @@ //Resets #define PPSMC_MSG_PrepareMp1ForUnload 0x2E -#define PPSMC_MSG_Mode1Reset 0x2F //Set SystemVirtual DramAddrHigh #define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x30 @@ -119,11 +118,12 @@ //STB to dram log #define PPSMC_MSG_DumpSTBtoDram 0x3D -#define PPSMC_MSG_STBtoDramLogSetDramAddrHigh 0x3E -#define PPSMC_MSG_STBtoDramLogSetDramAddrLow 0x3F +#define PPSMC_MSG_STBtoDramLogSetDramAddress 0x3E +#define PPSMC_MSG_DummyUndefined 0x3F #define PPSMC_MSG_STBtoDramLogSetDramSize 0x40 #define PPSMC_MSG_SetOBMTraceBufferLogging 0x41 +#define PPSMC_MSG_UseProfilingMode 0x42 #define PPSMC_MSG_AllowGfxDcs 0x43 #define PPSMC_MSG_DisallowGfxDcs 0x44 #define PPSMC_MSG_EnableAudioStutterWA 0x45 @@ -135,6 +135,16 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4B #define PPSMC_MSG_SetPriorityDeltaGain 0x4C #define PPSMC_MSG_AllowIHHostInterrupt 0x4D +#define PPSMC_MSG_EnableShadowDpm 0x4E #define PPSMC_MSG_Mode3Reset 0x4F -#define PPSMC_Message_Count 0x50 +#define PPSMC_MSG_SetDriverDramAddr 0x50 +#define PPSMC_MSG_SetToolsDramAddr 0x51 +#define PPSMC_MSG_TransferTableSmu2DramWithAddr 0x52 +#define PPSMC_MSG_TransferTableDram2SmuWithAddr 0x53 +#define PPSMC_MSG_GetAllRunningSmuFeatures 0x54 +#define PPSMC_MSG_GetSvi3Voltage 0x55 +#define PPSMC_MSG_UpdatePolicy 0x56 +#define PPSMC_MSG_ExtPwrConnSupport 0x57 +#define PPSMC_MSG_PreloadSwPstateForUclkOverDrive 0x58 +#define PPSMC_Message_Count 0x59 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 5913f9c60fe0..e000ac7b4c0e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -127,7 +127,6 @@ static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), - MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), From 75f0efbc4b3b088cca20864d055b3854a51b5af0 Mon Sep 17 00:00:00 2001 From: Rahul Jain Date: Tue, 13 Aug 2024 13:41:11 +0530 Subject: [PATCH 435/447] drm/amdgpu: Take IOMMU remapping into account for p2p checks when trying to enable p2p the amdgpu_device_is_peer_accessible() checks the condition where address_mask overlaps the aper_base and hence returns 0, due to which the p2p disables for this platform IOMMU should remap the BAR addresses so the device can access them. Hence check if peer_adev is remapping DMA v5: (Felix, Alex) - fixing comment as per Alex feedback - refactor code as per Felix v4: (Alex) - fix the comment and description v3: - remove iommu_remap variable v2: (Alex) - Fix as per review comments - add new function amdgpu_device_check_iommu_remap to check if iommu remap Signed-off-by: Rahul Jain Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 43 ++++++++++++++++++---- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ad97f03f1358..da06705f0026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3957,6 +3957,27 @@ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) adev->ram_is_direct_mapped = true; } +#if defined(CONFIG_HSA_AMD_P2P) +/** + * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled. + * + * @adev: amdgpu_device pointer + * + * return if IOMMU remapping bar address + */ +static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(adev->dev); + if (domain && (domain->type == IOMMU_DOMAIN_DMA || + domain->type == IOMMU_DOMAIN_DMA_FQ)) + return true; + + return false; +} +#endif + static const struct attribute *amdgpu_dev_attributes[] = { &dev_attr_pcie_replay_count.attr, NULL @@ -6151,18 +6172,24 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev) { #ifdef CONFIG_HSA_AMD_P2P - uint64_t address_mask = peer_adev->dev->dma_mask ? - ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); - resource_size_t aper_limit = - adev->gmc.aper_base + adev->gmc.aper_size - 1; bool p2p_access = !adev->gmc.xgmi.connected_to_cpu && !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); - return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && - adev->gmc.real_vram_size == adev->gmc.visible_vram_size && - !(adev->gmc.aper_base & address_mask || - aper_limit & address_mask)); + bool is_large_bar = adev->gmc.visible_vram_size && + adev->gmc.real_vram_size == adev->gmc.visible_vram_size; + bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev); + + if (!p2p_addressable) { + uint64_t address_mask = peer_adev->dev->dma_mask ? + ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); + resource_size_t aper_limit = + adev->gmc.aper_base + adev->gmc.aper_size - 1; + + p2p_addressable = !(adev->gmc.aper_base & address_mask || + aper_limit & address_mask); + } + return is_large_bar && p2p_access && p2p_addressable; #else return false; #endif From b05d6476ae2dde8eb447f907ab689083499edeaa Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 19 Aug 2024 22:59:19 +0800 Subject: [PATCH 436/447] drm/amdgpu: Retire query_utcl2_poison_status callback Driver switches to interrupt source id to identify utcl2 poison event. polling interface is not needed. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 ---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 2 -- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 18 ------------------ drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 17 ----------------- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 17 ----------------- 7 files changed, 74 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 64a989cbc301..4f08b153cb66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -783,22 +783,6 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst, int hub_type) -{ - if (!hub_type) { - if (adev->gfxhub.funcs->query_utcl2_poison_status) - return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst); - else - return false; - } else { - if (adev->mmhub.funcs->query_utcl2_poison_status) - return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst); - else - return false; - } -} - int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) { return kgd2kfd_check_and_lock_kfd(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 825c7ffe4bc9..f9d119448442 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -350,8 +350,6 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst, int hub_type); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index 103a837ccc71..c7b44aeb671b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -38,8 +38,6 @@ struct amdgpu_gfxhub_funcs { void (*mode2_save_regs)(struct amdgpu_device *adev); void (*mode2_restore_regs)(struct amdgpu_device *adev); void (*halt)(struct amdgpu_device *adev); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, - int xcc_id); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 95d676ee207f..1ca9d4ed8063 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,8 +63,6 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, - int hub_inst); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index d200310d1731..0e3ddea7b8e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -443,23 +443,6 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev) mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } -static bool gfxhub_v1_0_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) -{ - u32 status = 0; - struct amdgpu_vmhub *hub; - - if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) - return false; - - hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - status = RREG32(hub->vm_l2_pro_fault_status); - /* reset page fault status */ - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - - return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); -} - const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs, @@ -468,5 +451,4 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, .init = gfxhub_v1_0_init, .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, - .query_utcl2_poison_status = gfxhub_v1_0_query_utcl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 72109abe7c86..ed8e130c7d19 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -622,22 +622,6 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) return 0; } -static bool gfxhub_v1_2_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) -{ - u32 fed, status; - - status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVM_L2_PROTECTION_FAULT_STATUS); - fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - if (!amdgpu_sriov_vf(adev)) { - /* clear page fault status and address */ - WREG32_P(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), - regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); - } - - return fed; -} - const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs, @@ -646,7 +630,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, - .query_utcl2_poison_status = gfxhub_v1_2_query_utcl2_poison_status, }; static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 915203b91c5f..b01bb759d0f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -559,22 +559,6 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags) } -static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst) -{ - u32 fed, status; - - status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS); - fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - if (!amdgpu_sriov_vf(adev)) { - /* clear page fault status and address */ - WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst, - regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); - } - - return fed; -} - const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .get_fb_location = mmhub_v1_8_get_fb_location, .init = mmhub_v1_8_init, @@ -584,7 +568,6 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs, .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, - .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status, }; static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { From 40318a2406bd426c6f4591269669c04e8eda571d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Aug 2024 13:11:22 -0400 Subject: [PATCH 437/447] drm/amdgpu/gfx12: set UNORD_DISPATCH in compute MQDs This needs to be set to 1 to avoid a potential deadlock in the GC 10.x and newer. On GC 9.x and older, this needs to be set to 0. This can lead to hangs in some mixed graphics and compute workloads. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3575 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f14e27f86e0e..54059cbcfc08 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3054,7 +3054,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index b7a08e7a4423..d163d92a692f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -187,6 +187,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); From 988bfa0bc67d7220ff8d9e2ba3a425727aa98af3 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 21 Aug 2024 14:40:36 +0800 Subject: [PATCH 438/447] drm/amd/display: Make core_dcn4_g6_temp_read_blackout_table static The sparse tool complains as follows: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:6853:56: warning: symbol 'core_dcn4_g6_temp_read_blackout_table' was not declared. Should it be static? This symbol is not used outside of dml2_core_dcn4_calcs.c, so marks it static. And not want to change it, so mark it const. Signed-off-by: Jinjie Ruan Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index e2c45e498664..805fd783131f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -6887,7 +6887,8 @@ struct dml2_core_internal_g6_temp_read_blackouts_table { } entries[DML_MAX_CLK_TABLE_SIZE]; }; -struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { +static const struct dml2_core_internal_g6_temp_read_blackouts_table + core_dcn4_g6_temp_read_blackout_table = { .entries = { { .uclk_khz = 96000, From 0e405395e0b162075001b9c027443dd10b723a03 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 21 Aug 2024 14:40:37 +0800 Subject: [PATCH 439/447] drm/amd/display: Make core_dcn4_ip_caps_base static The sparse tool complains as follows: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c:12:28: warning: symbol 'core_dcn4_ip_caps_base' was not declared. Should it be static? This symbol is not used outside of dcn35_hubp.c, so marks it static. And do not want to change it, so mark it const. Signed-off-by: Jinjie Ruan Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 698307f3ca39..0aa4e4d343b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -9,7 +9,7 @@ #include "dml2_debug.h" #include "lib_float_math.h" -struct dml2_core_ip_params core_dcn4_ip_caps_base = { +static const struct dml2_core_ip_params core_dcn4_ip_caps_base = { // Hardcoded values for DCN3x .vblank_nom_default_us = 668, .remote_iommu_outstanding_translations = 256, From 570867ef90550b01f0ca0f919dba308c3f2fb605 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 21 Aug 2024 14:40:38 +0800 Subject: [PATCH 440/447] drm/amd/display: Make dcn35_hubp_funcs static The sparse tool complains as follows: drivers/gpu/drm/amd/amdgpu/../display/dc/hubp/dcn35/dcn35_hubp.c:191:19: warning: symbol 'dcn35_hubp_funcs' was not declared. Should it be static? This symbol is not used outside of dcn35_hubp.c, so marks it static. Signed-off-by: Jinjie Ruan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index 771fcd0d3b99..d1f05b82b3dd 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -188,7 +188,7 @@ void hubp35_program_surface_config( hubp35_program_pixel_format(hubp, format); } -struct hubp_funcs dcn35_hubp_funcs = { +static struct hubp_funcs dcn35_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, .hubp_program_surface_flip_and_addr = hubp3_program_surface_flip_and_addr, From 2845f512232de9e436b9e3b5529e906e62414013 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 21 Aug 2024 14:40:39 +0800 Subject: [PATCH 441/447] drm/amd/display: Make dcn401_dsc_funcs static The sparse tool complains as follows: drivers/gpu/drm/amd/amdgpu/../display/dc/dsc/dcn401/dcn401_dsc.c:30:24: warning: symbol 'dcn401_dsc_funcs' was not declared. Should it be static? This symbol is not used outside of dcn401_dsc.c, so marks it static. Signed-off-by: Jinjie Ruan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c index 6acb6699f146..61678b0a5a1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -27,7 +27,7 @@ static void dsc401_disconnect(struct display_stream_compressor *dsc); static void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc); static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); -const struct dsc_funcs dcn401_dsc_funcs = { +static const struct dsc_funcs dcn401_dsc_funcs = { .dsc_get_enc_caps = dsc401_get_enc_caps, .dsc_read_state = dsc401_read_state, .dsc_validate_stream = dsc401_validate_stream, From 4416377ae1fdc41a90b665943152ccd7ff61d3c5 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 21 Aug 2024 14:42:41 +0800 Subject: [PATCH 442/447] drm/amdgpu: add list empty check to avoid null pointer issue Add list empty check to avoid null pointer issues in some corner cases. - list_for_each_entry_safe() Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 929095a2e088..57bda66e85ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -80,6 +80,9 @@ static void aca_banks_release(struct aca_banks *banks) { struct aca_bank_node *node, *tmp; + if (list_empty(&banks->list)) + return; + list_for_each_entry_safe(node, tmp, &banks->list, node) { list_del(&node->node); kvfree(node); @@ -562,9 +565,13 @@ static void aca_error_fini(struct aca_error *aerr) struct aca_bank_error *bank_error, *tmp; mutex_lock(&aerr->lock); + if (list_empty(&aerr->list)) + goto out_unlock; + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) aca_bank_error_remove(aerr, bank_error); +out_unlock: mutex_destroy(&aerr->lock); } @@ -680,6 +687,9 @@ static void aca_manager_fini(struct aca_handle_manager *mgr) { struct aca_handle *handle, *tmp; + if (list_empty(&mgr->list)) + return; + list_for_each_entry_safe(handle, tmp, &mgr->list, node) amdgpu_aca_remove_handle(handle); } From 73dd0ad9e5dad53766ea3e631303430116f834b3 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 21 Aug 2024 12:27:24 +0800 Subject: [PATCH 443/447] drm/amd/display: avoid using null object of framebuffer Instead of using state->fb->obj[0] directly, get object from framebuffer by calling drm_gem_fb_get_obj() and return error code when object is null to avoid using null object of framebuffer. Fixes: 5d945cbcd4b1 ("drm/amd/display: Create a file dedicated to planes") Signed-off-by: Ma Ke Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 1ff469ef51af..a573a6639898 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "amdgpu.h" @@ -935,10 +936,14 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, } afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; + obj = drm_gem_fb_get_obj(new_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return -EINVAL; + } + rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); - r = amdgpu_bo_reserve(rbo, true); if (r) { dev_err(adev->dev, "fail to reserve bo (%d)\n", r); From 875ff9a7ee8824200885384effa7743892a34ed6 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 22 Aug 2024 11:44:12 +0800 Subject: [PATCH 444/447] drm/amdgpu: support for gc_info table v1.3 Add gc_info table v1.3 for IP discovery. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 11 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +++ drivers/gpu/drm/amd/include/discovery.h | 42 +++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 7b561e8e3caf..4bd61c169ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1500,6 +1500,7 @@ union gc_info { struct gc_info_v1_0 v1; struct gc_info_v1_1 v1_1; struct gc_info_v1_2 v1_2; + struct gc_info_v1_3 v1_3; struct gc_info_v2_0 v2; struct gc_info_v2_1 v2_1; }; @@ -1558,6 +1559,16 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance); adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu); } + if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) { + adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu); + adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size); + adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc); + adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size); + adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc); + adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size); + adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size); + adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size); + } break; case 2: adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index e28c1ebfa98f..5644e10a86a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -245,6 +245,12 @@ struct amdgpu_gfx_config { uint32_t gc_tcp_size_per_cu; uint32_t gc_num_cu_per_sqc; uint32_t gc_tcc_size; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_cache_line_size; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 46bf19c9c5c4..710e328fad48 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -258,6 +258,48 @@ struct gc_info_v1_2 { uint32_t gc_gl2c_per_gpu; }; +struct gc_info_v1_3 { + struct gpu_info_header header; + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; + uint32_t gc_num_tcp_per_wpg; + uint32_t gc_tcp_l1_size; + uint32_t gc_num_sqc_per_wgp; + uint32_t gc_l1_instruction_cache_size_per_sqc; + uint32_t gc_l1_data_cache_size_per_sqc; + uint32_t gc_gl1c_per_sa; + uint32_t gc_gl1c_size_per_instance; + uint32_t gc_gl2c_per_gpu; + uint32_t gc_tcp_size_per_cu; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_size; + uint32_t gc_tcc_cache_line_size; +}; + struct gc_info_v2_0 { struct gpu_info_header header; From 010cc730ace807c6d267481b5fb6ff99acc35c46 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 16 Aug 2024 14:34:17 +0530 Subject: [PATCH 445/447] drm/amd/pm: Add support for new P2S table revision Add p2s table support for a new revision of SMUv13.0.6. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Asad Kamal Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 78c3f94bb3ff..9974c9f8135e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -121,6 +121,7 @@ struct mca_ras_info { #define P2S_TABLE_ID_A 0x50325341 #define P2S_TABLE_ID_X 0x50325358 +#define P2S_TABLE_ID_3 0x50325303 // clang-format off static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { @@ -271,14 +272,18 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t p2s_table_id = P2S_TABLE_ID_A; int ret = 0, i, p2stable_count; + int var = (adev->pdev->device & 0xF); char ucode_prefix[15]; /* No need to load P2S tables in IOV mode */ if (amdgpu_sriov_vf(adev)) return 0; - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU)) { p2s_table_id = P2S_TABLE_ID_X; + if (var == 0x5) + p2s_table_id = P2S_TABLE_ID_3; + } amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); From 6ef29715ac06fad7b3e43086cb4df97952c3a4de Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Fri, 23 Aug 2024 02:04:09 -0500 Subject: [PATCH 446/447] drm/amdkfd: Change kfd/svm page fault drain handling When app unmap vm ranges(munmap) kfd/svm starts drain pending page fault and not handle any incoming pages fault of this process until a deferred work item got executed by default system wq. The time period of "not handle page fault" can be long and is unpredicable. That is advese to kfd performance on page faults recovery. This patch uses time stamp of incoming page fault to decide to drop or recover page fault. When app unmap vm ranges kfd records each gpu device's ih ring current time stamp. These time stamps are used at kfd page fault recovery routine. Any page fault happened on unmapped ranges after unmap events is application bug that accesses vm range after unmap. It is not driver work to cover that. By using time stamp of page fault do not need drain page faults at deferred work. So, the time period that kfd does not handle page faults is reduced and can be controlled. Signed-off-by: Xiaogang.Chen Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 95 +++++++++++++++++--------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 +- 7 files changed, 73 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1468222ea0cd..ad2e469548c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2776,7 +2776,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault) { bool is_compute_context = false; @@ -2802,7 +2802,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, write_fault)) { + node_id, addr, ts, write_fault)) { amdgpu_bo_unref(&root); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 046949c4b695..d12d66dca8e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -558,7 +558,7 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f0ceab3ce5bf..9784a2892185 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -132,7 +132,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, + entry->timestamp, write_fault)) return 1; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b73136d390cc..c76ac0dfe572 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -595,7 +595,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault); + addr, entry->timestamp, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -618,7 +618,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * tables */ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault)) + addr, entry->timestamp, write_fault)) return 1; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 7bba6bed2f48..9ae9abc6eb43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -866,6 +866,8 @@ struct svm_range_list { struct delayed_work restore_work; DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); struct task_struct *faulting_task; + /* check point ts decides if page fault recovery need be dropped */ + uint64_t checkpoint_ts[MAX_GPU_INSTANCE]; }; /* Process data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2339bbdf452f..ce2a5d9f90d3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2262,16 +2262,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) { struct kfd_process_device *pdd; struct kfd_process *p; - int drain; uint32_t i; p = container_of(svms, struct kfd_process, svms); -restart: - drain = atomic_read(&svms->drain_pagefaults); - if (!drain) - return; - for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { pdd = p->pdds[i]; if (!pdd) @@ -2291,8 +2285,6 @@ restart: pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } - if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) - goto restart; } static void svm_range_deferred_list_work(struct work_struct *work) @@ -2314,17 +2306,8 @@ static void svm_range_deferred_list_work(struct work_struct *work) prange->start, prange->last, prange->work_item.op); mm = prange->work_item.mm; -retry: - mmap_write_lock(mm); - /* Checking for the need to drain retry faults must be inside - * mmap write lock to serialize with munmap notifiers. - */ - if (unlikely(atomic_read(&svms->drain_pagefaults))) { - mmap_write_unlock(mm); - svm_range_drain_retry_fault(svms); - goto retry; - } + mmap_write_lock(mm); /* Remove from deferred_list must be inside mmap write lock, for * two race cases: @@ -2445,6 +2428,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, struct kfd_process *p; unsigned long s, l; bool unmap_parent; + uint32_t i; if (atomic_read(&prange->queue_refcount)) { int r; @@ -2464,11 +2448,35 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last, start, last); - /* Make sure pending page faults are drained in the deferred worker - * before the range is freed to avoid straggler interrupts on - * unmapped memory causing "phantom faults". + /* calculate time stamps that are used to decide which page faults need be + * dropped or handled before unmap pages from gpu vm */ - atomic_inc(&svms->drain_pagefaults); + for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct amdgpu_ih_ring *ih; + uint32_t checkpoint_wptr; + + pdd = p->pdds[i]; + if (!pdd) + continue; + + adev = pdd->dev->adev; + + /* Check and drain ih1 ring if cam not available */ + ih = &adev->irq.ih1; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) { + svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + continue; + } + + /* check if dev->irq.ih_soft is not empty */ + ih = &adev->irq.ih_soft; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) + svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + } unmap_parent = start <= prange->start && last >= prange->last; @@ -2909,7 +2917,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { unsigned long start, last, size; struct mm_struct *mm = NULL; @@ -2919,7 +2927,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ktime_t timestamp = ktime_get_boottime(); struct kfd_node *node; int32_t best_loc; - int32_t gpuidx = MAX_GPU_INSTANCE; + int32_t gpuid, gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; struct vm_area_struct *vma; bool migration = false; @@ -2940,11 +2948,38 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); if (atomic_read(&svms->drain_pagefaults)) { - pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr); r = 0; goto out; } + node = kfd_node_by_irq_ids(adev, node_id, vmid); + if (!node) { + pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, + vmid); + r = -EFAULT; + goto out; + } + + if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) { + pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id); + r = -EFAULT; + goto out; + } + + /* check if this page fault time stamp is before svms->checkpoint_ts */ + if (svms->checkpoint_ts[gpuidx] != 0) { + if (amdgpu_ih_ts_after(ts, svms->checkpoint_ts[gpuidx])) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = 0; + goto out; + } else + /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts + * to zero to avoid following ts wrap around give wrong comparing + */ + svms->checkpoint_ts[gpuidx] = 0; + } + if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); r = -EFAULT; @@ -2961,13 +2996,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - node = kfd_node_by_irq_ids(adev, node_id, vmid); - if (!node) { - pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, - vmid); - r = -EFAULT; - goto out; - } mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); @@ -3182,8 +3210,9 @@ void svm_range_list_fini(struct kfd_process *p) /* * Ensure no retry fault comes in afterwards, as page fault handler will * not find kfd process and take mm lock to recover fault. + * stop kfd page fault handing, then wait pending page faults got drained */ - atomic_inc(&p->svms.drain_pagefaults); + atomic_set(&p->svms.drain_pagefaults, 1); svm_range_drain_retry_fault(&p->svms); list_for_each_entry_safe(prange, next, &p->svms.list, list) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 747325a2ea89..bddd24f04669 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -174,7 +174,7 @@ int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, - uint32_t vmid, uint32_t node_id, uint64_t addr, + uint32_t vmid, uint32_t node_id, uint64_t addr, uint64_t ts, bool write_fault); int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); void svm_range_add_list_work(struct svm_range_list *svms, @@ -225,7 +225,7 @@ static inline void svm_range_list_fini(struct kfd_process *p) static inline int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t client_id, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { return -EFAULT; } From 3376f922bfe070eff762164b3fc66981e3079417 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 21 Aug 2024 13:10:58 +0800 Subject: [PATCH 447/447] drm/amd/pm: Drop unsupported features on smu v14_0_2 Drop unsupported features on smu v14_0_2. Signed-off-by: Candice Li Reviewed-by: Yang Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e000ac7b4c0e..a31fae5feedf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -2114,50 +2114,6 @@ static void smu_v14_0_2_set_smu_mailbox_registers(struct smu_context *smu) smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, regMP1_SMN_C2PMSG_54); } -static int smu_v14_0_2_smu_send_bad_mem_page_num(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad page number on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetNumBadMemoryPagesRetired, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages number\n", - __func__); - - return ret; -} - -static int smu_v14_0_2_send_bad_mem_channel_flag(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad channel info on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages channel info\n", - __func__); - - return ret; -} - -static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, - void *table) -{ - int ret = 0; - - // TODO - - return ret; -} - static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -2896,12 +2852,9 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_gfx_features = smu_v14_0_2_enable_gfx_features, .set_mp1_state = smu_v14_0_2_set_mp1_state, .set_df_cstate = smu_v14_0_2_set_df_cstate, - .send_hbm_bad_pages_num = smu_v14_0_2_smu_send_bad_mem_page_num, - .send_hbm_bad_channel_flag = smu_v14_0_2_send_bad_mem_channel_flag, #if 0 .gpo_control = smu_v14_0_gpo_control, #endif - .get_ecc_info = smu_v14_0_2_get_ecc_info, }; void smu_v14_0_2_set_ppt_funcs(struct smu_context *smu)