amd-drm-fixes-6.6-2023-09-13:

amdgpu:
 - GC 9.4.3 fixes
 - Fix white screen issues with S/G display on system with >= 64G of ram
 - Replay fixes
 - SMU 13.0.6 fixes
 - AUX backlight fix
 - NBIO 4.3 SR-IOV fixes for HDP
 - RAS fixes
 - DP MST resume fix
 - Fix segfault on systems with no vbios
 - DPIA fixes
 
 amdkfd:
 - CWSR grace period fix
 - Unaligned doorbell fix
 - CRIU fix for GFX11
 - Add missing TLB flush on gfx10 and newer
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZQIRSAAKCRC93/aFa7yZ
 2O/nAP4zB0fdLB46Hhz11aYsE9Zghe91b2rcmF4EYpEAQs7awwEAhSjy0Wiy6EYb
 prEGCdW0O8Tq7fdjr7+JrPmF7dasAQk=
 =SUbg
 -----END PGP SIGNATURE-----

Merge tag 'amd-drm-fixes-6.6-2023-09-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.6-2023-09-13:

amdgpu:
- GC 9.4.3 fixes
- Fix white screen issues with S/G display on system with >= 64G of ram
- Replay fixes
- SMU 13.0.6 fixes
- AUX backlight fix
- NBIO 4.3 SR-IOV fixes for HDP
- RAS fixes
- DP MST resume fix
- Fix segfault on systems with no vbios
- DPIA fixes

amdkfd:
- CWSR grace period fix
- Unaligned doorbell fix
- CRIU fix for GFX11
- Add missing TLB flush on gfx10 and newer

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230913195009.7714-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2023-09-15 09:50:34 +10:00
commit 1216d49178
44 changed files with 381 additions and 202 deletions

View File

@ -1293,7 +1293,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
bool amdgpu_device_pcie_dynamic_switching_supported(void);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
bool amdgpu_device_aspm_support_quirk(void);

View File

@ -478,7 +478,7 @@ void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *c
cu_info->cu_active_number = acu_info.number;
cu_info->cu_ao_mask = acu_info.ao_cu_mask;
memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
sizeof(acu_info.bitmap));
sizeof(cu_info->cu_bitmap));
cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;

View File

@ -980,8 +980,7 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data,
uint32_t inst)
uint32_t *reg_data)
{
*reg_data = wait_times;

View File

@ -55,5 +55,4 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data,
uint32_t inst);
uint32_t *reg_data);

View File

@ -1103,8 +1103,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data,
uint32_t inst)
uint32_t *reg_data)
{
*reg_data = wait_times;
@ -1120,8 +1119,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
SCH_WAVE,
grace_period);
*reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
mmCP_IQ_WAIT_TIME2);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,

View File

@ -100,5 +100,4 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data,
uint32_t inst);
uint32_t *reg_data);

View File

@ -1244,32 +1244,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
return true;
}
/*
* On APUs with >= 64GB white flickering has been observed w/ SG enabled.
* Disable S/G on such systems until we have a proper fix.
* https://gitlab.freedesktop.org/drm/amd/-/issues/2354
* https://gitlab.freedesktop.org/drm/amd/-/issues/2735
*/
bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
{
switch (amdgpu_sg_display) {
case -1:
break;
case 0:
return false;
case 1:
return true;
default:
return false;
}
if ((totalram_pages() << (PAGE_SHIFT - 10)) +
(adev->gmc.real_vram_size / 1024) >= 64000000) {
DRM_WARN("Disabling S/G due to >=64GB RAM\n");
return false;
}
return true;
}
/*
* Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
* speed switching. Until we have confirmation from Intel that a specific host

View File

@ -43,6 +43,7 @@
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
#define AMDGPU_MAX_GC_INSTANCES 8
#define KGD_MAX_QUEUES 128
#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@ -257,7 +258,7 @@ struct amdgpu_cu_info {
uint32_t number;
uint32_t ao_cu_mask;
uint32_t ao_cu_bitmap[4][4];
uint32_t bitmap[4][4];
uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
};
struct amdgpu_gfx_ras {

View File

@ -839,7 +839,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
sizeof(adev->gfx.cu_info.ao_cu_bitmap));
memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
sizeof(adev->gfx.cu_info.bitmap));
sizeof(dev_info->cu_bitmap));
dev_info->vram_type = adev->gmc.vram_type;
dev_info->vram_bit_width = adev->gmc.vram_width;
dev_info->vce_harvest_config = adev->vce.harvest_config;
@ -940,12 +940,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct atom_context *atom_context;
atom_context = adev->mode_info.atom_context;
memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name));
memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn));
vbios_info.version = atom_context->version;
memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
sizeof(atom_context->vbios_ver_str));
memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date));
if (atom_context) {
memcpy(vbios_info.name, atom_context->name,
sizeof(atom_context->name));
memcpy(vbios_info.vbios_pn, atom_context->vbios_pn,
sizeof(atom_context->vbios_pn));
vbios_info.version = atom_context->version;
memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
sizeof(atom_context->vbios_ver_str));
memcpy(vbios_info.date, atom_context->date,
sizeof(atom_context->date));
}
return copy_to_user(out, &vbios_info,
min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0;

View File

@ -1052,7 +1052,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
info->ce_count = obj->err_data.ce_count;
if (err_data.ce_count) {
if (adev->smuio.funcs &&
if (!adev->aid_mask &&
adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
dev_info(adev->dev, "socket: %d, die: %d "
@ -1072,7 +1073,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
}
}
if (err_data.ue_count) {
if (adev->smuio.funcs &&
if (!adev->aid_mask &&
adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
dev_info(adev->dev, "socket: %d, die: %d "

View File

@ -9449,7 +9449,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
cu_info->bitmap[i][j] = bitmap;
cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {

View File

@ -6368,7 +6368,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
* SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
* SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
*/
cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask)

View File

@ -3577,7 +3577,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
gfx_v6_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v6_0_get_cu_enabled(adev);
cu_info->bitmap[i][j] = bitmap;
cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {

View File

@ -5119,7 +5119,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
gfx_v7_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap;
cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {

View File

@ -7121,7 +7121,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
gfx_v8_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap;
cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {

View File

@ -1499,7 +1499,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (cu_info->bitmap[i][j] & mask) {
if (cu_info->bitmap[0][i][j] & mask) {
if (counter == pg_always_on_cu_num)
WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
if (counter < always_on_cu_num)
@ -7233,7 +7233,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
* SE6,SH0 --> bitmap[2][1]
* SE7,SH0 --> bitmap[3][1]
*/
cu_info->bitmap[i % 4][j + i / 4] = bitmap;
cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {

View File

@ -4259,7 +4259,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
}
static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
u32 bitmap)
u32 bitmap, int xcc_id)
{
u32 data;
@ -4269,15 +4269,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
}
static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id)
{
u32 data, mask;
data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG);
data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG);
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
@ -4290,7 +4290,7 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info)
{
int i, j, k, counter, active_cu_number = 0;
int i, j, k, counter, xcc_id, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
unsigned disable_masks[4 * 4];
@ -4309,46 +4309,38 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
adev->gfx.config.max_sh_per_se);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1;
ao_bitmap = 0;
counter = 0;
gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0);
gfx_v9_4_3_set_user_cu_inactive_bitmap(
adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev);
for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1;
ao_bitmap = 0;
counter = 0;
gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
gfx_v9_4_3_set_user_cu_inactive_bitmap(
adev,
disable_masks[i * adev->gfx.config.max_sh_per_se + j],
xcc_id);
bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id);
/*
* The bitmap(and ao_cu_bitmap) in cu_info structure is
* 4x4 size array, and it's usually suitable for Vega
* ASICs which has 4*2 SE/SH layout.
* But for Arcturus, SE/SH layout is changed to 8*1.
* To mostly reduce the impact, we make it compatible
* with current bitmap array as below:
* SE4,SH0 --> bitmap[0][1]
* SE5,SH0 --> bitmap[1][1]
* SE6,SH0 --> bitmap[2][1]
* SE7,SH0 --> bitmap[3][1]
*/
cu_info->bitmap[i % 4][j + i / 4] = bitmap;
cu_info->bitmap[xcc_id][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
if (counter < adev->gfx.config.max_cu_per_sh)
ao_bitmap |= mask;
counter++;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
if (counter < adev->gfx.config.max_cu_per_sh)
ao_bitmap |= mask;
counter++;
}
mask <<= 1;
}
mask <<= 1;
active_cu_number += counter;
if (i < 2 && j < 2)
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
active_cu_number += counter;
if (i < 2 && j < 2)
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
}
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
xcc_id);
}
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;

View File

@ -345,6 +345,9 @@ static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
}
if (amdgpu_sriov_vf(adev))
adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)

View File

@ -766,7 +766,7 @@ static int soc21_common_hw_init(void *handle)
* for the purpose of expose those registers
* to process space
*/
if (adev->nbio.funcs->remap_hdp_registers)
if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
adev->nbio.funcs->enable_doorbell_aperture(adev, true);

View File

@ -2087,7 +2087,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
cu->num_simd_per_cu = cu_info.simd_per_cu;
cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
cu->num_simd_cores = cu_info.simd_per_cu *
(cu_info.cu_active_number / kdev->kfd->num_nodes);
cu->max_waves_simd = cu_info.max_waves_per_simd;
cu->wave_front_size = cu_info.wave_front_size;

View File

@ -79,6 +79,10 @@ struct crat_header {
#define CRAT_SUBTYPE_IOLINK_AFFINITY 5
#define CRAT_SUBTYPE_MAX 6
/*
* Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
* as it breaks the ABI.
*/
#define CRAT_SIBLINGMAP_SIZE 32
/*

View File

@ -1677,8 +1677,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->dev->kfd2kgd->build_grace_period_packet_info(
dqm->dev->adev, dqm->wait_times,
grace_period, &reg_offset,
&dqm->wait_times,
ffs(dqm->dev->xcc_mask) - 1);
&dqm->wait_times);
}
dqm_unlock(dqm);

View File

@ -162,6 +162,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
return NULL;
*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
inx *= 2;
pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n"
@ -176,6 +177,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
unsigned int inx;
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
inx /= 2;
mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_bitmap);

View File

@ -97,18 +97,22 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask)
uint32_t *se_mask, uint32_t inst)
{
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
int i, se, sh, cu, cu_bitmap_sh_mul, cu_inc = wgp_mode_req ? 2 : 1;
uint32_t cu_active_per_node;
int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask);
int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
if (cu_mask_count > cu_info.cu_active_number)
cu_mask_count = cu_info.cu_active_number;
cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes;
if (cu_mask_count > cu_active_per_node)
cu_mask_count = cu_active_per_node;
/* Exceeding these bounds corrupts the stack and indicates a coding error.
* Returning with no CU's enabled will hang the queue, which should be
@ -141,7 +145,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
for (se = 0; se < cu_info.num_shader_engines; se++)
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
cu_per_sh[se][sh] = hweight32(
cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]);
cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) *
cu_bitmap_sh_mul]);
/* Symmetrically map cu_mask to all SEs & SHs:
* se_mask programs up to 2 SH in the upper and lower 16 bits.
@ -164,20 +169,33 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
* cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1)
* ...
*
* For GFX 9.4.3, the following code only looks at a
* subset of the cu_mask corresponding to the inst parameter.
* If we have n XCCs under one GPU node
* cu_mask[0] bit0 -> XCC0 se_mask[0] bit0 (XCC0,SE0,SH0,CU0)
* cu_mask[0] bit1 -> XCC1 se_mask[0] bit0 (XCC1,SE0,SH0,CU0)
* ..
* cu_mask[0] bitn -> XCCn se_mask[0] bit0 (XCCn,SE0,SH0,CU0)
* cu_mask[0] bit n+1 -> XCC0 se_mask[1] bit0 (XCC0,SE1,SH0,CU0)
*
* For example, if there are 6 XCCs under 1 KFD node, this code
* running for each inst, will look at the bits as:
* inst, inst + 6, inst + 12...
*
* First ensure all CUs are disabled, then enable user specified CUs.
*/
for (i = 0; i < cu_info.num_shader_engines; i++)
se_mask[i] = 0;
i = 0;
for (cu = 0; cu < 16; cu += inc) {
i = inst;
for (cu = 0; cu < 16; cu += cu_inc) {
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
for (se = 0; se < cu_info.num_shader_engines; se++) {
if (cu_per_sh[se][sh] > cu) {
if (cu_mask[i / 32] & (en_mask << (i % 32)))
se_mask[se] |= en_mask << (cu + sh * 16);
i += inc;
if (i == cu_mask_count)
if (i >= cu_mask_count)
return;
}
}

View File

@ -138,7 +138,7 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
uint32_t *se_mask, uint32_t inst);
int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,

View File

@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];

View File

@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];

View File

@ -71,7 +71,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
}
mqd_symmetrically_map_cu_mask(mm,
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m->compute_static_thread_mgmt_se0 = se_mask[0];
m->compute_static_thread_mgmt_se1 = se_mask[1];
@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
{
struct v11_compute_mqd *m;
m = get_mqd(mqd);
memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
}
static void restore_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *qp,
const void *mqd_src,
const void *ctl_stack_src, const u32 ctl_stack_size)
{
uint64_t addr;
struct v11_compute_mqd *m;
m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
addr = mqd_mem_obj->gpu_addr;
memcpy(m, mqd_src, sizeof(*m));
*mqd = m;
if (gart_addr)
*gart_addr = addr;
m->cp_hqd_pq_doorbell_control =
qp->doorbell_off <<
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
m->cp_hqd_pq_doorbell_control);
qp->is_active = 0;
}
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@ -458,6 +495,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
mqd->mqd_size = sizeof(struct v11_compute_mqd);
mqd->get_wave_state = get_wave_state;
mqd->mqd_stride = kfd_mqd_stride;
mqd->checkpoint_mqd = checkpoint_mqd;
mqd->restore_mqd = restore_mqd;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
@ -502,6 +541,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = kfd_destroy_mqd_sdma;
mqd->is_occupied = kfd_is_occupied_sdma;
mqd->checkpoint_mqd = checkpoint_mqd;
mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v11_sdma_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)

View File

@ -60,7 +60,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
}
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
struct mqd_update_info *minfo)
struct mqd_update_info *minfo, uint32_t inst)
{
struct v9_mqd *m;
uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
@ -69,27 +69,36 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
m->compute_static_thread_mgmt_se1 = se_mask[1];
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
m->compute_static_thread_mgmt_se4 = se_mask[4];
m->compute_static_thread_mgmt_se5 = se_mask[5];
m->compute_static_thread_mgmt_se6 = se_mask[6];
m->compute_static_thread_mgmt_se7 = se_mask[7];
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) {
m->compute_static_thread_mgmt_se4 = se_mask[4];
m->compute_static_thread_mgmt_se5 = se_mask[5];
m->compute_static_thread_mgmt_se6 = se_mask[6];
m->compute_static_thread_mgmt_se7 = se_mask[7];
pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
m->compute_static_thread_mgmt_se0,
m->compute_static_thread_mgmt_se1,
m->compute_static_thread_mgmt_se2,
m->compute_static_thread_mgmt_se3,
m->compute_static_thread_mgmt_se4,
m->compute_static_thread_mgmt_se5,
m->compute_static_thread_mgmt_se6,
m->compute_static_thread_mgmt_se7);
pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
m->compute_static_thread_mgmt_se0,
m->compute_static_thread_mgmt_se1,
m->compute_static_thread_mgmt_se2,
m->compute_static_thread_mgmt_se3,
m->compute_static_thread_mgmt_se4,
m->compute_static_thread_mgmt_se5,
m->compute_static_thread_mgmt_se6,
m->compute_static_thread_mgmt_se7);
} else {
pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n",
inst, m->compute_static_thread_mgmt_se0,
m->compute_static_thread_mgmt_se1,
m->compute_static_thread_mgmt_se2,
m->compute_static_thread_mgmt_se3);
}
}
static void set_priority(struct v9_mqd *m, struct queue_properties *q)
@ -290,7 +299,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
m->cp_hqd_ctx_save_control = 0;
update_cu_mask(mm, mqd, minfo);
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3))
update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
q->is_active = QUEUE_IS_ACTIVE(*q);
@ -676,6 +686,8 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd + size * xcc);
update_mqd(mm, m, q, minfo);
update_cu_mask(mm, mqd, minfo, xcc);
if (q->format == KFD_QUEUE_FORMAT_AQL) {
switch (xcc) {
case 0:

View File

@ -55,7 +55,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];

View File

@ -299,8 +299,7 @@ static int pm_set_grace_period_v9(struct packet_manager *pm,
pm->dqm->wait_times,
grace_period,
&reg_offset,
&reg_data,
0);
&reg_data);
if (grace_period == USE_DEFAULT_GRACE_PERIOD)
reg_data = pm->dqm->wait_times;

View File

@ -1466,8 +1466,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
{
return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) ||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
}

View File

@ -450,8 +450,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, offs, "simd_count",
dev->gpu ? (dev->node_props.simd_count *
NUM_XCC(dev->gpu->xcc_mask)) : 0);
dev->gpu ? dev->node_props.simd_count : 0);
sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
dev->node_props.mem_banks_count);
sysfs_show_32bit_prop(buffer, offs, "caches_count",
@ -1597,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
struct kfd_gpu_cache_info *pcache_info,
struct kfd_cu_info *cu_info,
int cache_type, unsigned int cu_processor_id)
int cache_type, unsigned int cu_processor_id,
struct kfd_node *knode)
{
unsigned int cu_sibling_map_mask;
int first_active_cu;
int i, j, k;
int i, j, k, xcc, start, end;
struct kfd_cache_properties *pcache = NULL;
cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
start = ffs(knode->xcc_mask) - 1;
end = start + NUM_XCC(knode->xcc_mask);
cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
first_active_cu = ffs(cu_sibling_map_mask);
@ -1639,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
k = 0;
for (i = 0; i < cu_info->num_shader_engines; i++) {
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
k += 4;
for (xcc = start; xcc < end; xcc++) {
for (i = 0; i < cu_info->num_shader_engines; i++) {
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
k += 4;
cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
}
}
}
pcache->sibling_map_size = k;
@ -1666,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
{
struct kfd_gpu_cache_info *pcache_info = NULL;
int i, j, k;
int i, j, k, xcc, start, end;
int ct = 0;
unsigned int cu_processor_id;
int ret;
@ -1700,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
* then it will consider only one CU from
* the shared unit
*/
start = ffs(kdev->xcc_mask) - 1;
end = start + NUM_XCC(kdev->xcc_mask);
for (ct = 0; ct < num_of_cache_types; ct++) {
cu_processor_id = gpu_processor_id;
if (pcache_info[ct].cache_level == 1) {
for (i = 0; i < pcu_info->num_shader_engines; i++) {
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
for (xcc = start; xcc < end; xcc++) {
for (i = 0; i < pcu_info->num_shader_engines; i++) {
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
cu_processor_id, k);
if (ret < 0)
break;
if (ret < 0)
break;
if (!ret) {
num_of_entries++;
list_add_tail(&props_ext->list, &dev->cache_props);
if (!ret) {
num_of_entries++;
list_add_tail(&props_ext->list, &dev->cache_props);
}
/* Move to next CU block */
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
pcu_info->num_cu_per_sh) ?
pcache_info[ct].num_cu_shared :
(pcu_info->num_cu_per_sh - k);
cu_processor_id += num_cu_shared;
}
/* Move to next CU block */
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
pcu_info->num_cu_per_sh) ?
pcache_info[ct].num_cu_shared :
(pcu_info->num_cu_per_sh - k);
cu_processor_id += num_cu_shared;
}
}
}
} else {
ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
pcu_info, ct, cu_processor_id);
pcu_info, ct, cu_processor_id, kdev);
if (ret < 0)
break;

View File

@ -89,7 +89,7 @@ struct kfd_mem_properties {
struct attribute attr;
};
#define CACHE_SIBLINGMAP_SIZE 64
#define CACHE_SIBLINGMAP_SIZE 128
struct kfd_cache_properties {
struct list_head list;

View File

@ -1274,11 +1274,15 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF;
page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12);
page_table_end.high_part = (u32)(adev->gmc.gart_end >> 44) & 0xF;
page_table_end.low_part = (u32)(adev->gmc.gart_end >> 12);
page_table_base.high_part = upper_32_bits(pt_base) & 0xF;
page_table_start.high_part = upper_32_bits(adev->gmc.gart_start >>
AMDGPU_GPU_PAGE_SHIFT);
page_table_start.low_part = lower_32_bits(adev->gmc.gart_start >>
AMDGPU_GPU_PAGE_SHIFT);
page_table_end.high_part = upper_32_bits(adev->gmc.gart_end >>
AMDGPU_GPU_PAGE_SHIFT);
page_table_end.low_part = lower_32_bits(adev->gmc.gart_end >>
AMDGPU_GPU_PAGE_SHIFT);
page_table_base.high_part = upper_32_bits(pt_base);
page_table_base.low_part = lower_32_bits(pt_base);
pa_config->system_aperture.start_addr = (uint64_t)logical_addr_low << 18;
@ -1640,8 +1644,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
break;
}
if (init_data.flags.gpu_vm_support)
init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev);
if (init_data.flags.gpu_vm_support &&
(amdgpu_sg_display == 0))
init_data.flags.gpu_vm_support = false;
if (init_data.flags.gpu_vm_support)
adev->mode_info.gpu_vm_support = true;
@ -2335,14 +2340,62 @@ static int dm_late_init(void *handle)
return detect_mst_link_for_all_connectors(adev_to_drm(adev));
}
static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr)
{
int ret;
u8 guid[16];
u64 tmp64;
mutex_lock(&mgr->lock);
if (!mgr->mst_primary)
goto out_fail;
if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) {
drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
goto out_fail;
}
ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
DP_MST_EN |
DP_UP_REQ_EN |
DP_UPSTREAM_IS_SRC);
if (ret < 0) {
drm_dbg_kms(mgr->dev, "mst write failed - undocked during suspend?\n");
goto out_fail;
}
/* Some hubs forget their guids after they resume */
ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
if (ret != 16) {
drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
goto out_fail;
}
if (memchr_inv(guid, 0, 16) == NULL) {
tmp64 = get_jiffies_64();
memcpy(&guid[0], &tmp64, sizeof(u64));
memcpy(&guid[8], &tmp64, sizeof(u64));
ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, guid, 16);
if (ret != 16) {
drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n");
goto out_fail;
}
}
memcpy(mgr->mst_primary->guid, guid, 16);
out_fail:
mutex_unlock(&mgr->lock);
}
static void s3_handle_mst(struct drm_device *dev, bool suspend)
{
struct amdgpu_dm_connector *aconnector;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct drm_dp_mst_topology_mgr *mgr;
int ret;
bool need_hotplug = false;
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
@ -2364,18 +2417,15 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
if (!dp_is_lttpr_present(aconnector->dc_link))
try_to_configure_aux_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
ret = drm_dp_mst_topology_mgr_resume(mgr, true);
if (ret < 0) {
dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
aconnector->dc_link);
need_hotplug = true;
}
/* TODO: move resume_mst_branch_status() into drm mst resume again
* once topology probing work is pulled out from mst resume into mst
* resume 2nd step. mst resume 2nd step should be called after old
* state getting restored (i.e. drm_atomic_helper_resume()).
*/
resume_mst_branch_status(mgr);
}
}
drm_connector_list_iter_end(&iter);
if (need_hotplug)
drm_kms_helper_hotplug_event(dev);
}
static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
@ -2769,7 +2819,8 @@ static int dm_resume(void *handle)
struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state);
enum dc_connection_type new_connection_type = dc_connection_none;
struct dc_state *dc_state;
int i, r, j;
int i, r, j, ret;
bool need_hotplug = false;
if (amdgpu_in_reset(adev)) {
dc_state = dm->cached_dc_state;
@ -2867,7 +2918,7 @@ static int dm_resume(void *handle)
continue;
/*
* this is the case when traversing through already created
* this is the case when traversing through already created end sink
* MST connectors, should be skipped
*/
if (aconnector && aconnector->mst_root)
@ -2927,6 +2978,27 @@ static int dm_resume(void *handle)
dm->cached_state = NULL;
/* Do mst topology probing after resuming cached state*/
drm_connector_list_iter_begin(ddev, &iter);
drm_for_each_connector_iter(connector, &iter) {
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type != dc_connection_mst_branch ||
aconnector->mst_root)
continue;
ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true);
if (ret < 0) {
dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
aconnector->dc_link);
need_hotplug = true;
}
}
drm_connector_list_iter_end(&iter);
if (need_hotplug)
drm_kms_helper_hotplug_event(ddev);
amdgpu_dm_irq_resume_late(adev);
amdgpu_dm_smu_write_watermarks_table(adev);
@ -8073,7 +8145,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->surface_updates[planes_count].plane_info =
&bundle->plane_infos[planes_count];
if (acrtc_state->stream->link->psr_settings.psr_feature_enabled) {
if (acrtc_state->stream->link->psr_settings.psr_feature_enabled ||
acrtc_state->stream->link->replay_settings.replay_feature_enabled) {
fill_dc_dirty_rects(plane, old_plane_state,
new_plane_state, new_crtc_state,
&bundle->flip_addrs[planes_count],

View File

@ -620,7 +620,7 @@ struct amdgpu_hdmi_vsdb_info {
unsigned int max_refresh_rate_hz;
/**
* @replay mode: Replay supported
* @replay_mode: Replay supported
*/
bool replay_mode;
};

View File

@ -169,11 +169,23 @@ static void add_link_enc_assignment(
/* Return first available DIG link encoder. */
static enum engine_id find_first_avail_link_enc(
const struct dc_context *ctx,
const struct dc_state *state)
const struct dc_state *state,
enum engine_id eng_id_requested)
{
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
int i;
if (eng_id_requested != ENGINE_ID_UNKNOWN) {
for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) {
eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i];
if (eng_id == eng_id_requested)
return eng_id;
}
}
eng_id = ENGINE_ID_UNKNOWN;
for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) {
eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i];
if (eng_id != ENGINE_ID_UNKNOWN)
@ -287,7 +299,7 @@ void link_enc_cfg_link_encs_assign(
struct dc_stream_state *streams[],
uint8_t stream_count)
{
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
enum engine_id eng_id = ENGINE_ID_UNKNOWN, eng_id_req = ENGINE_ID_UNKNOWN;
int i;
int j;
@ -377,8 +389,14 @@ void link_enc_cfg_link_encs_assign(
* assigned to that endpoint.
*/
link_enc = get_link_enc_used_by_link(state, stream->link);
if (link_enc == NULL)
eng_id = find_first_avail_link_enc(stream->ctx, state);
if (link_enc == NULL) {
if (stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
stream->link->dpia_preferred_eng_id != ENGINE_ID_UNKNOWN)
eng_id_req = stream->link->dpia_preferred_eng_id;
eng_id = find_first_avail_link_enc(stream->ctx, state, eng_id_req);
}
else
eng_id = link_enc->preferred_engine;
@ -402,7 +420,9 @@ void link_enc_cfg_link_encs_assign(
DC_LOG_DEBUG("%s: CUR %s(%d) - enc_id(%d)\n",
__func__,
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA",
assignment.ep_id.link_id.enum_id - 1,
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ?
assignment.ep_id.link_id.enum_id :
assignment.ep_id.link_id.enum_id - 1,
assignment.eng_id);
}
for (i = 0; i < MAX_PIPES; i++) {
@ -413,7 +433,9 @@ void link_enc_cfg_link_encs_assign(
DC_LOG_DEBUG("%s: NEW %s(%d) - enc_id(%d)\n",
__func__,
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA",
assignment.ep_id.link_id.enum_id - 1,
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ?
assignment.ep_id.link_id.enum_id :
assignment.ep_id.link_id.enum_id - 1,
assignment.eng_id);
}
@ -478,7 +500,6 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc(
if (stream)
link = stream->link;
// dm_output_to_console("%s: No link using DIG(%d).\n", __func__, eng_id);
return link;
}

View File

@ -1496,6 +1496,7 @@ struct dc_link {
* object creation.
*/
enum engine_id eng_id;
enum engine_id dpia_preferred_eng_id;
bool test_pattern_enabled;
enum dp_test_pattern current_test_pattern;

View File

@ -964,7 +964,9 @@ void dce110_edp_backlight_control(
return;
}
if (link->panel_cntl) {
if (link->panel_cntl && !(link->dpcd_sink_ext_caps.bits.oled ||
link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 ||
link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) {
bool is_backlight_on = link->panel_cntl->funcs->is_panel_backlight_on(link->panel_cntl);
if ((enable && is_backlight_on) || (!enable && !is_backlight_on)) {

View File

@ -1032,6 +1032,28 @@ static const struct dce_i2c_mask i2c_masks = {
I2C_COMMON_MASK_SH_LIST_DCN30(_MASK)
};
/* ========================================================== */
/*
* DPIA index | Preferred Encoder | Host Router
* 0 | C | 0
* 1 | First Available | 0
* 2 | D | 1
* 3 | First Available | 1
*/
/* ========================================================== */
static const enum engine_id dpia_to_preferred_enc_id_table[] = {
ENGINE_ID_DIGC,
ENGINE_ID_DIGC,
ENGINE_ID_DIGD,
ENGINE_ID_DIGD
};
static enum engine_id dcn314_get_preferred_eng_id_dpia(unsigned int dpia_index)
{
return dpia_to_preferred_enc_id_table[dpia_index];
}
static struct dce_i2c_hw *dcn31_i2c_hw_create(
struct dc_context *ctx,
uint32_t inst)
@ -1785,6 +1807,7 @@ static struct resource_funcs dcn314_res_pool_funcs = {
.update_bw_bounding_box = dcn314_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn314_get_panel_config_defaults,
.get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia,
};
static struct clock_source *dcn30_clock_source_create(

View File

@ -65,6 +65,7 @@ struct resource_context;
struct clk_bw_params;
struct resource_funcs {
enum engine_id (*get_preferred_eng_id_dpia)(unsigned int dpia_index);
void (*destroy)(struct resource_pool **pool);
void (*link_init)(struct dc_link *link);
struct panel_cntl*(*panel_cntl_create)(

View File

@ -791,6 +791,10 @@ static bool construct_dpia(struct dc_link *link,
/* Set dpia port index : 0 to number of dpia ports */
link->ddc_hw_inst = init_params->connector_index;
// Assign Dpia preferred eng_id
if (link->dc->res_pool->funcs->get_preferred_eng_id_dpia)
link->dpia_preferred_eng_id = link->dc->res_pool->funcs->get_preferred_eng_id_dpia(link->ddc_hw_inst);
/* TODO: Create link encoder */
link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;

View File

@ -31,12 +31,12 @@
#include <linux/types.h>
#include <linux/bitmap.h>
#include <linux/dma-fence.h>
#include "amdgpu_irq.h"
#include "amdgpu_gfx.h"
struct pci_dev;
struct amdgpu_device;
#define KGD_MAX_QUEUES 128
struct kfd_dev;
struct kgd_mem;
@ -68,7 +68,7 @@ struct kfd_cu_info {
uint32_t wave_front_size;
uint32_t max_scratch_slots_per_cu;
uint32_t lds_size;
uint32_t cu_bitmap[4][4];
uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
};
/* For getting GPU local memory information from KGD */
@ -326,8 +326,7 @@ struct kfd2kgd_calls {
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data,
uint32_t inst);
uint32_t *reg_data);
void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid,
int *wave_cnt, int *max_waves_per_cu, uint32_t inst);
void (*program_trap_handler_settings)(struct amdgpu_device *adev,

View File

@ -336,7 +336,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
/* Store one-time values in driver PPTable */
if (!pptable->Init) {
while (retry--) {
while (--retry) {
ret = smu_v13_0_6_get_metrics_table(smu, NULL, true);
if (ret)
return ret;