Merge tag 'drm-next-5.5-2019-12-03' of git://people.freedesktop.org/~agd5f/linux into drm-next

drm-next-5.5-2019-12-03:

amdgpu:
- Fix vram lost handling with BACO on VI/CI asics
- DC fixes for Navi14
- Misc gfx10 fixes
- SR-IOV fixes
- Fix driver unload
- Fix XGMI limits on Arcturus

amdkfd:
- Enable KFD on PPC
- Optimize KFD page table reservations

radeon:
- Fix register checker for r1xx/r2xx

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191203204135.5437-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2019-12-04 10:13:16 +10:00
commit 909a606526
31 changed files with 237 additions and 305 deletions

View File

@ -856,7 +856,6 @@ S: Maintained
F: drivers/i2c/busses/i2c-amd-mp2*
AMD POWERPLAY
M: Rex Zhu <rex.zhu@amd.com>
M: Evan Quan <evan.quan@amd.com>
L: amd-gfx@lists.freedesktop.org
S: Supported

View File

@ -105,11 +105,24 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
(kfd_mem_limit.max_ttm_mem_limit >> 20));
}
/* Estimate page table size needed to represent a given memory size
*
* With 4KB pages, we need one 8 byte PTE for each 4KB of memory
* (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
* of memory (factor 256K, >> 18). ROCm user mode tries to optimize
* for 2MB pages for TLB efficiency. However, small allocations and
* fragmented system memory still need some 4KB pages. We choose a
* compromise that should work in most cases without reserving too
* much memory for page tables unnecessarily (factor 16K, >> 14).
*/
#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain, bool sg)
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
int ret = 0;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,

View File

@ -1487,8 +1487,8 @@ out:
return ret;
/* Start rlc autoload after psp recieved all the gfx firmware */
if (psp->autoload_supported && ucode->ucode_id ==
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
ret = psp_rlc_autoload(psp);
if (ret) {
DRM_ERROR("Failed to start rlc autoload\n");

View File

@ -27,7 +27,8 @@
#include <linux/bits.h>
#include "smu_v11_0_i2c.h"
#define EEPROM_I2C_TARGET_ADDR 0xA0
#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8
#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0
/*
* The 2 macros bellow represent the actual size in bytes that
@ -83,7 +84,7 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
{
int ret = 0;
struct i2c_msg msg = {
.addr = EEPROM_I2C_TARGET_ADDR,
.addr = 0,
.flags = 0,
.len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
.buf = buff,
@ -93,6 +94,8 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
*(uint16_t *)buff = EEPROM_HDR_START;
__encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE);
msg.addr = control->i2c_address;
ret = i2c_transfer(&control->eeprom_accessor, &msg, 1);
if (ret < 1)
DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret);
@ -203,7 +206,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
struct i2c_msg msg = {
.addr = EEPROM_I2C_TARGET_ADDR,
.addr = 0,
.flags = I2C_M_RD,
.len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
.buf = buff,
@ -213,10 +216,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
switch (adev->asic_type) {
case CHIP_VEGA20:
control->i2c_address = EEPROM_I2C_TARGET_ADDR_VEGA20;
ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor);
break;
case CHIP_ARCTURUS:
control->i2c_address = EEPROM_I2C_TARGET_ADDR_ARCTURUS;
ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor);
break;
@ -229,6 +234,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
return ret;
}
msg.addr = control->i2c_address;
/* Read/Create table header from EEPROM address 0 */
ret = i2c_transfer(&control->eeprom_accessor, &msg, 1);
if (ret < 1) {
@ -408,8 +415,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
* Update bits 16,17 of EEPROM address in I2C address by setting them
* to bits 1,2 of Device address byte
*/
msg->addr = EEPROM_I2C_TARGET_ADDR |
((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
msg->addr = control->i2c_address |
((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
msg->flags = write ? 0 : I2C_M_RD;
msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE;
msg->buf = buff;

View File

@ -50,6 +50,7 @@ struct amdgpu_ras_eeprom_control {
struct mutex tbl_mutex;
bool bus_locked;
uint32_t tbl_byte_sum;
uint16_t i2c_address; // 8-bit represented address
};
/*

View File

@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
*/
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
{
volatile u32 *dst_ptr;
u32 dws;
int r;
/* allocate clear state block */
adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
return r;
}
/* set up the cs buffer */
dst_ptr = adev->gfx.rlc.cs_ptr;
adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
return 0;
}

View File

@ -1346,10 +1346,13 @@ static int cik_asic_reset(struct amdgpu_device *adev)
{
int r;
if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
if (!adev->in_suspend)
amdgpu_inc_vram_lost(adev);
r = smu7_asic_baco_reset(adev);
else
} else {
r = cik_asic_pci_config_reset(adev);
}
return r;
}

View File

@ -690,60 +690,62 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
if (version_major == 2 && version_minor == 1)
adev->gfx.rlc.is_rlc_v2_1 = true;
if (!amdgpu_sriov_vf(adev)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
if (version_major == 2 && version_minor == 1)
adev->gfx.rlc.is_rlc_v2_1 = true;
adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
adev->gfx.rlc.save_and_restore_offset =
adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
adev->gfx.rlc.save_and_restore_offset =
le32_to_cpu(rlc_hdr->save_and_restore_offset);
adev->gfx.rlc.clear_state_descriptor_offset =
adev->gfx.rlc.clear_state_descriptor_offset =
le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
adev->gfx.rlc.avail_scratch_ram_locations =
adev->gfx.rlc.avail_scratch_ram_locations =
le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
adev->gfx.rlc.reg_restore_list_size =
adev->gfx.rlc.reg_restore_list_size =
le32_to_cpu(rlc_hdr->reg_restore_list_size);
adev->gfx.rlc.reg_list_format_start =
adev->gfx.rlc.reg_list_format_start =
le32_to_cpu(rlc_hdr->reg_list_format_start);
adev->gfx.rlc.reg_list_format_separate_start =
adev->gfx.rlc.reg_list_format_separate_start =
le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
adev->gfx.rlc.starting_offsets_start =
adev->gfx.rlc.starting_offsets_start =
le32_to_cpu(rlc_hdr->starting_offsets_start);
adev->gfx.rlc.reg_list_format_size_bytes =
adev->gfx.rlc.reg_list_format_size_bytes =
le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
adev->gfx.rlc.reg_list_size_bytes =
adev->gfx.rlc.reg_list_size_bytes =
le32_to_cpu(rlc_hdr->reg_list_size_bytes);
adev->gfx.rlc.register_list_format =
adev->gfx.rlc.register_list_format =
kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
if (!adev->gfx.rlc.register_list_format) {
err = -ENOMEM;
goto out;
adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
if (!adev->gfx.rlc.register_list_format) {
err = -ENOMEM;
goto out;
}
tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
if (adev->gfx.rlc.is_rlc_v2_1)
gfx_v10_0_init_rlc_ext_microcode(adev);
}
tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
if (adev->gfx.rlc.is_rlc_v2_1)
gfx_v10_0_init_rlc_ext_microcode(adev);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
if (err)
@ -993,39 +995,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
{
int r;
r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
if (unlikely(r != 0))
return r;
r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
AMDGPU_GEM_DOMAIN_VRAM);
if (!r)
adev->gfx.rlc.clear_state_gpu_addr =
amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
return r;
}
static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
{
int r;
if (!adev->gfx.rlc.clear_state_obj)
return;
r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
if (likely(r == 0)) {
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
}
static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@ -1787,25 +1756,7 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
{
int r;
if (adev->in_gpu_reset) {
r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
if (r)
return r;
r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj,
(void **)&adev->gfx.rlc.cs_ptr);
if (!r) {
adev->gfx.rlc.funcs->get_csb_buffer(adev,
adev->gfx.rlc.cs_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
}
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
if (r)
return r;
}
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
@ -1817,22 +1768,6 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
return 0;
}
static int gfx_v10_0_init_pg(struct amdgpu_device *adev)
{
int i;
int r;
r = gfx_v10_0_init_csb(adev);
if (r)
return r;
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
/* TODO: init power gating */
return 0;
}
void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
@ -1925,21 +1860,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
{
int r;
if (amdgpu_sriov_vf(adev))
return 0;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
if (r)
return r;
r = gfx_v10_0_init_pg(adev);
if (r)
return r;
/* enable RLC SRM */
gfx_v10_0_rlc_enable_srm(adev);
gfx_v10_0_init_csb(adev);
if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
gfx_v10_0_rlc_enable_srm(adev);
} else {
adev->gfx.rlc.funcs->stop(adev);
@ -1961,9 +1891,7 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
r = gfx_v10_0_init_pg(adev);
if (r)
return r;
gfx_v10_0_init_csb(adev);
adev->gfx.rlc.funcs->start(adev);
@ -2825,7 +2753,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
/* Init gfx ring 0 for pipe 0 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
mutex_unlock(&adev->srbm_mutex);
/* Set ring buffer size */
ring = &adev->gfx.gfx_ring[0];
rb_bufsz = order_base_2(ring->ring_size / 8);
@ -2863,11 +2791,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1);
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
mutex_unlock(&adev->srbm_mutex);
/* Init gfx ring 1 for pipe 1 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
mutex_unlock(&adev->srbm_mutex);
ring = &adev->gfx.gfx_ring[1];
rb_bufsz = order_base_2(ring->ring_size / 8);
tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
@ -2897,6 +2825,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
mutex_unlock(&adev->srbm_mutex);
/* Switch to pipe 0 */
mutex_lock(&adev->srbm_mutex);
@ -3775,10 +3704,6 @@ static int gfx_v10_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = gfx_v10_0_csb_vram_pin(adev);
if (r)
return r;
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
@ -3861,12 +3786,11 @@ static int gfx_v10_0_hw_fini(void *handle)
if (amdgpu_gfx_disable_kcq(adev))
DRM_ERROR("KCQ disable failed\n");
if (amdgpu_sriov_vf(adev)) {
pr_debug("For SRIOV client, shouldn't do anything.\n");
gfx_v10_0_cp_gfx_enable(adev, false);
return 0;
}
gfx_v10_0_cp_enable(adev, false);
gfx_v10_0_enable_gui_idle_interrupt(adev, false);
gfx_v10_0_csb_vram_unpin(adev);
return 0;
}

View File

@ -4554,6 +4554,8 @@ static int gfx_v7_0_hw_init(void *handle)
gfx_v7_0_constants_init(adev);
/* init CSB */
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* init rlc */
r = adev->gfx.rlc.funcs->resume(adev);
if (r)

View File

@ -1321,39 +1321,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
{
int r;
r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
if (unlikely(r != 0))
return r;
r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
AMDGPU_GEM_DOMAIN_VRAM);
if (!r)
adev->gfx.rlc.clear_state_gpu_addr =
amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
return r;
}
static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
{
int r;
if (!adev->gfx.rlc.clear_state_obj)
return;
r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
if (likely(r == 0)) {
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
}
static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@ -3917,6 +3884,7 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
{
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
WREG32(mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
@ -4837,10 +4805,6 @@ static int gfx_v8_0_hw_init(void *handle)
gfx_v8_0_init_golden_registers(adev);
gfx_v8_0_constants_init(adev);
r = gfx_v8_0_csb_vram_pin(adev);
if (r)
return r;
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@ -4958,8 +4922,6 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_err("rlc is busy, skip halt rlc\n");
amdgpu_gfx_rlc_exit_safe_mode(adev);
gfx_v8_0_csb_vram_unpin(adev);
return 0;
}

View File

@ -1695,39 +1695,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
{
int r;
r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
if (unlikely(r != 0))
return r;
r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
AMDGPU_GEM_DOMAIN_VRAM);
if (!r)
adev->gfx.rlc.clear_state_gpu_addr =
amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
return r;
}
static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
{
int r;
if (!adev->gfx.rlc.clear_state_obj)
return;
r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
if (likely(r == 0)) {
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
}
static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@ -2415,6 +2382,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
{
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
adev->gfx.rlc.clear_state_gpu_addr >> 32);
@ -3706,10 +3674,6 @@ static int gfx_v9_0_hw_init(void *handle)
gfx_v9_0_constants_init(adev);
r = gfx_v9_0_csb_vram_pin(adev);
if (r)
return r;
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@ -3791,8 +3755,6 @@ static int gfx_v9_0_hw_fini(void *handle)
gfx_v9_0_cp_enable(adev, false);
adev->gfx.rlc.funcs->stop(adev);
gfx_v9_0_csb_vram_unpin(adev);
return 0;
}

View File

@ -33,16 +33,31 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
u32 max_region =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
u32 max_num_physical_nodes = 0;
u32 max_physical_node_id = 0;
switch (adev->asic_type) {
case CHIP_VEGA20:
max_num_physical_nodes = 4;
max_physical_node_id = 3;
break;
case CHIP_ARCTURUS:
max_num_physical_nodes = 8;
max_physical_node_id = 7;
break;
default:
return -EINVAL;
}
/* PF_MAX_REGION=0 means xgmi is disabled */
if (max_region) {
adev->gmc.xgmi.num_physical_nodes = max_region + 1;
if (adev->gmc.xgmi.num_physical_nodes > 4)
if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
return -EINVAL;
adev->gmc.xgmi.physical_node_id =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
if (adev->gmc.xgmi.physical_node_id > 3)
if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
return -EINVAL;
adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),

View File

@ -326,7 +326,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
if (!adev->mman.buffer_funcs_enabled ||
!adev->ib_pool_ready ||
adev->in_gpu_reset) {
adev->in_gpu_reset ||
ring->sched.ready == false) {
gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
mutex_unlock(&adev->mman.gtt_window_lock);
return;

View File

@ -783,10 +783,13 @@ static int vi_asic_reset(struct amdgpu_device *adev)
{
int r;
if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
if (!adev->in_suspend)
amdgpu_inc_vram_lost(adev);
r = smu7_asic_baco_reset(adev);
else
} else {
r = vi_asic_pci_config_reset(adev);
}
return r;
}

View File

@ -5,7 +5,7 @@
config HSA_AMD
bool "HSA kernel driver for AMD GPU devices"
depends on DRM_AMDGPU && (X86_64 || ARM64)
depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
imply AMD_IOMMU_V2 if X86_64
select MMU_NOTIFIER
help

View File

@ -342,7 +342,8 @@ bool dm_pp_get_clock_levels_by_type(
if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) {
if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle,
dc_to_pp_clock_type(clk_type), &pp_clks)) {
/* Error in pplib. Provide default values. */
/* Error in pplib. Provide default values. */
get_default_clock_levels(clk_type, dc_clks);
return true;
}
} else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type) {

View File

@ -1037,6 +1037,25 @@ void dcn20_pipe_control_lock(
if (pipe->plane_state != NULL)
flip_immediate = pipe->plane_state->flip_immediate;
if (flip_immediate && lock) {
const int TIMEOUT_FOR_FLIP_PENDING = 100000;
int i;
for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) {
if (!pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->plane_res.hubp))
break;
udelay(1);
}
if (pipe->bottom_pipe != NULL) {
for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) {
if (!pipe->bottom_pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->bottom_pipe->plane_res.hubp))
break;
udelay(1);
}
}
}
/* In flip immediate and pipe splitting case, we need to use GSL
* for synchronization. Only do setup on locking and on flip type change.
*/

View File

@ -157,6 +157,74 @@ struct _vcs_dpi_ip_params_st dcn2_0_ip = {
.xfc_fill_constant_bytes = 0,
};
struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip = {
.odm_capable = 1,
.gpuvm_enable = 0,
.hostvm_enable = 0,
.gpuvm_max_page_table_levels = 4,
.hostvm_max_page_table_levels = 4,
.hostvm_cached_page_table_levels = 0,
.num_dsc = 5,
.rob_buffer_size_kbytes = 168,
.det_buffer_size_kbytes = 164,
.dpte_buffer_size_in_pte_reqs_luma = 84,
.dpte_buffer_size_in_pte_reqs_chroma = 42,//todo
.dpp_output_buffer_pixels = 2560,
.opp_output_buffer_lines = 1,
.pixel_chunk_size_kbytes = 8,
.pte_enable = 1,
.max_page_table_levels = 4,
.pte_chunk_size_kbytes = 2,
.meta_chunk_size_kbytes = 2,
.writeback_chunk_size_kbytes = 2,
.line_buffer_size_bits = 789504,
.is_line_buffer_bpp_fixed = 0,
.line_buffer_fixed_bpp = 0,
.dcc_supported = true,
.max_line_buffer_lines = 12,
.writeback_luma_buffer_size_kbytes = 12,
.writeback_chroma_buffer_size_kbytes = 8,
.writeback_chroma_line_buffer_width_pixels = 4,
.writeback_max_hscl_ratio = 1,
.writeback_max_vscl_ratio = 1,
.writeback_min_hscl_ratio = 1,
.writeback_min_vscl_ratio = 1,
.writeback_max_hscl_taps = 12,
.writeback_max_vscl_taps = 12,
.writeback_line_buffer_luma_buffer_size = 0,
.writeback_line_buffer_chroma_buffer_size = 14643,
.cursor_buffer_size = 8,
.cursor_chunk_size = 2,
.max_num_otg = 5,
.max_num_dpp = 5,
.max_num_wb = 1,
.max_dchub_pscl_bw_pix_per_clk = 4,
.max_pscl_lb_bw_pix_per_clk = 2,
.max_lb_vscl_bw_pix_per_clk = 4,
.max_vscl_hscl_bw_pix_per_clk = 4,
.max_hscl_ratio = 8,
.max_vscl_ratio = 8,
.hscl_mults = 4,
.vscl_mults = 4,
.max_hscl_taps = 8,
.max_vscl_taps = 8,
.dispclk_ramp_margin_percent = 1,
.underscan_factor = 1.10,
.min_vblank_lines = 32, //
.dppclk_delay_subtotal = 77, //
.dppclk_delay_scl_lb_only = 16,
.dppclk_delay_scl = 50,
.dppclk_delay_cnvc_formatter = 8,
.dppclk_delay_cnvc_cursor = 6,
.dispclk_delay_subtotal = 87, //
.dcfclk_cstate_latency = 10, // SRExitTime
.max_inter_dcn_tile_repeaters = 8,
.xfc_supported = true,
.xfc_fill_bw_overhead_percent = 10.0,
.xfc_fill_constant_bytes = 0,
.ptoi_supported = 0
};
struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = {
/* Defaults that get patched on driver load from firmware. */
.clock_limits = {
@ -854,6 +922,8 @@ static const struct resource_caps res_cap_nv14 = {
.num_pll = 5,
.num_dwb = 1,
.num_ddc = 5,
.num_vmid = 16,
.num_dsc = 5,
};
static const struct dc_debug_options debug_defaults_drv = {
@ -3212,6 +3282,10 @@ static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb(
static struct _vcs_dpi_ip_params_st *get_asic_rev_ip_params(
uint32_t hw_internal_rev)
{
/* NV14 */
if (ASICREV_IS_NAVI14_M(hw_internal_rev))
return &dcn2_0_nv14_ip;
/* NV12 and NV10 */
return &dcn2_0_ip;
}

View File

@ -2548,3 +2548,12 @@ uint32_t smu_get_pptable_power_limit(struct smu_context *smu)
return ret;
}
int smu_send_smc_msg(struct smu_context *smu,
enum smu_message_type msg)
{
int ret;
ret = smu_send_smc_msg_with_param(smu, msg, 0);
return ret;
}

View File

@ -2130,7 +2130,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
.set_tool_table_location = smu_v11_0_set_tool_table_location,
.notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
.system_features_control = smu_v11_0_system_features_control,
.send_smc_msg = smu_v11_0_send_msg,
.send_smc_msg_with_param = smu_v11_0_send_msg_with_param,
.read_smc_arg = smu_v11_0_read_arg,
.init_display_count = smu_v11_0_init_display_count,

View File

@ -497,8 +497,8 @@ struct pptable_funcs {
int (*notify_memory_pool_location)(struct smu_context *smu);
int (*set_last_dcef_min_deep_sleep_clk)(struct smu_context *smu);
int (*system_features_control)(struct smu_context *smu, bool en);
int (*send_smc_msg)(struct smu_context *smu, uint16_t msg);
int (*send_smc_msg_with_param)(struct smu_context *smu, uint16_t msg, uint32_t param);
int (*send_smc_msg_with_param)(struct smu_context *smu,
enum smu_message_type msg, uint32_t param);
int (*read_smc_arg)(struct smu_context *smu, uint32_t *arg);
int (*init_display_count)(struct smu_context *smu, uint32_t count);
int (*set_allowed_mask)(struct smu_context *smu);

View File

@ -177,10 +177,9 @@ int smu_v11_0_notify_memory_pool_location(struct smu_context *smu);
int smu_v11_0_system_features_control(struct smu_context *smu,
bool en);
int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg);
int
smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
smu_v11_0_send_msg_with_param(struct smu_context *smu,
enum smu_message_type msg,
uint32_t param);
int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg);

View File

@ -44,10 +44,9 @@ int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg);
int smu_v12_0_wait_for_response(struct smu_context *smu);
int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg);
int
smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
smu_v12_0_send_msg_with_param(struct smu_context *smu,
enum smu_message_type msg,
uint32_t param);
int smu_v12_0_check_fw_status(struct smu_context *smu);

View File

@ -2055,7 +2055,6 @@ static const struct pptable_funcs navi10_ppt_funcs = {
.set_tool_table_location = smu_v11_0_set_tool_table_location,
.notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
.system_features_control = smu_v11_0_system_features_control,
.send_smc_msg = smu_v11_0_send_msg,
.send_smc_msg_with_param = smu_v11_0_send_msg_with_param,
.read_smc_arg = smu_v11_0_read_arg,
.init_display_count = smu_v11_0_init_display_count,

View File

@ -697,7 +697,6 @@ static const struct pptable_funcs renoir_ppt_funcs = {
.check_fw_version = smu_v12_0_check_fw_version,
.powergate_sdma = smu_v12_0_powergate_sdma,
.powergate_vcn = smu_v12_0_powergate_vcn,
.send_smc_msg = smu_v12_0_send_msg,
.send_smc_msg_with_param = smu_v12_0_send_msg_with_param,
.read_smc_arg = smu_v12_0_read_arg,
.set_gfx_cgpg = smu_v12_0_set_gfx_cgpg,

View File

@ -75,8 +75,8 @@
#define smu_set_default_od_settings(smu, initialize) \
((smu)->ppt_funcs->set_default_od_settings ? (smu)->ppt_funcs->set_default_od_settings((smu), (initialize)) : 0)
#define smu_send_smc_msg(smu, msg) \
((smu)->ppt_funcs->send_smc_msg? (smu)->ppt_funcs->send_smc_msg((smu), (msg)) : 0)
int smu_send_smc_msg(struct smu_context *smu, enum smu_message_type msg);
#define smu_send_smc_msg_with_param(smu, msg, param) \
((smu)->ppt_funcs->send_smc_msg_with_param? (smu)->ppt_funcs->send_smc_msg_with_param((smu), (msg), (param)) : 0)
#define smu_read_smc_arg(smu, arg) \

View File

@ -90,36 +90,11 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu)
return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO;
}
int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg)
{
struct amdgpu_device *adev = smu->adev;
int ret = 0, index = 0;
index = smu_msg_get_index(smu, msg);
if (index < 0)
return index;
smu_v11_0_wait_for_response(smu);
WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index);
ret = smu_v11_0_wait_for_response(smu);
if (ret)
pr_err("failed send message: %10s (%d) response %#x\n",
smu_get_message_name(smu, msg), index, ret);
return ret;
}
int
smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
smu_v11_0_send_msg_with_param(struct smu_context *smu,
enum smu_message_type msg,
uint32_t param)
{
struct amdgpu_device *adev = smu->adev;
int ret = 0, index = 0;

View File

@ -77,33 +77,9 @@ int smu_v12_0_wait_for_response(struct smu_context *smu)
return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO;
}
int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg)
{
struct amdgpu_device *adev = smu->adev;
int ret = 0, index = 0;
index = smu_msg_get_index(smu, msg);
if (index < 0)
return index;
smu_v12_0_wait_for_response(smu);
WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
smu_v12_0_send_msg_without_waiting(smu, (uint16_t)index);
ret = smu_v12_0_wait_for_response(smu);
if (ret)
pr_err("Failed to send message 0x%x, response 0x%x\n", index,
ret);
return ret;
}
int
smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
smu_v12_0_send_msg_with_param(struct smu_context *smu,
enum smu_message_type msg,
uint32_t param)
{
struct amdgpu_device *adev = smu->adev;

View File

@ -3231,7 +3231,6 @@ static const struct pptable_funcs vega20_ppt_funcs = {
.set_tool_table_location = smu_v11_0_set_tool_table_location,
.notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
.system_features_control = smu_v11_0_system_features_control,
.send_smc_msg = smu_v11_0_send_msg,
.send_smc_msg_with_param = smu_v11_0_send_msg_with_param,
.read_smc_arg = smu_v11_0_read_arg,
.init_display_count = smu_v11_0_init_display_count,

View File

@ -1826,8 +1826,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
track->textures[i].use_pitch = 1;
} else {
track->textures[i].use_pitch = 0;
track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
}
if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
track->textures[i].tex_coord_type = 2;

View File

@ -476,8 +476,8 @@ int r200_packet0_check(struct radeon_cs_parser *p,
track->textures[i].use_pitch = 1;
} else {
track->textures[i].use_pitch = 0;
track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
}
if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE)
track->textures[i].lookup_disable = true;