This tag contains the following fixes:

- Two fixes to the reset process of the ASIC. Without these fixes, the
   reset process might take a long time and produce a kernel panic.
   Alternatively, the ASIC could get stuck.
 
 - Fix to reference counting of a command buffer object. It was kref_put
   one more time than it should have been.
 -----BEGIN PGP SIGNATURE-----
 
 iQFKBAABCgA0FiEE7TEboABC71LctBLFZR1NuKta54AFAl5CcQoWHG9kZWQuZ2Fi
 YmF5QGdtYWlsLmNvbQAKCRBlHU24q1rngERpB/4ssOGhoKI43eb5ARKX4NPkBiUv
 GiWFYyasmN6+kWbC8gKNsWyEw1c88TYvjkgWllMURJujWrxA1bbjwt9jpaXO0nVI
 gXgqDaR1rASoQRqK0LYjxc1bdZ+MtWIWBPAie3NkJaggkvXKE/URQ4NiayaTqN5j
 H0kib1h0yC9mvAuDObcVyzWAAIP/dZB2MYlGhwD+IKvMQd/Jzj7zH8QCn0I7V1rR
 hF/9ds4jqDrQT7sKnzVzYvLvD4mGOunvbrOTrFre3LSYD9HZf+b29R1q7fKuZA5f
 TjElN9bOkHlKJ1PLiZv5QacAJos1neOibPOav42hOpiArXdY44sE50DObO8d
 =B93X
 -----END PGP SIGNATURE-----

Merge tag 'misc-habanalabs-fixes-2020-02-11' of git://people.freedesktop.org/~gabbayo/linux into char-misc-linus

Oded writes:

This tag contains the following fixes:

- Two fixes to the reset process of the ASIC. Without these fixes, the
  reset process might take a long time and produce a kernel panic.
  Alternatively, the ASIC could get stuck.

- Fix to reference counting of a command buffer object. It was kref_put
  one more time than it should have been.

* tag 'misc-habanalabs-fixes-2020-02-11' of git://people.freedesktop.org/~gabbayo/linux:
  habanalabs: patched cb equals user cb in device memset
  habanalabs: do not halt CoreSight during hard reset
  habanalabs: halt the engines before hard-reset
This commit is contained in:
Greg Kroah-Hartman 2020-02-17 11:58:16 +01:00
commit 74ba569a15
2 changed files with 46 additions and 3 deletions

View File

@ -598,7 +598,9 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
goto out;
}
hdev->asic_funcs->halt_coresight(hdev);
if (!hdev->hard_reset_pending)
hdev->asic_funcs->halt_coresight(hdev);
hdev->in_debug = 0;
goto out;
@ -1189,6 +1191,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->halt_engines(hdev, true);
hdev->asic_funcs->hw_fini(hdev, true);
}

View File

@ -895,6 +895,11 @@ void goya_init_dma_qmans(struct hl_device *hdev)
*/
static void goya_disable_external_queues(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
if (!(goya->hw_cap_initialized & HW_CAP_DMA))
return;
WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
@ -956,6 +961,11 @@ static int goya_stop_external_queues(struct hl_device *hdev)
{
int rc, retval = 0;
struct goya_device *goya = hdev->asic_specific;
if (!(goya->hw_cap_initialized & HW_CAP_DMA))
return retval;
rc = goya_stop_queue(hdev,
mmDMA_QM_0_GLBL_CFG1,
mmDMA_QM_0_CP_STS,
@ -1744,9 +1754,18 @@ void goya_init_tpc_qmans(struct hl_device *hdev)
*/
static void goya_disable_internal_queues(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
if (!(goya->hw_cap_initialized & HW_CAP_MME))
goto disable_tpc;
WREG32(mmMME_QM_GLBL_CFG0, 0);
WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
disable_tpc:
if (!(goya->hw_cap_initialized & HW_CAP_TPC))
return;
WREG32(mmTPC0_QM_GLBL_CFG0, 0);
WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
@ -1782,8 +1801,12 @@ static void goya_disable_internal_queues(struct hl_device *hdev)
*/
static int goya_stop_internal_queues(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
int rc, retval = 0;
if (!(goya->hw_cap_initialized & HW_CAP_MME))
goto stop_tpc;
/*
* Each queue (QMAN) is a separate H/W logic. That means that each
* QMAN can be stopped independently and failure to stop one does NOT
@ -1810,6 +1833,10 @@ static int goya_stop_internal_queues(struct hl_device *hdev)
retval = -EIO;
}
stop_tpc:
if (!(goya->hw_cap_initialized & HW_CAP_TPC))
return retval;
rc = goya_stop_queue(hdev,
mmTPC0_QM_GLBL_CFG1,
mmTPC0_QM_CP_STS,
@ -1975,6 +2002,11 @@ static int goya_stop_internal_queues(struct hl_device *hdev)
static void goya_dma_stall(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
if (!(goya->hw_cap_initialized & HW_CAP_DMA))
return;
WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
@ -1984,6 +2016,11 @@ static void goya_dma_stall(struct hl_device *hdev)
static void goya_tpc_stall(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
if (!(goya->hw_cap_initialized & HW_CAP_TPC))
return;
WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
@ -1996,6 +2033,11 @@ static void goya_tpc_stall(struct hl_device *hdev)
static void goya_mme_stall(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
if (!(goya->hw_cap_initialized & HW_CAP_MME))
return;
WREG32(mmMME_STALL, 0xFFFFFFFF);
}
@ -4648,8 +4690,6 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
rc = goya_send_job_on_qman0(hdev, job);
hl_cb_put(job->patched_cb);
hl_debugfs_remove_job(hdev, job);
kfree(job);
cb->cs_cnt--;