Merge tag 'char-misc-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char/misc and other driver updates from Greg KH: "Here is the big set of char/misc and other small driver subsystem updates for 5.18-rc1. Included in here are merges from driver subsystems which contain: - iio driver updates and new drivers - fsi driver updates - fpga driver updates - habanalabs driver updates and support for new hardware - soundwire driver updates and new drivers - phy driver updates and new drivers - coresight driver updates - icc driver updates Individual changes include: - mei driver updates - interconnect driver updates - new PECI driver subsystem added - vmci driver updates - lots of tiny misc/char driver updates All of these have been in linux-next for a while with no reported problems" * tag 'char-misc-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (556 commits) firmware: google: Properly state IOMEM dependency kgdbts: fix return value of __setup handler firmware: sysfb: fix platform-device leak in error path firmware: stratix10-svc: add missing callback parameter on RSU arm64: dts: qcom: add non-secure domain property to fastrpc nodes misc: fastrpc: Add dma handle implementation misc: fastrpc: Add fdlist implementation misc: fastrpc: Add helper function to get list and page misc: fastrpc: Add support to secure memory map dt-bindings: misc: add fastrpc domain vmid property misc: fastrpc: check before loading process to the DSP misc: fastrpc: add secure domain support dt-bindings: misc: add property to support non-secure DSP misc: fastrpc: Add support to get DSP capabilities misc: fastrpc: add support for FASTRPC_IOCTL_MEM_MAP/UNMAP misc: fastrpc: separate fastrpc device from channel context dt-bindings: nvmem: brcm,nvram: add basic NVMEM cells dt-bindings: nvmem: make "reg" property optional nvmem: brcm_nvram: parse NVRAM content into NVMEM cells nvmem: dt-bindings: Fix the error of dt-bindings check ...
This commit is contained in:
@@ -259,6 +259,7 @@ config QCOM_FASTRPC
|
||||
depends on ARCH_QCOM || COMPILE_TEST
|
||||
depends on RPMSG
|
||||
select DMA_SHARED_BUFFER
|
||||
select QCOM_SCM
|
||||
help
|
||||
Provides a communication mechanism that allows for clients to
|
||||
make remote method invocations across processor boundary to
|
||||
@@ -470,6 +471,18 @@ config HISI_HIKEY_USB
|
||||
switching between the dual-role USB-C port and the USB-A host ports
|
||||
using only one USB controller.
|
||||
|
||||
config OPEN_DICE
|
||||
tristate "Open Profile for DICE driver"
|
||||
depends on OF_RESERVED_MEM
|
||||
help
|
||||
This driver exposes a DICE reserved memory region to userspace via
|
||||
a character device. The memory region contains Compound Device
|
||||
Identifiers (CDIs) generated by firmware as an output of DICE
|
||||
measured boot flow. Userspace can use CDIs for remote attestation
|
||||
and sealing.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
source "drivers/misc/c2port/Kconfig"
|
||||
source "drivers/misc/eeprom/Kconfig"
|
||||
source "drivers/misc/cb710/Kconfig"
|
||||
|
||||
@@ -59,3 +59,4 @@ obj-$(CONFIG_UACCE) += uacce/
|
||||
obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
|
||||
obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o
|
||||
obj-$(CONFIG_HI6421V600_IRQ) += hi6421v600-irq.o
|
||||
obj-$(CONFIG_OPEN_DICE) += open-dice.o
|
||||
|
||||
@@ -1633,7 +1633,6 @@ static void bcm_vk_shutdown(struct pci_dev *pdev)
|
||||
|
||||
static const struct pci_device_id bcm_vk_ids[] = {
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VALKYRIE), },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VIPER), },
|
||||
{ }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(pci, bcm_vk_ids);
|
||||
|
||||
@@ -266,7 +266,7 @@ static int alcor_pci_probe(struct pci_dev *pdev,
|
||||
if (!priv)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = ida_simple_get(&alcor_pci_idr, 0, 0, GFP_KERNEL);
|
||||
ret = ida_alloc(&alcor_pci_idr, GFP_KERNEL);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
priv->id = ret;
|
||||
@@ -280,7 +280,8 @@ static int alcor_pci_probe(struct pci_dev *pdev,
|
||||
ret = pci_request_regions(pdev, DRV_NAME_ALCOR_PCI);
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "Cannot request region\n");
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto error_free_ida;
|
||||
}
|
||||
|
||||
if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
|
||||
@@ -324,6 +325,8 @@ static int alcor_pci_probe(struct pci_dev *pdev,
|
||||
|
||||
error_release_regions:
|
||||
pci_release_regions(pdev);
|
||||
error_free_ida:
|
||||
ida_free(&alcor_pci_idr, priv->id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -337,7 +340,7 @@ static void alcor_pci_remove(struct pci_dev *pdev)
|
||||
|
||||
mfd_remove_devices(&pdev->dev);
|
||||
|
||||
ida_simple_remove(&alcor_pci_idr, priv->id);
|
||||
ida_free(&alcor_pci_idr, priv->id);
|
||||
|
||||
pci_release_regions(pdev);
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
|
||||
@@ -76,7 +76,7 @@ static void rtl8411b_fetch_vendor_settings(struct rtsx_pcr *pcr)
|
||||
map_sd_drive(rtl8411b_reg_to_sd30_drive_sel_3v3(reg));
|
||||
}
|
||||
|
||||
static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
|
||||
{
|
||||
rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07);
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ static void rts5209_fetch_vendor_settings(struct rtsx_pcr *pcr)
|
||||
}
|
||||
}
|
||||
|
||||
static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
|
||||
{
|
||||
rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07);
|
||||
}
|
||||
|
||||
@@ -72,6 +72,8 @@ static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr)
|
||||
|
||||
pci_read_config_dword(pdev, PCR_SETTING_REG2, ®);
|
||||
pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
|
||||
if (CHK_PCI_PID(pcr, 0x522A))
|
||||
pcr->rtd3_en = rtsx_reg_to_rtd3(reg);
|
||||
if (rtsx_check_mmc_support(reg))
|
||||
pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
|
||||
pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg);
|
||||
@@ -171,6 +173,28 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
|
||||
else
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x00);
|
||||
|
||||
if (CHK_PCI_PID(pcr, 0x522A))
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_AUTOLOAD_CFG1,
|
||||
CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
|
||||
|
||||
if (pcr->rtd3_en) {
|
||||
if (CHK_PCI_PID(pcr, 0x522A)) {
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PM_CTRL3, 0x01, 0x01);
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PME_FORCE_CTL, 0x30, 0x30);
|
||||
} else {
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CTRL3, 0x01, 0x01);
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PME_FORCE_CTL, 0xFF, 0x33);
|
||||
}
|
||||
} else {
|
||||
if (CHK_PCI_PID(pcr, 0x522A)) {
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PM_CTRL3, 0x01, 0x00);
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PME_FORCE_CTL, 0x30, 0x20);
|
||||
} else {
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PME_FORCE_CTL, 0xFF, 0x30);
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CTRL3, 0x01, 0x00);
|
||||
}
|
||||
}
|
||||
|
||||
if (option->force_clkreq_0)
|
||||
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
|
||||
FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
|
||||
@@ -438,6 +462,28 @@ static int rts522a_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
|
||||
return rtsx_pci_send_cmd(pcr, 100);
|
||||
}
|
||||
|
||||
static void rts522a_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
|
||||
{
|
||||
/* Set relink_time to 0 */
|
||||
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
|
||||
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
|
||||
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
|
||||
RELINK_TIME_MASK, 0);
|
||||
|
||||
rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3,
|
||||
D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
|
||||
|
||||
if (!runtime) {
|
||||
rtsx_pci_write_register(pcr, RTS522A_AUTOLOAD_CFG1,
|
||||
CD_RESUME_EN_MASK, 0);
|
||||
rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, 0x01, 0x00);
|
||||
rtsx_pci_write_register(pcr, RTS522A_PME_FORCE_CTL, 0x30, 0x20);
|
||||
}
|
||||
|
||||
rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN);
|
||||
}
|
||||
|
||||
|
||||
static void rts522a_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
|
||||
{
|
||||
struct rtsx_cr_option *option = &pcr->option;
|
||||
@@ -473,6 +519,7 @@ static const struct pcr_ops rts522a_pcr_ops = {
|
||||
.card_power_on = rts5227_card_power_on,
|
||||
.card_power_off = rts5227_card_power_off,
|
||||
.switch_output_voltage = rts522a_switch_output_voltage,
|
||||
.force_power_down = rts522a_force_power_down,
|
||||
.cd_deglitch = NULL,
|
||||
.conv_clk_and_div_n = NULL,
|
||||
.set_l1off_cfg_sub_d0 = rts522a_set_l1off_cfg_sub_d0,
|
||||
|
||||
@@ -91,7 +91,7 @@ static int rts5228_optimize_phy(struct rtsx_pcr *pcr)
|
||||
return rtsx_pci_write_phy_register(pcr, 0x07, 0x8F40);
|
||||
}
|
||||
|
||||
static void rts5228_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
static void rts5228_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
|
||||
{
|
||||
/* Set relink_time to 0 */
|
||||
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
|
||||
@@ -102,6 +102,14 @@ static void rts5228_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3,
|
||||
D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
|
||||
|
||||
if (!runtime) {
|
||||
rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1,
|
||||
CD_RESUME_EN_MASK, 0);
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
|
||||
rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
|
||||
}
|
||||
|
||||
rtsx_pci_write_register(pcr, FPDCTL,
|
||||
SSC_POWER_DOWN, SSC_POWER_DOWN);
|
||||
}
|
||||
@@ -480,9 +488,18 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
|
||||
FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
|
||||
|
||||
rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
|
||||
rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
|
||||
|
||||
if (pcr->rtd3_en) {
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x01);
|
||||
rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE);
|
||||
} else {
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
|
||||
rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
|
||||
}
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, D3_DELINK_MODE_EN, 0x00);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@ static void rts5229_fetch_vendor_settings(struct rtsx_pcr *pcr)
|
||||
map_sd_drive(rtsx_reg_to_sd30_drive_sel_3v3(reg));
|
||||
}
|
||||
|
||||
static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
|
||||
{
|
||||
rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03);
|
||||
}
|
||||
|
||||
@@ -74,7 +74,8 @@ static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr)
|
||||
pci_read_config_dword(pdev, PCR_SETTING_REG2, ®);
|
||||
pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
|
||||
|
||||
pcr->rtd3_en = rtsx_reg_to_rtd3_uhsii(reg);
|
||||
if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A))
|
||||
pcr->rtd3_en = rtsx_reg_to_rtd3_uhsii(reg);
|
||||
|
||||
if (rtsx_check_mmc_support(reg))
|
||||
pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
|
||||
@@ -143,6 +144,27 @@ static int rts5249_init_from_hw(struct rtsx_pcr *pcr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rts52xa_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
|
||||
{
|
||||
/* Set relink_time to 0 */
|
||||
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
|
||||
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
|
||||
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
|
||||
RELINK_TIME_MASK, 0);
|
||||
|
||||
rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3,
|
||||
D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
|
||||
|
||||
if (!runtime) {
|
||||
rtsx_pci_write_register(pcr, RTS524A_AUTOLOAD_CFG1,
|
||||
CD_RESUME_EN_MASK, 0);
|
||||
rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, 0x01, 0x00);
|
||||
rtsx_pci_write_register(pcr, RTS524A_PME_FORCE_CTL, 0x30, 0x20);
|
||||
}
|
||||
|
||||
rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN);
|
||||
}
|
||||
|
||||
static void rts52xa_save_content_from_efuse(struct rtsx_pcr *pcr)
|
||||
{
|
||||
u8 cnt, sv;
|
||||
@@ -281,8 +303,11 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr)
|
||||
|
||||
rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF);
|
||||
|
||||
if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A))
|
||||
if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) {
|
||||
rtsx_pci_write_register(pcr, REG_VREF, PWD_SUSPND_EN, PWD_SUSPND_EN);
|
||||
rtsx_pci_write_register(pcr, RTS524A_AUTOLOAD_CFG1,
|
||||
CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
|
||||
}
|
||||
|
||||
if (pcr->rtd3_en) {
|
||||
if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) {
|
||||
@@ -724,6 +749,7 @@ static const struct pcr_ops rts524a_pcr_ops = {
|
||||
.card_power_on = rtsx_base_card_power_on,
|
||||
.card_power_off = rtsx_base_card_power_off,
|
||||
.switch_output_voltage = rtsx_base_switch_output_voltage,
|
||||
.force_power_down = rts52xa_force_power_down,
|
||||
.set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0,
|
||||
};
|
||||
|
||||
@@ -841,6 +867,7 @@ static const struct pcr_ops rts525a_pcr_ops = {
|
||||
.card_power_on = rts525a_card_power_on,
|
||||
.card_power_off = rtsx_base_card_power_off,
|
||||
.switch_output_voltage = rts525a_switch_output_voltage,
|
||||
.force_power_down = rts52xa_force_power_down,
|
||||
.set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0,
|
||||
};
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ static void rtsx5261_fetch_vendor_settings(struct rtsx_pcr *pcr)
|
||||
pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(reg);
|
||||
}
|
||||
|
||||
static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
|
||||
{
|
||||
/* Set relink_time to 0 */
|
||||
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
|
||||
@@ -103,6 +103,24 @@ static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3,
|
||||
D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
|
||||
|
||||
if (!runtime) {
|
||||
rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1,
|
||||
CD_RESUME_EN_MASK, 0);
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
|
||||
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
|
||||
|
||||
} else {
|
||||
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE, 0);
|
||||
|
||||
rtsx_pci_write_register(pcr, RTS5261_FW_CTL,
|
||||
RTS5261_INFORM_RTD3_COLD, RTS5261_INFORM_RTD3_COLD);
|
||||
rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4,
|
||||
RTS5261_FORCE_PRSNT_LOW, RTS5261_FORCE_PRSNT_LOW);
|
||||
|
||||
}
|
||||
|
||||
rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL,
|
||||
SSC_POWER_DOWN, SSC_POWER_DOWN);
|
||||
}
|
||||
@@ -536,9 +554,18 @@ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
|
||||
FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
|
||||
|
||||
rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
|
||||
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
|
||||
|
||||
if (pcr->rtd3_en) {
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x01);
|
||||
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE);
|
||||
} else {
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
|
||||
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
|
||||
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
|
||||
}
|
||||
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, D3_DELINK_MODE_EN, 0x00);
|
||||
|
||||
/* Clear Enter RTD3_cold Information*/
|
||||
rtsx_pci_write_register(pcr, RTS5261_FW_CTL,
|
||||
|
||||
@@ -152,20 +152,12 @@ void rtsx_pci_start_run(struct rtsx_pcr *pcr)
|
||||
if (pcr->remove_pci)
|
||||
return;
|
||||
|
||||
if (pcr->rtd3_en)
|
||||
if (pcr->is_runtime_suspended) {
|
||||
pm_runtime_get(&(pcr->pci->dev));
|
||||
pcr->is_runtime_suspended = false;
|
||||
}
|
||||
|
||||
if (pcr->state != PDEV_STAT_RUN) {
|
||||
pcr->state = PDEV_STAT_RUN;
|
||||
if (pcr->ops->enable_auto_blink)
|
||||
pcr->ops->enable_auto_blink(pcr);
|
||||
rtsx_pm_full_on(pcr);
|
||||
}
|
||||
|
||||
mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rtsx_pci_start_run);
|
||||
|
||||
@@ -1062,73 +1054,7 @@ static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rtsx_enable_aspm(struct rtsx_pcr *pcr)
|
||||
{
|
||||
if (pcr->ops->set_aspm)
|
||||
pcr->ops->set_aspm(pcr, true);
|
||||
else
|
||||
rtsx_comm_set_aspm(pcr, true);
|
||||
}
|
||||
|
||||
static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr)
|
||||
{
|
||||
struct rtsx_cr_option *option = &pcr->option;
|
||||
|
||||
if (option->ltr_enabled) {
|
||||
u32 latency = option->ltr_l1off_latency;
|
||||
|
||||
if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN))
|
||||
mdelay(option->l1_snooze_delay);
|
||||
|
||||
rtsx_set_ltr_latency(pcr, latency);
|
||||
}
|
||||
|
||||
if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN))
|
||||
rtsx_set_l1off_sub_cfg_d0(pcr, 0);
|
||||
|
||||
rtsx_enable_aspm(pcr);
|
||||
}
|
||||
|
||||
static void rtsx_pm_power_saving(struct rtsx_pcr *pcr)
|
||||
{
|
||||
rtsx_comm_pm_power_saving(pcr);
|
||||
}
|
||||
|
||||
static void rtsx_pci_rtd3_work(struct work_struct *work)
|
||||
{
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, rtd3_work);
|
||||
|
||||
pcr_dbg(pcr, "--> %s\n", __func__);
|
||||
if (!pcr->is_runtime_suspended)
|
||||
pm_runtime_put(&(pcr->pci->dev));
|
||||
}
|
||||
|
||||
static void rtsx_pci_idle_work(struct work_struct *work)
|
||||
{
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work);
|
||||
|
||||
pcr_dbg(pcr, "--> %s\n", __func__);
|
||||
|
||||
mutex_lock(&pcr->pcr_mutex);
|
||||
|
||||
pcr->state = PDEV_STAT_IDLE;
|
||||
|
||||
if (pcr->ops->disable_auto_blink)
|
||||
pcr->ops->disable_auto_blink(pcr);
|
||||
if (pcr->ops->turn_off_led)
|
||||
pcr->ops->turn_off_led(pcr);
|
||||
|
||||
rtsx_pm_power_saving(pcr);
|
||||
|
||||
mutex_unlock(&pcr->pcr_mutex);
|
||||
|
||||
if (pcr->rtd3_en)
|
||||
mod_delayed_work(system_wq, &pcr->rtd3_work, msecs_to_jiffies(10000));
|
||||
}
|
||||
|
||||
static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
static void rtsx_base_force_power_down(struct rtsx_pcr *pcr)
|
||||
{
|
||||
/* Set relink_time to 0 */
|
||||
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
|
||||
@@ -1142,7 +1068,7 @@ static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN);
|
||||
}
|
||||
|
||||
static void __maybe_unused rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
static void __maybe_unused rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
|
||||
{
|
||||
if (pcr->ops->turn_off_led)
|
||||
pcr->ops->turn_off_led(pcr);
|
||||
@@ -1154,9 +1080,9 @@ static void __maybe_unused rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state)
|
||||
rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, pm_state);
|
||||
|
||||
if (pcr->ops->force_power_down)
|
||||
pcr->ops->force_power_down(pcr, pm_state);
|
||||
pcr->ops->force_power_down(pcr, pm_state, runtime);
|
||||
else
|
||||
rtsx_base_force_power_down(pcr, pm_state);
|
||||
rtsx_base_force_power_down(pcr);
|
||||
}
|
||||
|
||||
void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr)
|
||||
@@ -1598,7 +1524,6 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
|
||||
pcr->card_inserted = 0;
|
||||
pcr->card_removed = 0;
|
||||
INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect);
|
||||
INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work);
|
||||
|
||||
pcr->msi_en = msi_en;
|
||||
if (pcr->msi_en) {
|
||||
@@ -1623,20 +1548,14 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
|
||||
rtsx_pcr_cells[i].pdata_size = sizeof(*handle);
|
||||
}
|
||||
|
||||
if (pcr->rtd3_en) {
|
||||
INIT_DELAYED_WORK(&pcr->rtd3_work, rtsx_pci_rtd3_work);
|
||||
pm_runtime_allow(&pcidev->dev);
|
||||
pm_runtime_enable(&pcidev->dev);
|
||||
pcr->is_runtime_suspended = false;
|
||||
}
|
||||
|
||||
|
||||
ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells,
|
||||
ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL);
|
||||
if (ret < 0)
|
||||
goto free_slots;
|
||||
|
||||
schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
|
||||
pm_runtime_allow(&pcidev->dev);
|
||||
pm_runtime_put(&pcidev->dev);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1668,11 +1587,11 @@ static void rtsx_pci_remove(struct pci_dev *pcidev)
|
||||
struct pcr_handle *handle = pci_get_drvdata(pcidev);
|
||||
struct rtsx_pcr *pcr = handle->pcr;
|
||||
|
||||
if (pcr->rtd3_en)
|
||||
pm_runtime_get_noresume(&pcr->pci->dev);
|
||||
|
||||
pcr->remove_pci = true;
|
||||
|
||||
pm_runtime_get_sync(&pcidev->dev);
|
||||
pm_runtime_forbid(&pcidev->dev);
|
||||
|
||||
/* Disable interrupts at the pcr level */
|
||||
spin_lock_irq(&pcr->lock);
|
||||
rtsx_pci_writel(pcr, RTSX_BIER, 0);
|
||||
@@ -1680,9 +1599,6 @@ static void rtsx_pci_remove(struct pci_dev *pcidev)
|
||||
spin_unlock_irq(&pcr->lock);
|
||||
|
||||
cancel_delayed_work_sync(&pcr->carddet_work);
|
||||
cancel_delayed_work_sync(&pcr->idle_work);
|
||||
if (pcr->rtd3_en)
|
||||
cancel_delayed_work_sync(&pcr->rtd3_work);
|
||||
|
||||
mfd_remove_devices(&pcidev->dev);
|
||||
|
||||
@@ -1700,11 +1616,6 @@ static void rtsx_pci_remove(struct pci_dev *pcidev)
|
||||
idr_remove(&rtsx_pci_idr, pcr->id);
|
||||
spin_unlock(&rtsx_pci_lock);
|
||||
|
||||
if (pcr->rtd3_en) {
|
||||
pm_runtime_disable(&pcr->pci->dev);
|
||||
pm_runtime_put_noidle(&pcr->pci->dev);
|
||||
}
|
||||
|
||||
kfree(pcr->slots);
|
||||
kfree(pcr);
|
||||
kfree(handle);
|
||||
@@ -1717,22 +1628,16 @@ static void rtsx_pci_remove(struct pci_dev *pcidev)
|
||||
static int __maybe_unused rtsx_pci_suspend(struct device *dev_d)
|
||||
{
|
||||
struct pci_dev *pcidev = to_pci_dev(dev_d);
|
||||
struct pcr_handle *handle;
|
||||
struct rtsx_pcr *pcr;
|
||||
struct pcr_handle *handle = pci_get_drvdata(pcidev);
|
||||
struct rtsx_pcr *pcr = handle->pcr;
|
||||
|
||||
dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
|
||||
|
||||
handle = pci_get_drvdata(pcidev);
|
||||
pcr = handle->pcr;
|
||||
|
||||
cancel_delayed_work(&pcr->carddet_work);
|
||||
cancel_delayed_work(&pcr->idle_work);
|
||||
cancel_delayed_work_sync(&pcr->carddet_work);
|
||||
|
||||
mutex_lock(&pcr->pcr_mutex);
|
||||
|
||||
rtsx_pci_power_off(pcr, HOST_ENTER_S3);
|
||||
|
||||
device_wakeup_disable(dev_d);
|
||||
rtsx_pci_power_off(pcr, HOST_ENTER_S3, false);
|
||||
|
||||
mutex_unlock(&pcr->pcr_mutex);
|
||||
return 0;
|
||||
@@ -1741,15 +1646,12 @@ static int __maybe_unused rtsx_pci_suspend(struct device *dev_d)
|
||||
static int __maybe_unused rtsx_pci_resume(struct device *dev_d)
|
||||
{
|
||||
struct pci_dev *pcidev = to_pci_dev(dev_d);
|
||||
struct pcr_handle *handle;
|
||||
struct rtsx_pcr *pcr;
|
||||
struct pcr_handle *handle = pci_get_drvdata(pcidev);
|
||||
struct rtsx_pcr *pcr = handle->pcr;
|
||||
int ret = 0;
|
||||
|
||||
dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
|
||||
|
||||
handle = pci_get_drvdata(pcidev);
|
||||
pcr = handle->pcr;
|
||||
|
||||
mutex_lock(&pcr->pcr_mutex);
|
||||
|
||||
ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
|
||||
@@ -1760,8 +1662,6 @@ static int __maybe_unused rtsx_pci_resume(struct device *dev_d)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
|
||||
|
||||
out:
|
||||
mutex_unlock(&pcr->pcr_mutex);
|
||||
return ret;
|
||||
@@ -1769,16 +1669,46 @@ out:
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
|
||||
static void rtsx_enable_aspm(struct rtsx_pcr *pcr)
|
||||
{
|
||||
if (pcr->ops->set_aspm)
|
||||
pcr->ops->set_aspm(pcr, true);
|
||||
else
|
||||
rtsx_comm_set_aspm(pcr, true);
|
||||
}
|
||||
|
||||
static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr)
|
||||
{
|
||||
struct rtsx_cr_option *option = &pcr->option;
|
||||
|
||||
if (option->ltr_enabled) {
|
||||
u32 latency = option->ltr_l1off_latency;
|
||||
|
||||
if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN))
|
||||
mdelay(option->l1_snooze_delay);
|
||||
|
||||
rtsx_set_ltr_latency(pcr, latency);
|
||||
}
|
||||
|
||||
if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN))
|
||||
rtsx_set_l1off_sub_cfg_d0(pcr, 0);
|
||||
|
||||
rtsx_enable_aspm(pcr);
|
||||
}
|
||||
|
||||
static void rtsx_pm_power_saving(struct rtsx_pcr *pcr)
|
||||
{
|
||||
rtsx_comm_pm_power_saving(pcr);
|
||||
}
|
||||
|
||||
static void rtsx_pci_shutdown(struct pci_dev *pcidev)
|
||||
{
|
||||
struct pcr_handle *handle;
|
||||
struct rtsx_pcr *pcr;
|
||||
struct pcr_handle *handle = pci_get_drvdata(pcidev);
|
||||
struct rtsx_pcr *pcr = handle->pcr;
|
||||
|
||||
dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
|
||||
|
||||
handle = pci_get_drvdata(pcidev);
|
||||
pcr = handle->pcr;
|
||||
rtsx_pci_power_off(pcr, HOST_ENTER_S1);
|
||||
rtsx_pci_power_off(pcr, HOST_ENTER_S1, false);
|
||||
|
||||
pci_disable_device(pcidev);
|
||||
free_irq(pcr->irq, (void *)pcr);
|
||||
@@ -1786,26 +1716,47 @@ static void rtsx_pci_shutdown(struct pci_dev *pcidev)
|
||||
pci_disable_msi(pcr->pci);
|
||||
}
|
||||
|
||||
static int rtsx_pci_runtime_suspend(struct device *device)
|
||||
static int rtsx_pci_runtime_idle(struct device *device)
|
||||
{
|
||||
struct pci_dev *pcidev = to_pci_dev(device);
|
||||
struct pcr_handle *handle;
|
||||
struct rtsx_pcr *pcr;
|
||||
struct pcr_handle *handle = pci_get_drvdata(pcidev);
|
||||
struct rtsx_pcr *pcr = handle->pcr;
|
||||
|
||||
handle = pci_get_drvdata(pcidev);
|
||||
pcr = handle->pcr;
|
||||
dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
|
||||
|
||||
cancel_delayed_work(&pcr->carddet_work);
|
||||
cancel_delayed_work(&pcr->rtd3_work);
|
||||
cancel_delayed_work(&pcr->idle_work);
|
||||
dev_dbg(device, "--> %s\n", __func__);
|
||||
|
||||
mutex_lock(&pcr->pcr_mutex);
|
||||
rtsx_pci_power_off(pcr, HOST_ENTER_S3);
|
||||
|
||||
pcr->state = PDEV_STAT_IDLE;
|
||||
|
||||
if (pcr->ops->disable_auto_blink)
|
||||
pcr->ops->disable_auto_blink(pcr);
|
||||
if (pcr->ops->turn_off_led)
|
||||
pcr->ops->turn_off_led(pcr);
|
||||
|
||||
rtsx_pm_power_saving(pcr);
|
||||
|
||||
mutex_unlock(&pcr->pcr_mutex);
|
||||
|
||||
pcr->is_runtime_suspended = true;
|
||||
if (pcr->rtd3_en)
|
||||
pm_schedule_suspend(device, 10000);
|
||||
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
static int rtsx_pci_runtime_suspend(struct device *device)
|
||||
{
|
||||
struct pci_dev *pcidev = to_pci_dev(device);
|
||||
struct pcr_handle *handle = pci_get_drvdata(pcidev);
|
||||
struct rtsx_pcr *pcr = handle->pcr;
|
||||
|
||||
dev_dbg(device, "--> %s\n", __func__);
|
||||
|
||||
cancel_delayed_work_sync(&pcr->carddet_work);
|
||||
|
||||
mutex_lock(&pcr->pcr_mutex);
|
||||
rtsx_pci_power_off(pcr, HOST_ENTER_S3, true);
|
||||
|
||||
mutex_unlock(&pcr->pcr_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1813,20 +1764,15 @@ static int rtsx_pci_runtime_suspend(struct device *device)
|
||||
static int rtsx_pci_runtime_resume(struct device *device)
|
||||
{
|
||||
struct pci_dev *pcidev = to_pci_dev(device);
|
||||
struct pcr_handle *handle;
|
||||
struct rtsx_pcr *pcr;
|
||||
struct pcr_handle *handle = pci_get_drvdata(pcidev);
|
||||
struct rtsx_pcr *pcr = handle->pcr;
|
||||
|
||||
handle = pci_get_drvdata(pcidev);
|
||||
pcr = handle->pcr;
|
||||
dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
|
||||
dev_dbg(device, "--> %s\n", __func__);
|
||||
|
||||
mutex_lock(&pcr->pcr_mutex);
|
||||
|
||||
rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
|
||||
|
||||
if (pcr->ops->fetch_vendor_settings)
|
||||
pcr->ops->fetch_vendor_settings(pcr);
|
||||
|
||||
rtsx_pci_init_hw(pcr);
|
||||
|
||||
if (pcr->slots[RTSX_SD_CARD].p_dev != NULL) {
|
||||
@@ -1834,8 +1780,6 @@ static int rtsx_pci_runtime_resume(struct device *device)
|
||||
pcr->slots[RTSX_SD_CARD].p_dev);
|
||||
}
|
||||
|
||||
schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
|
||||
|
||||
mutex_unlock(&pcr->pcr_mutex);
|
||||
return 0;
|
||||
}
|
||||
@@ -1850,7 +1794,7 @@ static int rtsx_pci_runtime_resume(struct device *device)
|
||||
|
||||
static const struct dev_pm_ops rtsx_pci_pm_ops = {
|
||||
SET_SYSTEM_SLEEP_PM_OPS(rtsx_pci_suspend, rtsx_pci_resume)
|
||||
SET_RUNTIME_PM_OPS(rtsx_pci_runtime_suspend, rtsx_pci_runtime_resume, NULL)
|
||||
SET_RUNTIME_PM_OPS(rtsx_pci_runtime_suspend, rtsx_pci_runtime_resume, rtsx_pci_runtime_idle)
|
||||
};
|
||||
|
||||
static struct pci_driver rtsx_pci_driver = {
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
#define MIN_DIV_N_PCR 80
|
||||
#define MAX_DIV_N_PCR 208
|
||||
|
||||
#define RTS522A_PME_FORCE_CTL 0xFF78
|
||||
#define RTS522A_AUTOLOAD_CFG1 0xFF7C
|
||||
#define RTS522A_PM_CTRL3 0xFF7E
|
||||
|
||||
#define RTS524A_PME_FORCE_CTL 0xFF78
|
||||
@@ -25,6 +27,7 @@
|
||||
#define REG_EFUSE_POWEROFF 0x00
|
||||
#define RTS5250_CLK_CFG3 0xFF79
|
||||
#define RTS525A_CFG_MEM_PD 0xF0
|
||||
#define RTS524A_AUTOLOAD_CFG1 0xFF7C
|
||||
#define RTS524A_PM_CTRL3 0xFF7E
|
||||
#define RTS525A_BIOS_CFG 0xFF2D
|
||||
#define RTS525A_LOAD_BIOS_FLAG 0x01
|
||||
|
||||
@@ -309,7 +309,7 @@ static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip)
|
||||
u32 val;
|
||||
int err;
|
||||
|
||||
strncpy(chip->name, "at25", sizeof(chip->name));
|
||||
strscpy(chip->name, "at25", sizeof(chip->name));
|
||||
|
||||
err = device_property_read_u32(dev, "size", &val);
|
||||
if (err)
|
||||
@@ -370,7 +370,7 @@ static int at25_fram_to_chip(struct device *dev, struct spi_eeprom *chip)
|
||||
u8 id[FM25_ID_LEN];
|
||||
int i;
|
||||
|
||||
strncpy(chip->name, "fm25", sizeof(chip->name));
|
||||
strscpy(chip->name, "fm25", sizeof(chip->name));
|
||||
|
||||
/* Get ID of chip */
|
||||
fm25_aux_read(at25, id, FM25_RDID, FM25_ID_LEN);
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <linux/rpmsg.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/qcom_scm.h>
|
||||
#include <uapi/misc/fastrpc.h>
|
||||
|
||||
#define ADSP_DOMAIN_ID (0)
|
||||
@@ -25,16 +26,22 @@
|
||||
#define CDSP_DOMAIN_ID (3)
|
||||
#define FASTRPC_DEV_MAX 4 /* adsp, mdsp, slpi, cdsp*/
|
||||
#define FASTRPC_MAX_SESSIONS 13 /*12 compute, 1 cpz*/
|
||||
#define FASTRPC_MAX_VMIDS 16
|
||||
#define FASTRPC_ALIGN 128
|
||||
#define FASTRPC_MAX_FDLIST 16
|
||||
#define FASTRPC_MAX_CRCLIST 64
|
||||
#define FASTRPC_PHYS(p) ((p) & 0xffffffff)
|
||||
#define FASTRPC_CTX_MAX (256)
|
||||
#define FASTRPC_INIT_HANDLE 1
|
||||
#define FASTRPC_DSP_UTILITIES_HANDLE 2
|
||||
#define FASTRPC_CTXID_MASK (0xFF0)
|
||||
#define INIT_FILELEN_MAX (2 * 1024 * 1024)
|
||||
#define FASTRPC_DEVICE_NAME "fastrpc"
|
||||
#define ADSP_MMAP_ADD_PAGES 0x1000
|
||||
#define DSP_UNSUPPORTED_API (0x80000414)
|
||||
/* MAX NUMBER of DSP ATTRIBUTES SUPPORTED */
|
||||
#define FASTRPC_MAX_DSP_ATTRIBUTES (256)
|
||||
#define FASTRPC_MAX_DSP_ATTRIBUTES_LEN (sizeof(u32) * FASTRPC_MAX_DSP_ATTRIBUTES)
|
||||
|
||||
/* Retrives number of input buffers from the scalars parameter */
|
||||
#define REMOTE_SCALARS_INBUFS(sc) (((sc) >> 16) & 0x0ff)
|
||||
@@ -72,13 +79,15 @@
|
||||
#define FASTRPC_RMID_INIT_CREATE 6
|
||||
#define FASTRPC_RMID_INIT_CREATE_ATTR 7
|
||||
#define FASTRPC_RMID_INIT_CREATE_STATIC 8
|
||||
#define FASTRPC_RMID_INIT_MEM_MAP 10
|
||||
#define FASTRPC_RMID_INIT_MEM_UNMAP 11
|
||||
|
||||
/* Protection Domain(PD) ids */
|
||||
#define AUDIO_PD (0) /* also GUEST_OS PD? */
|
||||
#define USER_PD (1)
|
||||
#define SENSORS_PD (2)
|
||||
|
||||
#define miscdev_to_cctx(d) container_of(d, struct fastrpc_channel_ctx, miscdev)
|
||||
#define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
|
||||
|
||||
static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
|
||||
"sdsp", "cdsp"};
|
||||
@@ -92,9 +101,20 @@ struct fastrpc_invoke_buf {
|
||||
u32 pgidx; /* index to start of contiguous region */
|
||||
};
|
||||
|
||||
struct fastrpc_remote_arg {
|
||||
u64 pv;
|
||||
u64 len;
|
||||
struct fastrpc_remote_dmahandle {
|
||||
s32 fd; /* dma handle fd */
|
||||
u32 offset; /* dma handle offset */
|
||||
u32 len; /* dma handle length */
|
||||
};
|
||||
|
||||
struct fastrpc_remote_buf {
|
||||
u64 pv; /* buffer pointer */
|
||||
u64 len; /* length of buffer */
|
||||
};
|
||||
|
||||
union fastrpc_remote_arg {
|
||||
struct fastrpc_remote_buf buf;
|
||||
struct fastrpc_remote_dmahandle dma;
|
||||
};
|
||||
|
||||
struct fastrpc_mmap_rsp_msg {
|
||||
@@ -108,12 +128,29 @@ struct fastrpc_mmap_req_msg {
|
||||
s32 num;
|
||||
};
|
||||
|
||||
struct fastrpc_mem_map_req_msg {
|
||||
s32 pgid;
|
||||
s32 fd;
|
||||
s32 offset;
|
||||
u32 flags;
|
||||
u64 vaddrin;
|
||||
s32 num;
|
||||
s32 data_len;
|
||||
};
|
||||
|
||||
struct fastrpc_munmap_req_msg {
|
||||
s32 pgid;
|
||||
u64 vaddr;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
struct fastrpc_mem_unmap_req_msg {
|
||||
s32 pgid;
|
||||
s32 fd;
|
||||
u64 vaddrin;
|
||||
u64 len;
|
||||
};
|
||||
|
||||
struct fastrpc_msg {
|
||||
int pid; /* process group id */
|
||||
int tid; /* thread id */
|
||||
@@ -170,6 +207,8 @@ struct fastrpc_map {
|
||||
u64 size;
|
||||
void *va;
|
||||
u64 len;
|
||||
u64 raddr;
|
||||
u32 attr;
|
||||
struct kref refcount;
|
||||
};
|
||||
|
||||
@@ -189,7 +228,7 @@ struct fastrpc_invoke_ctx {
|
||||
struct work_struct put_work;
|
||||
struct fastrpc_msg msg;
|
||||
struct fastrpc_user *fl;
|
||||
struct fastrpc_remote_arg *rpra;
|
||||
union fastrpc_remote_arg *rpra;
|
||||
struct fastrpc_map **maps;
|
||||
struct fastrpc_buf *buf;
|
||||
struct fastrpc_invoke_args *args;
|
||||
@@ -207,13 +246,28 @@ struct fastrpc_session_ctx {
|
||||
struct fastrpc_channel_ctx {
|
||||
int domain_id;
|
||||
int sesscount;
|
||||
int vmcount;
|
||||
u32 perms;
|
||||
struct qcom_scm_vmperm vmperms[FASTRPC_MAX_VMIDS];
|
||||
struct rpmsg_device *rpdev;
|
||||
struct fastrpc_session_ctx session[FASTRPC_MAX_SESSIONS];
|
||||
spinlock_t lock;
|
||||
struct idr ctx_idr;
|
||||
struct list_head users;
|
||||
struct miscdevice miscdev;
|
||||
struct kref refcount;
|
||||
/* Flag if dsp attributes are cached */
|
||||
bool valid_attributes;
|
||||
u32 dsp_attributes[FASTRPC_MAX_DSP_ATTRIBUTES];
|
||||
struct fastrpc_device *secure_fdevice;
|
||||
struct fastrpc_device *fdevice;
|
||||
bool secure;
|
||||
bool unsigned_support;
|
||||
};
|
||||
|
||||
struct fastrpc_device {
|
||||
struct fastrpc_channel_ctx *cctx;
|
||||
struct miscdevice miscdev;
|
||||
bool secure;
|
||||
};
|
||||
|
||||
struct fastrpc_user {
|
||||
@@ -228,6 +282,7 @@ struct fastrpc_user {
|
||||
|
||||
int tgid;
|
||||
int pd;
|
||||
bool is_secure_dev;
|
||||
/* Lock for lists */
|
||||
spinlock_t lock;
|
||||
/* lock for allocations */
|
||||
@@ -241,6 +296,20 @@ static void fastrpc_free_map(struct kref *ref)
|
||||
map = container_of(ref, struct fastrpc_map, refcount);
|
||||
|
||||
if (map->table) {
|
||||
if (map->attr & FASTRPC_ATTR_SECUREMAP) {
|
||||
struct qcom_scm_vmperm perm;
|
||||
int err = 0;
|
||||
|
||||
perm.vmid = QCOM_SCM_VMID_HLOS;
|
||||
perm.perm = QCOM_SCM_PERM_RWX;
|
||||
err = qcom_scm_assign_mem(map->phys, map->size,
|
||||
&(map->fl->cctx->vmperms[0].vmid), &perm, 1);
|
||||
if (err) {
|
||||
dev_err(map->fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
|
||||
map->phys, map->size, err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
dma_buf_unmap_attachment(map->attach, map->table,
|
||||
DMA_BIDIRECTIONAL);
|
||||
dma_buf_detach(map->buf, map->attach);
|
||||
@@ -262,7 +331,8 @@ static void fastrpc_map_get(struct fastrpc_map *map)
|
||||
kref_get(&map->refcount);
|
||||
}
|
||||
|
||||
static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
|
||||
|
||||
static int fastrpc_map_lookup(struct fastrpc_user *fl, int fd,
|
||||
struct fastrpc_map **ppmap)
|
||||
{
|
||||
struct fastrpc_map *map = NULL;
|
||||
@@ -270,7 +340,6 @@ static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
|
||||
mutex_lock(&fl->mutex);
|
||||
list_for_each_entry(map, &fl->maps, node) {
|
||||
if (map->fd == fd) {
|
||||
fastrpc_map_get(map);
|
||||
*ppmap = map;
|
||||
mutex_unlock(&fl->mutex);
|
||||
return 0;
|
||||
@@ -281,6 +350,17 @@ static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
|
||||
struct fastrpc_map **ppmap)
|
||||
{
|
||||
int ret = fastrpc_map_lookup(fl, fd, ppmap);
|
||||
|
||||
if (!ret)
|
||||
fastrpc_map_get(*ppmap);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void fastrpc_buf_free(struct fastrpc_buf *buf)
|
||||
{
|
||||
dma_free_coherent(buf->dev, buf->size, buf->virt,
|
||||
@@ -353,7 +433,7 @@ static void fastrpc_context_free(struct kref *ref)
|
||||
ctx = container_of(ref, struct fastrpc_invoke_ctx, refcount);
|
||||
cctx = ctx->cctx;
|
||||
|
||||
for (i = 0; i < ctx->nscalars; i++)
|
||||
for (i = 0; i < ctx->nbufs; i++)
|
||||
fastrpc_map_put(ctx->maps[i]);
|
||||
|
||||
if (ctx->buf)
|
||||
@@ -617,7 +697,7 @@ static const struct dma_buf_ops fastrpc_dma_buf_ops = {
|
||||
};
|
||||
|
||||
static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
|
||||
u64 len, struct fastrpc_map **ppmap)
|
||||
u64 len, u32 attr, struct fastrpc_map **ppmap)
|
||||
{
|
||||
struct fastrpc_session_ctx *sess = fl->sctx;
|
||||
struct fastrpc_map *map = NULL;
|
||||
@@ -659,6 +739,22 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
|
||||
map->len = len;
|
||||
kref_init(&map->refcount);
|
||||
|
||||
if (attr & FASTRPC_ATTR_SECUREMAP) {
|
||||
/*
|
||||
* If subsystem VMIDs are defined in DTSI, then do
|
||||
* hyp_assign from HLOS to those VM(s)
|
||||
*/
|
||||
unsigned int perms = BIT(QCOM_SCM_VMID_HLOS);
|
||||
|
||||
map->attr = attr;
|
||||
err = qcom_scm_assign_mem(map->phys, (u64)map->size, &perms,
|
||||
fl->cctx->vmperms, fl->cctx->vmcount);
|
||||
if (err) {
|
||||
dev_err(sess->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d",
|
||||
map->phys, map->size, err);
|
||||
goto map_err;
|
||||
}
|
||||
}
|
||||
spin_lock(&fl->lock);
|
||||
list_add_tail(&map->node, &fl->maps);
|
||||
spin_unlock(&fl->lock);
|
||||
@@ -682,7 +778,7 @@ get_err:
|
||||
* >>>>>> START of METADATA <<<<<<<<<
|
||||
* +---------------------------------+
|
||||
* | Arguments |
|
||||
* | type:(struct fastrpc_remote_arg)|
|
||||
* | type:(union fastrpc_remote_arg)|
|
||||
* | (0 - N) |
|
||||
* +---------------------------------+
|
||||
* | Invoke Buffer list |
|
||||
@@ -707,7 +803,7 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
|
||||
{
|
||||
int size = 0;
|
||||
|
||||
size = (sizeof(struct fastrpc_remote_arg) +
|
||||
size = (sizeof(struct fastrpc_remote_buf) +
|
||||
sizeof(struct fastrpc_invoke_buf) +
|
||||
sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
|
||||
sizeof(u64) * FASTRPC_MAX_FDLIST +
|
||||
@@ -743,16 +839,13 @@ static int fastrpc_create_maps(struct fastrpc_invoke_ctx *ctx)
|
||||
int i, err;
|
||||
|
||||
for (i = 0; i < ctx->nscalars; ++i) {
|
||||
/* Make sure reserved field is set to 0 */
|
||||
if (ctx->args[i].reserved)
|
||||
return -EINVAL;
|
||||
|
||||
if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1 ||
|
||||
ctx->args[i].length == 0)
|
||||
continue;
|
||||
|
||||
err = fastrpc_map_create(ctx->fl, ctx->args[i].fd,
|
||||
ctx->args[i].length, &ctx->maps[i]);
|
||||
ctx->args[i].length, ctx->args[i].attr, &ctx->maps[i]);
|
||||
if (err) {
|
||||
dev_err(dev, "Error Creating map %d\n", err);
|
||||
return -EINVAL;
|
||||
@@ -762,10 +855,20 @@ static int fastrpc_create_maps(struct fastrpc_invoke_ctx *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct fastrpc_invoke_buf *fastrpc_invoke_buf_start(union fastrpc_remote_arg *pra, int len)
|
||||
{
|
||||
return (struct fastrpc_invoke_buf *)(&pra[len]);
|
||||
}
|
||||
|
||||
static struct fastrpc_phy_page *fastrpc_phy_page_start(struct fastrpc_invoke_buf *buf, int len)
|
||||
{
|
||||
return (struct fastrpc_phy_page *)(&buf[len]);
|
||||
}
|
||||
|
||||
static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
|
||||
{
|
||||
struct device *dev = ctx->fl->sctx->dev;
|
||||
struct fastrpc_remote_arg *rpra;
|
||||
union fastrpc_remote_arg *rpra;
|
||||
struct fastrpc_invoke_buf *list;
|
||||
struct fastrpc_phy_page *pages;
|
||||
int inbufs, i, oix, err = 0;
|
||||
@@ -789,9 +892,8 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
|
||||
return err;
|
||||
|
||||
rpra = ctx->buf->virt;
|
||||
list = ctx->buf->virt + ctx->nscalars * sizeof(*rpra);
|
||||
pages = ctx->buf->virt + ctx->nscalars * (sizeof(*list) +
|
||||
sizeof(*rpra));
|
||||
list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
|
||||
pages = fastrpc_phy_page_start(list, ctx->nscalars);
|
||||
args = (uintptr_t)ctx->buf->virt + metalen;
|
||||
rlen = pkt_size - metalen;
|
||||
ctx->rpra = rpra;
|
||||
@@ -802,8 +904,8 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
|
||||
i = ctx->olaps[oix].raix;
|
||||
len = ctx->args[i].length;
|
||||
|
||||
rpra[i].pv = 0;
|
||||
rpra[i].len = len;
|
||||
rpra[i].buf.pv = 0;
|
||||
rpra[i].buf.len = len;
|
||||
list[i].num = len ? 1 : 0;
|
||||
list[i].pgidx = i;
|
||||
|
||||
@@ -813,7 +915,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
|
||||
if (ctx->maps[i]) {
|
||||
struct vm_area_struct *vma = NULL;
|
||||
|
||||
rpra[i].pv = (u64) ctx->args[i].ptr;
|
||||
rpra[i].buf.pv = (u64) ctx->args[i].ptr;
|
||||
pages[i].addr = ctx->maps[i]->phys;
|
||||
|
||||
mmap_read_lock(current->mm);
|
||||
@@ -840,7 +942,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
|
||||
if (rlen < mlen)
|
||||
goto bail;
|
||||
|
||||
rpra[i].pv = args - ctx->olaps[oix].offset;
|
||||
rpra[i].buf.pv = args - ctx->olaps[oix].offset;
|
||||
pages[i].addr = ctx->buf->phys -
|
||||
ctx->olaps[oix].offset +
|
||||
(pkt_size - rlen);
|
||||
@@ -854,7 +956,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
|
||||
}
|
||||
|
||||
if (i < inbufs && !ctx->maps[i]) {
|
||||
void *dst = (void *)(uintptr_t)rpra[i].pv;
|
||||
void *dst = (void *)(uintptr_t)rpra[i].buf.pv;
|
||||
void *src = (void *)(uintptr_t)ctx->args[i].ptr;
|
||||
|
||||
if (!kernel) {
|
||||
@@ -870,12 +972,15 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
|
||||
}
|
||||
|
||||
for (i = ctx->nbufs; i < ctx->nscalars; ++i) {
|
||||
rpra[i].pv = (u64) ctx->args[i].ptr;
|
||||
rpra[i].len = ctx->args[i].length;
|
||||
list[i].num = ctx->args[i].length ? 1 : 0;
|
||||
list[i].pgidx = i;
|
||||
pages[i].addr = ctx->maps[i]->phys;
|
||||
pages[i].size = ctx->maps[i]->size;
|
||||
if (ctx->maps[i]) {
|
||||
pages[i].addr = ctx->maps[i]->phys;
|
||||
pages[i].size = ctx->maps[i]->size;
|
||||
}
|
||||
rpra[i].dma.fd = ctx->args[i].fd;
|
||||
rpra[i].dma.len = ctx->args[i].length;
|
||||
rpra[i].dma.offset = (u64) ctx->args[i].ptr;
|
||||
}
|
||||
|
||||
bail:
|
||||
@@ -888,16 +993,26 @@ bail:
|
||||
static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
|
||||
u32 kernel)
|
||||
{
|
||||
struct fastrpc_remote_arg *rpra = ctx->rpra;
|
||||
int i, inbufs;
|
||||
union fastrpc_remote_arg *rpra = ctx->rpra;
|
||||
struct fastrpc_user *fl = ctx->fl;
|
||||
struct fastrpc_map *mmap = NULL;
|
||||
struct fastrpc_invoke_buf *list;
|
||||
struct fastrpc_phy_page *pages;
|
||||
u64 *fdlist;
|
||||
int i, inbufs, outbufs, handles;
|
||||
|
||||
inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
|
||||
outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc);
|
||||
handles = REMOTE_SCALARS_INHANDLES(ctx->sc) + REMOTE_SCALARS_OUTHANDLES(ctx->sc);
|
||||
list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
|
||||
pages = fastrpc_phy_page_start(list, ctx->nscalars);
|
||||
fdlist = (uint64_t *)(pages + inbufs + outbufs + handles);
|
||||
|
||||
for (i = inbufs; i < ctx->nbufs; ++i) {
|
||||
if (!ctx->maps[i]) {
|
||||
void *src = (void *)(uintptr_t)rpra[i].pv;
|
||||
void *src = (void *)(uintptr_t)rpra[i].buf.pv;
|
||||
void *dst = (void *)(uintptr_t)ctx->args[i].ptr;
|
||||
u64 len = rpra[i].len;
|
||||
u64 len = rpra[i].buf.len;
|
||||
|
||||
if (!kernel) {
|
||||
if (copy_to_user((void __user *)dst, src, len))
|
||||
@@ -908,6 +1023,13 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < FASTRPC_MAX_FDLIST; i++) {
|
||||
if (!fdlist[i])
|
||||
break;
|
||||
if (!fastrpc_map_lookup(fl, (int)fdlist[i], &mmap))
|
||||
fastrpc_map_put(mmap);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1016,6 +1138,24 @@ bail:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool is_session_rejected(struct fastrpc_user *fl, bool unsigned_pd_request)
|
||||
{
|
||||
/* Check if the device node is non-secure and channel is secure*/
|
||||
if (!fl->is_secure_dev && fl->cctx->secure) {
|
||||
/*
|
||||
* Allow untrusted applications to offload only to Unsigned PD when
|
||||
* channel is configured as secure and block untrusted apps on channel
|
||||
* that does not support unsigned PD offload
|
||||
*/
|
||||
if (!fl->cctx->unsigned_support || !unsigned_pd_request) {
|
||||
dev_err(&fl->cctx->rpdev->dev, "Error: Untrusted application trying to offload to signed PD");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int fastrpc_init_create_process(struct fastrpc_user *fl,
|
||||
char __user *argp)
|
||||
{
|
||||
@@ -1035,6 +1175,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl,
|
||||
u32 siglen;
|
||||
} inbuf;
|
||||
u32 sc;
|
||||
bool unsigned_module = false;
|
||||
|
||||
args = kcalloc(FASTRPC_CREATE_PROCESS_NARGS, sizeof(*args), GFP_KERNEL);
|
||||
if (!args)
|
||||
@@ -1045,6 +1186,14 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (init.attrs & FASTRPC_MODE_UNSIGNED_MODULE)
|
||||
unsigned_module = true;
|
||||
|
||||
if (is_session_rejected(fl, unsigned_module)) {
|
||||
err = -ECONNREFUSED;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (init.filelen > INIT_FILELEN_MAX) {
|
||||
err = -EINVAL;
|
||||
goto err;
|
||||
@@ -1059,7 +1208,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl,
|
||||
fl->pd = USER_PD;
|
||||
|
||||
if (init.filelen && init.filefd) {
|
||||
err = fastrpc_map_create(fl, init.filefd, init.filelen, &map);
|
||||
err = fastrpc_map_create(fl, init.filefd, init.filelen, 0, &map);
|
||||
if (err)
|
||||
goto err;
|
||||
}
|
||||
@@ -1168,7 +1317,6 @@ static int fastrpc_release_current_dsp_process(struct fastrpc_user *fl)
|
||||
args[0].ptr = (u64)(uintptr_t) &tgid;
|
||||
args[0].length = sizeof(tgid);
|
||||
args[0].fd = -1;
|
||||
args[0].reserved = 0;
|
||||
sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_RELEASE, 1, 0);
|
||||
|
||||
return fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
|
||||
@@ -1220,10 +1368,14 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
|
||||
|
||||
static int fastrpc_device_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct fastrpc_channel_ctx *cctx = miscdev_to_cctx(filp->private_data);
|
||||
struct fastrpc_channel_ctx *cctx;
|
||||
struct fastrpc_device *fdevice;
|
||||
struct fastrpc_user *fl = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
fdevice = miscdev_to_fdevice(filp->private_data);
|
||||
cctx = fdevice->cctx;
|
||||
|
||||
fl = kzalloc(sizeof(*fl), GFP_KERNEL);
|
||||
if (!fl)
|
||||
return -ENOMEM;
|
||||
@@ -1240,6 +1392,7 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp)
|
||||
INIT_LIST_HEAD(&fl->user);
|
||||
fl->tgid = current->tgid;
|
||||
fl->cctx = cctx;
|
||||
fl->is_secure_dev = fdevice->secure;
|
||||
|
||||
fl->sctx = fastrpc_session_alloc(cctx);
|
||||
if (!fl->sctx) {
|
||||
@@ -1311,7 +1464,6 @@ static int fastrpc_init_attach(struct fastrpc_user *fl, int pd)
|
||||
args[0].ptr = (u64)(uintptr_t) &tgid;
|
||||
args[0].length = sizeof(tgid);
|
||||
args[0].fd = -1;
|
||||
args[0].reserved = 0;
|
||||
sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_ATTACH, 1, 0);
|
||||
fl->pd = pd;
|
||||
|
||||
@@ -1349,6 +1501,107 @@ static int fastrpc_invoke(struct fastrpc_user *fl, char __user *argp)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int fastrpc_get_info_from_dsp(struct fastrpc_user *fl, uint32_t *dsp_attr_buf,
|
||||
uint32_t dsp_attr_buf_len)
|
||||
{
|
||||
struct fastrpc_invoke_args args[2] = { 0 };
|
||||
|
||||
/* Capability filled in userspace */
|
||||
dsp_attr_buf[0] = 0;
|
||||
|
||||
args[0].ptr = (u64)(uintptr_t)&dsp_attr_buf_len;
|
||||
args[0].length = sizeof(dsp_attr_buf_len);
|
||||
args[0].fd = -1;
|
||||
args[1].ptr = (u64)(uintptr_t)&dsp_attr_buf[1];
|
||||
args[1].length = dsp_attr_buf_len;
|
||||
args[1].fd = -1;
|
||||
fl->pd = 1;
|
||||
|
||||
return fastrpc_internal_invoke(fl, true, FASTRPC_DSP_UTILITIES_HANDLE,
|
||||
FASTRPC_SCALARS(0, 1, 1), args);
|
||||
}
|
||||
|
||||
static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
|
||||
struct fastrpc_user *fl)
|
||||
{
|
||||
struct fastrpc_channel_ctx *cctx = fl->cctx;
|
||||
uint32_t attribute_id = cap->attribute_id;
|
||||
uint32_t *dsp_attributes;
|
||||
unsigned long flags;
|
||||
uint32_t domain = cap->domain;
|
||||
int err;
|
||||
|
||||
spin_lock_irqsave(&cctx->lock, flags);
|
||||
/* check if we already have queried dsp for attributes */
|
||||
if (cctx->valid_attributes) {
|
||||
spin_unlock_irqrestore(&cctx->lock, flags);
|
||||
goto done;
|
||||
}
|
||||
spin_unlock_irqrestore(&cctx->lock, flags);
|
||||
|
||||
dsp_attributes = kzalloc(FASTRPC_MAX_DSP_ATTRIBUTES_LEN, GFP_KERNEL);
|
||||
if (!dsp_attributes)
|
||||
return -ENOMEM;
|
||||
|
||||
err = fastrpc_get_info_from_dsp(fl, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN);
|
||||
if (err == DSP_UNSUPPORTED_API) {
|
||||
dev_info(&cctx->rpdev->dev,
|
||||
"Warning: DSP capabilities not supported on domain: %d\n", domain);
|
||||
kfree(dsp_attributes);
|
||||
return -EOPNOTSUPP;
|
||||
} else if (err) {
|
||||
dev_err(&cctx->rpdev->dev, "Error: dsp information is incorrect err: %d\n", err);
|
||||
kfree(dsp_attributes);
|
||||
return err;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&cctx->lock, flags);
|
||||
memcpy(cctx->dsp_attributes, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN);
|
||||
cctx->valid_attributes = true;
|
||||
spin_unlock_irqrestore(&cctx->lock, flags);
|
||||
kfree(dsp_attributes);
|
||||
done:
|
||||
cap->capability = cctx->dsp_attributes[attribute_id];
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
|
||||
{
|
||||
struct fastrpc_ioctl_capability cap = {0};
|
||||
int err = 0;
|
||||
|
||||
if (copy_from_user(&cap, argp, sizeof(cap)))
|
||||
return -EFAULT;
|
||||
|
||||
cap.capability = 0;
|
||||
if (cap.domain >= FASTRPC_DEV_MAX) {
|
||||
dev_err(&fl->cctx->rpdev->dev, "Error: Invalid domain id:%d, err:%d\n",
|
||||
cap.domain, err);
|
||||
return -ECHRNG;
|
||||
}
|
||||
|
||||
/* Fastrpc Capablities does not support modem domain */
|
||||
if (cap.domain == MDSP_DOMAIN_ID) {
|
||||
dev_err(&fl->cctx->rpdev->dev, "Error: modem not supported %d\n", err);
|
||||
return -ECHRNG;
|
||||
}
|
||||
|
||||
if (cap.attribute_id >= FASTRPC_MAX_DSP_ATTRIBUTES) {
|
||||
dev_err(&fl->cctx->rpdev->dev, "Error: invalid attribute: %d, err: %d\n",
|
||||
cap.attribute_id, err);
|
||||
return -EOVERFLOW;
|
||||
}
|
||||
|
||||
err = fastrpc_get_info_from_kernel(&cap, fl);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (copy_to_user(argp, &cap.capability, sizeof(cap.capability)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fastrpc_req_munmap_impl(struct fastrpc_user *fl,
|
||||
struct fastrpc_req_munmap *req)
|
||||
{
|
||||
@@ -1491,6 +1744,134 @@ err_invoke:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int fastrpc_req_mem_unmap_impl(struct fastrpc_user *fl, struct fastrpc_mem_unmap *req)
|
||||
{
|
||||
struct fastrpc_invoke_args args[1] = { [0] = { 0 } };
|
||||
struct fastrpc_map *map = NULL, *m;
|
||||
struct fastrpc_mem_unmap_req_msg req_msg = { 0 };
|
||||
int err = 0;
|
||||
u32 sc;
|
||||
struct device *dev = fl->sctx->dev;
|
||||
|
||||
spin_lock(&fl->lock);
|
||||
list_for_each_entry_safe(map, m, &fl->maps, node) {
|
||||
if ((req->fd < 0 || map->fd == req->fd) && (map->raddr == req->vaddr))
|
||||
break;
|
||||
map = NULL;
|
||||
}
|
||||
|
||||
spin_unlock(&fl->lock);
|
||||
|
||||
if (!map) {
|
||||
dev_err(dev, "map not in list\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
req_msg.pgid = fl->tgid;
|
||||
req_msg.len = map->len;
|
||||
req_msg.vaddrin = map->raddr;
|
||||
req_msg.fd = map->fd;
|
||||
|
||||
args[0].ptr = (u64) (uintptr_t) &req_msg;
|
||||
args[0].length = sizeof(req_msg);
|
||||
|
||||
sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MEM_UNMAP, 1, 0);
|
||||
err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc,
|
||||
&args[0]);
|
||||
fastrpc_map_put(map);
|
||||
if (err)
|
||||
dev_err(dev, "unmmap\tpt fd = %d, 0x%09llx error\n", map->fd, map->raddr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int fastrpc_req_mem_unmap(struct fastrpc_user *fl, char __user *argp)
|
||||
{
|
||||
struct fastrpc_mem_unmap req;
|
||||
|
||||
if (copy_from_user(&req, argp, sizeof(req)))
|
||||
return -EFAULT;
|
||||
|
||||
return fastrpc_req_mem_unmap_impl(fl, &req);
|
||||
}
|
||||
|
||||
static int fastrpc_req_mem_map(struct fastrpc_user *fl, char __user *argp)
|
||||
{
|
||||
struct fastrpc_invoke_args args[4] = { [0 ... 3] = { 0 } };
|
||||
struct fastrpc_mem_map_req_msg req_msg = { 0 };
|
||||
struct fastrpc_mmap_rsp_msg rsp_msg = { 0 };
|
||||
struct fastrpc_mem_unmap req_unmap = { 0 };
|
||||
struct fastrpc_phy_page pages = { 0 };
|
||||
struct fastrpc_mem_map req;
|
||||
struct device *dev = fl->sctx->dev;
|
||||
struct fastrpc_map *map = NULL;
|
||||
int err;
|
||||
u32 sc;
|
||||
|
||||
if (copy_from_user(&req, argp, sizeof(req)))
|
||||
return -EFAULT;
|
||||
|
||||
/* create SMMU mapping */
|
||||
err = fastrpc_map_create(fl, req.fd, req.length, 0, &map);
|
||||
if (err) {
|
||||
dev_err(dev, "failed to map buffer, fd = %d\n", req.fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
req_msg.pgid = fl->tgid;
|
||||
req_msg.fd = req.fd;
|
||||
req_msg.offset = req.offset;
|
||||
req_msg.vaddrin = req.vaddrin;
|
||||
map->va = (void *) (uintptr_t) req.vaddrin;
|
||||
req_msg.flags = req.flags;
|
||||
req_msg.num = sizeof(pages);
|
||||
req_msg.data_len = 0;
|
||||
|
||||
args[0].ptr = (u64) (uintptr_t) &req_msg;
|
||||
args[0].length = sizeof(req_msg);
|
||||
|
||||
pages.addr = map->phys;
|
||||
pages.size = map->size;
|
||||
|
||||
args[1].ptr = (u64) (uintptr_t) &pages;
|
||||
args[1].length = sizeof(pages);
|
||||
|
||||
args[2].ptr = (u64) (uintptr_t) &pages;
|
||||
args[2].length = 0;
|
||||
|
||||
args[3].ptr = (u64) (uintptr_t) &rsp_msg;
|
||||
args[3].length = sizeof(rsp_msg);
|
||||
|
||||
sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MEM_MAP, 3, 1);
|
||||
err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, &args[0]);
|
||||
if (err) {
|
||||
dev_err(dev, "mem mmap error, fd %d, vaddr %llx, size %lld\n",
|
||||
req.fd, req.vaddrin, map->size);
|
||||
goto err_invoke;
|
||||
}
|
||||
|
||||
/* update the buffer to be able to deallocate the memory on the DSP */
|
||||
map->raddr = rsp_msg.vaddr;
|
||||
|
||||
/* let the client know the address to use */
|
||||
req.vaddrout = rsp_msg.vaddr;
|
||||
|
||||
if (copy_to_user((void __user *)argp, &req, sizeof(req))) {
|
||||
/* unmap the memory and release the buffer */
|
||||
req_unmap.vaddr = (uintptr_t) rsp_msg.vaddr;
|
||||
req_unmap.length = map->size;
|
||||
fastrpc_req_mem_unmap_impl(fl, &req_unmap);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_invoke:
|
||||
fastrpc_map_put(map);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
@@ -1520,6 +1901,15 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
|
||||
case FASTRPC_IOCTL_MUNMAP:
|
||||
err = fastrpc_req_munmap(fl, argp);
|
||||
break;
|
||||
case FASTRPC_IOCTL_MEM_MAP:
|
||||
err = fastrpc_req_mem_map(fl, argp);
|
||||
break;
|
||||
case FASTRPC_IOCTL_MEM_UNMAP:
|
||||
err = fastrpc_req_mem_unmap(fl, argp);
|
||||
break;
|
||||
case FASTRPC_IOCTL_GET_DSP_INFO:
|
||||
err = fastrpc_get_dsp_info(fl, argp);
|
||||
break;
|
||||
default:
|
||||
err = -ENOTTY;
|
||||
break;
|
||||
@@ -1615,12 +2005,41 @@ static struct platform_driver fastrpc_cb_driver = {
|
||||
},
|
||||
};
|
||||
|
||||
static int fastrpc_device_register(struct device *dev, struct fastrpc_channel_ctx *cctx,
|
||||
bool is_secured, const char *domain)
|
||||
{
|
||||
struct fastrpc_device *fdev;
|
||||
int err;
|
||||
|
||||
fdev = devm_kzalloc(dev, sizeof(*fdev), GFP_KERNEL);
|
||||
if (!fdev)
|
||||
return -ENOMEM;
|
||||
|
||||
fdev->secure = is_secured;
|
||||
fdev->cctx = cctx;
|
||||
fdev->miscdev.minor = MISC_DYNAMIC_MINOR;
|
||||
fdev->miscdev.fops = &fastrpc_fops;
|
||||
fdev->miscdev.name = devm_kasprintf(dev, GFP_KERNEL, "fastrpc-%s%s",
|
||||
domain, is_secured ? "-secure" : "");
|
||||
err = misc_register(&fdev->miscdev);
|
||||
if (!err) {
|
||||
if (is_secured)
|
||||
cctx->secure_fdevice = fdev;
|
||||
else
|
||||
cctx->fdevice = fdev;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
|
||||
{
|
||||
struct device *rdev = &rpdev->dev;
|
||||
struct fastrpc_channel_ctx *data;
|
||||
int i, err, domain_id = -1;
|
||||
int i, err, domain_id = -1, vmcount;
|
||||
const char *domain;
|
||||
bool secure_dsp;
|
||||
unsigned int vmids[FASTRPC_MAX_VMIDS];
|
||||
|
||||
err = of_property_read_string(rdev->of_node, "label", &domain);
|
||||
if (err) {
|
||||
@@ -1640,18 +2059,53 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vmcount = of_property_read_variable_u32_array(rdev->of_node,
|
||||
"qcom,vmids", &vmids[0], 0, FASTRPC_MAX_VMIDS);
|
||||
if (vmcount < 0)
|
||||
vmcount = 0;
|
||||
else if (!qcom_scm_is_available())
|
||||
return -EPROBE_DEFER;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
data->miscdev.minor = MISC_DYNAMIC_MINOR;
|
||||
data->miscdev.name = devm_kasprintf(rdev, GFP_KERNEL, "fastrpc-%s",
|
||||
domains[domain_id]);
|
||||
data->miscdev.fops = &fastrpc_fops;
|
||||
err = misc_register(&data->miscdev);
|
||||
if (err) {
|
||||
kfree(data);
|
||||
return err;
|
||||
if (vmcount) {
|
||||
data->vmcount = vmcount;
|
||||
data->perms = BIT(QCOM_SCM_VMID_HLOS);
|
||||
for (i = 0; i < data->vmcount; i++) {
|
||||
data->vmperms[i].vmid = vmids[i];
|
||||
data->vmperms[i].perm = QCOM_SCM_PERM_RWX;
|
||||
}
|
||||
}
|
||||
|
||||
secure_dsp = !(of_property_read_bool(rdev->of_node, "qcom,non-secure-domain"));
|
||||
data->secure = secure_dsp;
|
||||
|
||||
switch (domain_id) {
|
||||
case ADSP_DOMAIN_ID:
|
||||
case MDSP_DOMAIN_ID:
|
||||
case SDSP_DOMAIN_ID:
|
||||
/* Unsigned PD offloading is only supported on CDSP*/
|
||||
data->unsigned_support = false;
|
||||
err = fastrpc_device_register(rdev, data, secure_dsp, domains[domain_id]);
|
||||
if (err)
|
||||
goto fdev_error;
|
||||
break;
|
||||
case CDSP_DOMAIN_ID:
|
||||
data->unsigned_support = true;
|
||||
/* Create both device nodes so that we can allow both Signed and Unsigned PD */
|
||||
err = fastrpc_device_register(rdev, data, true, domains[domain_id]);
|
||||
if (err)
|
||||
goto fdev_error;
|
||||
|
||||
err = fastrpc_device_register(rdev, data, false, domains[domain_id]);
|
||||
if (err)
|
||||
goto fdev_error;
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
goto fdev_error;
|
||||
}
|
||||
|
||||
kref_init(&data->refcount);
|
||||
@@ -1665,6 +2119,9 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
|
||||
data->rpdev = rpdev;
|
||||
|
||||
return of_platform_populate(rdev->of_node, NULL, NULL, rdev);
|
||||
fdev_error:
|
||||
kfree(data);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void fastrpc_notify_users(struct fastrpc_user *user)
|
||||
@@ -1688,7 +2145,12 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
|
||||
fastrpc_notify_users(user);
|
||||
spin_unlock_irqrestore(&cctx->lock, flags);
|
||||
|
||||
misc_deregister(&cctx->miscdev);
|
||||
if (cctx->fdevice)
|
||||
misc_deregister(&cctx->fdevice->miscdev);
|
||||
|
||||
if (cctx->secure_fdevice)
|
||||
misc_deregister(&cctx->secure_fdevice->miscdev);
|
||||
|
||||
of_platform_depopulate(&rpdev->dev);
|
||||
|
||||
cctx->rpdev = NULL;
|
||||
|
||||
@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
|
||||
common/command_buffer.o common/hw_queue.o common/irq.o \
|
||||
common/sysfs.o common/hwmon.o common/memory.o \
|
||||
common/command_submission.o common/firmware_if.o \
|
||||
common/state_dump.o common/hwmgr.o
|
||||
common/state_dump.o
|
||||
|
||||
@@ -424,8 +424,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
{
|
||||
union hl_cb_args *args = data;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
u64 handle = 0, device_va = 0;
|
||||
enum hl_device_status status;
|
||||
u64 handle = 0, device_va;
|
||||
u32 usage_cnt = 0;
|
||||
int rc;
|
||||
|
||||
@@ -464,6 +464,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
args->in.flags,
|
||||
&usage_cnt,
|
||||
&device_va);
|
||||
if (rc)
|
||||
break;
|
||||
|
||||
memset(&args->out, 0, sizeof(args->out));
|
||||
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
|
||||
HL_CS_FLAGS_COLLECTIVE_WAIT)
|
||||
|
||||
#define MAX_TS_ITER_NUM 10
|
||||
|
||||
/**
|
||||
* enum hl_cs_wait_status - cs wait status
|
||||
* @CS_WAIT_STATUS_BUSY: cs was not completed yet
|
||||
@@ -919,18 +921,21 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
|
||||
complete_job(hdev, job);
|
||||
}
|
||||
|
||||
void hl_cs_rollback_all(struct hl_device *hdev)
|
||||
void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
|
||||
{
|
||||
int i;
|
||||
struct hl_cs *cs, *tmp;
|
||||
|
||||
flush_workqueue(hdev->sob_reset_wq);
|
||||
if (!skip_wq_flush) {
|
||||
flush_workqueue(hdev->ts_free_obj_wq);
|
||||
|
||||
/* flush all completions before iterating over the CS mirror list in
|
||||
* order to avoid a race with the release functions
|
||||
*/
|
||||
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
|
||||
flush_workqueue(hdev->cq_wq[i]);
|
||||
/* flush all completions before iterating over the CS mirror list in
|
||||
* order to avoid a race with the release functions
|
||||
*/
|
||||
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
|
||||
flush_workqueue(hdev->cq_wq[i]);
|
||||
|
||||
}
|
||||
|
||||
/* Make sure we don't have leftovers in the CS mirror list */
|
||||
list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
|
||||
@@ -948,13 +953,19 @@ void hl_cs_rollback_all(struct hl_device *hdev)
|
||||
static void
|
||||
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
struct hl_user_pending_interrupt *pend, *temp;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
|
||||
pend->fence.error = -EIO;
|
||||
complete_all(&pend->fence.completion);
|
||||
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
|
||||
if (pend->ts_reg_info.ts_buff) {
|
||||
list_del(&pend->wait_list_node);
|
||||
hl_ts_put(pend->ts_reg_info.ts_buff);
|
||||
hl_cb_put(pend->ts_reg_info.cq_cb);
|
||||
} else {
|
||||
pend->fence.error = -EIO;
|
||||
complete_all(&pend->fence.completion);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
}
|
||||
@@ -2063,13 +2074,16 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
||||
idp = &ctx->sig_mgr.handles;
|
||||
idr_for_each_entry(idp, encaps_sig_hdl, id) {
|
||||
if (encaps_sig_hdl->cs_seq == signal_seq) {
|
||||
handle_found = true;
|
||||
/* get refcount to protect removing
|
||||
* this handle from idr, needed when
|
||||
* multiple wait cs are used with offset
|
||||
/* get refcount to protect removing this handle from idr,
|
||||
* needed when multiple wait cs are used with offset
|
||||
* to wait on reserved encaps signals.
|
||||
* Since kref_put of this handle is executed outside the
|
||||
* current lock, it is possible that the handle refcount
|
||||
* is 0 but it yet to be removed from the list. In this
|
||||
* case need to consider the handle as not valid.
|
||||
*/
|
||||
kref_get(&encaps_sig_hdl->refcount);
|
||||
if (kref_get_unless_zero(&encaps_sig_hdl->refcount))
|
||||
handle_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -2739,7 +2753,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
mcs_data.update_ts = false;
|
||||
rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
|
||||
|
||||
if (mcs_data.completion_bitmap)
|
||||
if (rc || mcs_data.completion_bitmap)
|
||||
break;
|
||||
|
||||
/*
|
||||
@@ -2854,64 +2868,174 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
struct hl_cb_mgr *cb_mgr, u64 timeout_us,
|
||||
u64 cq_counters_handle, u64 cq_counters_offset,
|
||||
u64 target_value, struct hl_user_interrupt *interrupt,
|
||||
u32 *status,
|
||||
u64 *timestamp)
|
||||
static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
|
||||
struct hl_cb *cq_cb,
|
||||
u64 ts_offset, u64 cq_offset, u64 target_value,
|
||||
spinlock_t *wait_list_lock,
|
||||
struct hl_user_pending_interrupt **pend)
|
||||
{
|
||||
struct hl_user_pending_interrupt *requested_offset_record =
|
||||
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
|
||||
ts_offset;
|
||||
struct hl_user_pending_interrupt *cb_last =
|
||||
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
|
||||
(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
|
||||
unsigned long flags, iter_counter = 0;
|
||||
u64 current_cq_counter;
|
||||
|
||||
/* Validate ts_offset not exceeding last max */
|
||||
if (requested_offset_record > cb_last) {
|
||||
dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
|
||||
(u64)(uintptr_t)cb_last);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
start_over:
|
||||
spin_lock_irqsave(wait_list_lock, flags);
|
||||
|
||||
/* Unregister only if we didn't reach the target value
|
||||
* since in this case there will be no handling in irq context
|
||||
* and then it's safe to delete the node out of the interrupt list
|
||||
* then re-use it on other interrupt
|
||||
*/
|
||||
if (requested_offset_record->ts_reg_info.in_use) {
|
||||
current_cq_counter = *requested_offset_record->cq_kernel_addr;
|
||||
if (current_cq_counter < requested_offset_record->cq_target_value) {
|
||||
list_del(&requested_offset_record->wait_list_node);
|
||||
spin_unlock_irqrestore(wait_list_lock, flags);
|
||||
|
||||
hl_ts_put(requested_offset_record->ts_reg_info.ts_buff);
|
||||
hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
|
||||
|
||||
dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n");
|
||||
} else {
|
||||
dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n");
|
||||
|
||||
/* irq handling in the middle give it time to finish */
|
||||
spin_unlock_irqrestore(wait_list_lock, flags);
|
||||
usleep_range(1, 10);
|
||||
if (++iter_counter == MAX_TS_ITER_NUM) {
|
||||
dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
goto start_over;
|
||||
}
|
||||
} else {
|
||||
spin_unlock_irqrestore(wait_list_lock, flags);
|
||||
}
|
||||
|
||||
/* Fill up the new registration node info */
|
||||
requested_offset_record->ts_reg_info.in_use = 1;
|
||||
requested_offset_record->ts_reg_info.ts_buff = ts_buff;
|
||||
requested_offset_record->ts_reg_info.cq_cb = cq_cb;
|
||||
requested_offset_record->ts_reg_info.timestamp_kernel_addr =
|
||||
(u64 *) ts_buff->user_buff_address + ts_offset;
|
||||
requested_offset_record->cq_kernel_addr =
|
||||
(u64 *) cq_cb->kernel_address + cq_offset;
|
||||
requested_offset_record->cq_target_value = target_value;
|
||||
|
||||
*pend = requested_offset_record;
|
||||
|
||||
dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n",
|
||||
(u64)(uintptr_t)requested_offset_record);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr,
|
||||
u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset,
|
||||
u64 target_value, struct hl_user_interrupt *interrupt,
|
||||
bool register_ts_record, u64 ts_handle, u64 ts_offset,
|
||||
u32 *status, u64 *timestamp)
|
||||
{
|
||||
u32 cq_patched_handle, ts_patched_handle;
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
struct hl_ts_buff *ts_buff;
|
||||
struct hl_cb *cq_cb;
|
||||
unsigned long timeout, flags;
|
||||
long completion_rc;
|
||||
struct hl_cb *cb;
|
||||
int rc = 0;
|
||||
u32 handle;
|
||||
|
||||
timeout = hl_usecs64_to_jiffies(timeout_us);
|
||||
|
||||
hl_ctx_get(hdev, ctx);
|
||||
|
||||
cq_counters_handle >>= PAGE_SHIFT;
|
||||
handle = (u32) cq_counters_handle;
|
||||
|
||||
cb = hl_cb_get(hdev, cb_mgr, handle);
|
||||
if (!cb) {
|
||||
hl_ctx_put(ctx);
|
||||
return -EINVAL;
|
||||
cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT);
|
||||
cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle);
|
||||
if (!cq_cb) {
|
||||
rc = -EINVAL;
|
||||
goto put_ctx;
|
||||
}
|
||||
|
||||
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
|
||||
if (!pend) {
|
||||
hl_cb_put(cb);
|
||||
hl_ctx_put(ctx);
|
||||
return -ENOMEM;
|
||||
if (register_ts_record) {
|
||||
dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
|
||||
interrupt->interrupt_id, ts_offset, cq_counters_offset);
|
||||
|
||||
ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT);
|
||||
ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle);
|
||||
if (!ts_buff) {
|
||||
rc = -EINVAL;
|
||||
goto put_cq_cb;
|
||||
}
|
||||
|
||||
/* Find first available record */
|
||||
rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset,
|
||||
cq_counters_offset, target_value,
|
||||
&interrupt->wait_list_lock, &pend);
|
||||
if (rc)
|
||||
goto put_ts_buff;
|
||||
} else {
|
||||
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
|
||||
if (!pend) {
|
||||
rc = -ENOMEM;
|
||||
goto put_cq_cb;
|
||||
}
|
||||
hl_fence_init(&pend->fence, ULONG_MAX);
|
||||
pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
|
||||
pend->cq_target_value = target_value;
|
||||
}
|
||||
|
||||
hl_fence_init(&pend->fence, ULONG_MAX);
|
||||
|
||||
pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
|
||||
pend->cq_target_value = target_value;
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
|
||||
/* We check for completion value as interrupt could have been received
|
||||
* before we added the node to the wait list
|
||||
*/
|
||||
if (*pend->cq_kernel_addr >= target_value) {
|
||||
if (register_ts_record)
|
||||
pend->ts_reg_info.in_use = 0;
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
*status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
/* There was no interrupt, we assume the completion is now. */
|
||||
|
||||
if (register_ts_record) {
|
||||
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
|
||||
goto put_ts_buff;
|
||||
} else {
|
||||
pend->fence.timestamp = ktime_get();
|
||||
goto set_timestamp;
|
||||
}
|
||||
} else if (!timeout_us) {
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
pend->fence.timestamp = ktime_get();
|
||||
goto set_timestamp;
|
||||
}
|
||||
|
||||
if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
|
||||
goto set_timestamp;
|
||||
|
||||
/* Add pending user interrupt to relevant list for the interrupt
|
||||
* handler to monitor
|
||||
* handler to monitor.
|
||||
* Note that we cannot have sorted list by target value,
|
||||
* in order to shorten the list pass loop, since
|
||||
* same list could have nodes for different cq counter handle.
|
||||
*/
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
if (register_ts_record) {
|
||||
rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
goto ts_registration_exit;
|
||||
}
|
||||
|
||||
/* Wait for interrupt handler to signal completion */
|
||||
completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
|
||||
timeout);
|
||||
@@ -2932,23 +3056,41 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
rc = -EIO;
|
||||
*status = HL_WAIT_CS_STATUS_ABORTED;
|
||||
} else {
|
||||
dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n",
|
||||
interrupt->interrupt_id);
|
||||
rc = -ETIMEDOUT;
|
||||
/* The wait has timed-out. We don't know anything beyond that
|
||||
* because the workload wasn't submitted through the driver.
|
||||
* Therefore, from driver's perspective, the workload is still
|
||||
* executing.
|
||||
*/
|
||||
rc = 0;
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
}
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We keep removing the node from list here, and not at the irq handler
|
||||
* for completion timeout case. and if it's a registration
|
||||
* for ts record, the node will be deleted in the irq handler after
|
||||
* we reach the target value.
|
||||
*/
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
list_del(&pend->wait_list_node);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
set_timestamp:
|
||||
*timestamp = ktime_to_ns(pend->fence.timestamp);
|
||||
|
||||
kfree(pend);
|
||||
hl_cb_put(cb);
|
||||
hl_cb_put(cq_cb);
|
||||
ts_registration_exit:
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
return rc;
|
||||
|
||||
put_ts_buff:
|
||||
hl_ts_put(ts_buff);
|
||||
put_cq_cb:
|
||||
hl_cb_put(cq_cb);
|
||||
put_ctx:
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
return rc;
|
||||
@@ -3049,6 +3191,12 @@ wait_again:
|
||||
interrupt->interrupt_id);
|
||||
rc = -EINTR;
|
||||
} else {
|
||||
/* The wait has timed-out. We don't know anything beyond that
|
||||
* because the workload wasn't submitted through the driver.
|
||||
* Therefore, from driver's perspective, the workload is still
|
||||
* executing.
|
||||
*/
|
||||
rc = 0;
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
}
|
||||
|
||||
@@ -3101,23 +3249,20 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
|
||||
|
||||
if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
|
||||
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
|
||||
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr,
|
||||
args->in.interrupt_timeout_us, args->in.cq_counters_handle,
|
||||
args->in.cq_counters_offset,
|
||||
args->in.target, interrupt, &status,
|
||||
×tamp);
|
||||
args->in.target, interrupt,
|
||||
!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
|
||||
args->in.timestamp_handle, args->in.timestamp_offset,
|
||||
&status, ×tamp);
|
||||
else
|
||||
rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
|
||||
args->in.interrupt_timeout_us, args->in.addr,
|
||||
args->in.target, interrupt, &status,
|
||||
×tamp);
|
||||
if (rc) {
|
||||
if (rc != -EINTR)
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"interrupt_wait_ioctl failed (%d)\n", rc);
|
||||
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
memset(args, 0, sizeof(*args));
|
||||
args->out.status = status;
|
||||
|
||||
@@ -890,6 +890,8 @@ static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
|
||||
pci_set_power_state(hdev->pdev, PCI_D0);
|
||||
pci_restore_state(hdev->pdev);
|
||||
rc = pci_enable_device(hdev->pdev);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
} else if (value == 2) {
|
||||
pci_save_state(hdev->pdev);
|
||||
pci_disable_device(hdev->pdev);
|
||||
@@ -1054,42 +1056,12 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf,
|
||||
static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||
struct hl_device *hdev = entry->hdev;
|
||||
char tmp_buf[200];
|
||||
ssize_t rc;
|
||||
|
||||
if (*ppos)
|
||||
return 0;
|
||||
|
||||
sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask);
|
||||
rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
|
||||
strlen(tmp_buf) + 1);
|
||||
|
||||
return rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||
struct hl_device *hdev = entry->hdev;
|
||||
u64 value;
|
||||
ssize_t rc;
|
||||
|
||||
if (hdev->reset_info.in_reset) {
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"Can't change clock gating during reset\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
rc = kstrtoull_from_user(buf, count, 16, &value);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
hdev->clock_gating_mask = value;
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
@@ -1101,6 +1073,9 @@ static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
|
||||
char tmp_buf[200];
|
||||
ssize_t rc;
|
||||
|
||||
if (!hdev->asic_prop.configurable_stop_on_err)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (*ppos)
|
||||
return 0;
|
||||
|
||||
@@ -1119,6 +1094,9 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
|
||||
u32 value;
|
||||
ssize_t rc;
|
||||
|
||||
if (!hdev->asic_prop.configurable_stop_on_err)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (hdev->reset_info.in_reset) {
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"Can't change stop on error during reset\n");
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/hwmon.h>
|
||||
|
||||
#define HL_RESET_DELAY_USEC 10000 /* 10ms */
|
||||
|
||||
enum hl_device_status hl_device_status(struct hl_device *hdev)
|
||||
{
|
||||
enum hl_device_status status;
|
||||
@@ -145,6 +147,7 @@ static int hl_device_release(struct inode *inode, struct file *filp)
|
||||
hl_release_pending_user_interrupts(hpriv->hdev);
|
||||
|
||||
hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
|
||||
hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
|
||||
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
|
||||
|
||||
if (!hl_hpriv_put(hpriv))
|
||||
@@ -209,6 +212,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
|
||||
case HL_MMAP_TYPE_BLOCK:
|
||||
return hl_hw_block_mmap(hpriv, vma);
|
||||
|
||||
case HL_MMAP_TYPE_TS_BUFF:
|
||||
return hl_ts_mmap(hpriv, vma);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
@@ -410,10 +416,10 @@ static int device_early_init(struct hl_device *hdev)
|
||||
goto free_cq_wq;
|
||||
}
|
||||
|
||||
hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
|
||||
if (!hdev->sob_reset_wq) {
|
||||
hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
|
||||
if (!hdev->ts_free_obj_wq) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate SOB reset workqueue\n");
|
||||
"Failed to allocate Timestamp registration free workqueue\n");
|
||||
rc = -ENOMEM;
|
||||
goto free_eq_wq;
|
||||
}
|
||||
@@ -422,7 +428,7 @@ static int device_early_init(struct hl_device *hdev)
|
||||
GFP_KERNEL);
|
||||
if (!hdev->hl_chip_info) {
|
||||
rc = -ENOMEM;
|
||||
goto free_sob_reset_wq;
|
||||
goto free_ts_free_wq;
|
||||
}
|
||||
|
||||
rc = hl_mmu_if_set_funcs(hdev);
|
||||
@@ -461,8 +467,8 @@ free_cb_mgr:
|
||||
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
|
||||
free_chip_info:
|
||||
kfree(hdev->hl_chip_info);
|
||||
free_sob_reset_wq:
|
||||
destroy_workqueue(hdev->sob_reset_wq);
|
||||
free_ts_free_wq:
|
||||
destroy_workqueue(hdev->ts_free_obj_wq);
|
||||
free_eq_wq:
|
||||
destroy_workqueue(hdev->eq_wq);
|
||||
free_cq_wq:
|
||||
@@ -501,7 +507,7 @@ static void device_early_fini(struct hl_device *hdev)
|
||||
|
||||
kfree(hdev->hl_chip_info);
|
||||
|
||||
destroy_workqueue(hdev->sob_reset_wq);
|
||||
destroy_workqueue(hdev->ts_free_obj_wq);
|
||||
destroy_workqueue(hdev->eq_wq);
|
||||
destroy_workqueue(hdev->device_reset_work.wq);
|
||||
|
||||
@@ -610,7 +616,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
|
||||
u64 max_power, curr_power, dc_power, dividend;
|
||||
int rc;
|
||||
|
||||
max_power = hdev->asic_prop.max_power_default;
|
||||
max_power = hdev->max_power;
|
||||
dc_power = hdev->asic_prop.dc_power_default;
|
||||
rc = hl_fw_cpucp_power_get(hdev, &curr_power);
|
||||
|
||||
@@ -644,9 +650,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en
|
||||
|
||||
hdev->in_debug = 0;
|
||||
|
||||
if (!hdev->reset_info.hard_reset_pending)
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -657,7 +660,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en
|
||||
goto out;
|
||||
}
|
||||
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
hdev->in_debug = 1;
|
||||
|
||||
out:
|
||||
@@ -685,7 +687,8 @@ static void take_release_locks(struct hl_device *hdev)
|
||||
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
|
||||
}
|
||||
|
||||
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
|
||||
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
|
||||
bool skip_wq_flush)
|
||||
{
|
||||
if (hard_reset)
|
||||
device_late_fini(hdev);
|
||||
@@ -698,7 +701,7 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r
|
||||
hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
|
||||
|
||||
/* Go over all the queues, release all CS and their jobs */
|
||||
hl_cs_rollback_all(hdev);
|
||||
hl_cs_rollback_all(hdev, skip_wq_flush);
|
||||
|
||||
/* Release all pending user interrupts, each pending user interrupt
|
||||
* holds a reference to user context
|
||||
@@ -978,7 +981,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
|
||||
int hl_device_reset(struct hl_device *hdev, u32 flags)
|
||||
{
|
||||
bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
|
||||
reset_upon_device_release = false, schedule_hard_reset = false;
|
||||
reset_upon_device_release = false, schedule_hard_reset = false,
|
||||
skip_wq_flush, delay_reset;
|
||||
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
|
||||
struct hl_ctx *ctx;
|
||||
int i, rc;
|
||||
@@ -991,6 +995,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
|
||||
hard_reset = !!(flags & HL_DRV_RESET_HARD);
|
||||
from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
|
||||
fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
|
||||
skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
|
||||
delay_reset = !!(flags & HL_DRV_RESET_DELAY);
|
||||
|
||||
if (!hard_reset && !hdev->asic_prop.supports_soft_reset) {
|
||||
hard_instead_soft = true;
|
||||
@@ -1040,6 +1046,9 @@ do_reset:
|
||||
hdev->reset_info.in_reset = 1;
|
||||
spin_unlock(&hdev->reset_info.lock);
|
||||
|
||||
if (delay_reset)
|
||||
usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
|
||||
|
||||
handle_reset_trigger(hdev, flags);
|
||||
|
||||
/* This still allows the completion of some KDMA ops */
|
||||
@@ -1076,7 +1085,7 @@ again:
|
||||
return 0;
|
||||
}
|
||||
|
||||
cleanup_resources(hdev, hard_reset, fw_reset);
|
||||
cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush);
|
||||
|
||||
kill_processes:
|
||||
if (hard_reset) {
|
||||
@@ -1232,7 +1241,7 @@ kill_processes:
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
hl_set_max_power(hdev);
|
||||
hl_fw_set_max_power(hdev);
|
||||
} else {
|
||||
rc = hdev->asic_funcs->non_hard_reset_late_init(hdev);
|
||||
if (rc) {
|
||||
@@ -1297,11 +1306,14 @@ out_err:
|
||||
hdev->reset_info.hard_reset_cnt++;
|
||||
} else if (reset_upon_device_release) {
|
||||
dev_err(hdev->dev, "Failed to reset device after user release\n");
|
||||
flags |= HL_DRV_RESET_HARD;
|
||||
flags &= ~HL_DRV_RESET_DEV_RELEASE;
|
||||
hard_reset = true;
|
||||
goto again;
|
||||
} else {
|
||||
dev_err(hdev->dev, "Failed to do soft-reset\n");
|
||||
hdev->reset_info.soft_reset_cnt++;
|
||||
flags |= HL_DRV_RESET_HARD;
|
||||
hard_reset = true;
|
||||
goto again;
|
||||
}
|
||||
@@ -1538,7 +1550,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||
/* Need to call this again because the max power might change,
|
||||
* depending on card type for certain ASICs
|
||||
*/
|
||||
hl_set_max_power(hdev);
|
||||
if (hdev->asic_prop.set_max_power_on_device_init)
|
||||
hl_fw_set_max_power(hdev);
|
||||
|
||||
/*
|
||||
* hl_hwmon_init() must be called after device_late_init(), because only
|
||||
@@ -1682,7 +1695,7 @@ void hl_device_fini(struct hl_device *hdev)
|
||||
|
||||
hl_hwmon_fini(hdev);
|
||||
|
||||
cleanup_resources(hdev, true, false);
|
||||
cleanup_resources(hdev, true, false, false);
|
||||
|
||||
/* Kill processes here after CS rollback. This is because the process
|
||||
* can't really exit until all its CSs are done, which is what we
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
@@ -214,7 +214,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
||||
dma_addr_t pkt_dma_addr;
|
||||
struct hl_bd *sent_bd;
|
||||
u32 tmp, expected_ack_val, pi;
|
||||
int rc = 0;
|
||||
int rc;
|
||||
|
||||
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
|
||||
&pkt_dma_addr);
|
||||
@@ -228,8 +228,11 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
||||
|
||||
mutex_lock(&hdev->send_cpu_message_lock);
|
||||
|
||||
if (hdev->disabled)
|
||||
/* CPU-CP messages can be sent during soft-reset */
|
||||
if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) {
|
||||
rc = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (hdev->device_cpu_disabled) {
|
||||
rc = -EIO;
|
||||
@@ -958,15 +961,17 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
|
||||
if (rc)
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
|
||||
pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
|
||||
pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
|
||||
pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
|
||||
|
||||
return rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
|
||||
@@ -1202,8 +1207,6 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
|
||||
hdev,
|
||||
cpu_boot_status_reg,
|
||||
status,
|
||||
(status == CPU_BOOT_STATUS_IN_UBOOT) ||
|
||||
(status == CPU_BOOT_STATUS_DRAM_RDY) ||
|
||||
(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
|
||||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
|
||||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
|
||||
@@ -2682,3 +2685,138 @@ int hl_fw_init_cpu(struct hl_device *hdev)
|
||||
hl_fw_dynamic_init_cpu(hdev, fw_loader) :
|
||||
hl_fw_static_init_cpu(hdev, fw_loader);
|
||||
}
|
||||
|
||||
void hl_fw_set_pll_profile(struct hl_device *hdev)
|
||||
{
|
||||
hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
|
||||
hdev->asic_prop.max_freq_value);
|
||||
}
|
||||
|
||||
int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
|
||||
{
|
||||
long value;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
if (!hdev->pdev) {
|
||||
*cur_clk = 0;
|
||||
*max_clk = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
|
||||
|
||||
if (value < 0) {
|
||||
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
*max_clk = (value / 1000 / 1000);
|
||||
|
||||
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
|
||||
|
||||
if (value < 0) {
|
||||
dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
*cur_clk = (value / 1000 / 1000);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
u32 used_pll_idx;
|
||||
u64 result;
|
||||
int rc;
|
||||
|
||||
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
if (curr)
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
else
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
|
||||
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n",
|
||||
used_pll_idx, rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return (long) result;
|
||||
}
|
||||
|
||||
void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
u32 used_pll_idx;
|
||||
int rc;
|
||||
|
||||
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
|
||||
if (rc)
|
||||
return;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
|
||||
pkt.value = cpu_to_le64(freq);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
|
||||
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n",
|
||||
used_pll_idx, rc);
|
||||
}
|
||||
|
||||
long hl_fw_get_max_power(struct hl_device *hdev)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
u64 result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void hl_fw_set_max_power(struct hl_device *hdev)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
/* TODO: remove this after simulator supports this packet */
|
||||
if (!hdev->pdev)
|
||||
return;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.value = cpu_to_le64(hdev->max_power);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
|
||||
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
@@ -31,14 +31,15 @@
|
||||
#define HL_NAME "habanalabs"
|
||||
|
||||
/* Use upper bits of mmap offset to store habana driver specific information.
|
||||
* bits[63:61] - Encode mmap type
|
||||
* bits[63:59] - Encode mmap type
|
||||
* bits[45:0] - mmap offset value
|
||||
*
|
||||
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
|
||||
* defines are w.r.t to PAGE_SIZE
|
||||
*/
|
||||
#define HL_MMAP_TYPE_SHIFT (61 - PAGE_SHIFT)
|
||||
#define HL_MMAP_TYPE_MASK (0x7ull << HL_MMAP_TYPE_SHIFT)
|
||||
#define HL_MMAP_TYPE_SHIFT (59 - PAGE_SHIFT)
|
||||
#define HL_MMAP_TYPE_MASK (0x1full << HL_MMAP_TYPE_SHIFT)
|
||||
#define HL_MMAP_TYPE_TS_BUFF (0x10ull << HL_MMAP_TYPE_SHIFT)
|
||||
#define HL_MMAP_TYPE_BLOCK (0x4ull << HL_MMAP_TYPE_SHIFT)
|
||||
#define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT)
|
||||
|
||||
@@ -141,6 +142,9 @@ enum hl_mmu_page_table_location {
|
||||
*
|
||||
* - HL_DRV_RESET_FW_FATAL_ERR
|
||||
* Set if reset is due to a fatal error from FW
|
||||
*
|
||||
* - HL_DRV_RESET_DELAY
|
||||
* Set if a delay should be added before the reset
|
||||
*/
|
||||
|
||||
#define HL_DRV_RESET_HARD (1 << 0)
|
||||
@@ -150,6 +154,7 @@ enum hl_mmu_page_table_location {
|
||||
#define HL_DRV_RESET_DEV_RELEASE (1 << 4)
|
||||
#define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5)
|
||||
#define HL_DRV_RESET_FW_FATAL_ERR (1 << 6)
|
||||
#define HL_DRV_RESET_DELAY (1 << 7)
|
||||
|
||||
#define HL_MAX_SOBS_PER_MONITOR 8
|
||||
|
||||
@@ -402,8 +407,11 @@ enum hl_device_hw_state {
|
||||
* @hop4_mask: mask to get the PTE address in hop 4.
|
||||
* @hop5_mask: mask to get the PTE address in hop 5.
|
||||
* @last_mask: mask to get the bit indicating this is the last hop.
|
||||
* @pgt_size: size for page tables.
|
||||
* @page_size: default page size used to allocate memory.
|
||||
* @num_hops: The amount of hops supported by the translation table.
|
||||
* @hop_table_size: HOP table size.
|
||||
* @hop0_tables_total_size: total size for all HOP0 tables.
|
||||
* @host_resident: Should the MMU page table reside in host memory or in the
|
||||
* device DRAM.
|
||||
*/
|
||||
@@ -423,8 +431,11 @@ struct hl_mmu_properties {
|
||||
u64 hop4_mask;
|
||||
u64 hop5_mask;
|
||||
u64 last_mask;
|
||||
u64 pgt_size;
|
||||
u32 page_size;
|
||||
u32 num_hops;
|
||||
u32 hop_table_size;
|
||||
u32 hop0_tables_total_size;
|
||||
u8 host_resident;
|
||||
};
|
||||
|
||||
@@ -554,6 +565,9 @@ struct hl_hints_range {
|
||||
* use-case of doing soft-reset in training (due
|
||||
* to the fact that training runs on multiple
|
||||
* devices)
|
||||
* @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
|
||||
* @set_max_power_on_device_init: true if need to set max power in F/W on device init.
|
||||
* @supports_user_set_page_size: true if user can set the allocation page size.
|
||||
*/
|
||||
struct asic_fixed_properties {
|
||||
struct hw_queue_properties *hw_queues_props;
|
||||
@@ -637,6 +651,9 @@ struct asic_fixed_properties {
|
||||
u8 use_get_power_for_reset_history;
|
||||
u8 supports_soft_reset;
|
||||
u8 allow_inference_soft_reset;
|
||||
u8 configurable_stop_on_err;
|
||||
u8 set_max_power_on_device_init;
|
||||
u8 supports_user_set_page_size;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -703,6 +720,40 @@ struct hl_cb_mgr {
|
||||
struct idr cb_handles; /* protected by cb_lock */
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_ts_mgr - describes the timestamp registration memory manager.
|
||||
* @ts_lock: protects ts_handles.
|
||||
* @ts_handles: an idr to hold all ts bufferes handles.
|
||||
*/
|
||||
struct hl_ts_mgr {
|
||||
spinlock_t ts_lock;
|
||||
struct idr ts_handles;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_ts_buff - describes a timestamp buffer.
|
||||
* @refcount: reference counter for usage of the buffer.
|
||||
* @hdev: pointer to device this buffer belongs to.
|
||||
* @mmap: true if the buff is currently mapped to user.
|
||||
* @kernel_buff_address: Holds the internal buffer's kernel virtual address.
|
||||
* @user_buff_address: Holds the user buffer's kernel virtual address.
|
||||
* @id: the buffer ID.
|
||||
* @mmap_size: Holds the buffer size that was mmaped.
|
||||
* @kernel_buff_size: Holds the internal kernel buffer size.
|
||||
* @user_buff_size: Holds the user buffer size.
|
||||
*/
|
||||
struct hl_ts_buff {
|
||||
struct kref refcount;
|
||||
struct hl_device *hdev;
|
||||
atomic_t mmap;
|
||||
void *kernel_buff_address;
|
||||
void *user_buff_address;
|
||||
u32 id;
|
||||
u32 mmap_size;
|
||||
u32 kernel_buff_size;
|
||||
u32 user_buff_size;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_cb - describes a Command Buffer.
|
||||
* @refcount: reference counter for usage of the CB.
|
||||
@@ -880,9 +931,54 @@ struct hl_user_interrupt {
|
||||
u32 interrupt_id;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct timestamp_reg_free_node - holds the timestamp registration free objects node
|
||||
* @free_objects_node: node in the list free_obj_jobs
|
||||
* @cq_cb: pointer to cq command buffer to be freed
|
||||
* @ts_buff: pointer to timestamp buffer to be freed
|
||||
*/
|
||||
struct timestamp_reg_free_node {
|
||||
struct list_head free_objects_node;
|
||||
struct hl_cb *cq_cb;
|
||||
struct hl_ts_buff *ts_buff;
|
||||
};
|
||||
|
||||
/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
|
||||
* the job will be to pass over the free_obj_jobs list and put refcount to objects
|
||||
* in each node of the list
|
||||
* @free_obj: workqueue object to free timestamp registration node objects
|
||||
* @hdev: pointer to the device structure
|
||||
* @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
|
||||
*/
|
||||
struct timestamp_reg_work_obj {
|
||||
struct work_struct free_obj;
|
||||
struct hl_device *hdev;
|
||||
struct list_head *free_obj_head;
|
||||
};
|
||||
|
||||
/* struct timestamp_reg_info - holds the timestamp registration related data.
|
||||
* @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers.
|
||||
* relevant only when doing timestamps records registration.
|
||||
* @cq_cb: pointer to CQ counter CB.
|
||||
* @timestamp_kernel_addr: timestamp handle address, where to set timestamp
|
||||
* relevant only when doing timestamps records
|
||||
* registration.
|
||||
* @in_use: indicates if the node already in use. relevant only when doing
|
||||
* timestamps records registration, since in this case the driver
|
||||
* will have it's own buffer which serve as a records pool instead of
|
||||
* allocating records dynamically.
|
||||
*/
|
||||
struct timestamp_reg_info {
|
||||
struct hl_ts_buff *ts_buff;
|
||||
struct hl_cb *cq_cb;
|
||||
u64 *timestamp_kernel_addr;
|
||||
u8 in_use;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_user_pending_interrupt - holds a context to a user thread
|
||||
* pending on an interrupt
|
||||
* @ts_reg_info: holds the timestamps registration nodes info
|
||||
* @wait_list_node: node in the list of user threads pending on an interrupt
|
||||
* @fence: hl fence object for interrupt completion
|
||||
* @cq_target_value: CQ target value
|
||||
@@ -890,10 +986,11 @@ struct hl_user_interrupt {
|
||||
* handler for taget value comparison
|
||||
*/
|
||||
struct hl_user_pending_interrupt {
|
||||
struct list_head wait_list_node;
|
||||
struct hl_fence fence;
|
||||
u64 cq_target_value;
|
||||
u64 *cq_kernel_addr;
|
||||
struct timestamp_reg_info ts_reg_info;
|
||||
struct list_head wait_list_node;
|
||||
struct hl_fence fence;
|
||||
u64 cq_target_value;
|
||||
u64 *cq_kernel_addr;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -1155,7 +1252,6 @@ struct fw_load_mgr {
|
||||
* internal memory via DMA engine.
|
||||
* @add_device_attr: add ASIC specific device attributes.
|
||||
* @handle_eqe: handle event queue entry (IRQ) from CPU-CP.
|
||||
* @set_pll_profile: change PLL profile (manual/automatic).
|
||||
* @get_events_stat: retrieve event queue entries histogram.
|
||||
* @read_pte: read MMU page table entry from DRAM.
|
||||
* @write_pte: write MMU page table entry to DRAM.
|
||||
@@ -1164,9 +1260,6 @@ struct fw_load_mgr {
|
||||
* @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
|
||||
* ASID-VA-size mask.
|
||||
* @send_heartbeat: send is-alive packet to CPU-CP and verify response.
|
||||
* @set_clock_gating: enable/disable clock gating per engine according to
|
||||
* clock gating mask in hdev
|
||||
* @disable_clock_gating: disable clock gating completely
|
||||
* @debug_coresight: perform certain actions on Coresight for debugging.
|
||||
* @is_device_idle: return true if device is idle, false otherwise.
|
||||
* @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset
|
||||
@@ -1187,7 +1280,6 @@ struct fw_load_mgr {
|
||||
* @halt_coresight: stop the ETF and ETR traces.
|
||||
* @ctx_init: context dependent initialization.
|
||||
* @ctx_fini: context dependent cleanup.
|
||||
* @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
|
||||
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
|
||||
* @load_firmware_to_device: load the firmware to the device's memory
|
||||
* @load_boot_fit_to_device: load boot fit to device's memory
|
||||
@@ -1225,6 +1317,8 @@ struct fw_load_mgr {
|
||||
* @get_sob_addr: get SOB base address offset.
|
||||
* @set_pci_memory_regions: setting properties of PCI memory regions
|
||||
* @get_stream_master_qid_arr: get pointer to stream masters QID array
|
||||
* @is_valid_dram_page_size: return true if page size is supported in device
|
||||
* memory allocation, otherwise false.
|
||||
*/
|
||||
struct hl_asic_funcs {
|
||||
int (*early_init)(struct hl_device *hdev);
|
||||
@@ -1285,12 +1379,10 @@ struct hl_asic_funcs {
|
||||
bool user_address, u64 val);
|
||||
int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size,
|
||||
void *blob_addr);
|
||||
void (*add_device_attr)(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp);
|
||||
void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
|
||||
struct attribute_group *dev_vrm_attr_grp);
|
||||
void (*handle_eqe)(struct hl_device *hdev,
|
||||
struct hl_eq_entry *eq_entry);
|
||||
void (*set_pll_profile)(struct hl_device *hdev,
|
||||
enum hl_pll_frequency freq);
|
||||
void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
|
||||
u32 *size);
|
||||
u64 (*read_pte)(struct hl_device *hdev, u64 addr);
|
||||
@@ -1300,8 +1392,6 @@ struct hl_asic_funcs {
|
||||
int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
|
||||
u32 flags, u32 asid, u64 va, u64 size);
|
||||
int (*send_heartbeat)(struct hl_device *hdev);
|
||||
void (*set_clock_gating)(struct hl_device *hdev);
|
||||
void (*disable_clock_gating)(struct hl_device *hdev);
|
||||
int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
|
||||
bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
|
||||
u8 mask_len, struct seq_file *s);
|
||||
@@ -1320,7 +1410,6 @@ struct hl_asic_funcs {
|
||||
void (*halt_coresight)(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||
int (*ctx_init)(struct hl_ctx *ctx);
|
||||
void (*ctx_fini)(struct hl_ctx *ctx);
|
||||
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
|
||||
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
|
||||
int (*load_firmware_to_device)(struct hl_device *hdev);
|
||||
int (*load_boot_fit_to_device)(struct hl_device *hdev);
|
||||
@@ -1355,6 +1444,7 @@ struct hl_asic_funcs {
|
||||
u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
|
||||
void (*set_pci_memory_regions)(struct hl_device *hdev);
|
||||
u32* (*get_stream_master_qid_arr)(void);
|
||||
bool (*is_valid_dram_page_size)(u32 page_size);
|
||||
};
|
||||
|
||||
|
||||
@@ -1742,6 +1832,8 @@ struct hl_vm_hw_block_list_node {
|
||||
* @pages: the physical page array.
|
||||
* @npages: num physical pages in the pack.
|
||||
* @total_size: total size of all the pages in this list.
|
||||
* @node: used to attach to deletion list that is used when all the allocations are cleared
|
||||
* at the teardown of the context.
|
||||
* @mapping_cnt: number of shared mappings.
|
||||
* @exporting_cnt: number of dma-buf exporting.
|
||||
* @asid: the context related to this list.
|
||||
@@ -1757,6 +1849,7 @@ struct hl_vm_phys_pg_pack {
|
||||
u64 *pages;
|
||||
u64 npages;
|
||||
u64 total_size;
|
||||
struct list_head node;
|
||||
atomic_t mapping_cnt;
|
||||
u32 exporting_cnt;
|
||||
u32 asid;
|
||||
@@ -1834,6 +1927,7 @@ struct hl_debug_params {
|
||||
* @ctx: current executing context. TODO: remove for multiple ctx per process
|
||||
* @ctx_mgr: context manager to handle multiple context for this FD.
|
||||
* @cb_mgr: command buffer manager to handle multiple buffers for this FD.
|
||||
* @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers.
|
||||
* @debugfs_list: list of relevant ASIC debugfs.
|
||||
* @dev_node: node in the device list of file private data
|
||||
* @refcount: number of related contexts.
|
||||
@@ -1846,6 +1940,7 @@ struct hl_fpriv {
|
||||
struct hl_ctx *ctx;
|
||||
struct hl_ctx_mgr ctx_mgr;
|
||||
struct hl_cb_mgr cb_mgr;
|
||||
struct hl_ts_mgr ts_mem_mgr;
|
||||
struct list_head debugfs_list;
|
||||
struct list_head dev_node;
|
||||
struct kref refcount;
|
||||
@@ -2518,7 +2613,7 @@ struct hl_reset_info {
|
||||
* @cq_wq: work queues of completion queues for executing work in process
|
||||
* context.
|
||||
* @eq_wq: work queue of event queue for executing work in process context.
|
||||
* @sob_reset_wq: work queue for sob reset executions.
|
||||
* @ts_free_obj_wq: work queue for timestamp registration objects release.
|
||||
* @kernel_ctx: Kernel driver context structure.
|
||||
* @kernel_queues: array of hl_hw_queue.
|
||||
* @cs_mirror_list: CS mirror list for TDR.
|
||||
@@ -2569,9 +2664,6 @@ struct hl_reset_info {
|
||||
* @max_power: the max power of the device, as configured by the sysadmin. This
|
||||
* value is saved so in case of hard-reset, the driver will restore
|
||||
* this value and update the F/W after the re-initialization
|
||||
* @clock_gating_mask: is clock gating enabled. bitmask that represents the
|
||||
* different engines. See debugfs-driver-habanalabs for
|
||||
* details.
|
||||
* @boot_error_status_mask: contains a mask of the device boot error status.
|
||||
* Each bit represents a different error, according to
|
||||
* the defines in hl_boot_if.h. If the bit is cleared,
|
||||
@@ -2611,8 +2703,6 @@ struct hl_reset_info {
|
||||
* @in_debug: whether the device is in a state where the profiling/tracing infrastructure
|
||||
* can be used. This indication is needed because in some ASICs we need to do
|
||||
* specific operations to enable that infrastructure.
|
||||
* @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
|
||||
* only to POWER9 machines.
|
||||
* @cdev_sysfs_created: were char devices and sysfs nodes created.
|
||||
* @stop_on_err: true if engines should stop on error.
|
||||
* @supports_sync_stream: is sync stream supported.
|
||||
@@ -2651,7 +2741,7 @@ struct hl_device {
|
||||
struct hl_user_interrupt common_user_interrupt;
|
||||
struct workqueue_struct **cq_wq;
|
||||
struct workqueue_struct *eq_wq;
|
||||
struct workqueue_struct *sob_reset_wq;
|
||||
struct workqueue_struct *ts_free_obj_wq;
|
||||
struct hl_ctx *kernel_ctx;
|
||||
struct hl_hw_queue *kernel_queues;
|
||||
struct list_head cs_mirror_list;
|
||||
@@ -2710,7 +2800,6 @@ struct hl_device {
|
||||
atomic64_t dram_used_mem;
|
||||
u64 timeout_jiffies;
|
||||
u64 max_power;
|
||||
u64 clock_gating_mask;
|
||||
u64 boot_error_status_mask;
|
||||
u64 dram_pci_bar_start;
|
||||
u64 last_successful_open_jif;
|
||||
@@ -2736,7 +2825,6 @@ struct hl_device {
|
||||
u8 device_cpu_disabled;
|
||||
u8 dma_mask;
|
||||
u8 in_debug;
|
||||
u8 power9_64bit_dma_enable;
|
||||
u8 cdev_sysfs_created;
|
||||
u8 stop_on_err;
|
||||
u8 supports_sync_stream;
|
||||
@@ -2970,7 +3058,7 @@ int hl_cb_pool_fini(struct hl_device *hdev);
|
||||
int hl_cb_va_pool_init(struct hl_ctx *ctx);
|
||||
void hl_cb_va_pool_fini(struct hl_ctx *ctx);
|
||||
|
||||
void hl_cs_rollback_all(struct hl_device *hdev);
|
||||
void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush);
|
||||
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
|
||||
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
|
||||
void hl_sob_reset_error(struct kref *ref);
|
||||
@@ -3024,6 +3112,9 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
|
||||
int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags);
|
||||
int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
|
||||
u32 flags, u32 asid, u64 va, u64 size);
|
||||
u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte);
|
||||
u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
|
||||
u8 hop_idx, u64 hop_addr, u64 virt_addr);
|
||||
void hl_mmu_swap_out(struct hl_ctx *ctx);
|
||||
void hl_mmu_swap_in(struct hl_ctx *ctx);
|
||||
int hl_mmu_if_set_funcs(struct hl_device *hdev);
|
||||
@@ -3094,39 +3185,26 @@ enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr);
|
||||
int hl_pci_init(struct hl_device *hdev);
|
||||
void hl_pci_fini(struct hl_device *hdev);
|
||||
|
||||
long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
|
||||
bool curr);
|
||||
void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
|
||||
u64 freq);
|
||||
int hl_get_temperature(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
int hl_set_temperature(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value);
|
||||
int hl_get_voltage(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
int hl_get_current(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
int hl_get_fan_speed(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
int hl_get_pwm_info(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
|
||||
long value);
|
||||
u64 hl_get_max_power(struct hl_device *hdev);
|
||||
void hl_set_max_power(struct hl_device *hdev);
|
||||
int hl_set_voltage(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value);
|
||||
int hl_set_current(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value);
|
||||
int hl_set_power(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value);
|
||||
int hl_get_power(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
int hl_get_clk_rate(struct hl_device *hdev,
|
||||
u32 *cur_clk, u32 *max_clk);
|
||||
void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
|
||||
void hl_add_device_attr(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp);
|
||||
long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
|
||||
void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
|
||||
int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
|
||||
int hl_set_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long value);
|
||||
int hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
|
||||
int hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
|
||||
int hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
|
||||
int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
|
||||
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value);
|
||||
long hl_fw_get_max_power(struct hl_device *hdev);
|
||||
void hl_fw_set_max_power(struct hl_device *hdev);
|
||||
int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
|
||||
int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
|
||||
int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
|
||||
int hl_get_power(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
|
||||
int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
|
||||
void hl_fw_set_pll_profile(struct hl_device *hdev);
|
||||
void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp);
|
||||
void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp);
|
||||
|
||||
void hw_sob_get(struct hl_hw_sob *hw_sob);
|
||||
void hw_sob_put(struct hl_hw_sob *hw_sob);
|
||||
void hl_encaps_handle_do_release(struct kref *ref);
|
||||
@@ -3146,6 +3224,11 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
|
||||
const char *format, ...);
|
||||
char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
|
||||
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
|
||||
void hl_ts_mgr_init(struct hl_ts_mgr *mgr);
|
||||
void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr);
|
||||
int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
|
||||
struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle);
|
||||
void hl_ts_put(struct hl_ts_buff *buff);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
|
||||
@@ -140,6 +140,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
||||
|
||||
hl_cb_mgr_init(&hpriv->cb_mgr);
|
||||
hl_ctx_mgr_init(&hpriv->ctx_mgr);
|
||||
hl_ts_mgr_init(&hpriv->ts_mem_mgr);
|
||||
|
||||
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
|
||||
|
||||
@@ -184,6 +185,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
||||
out_err:
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
|
||||
hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
|
||||
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
|
||||
filp->private_data = NULL;
|
||||
mutex_destroy(&hpriv->restore_phase_mutex);
|
||||
@@ -256,7 +258,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
|
||||
hdev->cpu_queues_enable = 1;
|
||||
hdev->heartbeat = 1;
|
||||
hdev->mmu_enable = 1;
|
||||
hdev->clock_gating_mask = ULONG_MAX;
|
||||
hdev->sram_scrambler_enable = 1;
|
||||
hdev->dram_scrambler_enable = 1;
|
||||
hdev->bmc_enable = 1;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
@@ -92,8 +92,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
|
||||
hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
|
||||
hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
|
||||
|
||||
hw_ip.first_available_interrupt_id =
|
||||
prop->first_available_user_msix_interrupt;
|
||||
hw_ip.first_available_interrupt_id = prop->first_available_user_msix_interrupt;
|
||||
hw_ip.number_of_user_interrupts = prop->user_interrupt_count;
|
||||
hw_ip.server_type = prop->server_type;
|
||||
|
||||
return copy_to_user(out, &hw_ip,
|
||||
@@ -251,13 +251,12 @@ static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
|
||||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz,
|
||||
&clk_rate.max_clk_rate_mhz);
|
||||
rc = hl_fw_get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, &clk_rate.max_clk_rate_mhz);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
return copy_to_user(out, &clk_rate,
|
||||
min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
|
||||
return copy_to_user(out, &clk_rate, min_t(size_t, max_size, sizeof(clk_rate)))
|
||||
? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
|
||||
|
||||
@@ -1,117 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2019-2021 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "habanalabs.h"
|
||||
|
||||
void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
|
||||
{
|
||||
hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
|
||||
hdev->asic_prop.max_freq_value);
|
||||
}
|
||||
|
||||
int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
|
||||
{
|
||||
long value;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
|
||||
|
||||
if (value < 0) {
|
||||
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
|
||||
value);
|
||||
return value;
|
||||
}
|
||||
|
||||
*max_clk = (value / 1000 / 1000);
|
||||
|
||||
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
|
||||
|
||||
if (value < 0) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to retrieve device current clock %ld\n",
|
||||
value);
|
||||
return value;
|
||||
}
|
||||
|
||||
*cur_clk = (value / 1000 / 1000);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t clk_max_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
long value;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
|
||||
|
||||
hdev->asic_prop.max_freq_value = value;
|
||||
|
||||
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
|
||||
}
|
||||
|
||||
static ssize_t clk_max_freq_mhz_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
int rc;
|
||||
u64 value;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL)) {
|
||||
count = -ENODEV;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rc = kstrtoull(buf, 0, &value);
|
||||
if (rc) {
|
||||
count = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
hdev->asic_prop.max_freq_value = value * 1000 * 1000;
|
||||
|
||||
hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
|
||||
hdev->asic_prop.max_freq_value);
|
||||
|
||||
fail:
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t clk_cur_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
long value;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
|
||||
|
||||
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RW(clk_max_freq_mhz);
|
||||
static DEVICE_ATTR_RO(clk_cur_freq_mhz);
|
||||
|
||||
static struct attribute *hl_dev_attrs[] = {
|
||||
&dev_attr_clk_max_freq_mhz.attr,
|
||||
&dev_attr_clk_cur_freq_mhz.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
void hl_add_device_attr(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp)
|
||||
{
|
||||
dev_attr_grp->attrs = hl_dev_attrs;
|
||||
}
|
||||
@@ -137,22 +137,137 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_ts_free_objects - handler of the free objects workqueue.
|
||||
* This function should put refcount to objects that the registration node
|
||||
* took refcount to them.
|
||||
* @work: workqueue object pointer
|
||||
*/
|
||||
static void hl_ts_free_objects(struct work_struct *work)
|
||||
{
|
||||
struct timestamp_reg_work_obj *job =
|
||||
container_of(work, struct timestamp_reg_work_obj, free_obj);
|
||||
struct timestamp_reg_free_node *free_obj, *temp_free_obj;
|
||||
struct list_head *free_list_head = job->free_obj_head;
|
||||
struct hl_device *hdev = job->hdev;
|
||||
|
||||
list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
|
||||
dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n",
|
||||
free_obj->ts_buff,
|
||||
free_obj->cq_cb);
|
||||
|
||||
hl_ts_put(free_obj->ts_buff);
|
||||
hl_cb_put(free_obj->cq_cb);
|
||||
kfree(free_obj);
|
||||
}
|
||||
|
||||
kfree(free_list_head);
|
||||
kfree(job);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function called with spin_lock of wait_list_lock taken
|
||||
* This function will set timestamp and delete the registration node from the
|
||||
* wait_list_lock.
|
||||
* and since we're protected with spin_lock here, so we cannot just put the refcount
|
||||
* for the objects here, since the release function may be called and it's also a long
|
||||
* logic (which might sleep also) that cannot be handled in irq context.
|
||||
* so here we'll be filling a list with nodes of "put" jobs and then will send this
|
||||
* list to a dedicated workqueue to do the actual put.
|
||||
*/
|
||||
static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
|
||||
struct list_head **free_list)
|
||||
{
|
||||
struct timestamp_reg_free_node *free_node;
|
||||
u64 timestamp;
|
||||
|
||||
if (!(*free_list)) {
|
||||
/* Alloc/Init the timestamp registration free objects list */
|
||||
*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
|
||||
if (!(*free_list))
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(*free_list);
|
||||
}
|
||||
|
||||
free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
|
||||
if (!free_node)
|
||||
return -ENOMEM;
|
||||
|
||||
timestamp = ktime_get_ns();
|
||||
|
||||
*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
|
||||
|
||||
dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
|
||||
pend->ts_reg_info.timestamp_kernel_addr,
|
||||
*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
|
||||
|
||||
list_del(&pend->wait_list_node);
|
||||
|
||||
/* Mark kernel CB node as free */
|
||||
pend->ts_reg_info.in_use = 0;
|
||||
|
||||
/* Putting the refcount for ts_buff and cq_cb objects will be handled
|
||||
* in workqueue context, just add job to free_list.
|
||||
*/
|
||||
free_node->ts_buff = pend->ts_reg_info.ts_buff;
|
||||
free_node->cq_cb = pend->ts_reg_info.cq_cb;
|
||||
list_add(&free_node->free_objects_node, *free_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void handle_user_cq(struct hl_device *hdev,
|
||||
struct hl_user_interrupt *user_cq)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
struct hl_user_pending_interrupt *pend, *temp_pend;
|
||||
struct list_head *ts_reg_free_list_head = NULL;
|
||||
struct timestamp_reg_work_obj *job;
|
||||
bool reg_node_handle_fail = false;
|
||||
ktime_t now = ktime_get();
|
||||
int rc;
|
||||
|
||||
/* For registration nodes:
|
||||
* As part of handling the registration nodes, we should put refcount to
|
||||
* some objects. the problem is that we cannot do that under spinlock
|
||||
* or in irq handler context at all (since release functions are long and
|
||||
* might sleep), so we will need to handle that part in workqueue context.
|
||||
* To avoid handling kmalloc failure which compels us rolling back actions
|
||||
* and move nodes hanged on the free list back to the interrupt wait list
|
||||
* we always alloc the job of the WQ at the beginning.
|
||||
*/
|
||||
job = kmalloc(sizeof(*job), GFP_ATOMIC);
|
||||
if (!job)
|
||||
return;
|
||||
|
||||
spin_lock(&user_cq->wait_list_lock);
|
||||
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
|
||||
if ((pend->cq_kernel_addr &&
|
||||
*(pend->cq_kernel_addr) >= pend->cq_target_value) ||
|
||||
list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) {
|
||||
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
|
||||
!pend->cq_kernel_addr) {
|
||||
pend->fence.timestamp = now;
|
||||
complete_all(&pend->fence.completion);
|
||||
if (pend->ts_reg_info.ts_buff) {
|
||||
if (!reg_node_handle_fail) {
|
||||
rc = handle_registration_node(hdev, pend,
|
||||
&ts_reg_free_list_head);
|
||||
if (rc)
|
||||
reg_node_handle_fail = true;
|
||||
}
|
||||
} else {
|
||||
/* Handle wait target value node */
|
||||
pend->fence.timestamp = now;
|
||||
complete_all(&pend->fence.completion);
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock(&user_cq->wait_list_lock);
|
||||
|
||||
if (ts_reg_free_list_head) {
|
||||
INIT_WORK(&job->free_obj, hl_ts_free_objects);
|
||||
job->free_obj_head = ts_reg_free_list_head;
|
||||
job->hdev = hdev;
|
||||
queue_work(hdev->ts_free_obj_wq, &job->free_obj);
|
||||
} else {
|
||||
kfree(job);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/pci-p2pdma.h>
|
||||
|
||||
MODULE_IMPORT_NS(DMA_BUF);
|
||||
@@ -20,6 +21,34 @@ MODULE_IMPORT_NS(DMA_BUF);
|
||||
/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
|
||||
#define DRAM_POOL_PAGE_SIZE SZ_8M
|
||||
|
||||
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
|
||||
struct hl_mem_in *args, u64 *handle);
|
||||
|
||||
static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u32 psize;
|
||||
|
||||
/*
|
||||
* for ASIC that supports setting the allocation page size by user we will address
|
||||
* user's choice only if it is not 0 (as 0 means taking the default page size)
|
||||
*/
|
||||
if (prop->supports_user_set_page_size && args->alloc.page_size) {
|
||||
psize = args->alloc.page_size;
|
||||
|
||||
if (!hdev->asic_funcs->is_valid_dram_page_size(psize)) {
|
||||
dev_err(hdev->dev, "user page size (%#x) is not valid\n", psize);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
psize = hdev->asic_prop.dram_page_size;
|
||||
}
|
||||
|
||||
*page_size = psize;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The va ranges in context object contain a list with the available chunks of
|
||||
* device virtual memory.
|
||||
@@ -61,11 +90,15 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
u64 paddr = 0, total_size, num_pgs, i;
|
||||
u32 num_curr_pgs, page_size;
|
||||
int handle, rc;
|
||||
bool contiguous;
|
||||
int handle, rc;
|
||||
|
||||
num_curr_pgs = 0;
|
||||
page_size = hdev->asic_prop.dram_page_size;
|
||||
|
||||
rc = set_alloc_page_size(hdev, args, &page_size);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
|
||||
total_size = num_pgs * page_size;
|
||||
|
||||
@@ -77,7 +110,11 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
contiguous = args->flags & HL_MEM_CONTIGUOUS;
|
||||
|
||||
if (contiguous) {
|
||||
paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
|
||||
if (is_power_of_2(page_size))
|
||||
paddr = (u64) (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool,
|
||||
total_size, NULL, page_size);
|
||||
else
|
||||
paddr = (u64) (uintptr_t) gen_pool_alloc(vm->dram_pg_pool, total_size);
|
||||
if (!paddr) {
|
||||
dev_err(hdev->dev,
|
||||
"failed to allocate %llu contiguous pages with total size of %llu\n",
|
||||
@@ -111,9 +148,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
phys_pg_pack->pages[i] = paddr + i * page_size;
|
||||
} else {
|
||||
for (i = 0 ; i < num_pgs ; i++) {
|
||||
phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
|
||||
vm->dram_pg_pool,
|
||||
page_size);
|
||||
if (is_power_of_2(page_size))
|
||||
phys_pg_pack->pages[i] =
|
||||
(u64) gen_pool_dma_alloc_align(vm->dram_pg_pool,
|
||||
page_size, NULL,
|
||||
page_size);
|
||||
else
|
||||
phys_pg_pack->pages[i] = (u64) gen_pool_alloc(vm->dram_pg_pool,
|
||||
page_size);
|
||||
if (!phys_pg_pack->pages[i]) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate device memory (out of memory)\n");
|
||||
@@ -652,7 +694,7 @@ static u64 get_va_block(struct hl_device *hdev,
|
||||
continue;
|
||||
|
||||
/*
|
||||
* In case hint address is 0, and arc_hints_range_reservation
|
||||
* In case hint address is 0, and hints_range_reservation
|
||||
* property enabled, then avoid allocating va blocks from the
|
||||
* range reserved for hint addresses
|
||||
*/
|
||||
@@ -1967,16 +2009,15 @@ err_dec_exporting_cnt:
|
||||
static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_ctx *ctx = hpriv->ctx;
|
||||
u64 block_handle, device_addr = 0;
|
||||
struct hl_ctx *ctx = hpriv->ctx;
|
||||
u32 handle = 0, block_size;
|
||||
int rc, dmabuf_fd = -EBADF;
|
||||
int rc;
|
||||
|
||||
switch (args->in.op) {
|
||||
case HL_MEM_OP_ALLOC:
|
||||
if (args->in.alloc.mem_size == 0) {
|
||||
dev_err(hdev->dev,
|
||||
"alloc size must be larger than 0\n");
|
||||
dev_err(hdev->dev, "alloc size must be larger than 0\n");
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@@ -1997,15 +2038,14 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
|
||||
|
||||
case HL_MEM_OP_MAP:
|
||||
if (args->in.flags & HL_MEM_USERPTR) {
|
||||
device_addr = args->in.map_host.host_virt_addr;
|
||||
rc = 0;
|
||||
dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n");
|
||||
rc = -EPERM;
|
||||
} else {
|
||||
rc = get_paddr_from_handle(ctx, &args->in,
|
||||
&device_addr);
|
||||
rc = get_paddr_from_handle(ctx, &args->in, &device_addr);
|
||||
memset(args, 0, sizeof(*args));
|
||||
args->out.device_virt_addr = device_addr;
|
||||
}
|
||||
|
||||
memset(args, 0, sizeof(*args));
|
||||
args->out.device_virt_addr = device_addr;
|
||||
break;
|
||||
|
||||
case HL_MEM_OP_UNMAP:
|
||||
@@ -2013,22 +2053,19 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
|
||||
break;
|
||||
|
||||
case HL_MEM_OP_MAP_BLOCK:
|
||||
rc = map_block(hdev, args->in.map_block.block_addr,
|
||||
&block_handle, &block_size);
|
||||
rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size);
|
||||
args->out.block_handle = block_handle;
|
||||
args->out.block_size = block_size;
|
||||
break;
|
||||
|
||||
case HL_MEM_OP_EXPORT_DMABUF_FD:
|
||||
rc = export_dmabuf_from_addr(ctx,
|
||||
args->in.export_dmabuf_fd.handle,
|
||||
args->in.export_dmabuf_fd.mem_size,
|
||||
args->in.flags,
|
||||
&dmabuf_fd);
|
||||
memset(args, 0, sizeof(*args));
|
||||
args->out.fd = dmabuf_fd;
|
||||
dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n");
|
||||
rc = -EPERM;
|
||||
break;
|
||||
|
||||
case HL_MEM_OP_TS_ALLOC:
|
||||
rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
|
||||
rc = -EINVAL;
|
||||
@@ -2039,6 +2076,258 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void ts_buff_release(struct kref *ref)
|
||||
{
|
||||
struct hl_ts_buff *buff;
|
||||
|
||||
buff = container_of(ref, struct hl_ts_buff, refcount);
|
||||
|
||||
vfree(buff->kernel_buff_address);
|
||||
vfree(buff->user_buff_address);
|
||||
kfree(buff);
|
||||
}
|
||||
|
||||
struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr,
|
||||
u32 handle)
|
||||
{
|
||||
struct hl_ts_buff *buff;
|
||||
|
||||
spin_lock(&mgr->ts_lock);
|
||||
buff = idr_find(&mgr->ts_handles, handle);
|
||||
if (!buff) {
|
||||
spin_unlock(&mgr->ts_lock);
|
||||
dev_warn(hdev->dev,
|
||||
"TS buff get failed, no match to handle 0x%x\n", handle);
|
||||
return NULL;
|
||||
}
|
||||
kref_get(&buff->refcount);
|
||||
spin_unlock(&mgr->ts_lock);
|
||||
|
||||
return buff;
|
||||
}
|
||||
|
||||
void hl_ts_put(struct hl_ts_buff *buff)
|
||||
{
|
||||
kref_put(&buff->refcount, ts_buff_release);
|
||||
}
|
||||
|
||||
static void buff_vm_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data;
|
||||
long new_mmap_size;
|
||||
|
||||
new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start);
|
||||
|
||||
if (new_mmap_size > 0) {
|
||||
buff->mmap_size = new_mmap_size;
|
||||
return;
|
||||
}
|
||||
|
||||
atomic_set(&buff->mmap, 0);
|
||||
hl_ts_put(buff);
|
||||
vma->vm_private_data = NULL;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct ts_buff_vm_ops = {
|
||||
.close = buff_vm_close
|
||||
};
|
||||
|
||||
int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_ts_buff *buff;
|
||||
u32 handle, user_buff_size;
|
||||
int rc;
|
||||
|
||||
/* We use the page offset to hold the idr and thus we need to clear
|
||||
* it before doing the mmap itself
|
||||
*/
|
||||
handle = vma->vm_pgoff;
|
||||
vma->vm_pgoff = 0;
|
||||
|
||||
buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle);
|
||||
if (!buff) {
|
||||
dev_err(hdev->dev,
|
||||
"TS buff mmap failed, no match to handle 0x%x\n", handle);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Validation check */
|
||||
user_buff_size = vma->vm_end - vma->vm_start;
|
||||
if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) {
|
||||
dev_err(hdev->dev,
|
||||
"TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n",
|
||||
user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE));
|
||||
rc = -EINVAL;
|
||||
goto put_buff;
|
||||
}
|
||||
|
||||
#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
|
||||
if (!access_ok(VERIFY_WRITE,
|
||||
(void __user *) (uintptr_t) vma->vm_start, user_buff_size)) {
|
||||
#else
|
||||
if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
|
||||
user_buff_size)) {
|
||||
#endif
|
||||
dev_err(hdev->dev,
|
||||
"user pointer is invalid - 0x%lx\n",
|
||||
vma->vm_start);
|
||||
|
||||
rc = -EINVAL;
|
||||
goto put_buff;
|
||||
}
|
||||
|
||||
if (atomic_cmpxchg(&buff->mmap, 0, 1)) {
|
||||
dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n");
|
||||
rc = -EINVAL;
|
||||
goto put_buff;
|
||||
}
|
||||
|
||||
vma->vm_ops = &ts_buff_vm_ops;
|
||||
vma->vm_private_data = buff;
|
||||
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE;
|
||||
rc = remap_vmalloc_range(vma, buff->user_buff_address, 0);
|
||||
if (rc) {
|
||||
atomic_set(&buff->mmap, 0);
|
||||
goto put_buff;
|
||||
}
|
||||
|
||||
buff->mmap_size = buff->user_buff_size;
|
||||
vma->vm_pgoff = handle;
|
||||
|
||||
return 0;
|
||||
|
||||
put_buff:
|
||||
hl_ts_put(buff);
|
||||
return rc;
|
||||
}
|
||||
|
||||
void hl_ts_mgr_init(struct hl_ts_mgr *mgr)
|
||||
{
|
||||
spin_lock_init(&mgr->ts_lock);
|
||||
idr_init(&mgr->ts_handles);
|
||||
}
|
||||
|
||||
void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr)
|
||||
{
|
||||
struct hl_ts_buff *buff;
|
||||
struct idr *idp;
|
||||
u32 id;
|
||||
|
||||
idp = &mgr->ts_handles;
|
||||
|
||||
idr_for_each_entry(idp, buff, id) {
|
||||
if (kref_put(&buff->refcount, ts_buff_release) != 1)
|
||||
dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n",
|
||||
id);
|
||||
}
|
||||
|
||||
idr_destroy(&mgr->ts_handles);
|
||||
}
|
||||
|
||||
static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements)
|
||||
{
|
||||
struct hl_ts_buff *ts_buff = NULL;
|
||||
u32 size;
|
||||
void *p;
|
||||
|
||||
ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL);
|
||||
if (!ts_buff)
|
||||
return NULL;
|
||||
|
||||
/* Allocate the user buffer */
|
||||
size = num_elements * sizeof(u64);
|
||||
p = vmalloc_user(size);
|
||||
if (!p)
|
||||
goto free_mem;
|
||||
|
||||
ts_buff->user_buff_address = p;
|
||||
ts_buff->user_buff_size = size;
|
||||
|
||||
/* Allocate the internal kernel buffer */
|
||||
size = num_elements * sizeof(struct hl_user_pending_interrupt);
|
||||
p = vmalloc(size);
|
||||
if (!p)
|
||||
goto free_user_buff;
|
||||
|
||||
ts_buff->kernel_buff_address = p;
|
||||
ts_buff->kernel_buff_size = size;
|
||||
|
||||
return ts_buff;
|
||||
|
||||
free_user_buff:
|
||||
vfree(ts_buff->user_buff_address);
|
||||
free_mem:
|
||||
kfree(ts_buff);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* allocate_timestamps_buffers() - allocate timestamps buffers
|
||||
* This function will allocate ts buffer that will later on be mapped to the user
|
||||
* in order to be able to read the timestamp.
|
||||
* in additon it'll allocate an extra buffer for registration management.
|
||||
* since we cannot fail during registration for out-of-memory situation, so
|
||||
* we'll prepare a pool which will be used as user interrupt nodes and instead
|
||||
* of dynamically allocating nodes while registration we'll pick the node from
|
||||
* this pool. in addtion it'll add node to the mapping hash which will be used
|
||||
* to map user ts buffer to the internal kernel ts buffer.
|
||||
* @hpriv: pointer to the private data of the fd
|
||||
* @args: ioctl input
|
||||
* @handle: user timestamp buffer handle as an output
|
||||
*/
|
||||
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
|
||||
{
|
||||
struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_ts_buff *ts_buff;
|
||||
int rc = 0;
|
||||
|
||||
if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
|
||||
dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
|
||||
args->num_of_elements, TS_MAX_ELEMENTS_NUM);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Allocate ts buffer object
|
||||
* This object will contain two buffers one that will be mapped to the user
|
||||
* and another internal buffer for the driver use only, which won't be mapped
|
||||
* to the user.
|
||||
*/
|
||||
ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements);
|
||||
if (!ts_buff) {
|
||||
rc = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
spin_lock(&ts_mgr->ts_lock);
|
||||
rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC);
|
||||
spin_unlock(&ts_mgr->ts_lock);
|
||||
if (rc < 0) {
|
||||
dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n");
|
||||
goto release_ts_buff;
|
||||
}
|
||||
|
||||
ts_buff->id = rc;
|
||||
ts_buff->hdev = hdev;
|
||||
|
||||
kref_init(&ts_buff->refcount);
|
||||
|
||||
/* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */
|
||||
*handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF;
|
||||
*handle <<= PAGE_SHIFT;
|
||||
|
||||
dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id);
|
||||
|
||||
return 0;
|
||||
|
||||
release_ts_buff:
|
||||
kref_put(&ts_buff->refcount, ts_buff_release);
|
||||
out_err:
|
||||
*handle = 0;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
{
|
||||
enum hl_device_status status;
|
||||
@@ -2154,6 +2443,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
args->out.fd = dmabuf_fd;
|
||||
break;
|
||||
|
||||
case HL_MEM_OP_TS_ALLOC:
|
||||
rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
|
||||
rc = -EINVAL;
|
||||
@@ -2607,11 +2899,12 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
|
||||
*/
|
||||
void hl_vm_ctx_fini(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_vm_phys_pg_pack *phys_pg_list, *tmp_phys_node;
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct hl_vm *vm = &hdev->vm;
|
||||
struct hl_vm_phys_pg_pack *phys_pg_list;
|
||||
struct hl_vm_hash_node *hnode;
|
||||
struct hl_vm *vm = &hdev->vm;
|
||||
struct hlist_node *tmp_node;
|
||||
struct list_head free_list;
|
||||
struct hl_mem_in args;
|
||||
int i;
|
||||
|
||||
@@ -2644,19 +2937,24 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
INIT_LIST_HEAD(&free_list);
|
||||
|
||||
spin_lock(&vm->idr_lock);
|
||||
idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
|
||||
if (phys_pg_list->asid == ctx->asid) {
|
||||
dev_dbg(hdev->dev,
|
||||
"page list 0x%px of asid %d is still alive\n",
|
||||
phys_pg_list, ctx->asid);
|
||||
atomic64_sub(phys_pg_list->total_size,
|
||||
&hdev->dram_used_mem);
|
||||
free_phys_pg_pack(hdev, phys_pg_list);
|
||||
|
||||
atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem);
|
||||
idr_remove(&vm->phys_pg_pack_handles, i);
|
||||
list_add(&phys_pg_list->node, &free_list);
|
||||
}
|
||||
spin_unlock(&vm->idr_lock);
|
||||
|
||||
list_for_each_entry_safe(phys_pg_list, tmp_phys_node, &free_list, node)
|
||||
free_phys_pg_pack(hdev, phys_pg_list);
|
||||
|
||||
va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]);
|
||||
va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]);
|
||||
|
||||
|
||||
@@ -662,3 +662,58 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
|
||||
return rc;
|
||||
}
|
||||
|
||||
u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
|
||||
{
|
||||
return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_mmu_get_hop_pte_phys_addr() - extract PTE address from HOP
|
||||
* @ctx: pointer to the context structure to initialize.
|
||||
* @hop_idx: HOP index.
|
||||
* @hop_addr: HOP address.
|
||||
* @virt_addr: virtual address fro the translation.
|
||||
*
|
||||
* @return the matching PTE value on success, otherwise U64_MAX.
|
||||
*/
|
||||
u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
|
||||
u8 hop_idx, u64 hop_addr, u64 virt_addr)
|
||||
{
|
||||
u64 mask, shift;
|
||||
|
||||
if (hop_idx >= mmu_prop->num_hops) {
|
||||
dev_err_ratelimited(ctx->hdev->dev, "Invalid hop index %d\n", hop_idx);
|
||||
return U64_MAX;
|
||||
}
|
||||
|
||||
/* currently max number of HOPs is 6 */
|
||||
switch (hop_idx) {
|
||||
case 0:
|
||||
mask = mmu_prop->hop0_mask;
|
||||
shift = mmu_prop->hop0_shift;
|
||||
break;
|
||||
case 1:
|
||||
mask = mmu_prop->hop1_mask;
|
||||
shift = mmu_prop->hop1_shift;
|
||||
break;
|
||||
case 2:
|
||||
mask = mmu_prop->hop2_mask;
|
||||
shift = mmu_prop->hop2_shift;
|
||||
break;
|
||||
case 3:
|
||||
mask = mmu_prop->hop3_mask;
|
||||
shift = mmu_prop->hop3_shift;
|
||||
break;
|
||||
case 4:
|
||||
mask = mmu_prop->hop4_mask;
|
||||
shift = mmu_prop->hop4_shift;
|
||||
break;
|
||||
default:
|
||||
mask = mmu_prop->hop5_mask;
|
||||
shift = mmu_prop->hop5_shift;
|
||||
break;
|
||||
}
|
||||
|
||||
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
|
||||
}
|
||||
|
||||
|
||||
@@ -217,18 +217,10 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
|
||||
mmu_prop->hop4_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
|
||||
{
|
||||
if (curr_pte & PAGE_PRESENT_MASK)
|
||||
return curr_pte & HOP_PHYS_ADDR_MASK;
|
||||
else
|
||||
return ULLONG_MAX;
|
||||
}
|
||||
|
||||
static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
|
||||
bool *is_new_hop)
|
||||
{
|
||||
u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop_addr == ULLONG_MAX) {
|
||||
hop_addr = alloc_hop(ctx);
|
||||
@@ -467,7 +459,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
|
||||
{
|
||||
/* MMU H/W fini was already done in device hw_fini() */
|
||||
|
||||
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
|
||||
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
|
||||
kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
|
||||
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
|
||||
|
||||
@@ -546,7 +538,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
|
||||
|
||||
hop1_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
hop1_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop1_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
@@ -555,7 +547,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
|
||||
|
||||
hop2_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
hop2_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop2_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
@@ -564,7 +556,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
|
||||
|
||||
hop3_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
hop3_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop3_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
@@ -582,7 +574,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
|
||||
}
|
||||
|
||||
if (!is_huge) {
|
||||
hop4_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
hop4_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop4_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
@@ -845,27 +837,6 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
|
||||
|
||||
}
|
||||
|
||||
static inline u64 get_hop_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
int hop_num, u64 hop_addr, u64 virt_addr)
|
||||
{
|
||||
switch (hop_num) {
|
||||
case 0:
|
||||
return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
|
||||
case 1:
|
||||
return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
|
||||
case 2:
|
||||
return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
|
||||
case 3:
|
||||
return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
|
||||
case 4:
|
||||
return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return U64_MAX;
|
||||
}
|
||||
|
||||
static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
|
||||
struct hl_mmu_hop_info *hops)
|
||||
{
|
||||
@@ -906,7 +877,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
|
||||
|
||||
hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
|
||||
hops->hop_info[0].hop_pte_addr =
|
||||
get_hop_pte_addr(ctx, mmu_prop, 0,
|
||||
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
|
||||
hops->hop_info[0].hop_addr, virt_addr);
|
||||
hops->hop_info[0].hop_pte_val =
|
||||
hdev->asic_funcs->read_pte(hdev,
|
||||
@@ -914,13 +885,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
|
||||
|
||||
for (i = 1 ; i < used_hops ; i++) {
|
||||
hops->hop_info[i].hop_addr =
|
||||
get_next_hop_addr(ctx,
|
||||
hl_mmu_get_next_hop_addr(ctx,
|
||||
hops->hop_info[i - 1].hop_pte_val);
|
||||
if (hops->hop_info[i].hop_addr == ULLONG_MAX)
|
||||
return -EFAULT;
|
||||
|
||||
hops->hop_info[i].hop_pte_addr =
|
||||
get_hop_pte_addr(ctx, mmu_prop, i,
|
||||
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
|
||||
hops->hop_info[i].hop_addr,
|
||||
virt_addr);
|
||||
hops->hop_info[i].hop_pte_val =
|
||||
|
||||
@@ -338,10 +338,7 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
|
||||
lower_32_bits(outbound_region_end_address));
|
||||
rc |= hl_pci_iatu_write(hdev, 0x014, 0);
|
||||
|
||||
if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
|
||||
rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000);
|
||||
else
|
||||
rc |= hl_pci_iatu_write(hdev, 0x018, 0);
|
||||
rc |= hl_pci_iatu_write(hdev, 0x018, 0);
|
||||
|
||||
rc |= hl_pci_iatu_write(hdev, 0x020,
|
||||
upper_32_bits(outbound_region_end_address));
|
||||
@@ -411,13 +408,13 @@ int hl_pci_init(struct hl_device *hdev)
|
||||
|
||||
rc = hdev->asic_funcs->pci_bars_map(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
|
||||
dev_err(hdev->dev, "Failed to map PCI BAR addresses\n");
|
||||
goto disable_device;
|
||||
}
|
||||
|
||||
rc = hdev->asic_funcs->init_iatu(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to initialize iATU\n");
|
||||
dev_err(hdev->dev, "PCI controller was not initialized successfully\n");
|
||||
goto unmap_pci_bars;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
@@ -9,105 +9,91 @@
|
||||
|
||||
#include <linux/pci.h>
|
||||
|
||||
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
|
||||
static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
u32 used_pll_idx;
|
||||
u64 result;
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
long value;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
|
||||
if (value < 0)
|
||||
return value;
|
||||
|
||||
hdev->asic_prop.max_freq_value = value;
|
||||
|
||||
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
|
||||
}
|
||||
|
||||
static ssize_t clk_max_freq_mhz_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
int rc;
|
||||
u64 value;
|
||||
|
||||
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
|
||||
if (rc)
|
||||
return rc;
|
||||
if (!hl_device_operational(hdev, NULL)) {
|
||||
count = -ENODEV;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
rc = kstrtoull(buf, 0, &value);
|
||||
if (rc) {
|
||||
count = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (curr)
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
hdev->asic_prop.max_freq_value = value * 1000 * 1000;
|
||||
|
||||
hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index, hdev->asic_prop.max_freq_value);
|
||||
|
||||
fail:
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t clk_cur_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
long value;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
|
||||
if (value < 0)
|
||||
return value;
|
||||
|
||||
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RW(clk_max_freq_mhz);
|
||||
static DEVICE_ATTR_RO(clk_cur_freq_mhz);
|
||||
|
||||
static struct attribute *hl_dev_clk_attrs[] = {
|
||||
&dev_attr_clk_max_freq_mhz.attr,
|
||||
&dev_attr_clk_cur_freq_mhz.attr,
|
||||
};
|
||||
|
||||
static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
struct cpucp_info *cpucp_info;
|
||||
|
||||
cpucp_info = &hdev->asic_prop.cpucp_info;
|
||||
|
||||
if (cpucp_info->infineon_second_stage_version)
|
||||
return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version),
|
||||
le32_to_cpu(cpucp_info->infineon_second_stage_version));
|
||||
else
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
0, &result);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to get frequency of PLL %d, error %d\n",
|
||||
used_pll_idx, rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return (long) result;
|
||||
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
|
||||
}
|
||||
|
||||
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
u32 used_pll_idx;
|
||||
int rc;
|
||||
static DEVICE_ATTR_RO(vrm_ver);
|
||||
|
||||
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
|
||||
if (rc)
|
||||
return;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
|
||||
pkt.value = cpu_to_le64(freq);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
0, NULL);
|
||||
|
||||
if (rc)
|
||||
dev_err(hdev->dev,
|
||||
"Failed to set frequency to PLL %d, error %d\n",
|
||||
used_pll_idx, rc);
|
||||
}
|
||||
|
||||
u64 hl_get_max_power(struct hl_device *hdev)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
u64 result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
0, &result);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
|
||||
return (u64) rc;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void hl_set_max_power(struct hl_device *hdev)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.value = cpu_to_le64(hdev->max_power);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
0, NULL);
|
||||
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
|
||||
}
|
||||
static struct attribute *hl_dev_vrm_attrs[] = {
|
||||
&dev_attr_vrm_ver.attr,
|
||||
};
|
||||
|
||||
static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
@@ -158,20 +144,6 @@ static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr,
|
||||
return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
|
||||
}
|
||||
|
||||
static ssize_t infineon_ver_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
if (hdev->asic_prop.cpucp_info.infineon_second_stage_version)
|
||||
return sprintf(buf, "%#04x %#04x\n",
|
||||
le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version),
|
||||
le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_second_stage_version));
|
||||
else
|
||||
return sprintf(buf, "%#04x\n",
|
||||
le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version));
|
||||
}
|
||||
|
||||
static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
@@ -188,6 +160,14 @@ static ssize_t thermal_ver_show(struct device *dev,
|
||||
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version);
|
||||
}
|
||||
|
||||
static ssize_t fw_os_ver_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.fw_os_version);
|
||||
}
|
||||
|
||||
static ssize_t preboot_btl_ver_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
@@ -323,7 +303,9 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
val = hl_get_max_power(hdev);
|
||||
val = hl_fw_get_max_power(hdev);
|
||||
if (val < 0)
|
||||
return val;
|
||||
|
||||
return sprintf(buf, "%lu\n", val);
|
||||
}
|
||||
@@ -348,7 +330,7 @@ static ssize_t max_power_store(struct device *dev,
|
||||
}
|
||||
|
||||
hdev->max_power = value;
|
||||
hl_set_max_power(hdev);
|
||||
hl_fw_set_max_power(hdev);
|
||||
|
||||
out:
|
||||
return count;
|
||||
@@ -394,7 +376,6 @@ static DEVICE_ATTR_RO(device_type);
|
||||
static DEVICE_ATTR_RO(fuse_ver);
|
||||
static DEVICE_ATTR_WO(hard_reset);
|
||||
static DEVICE_ATTR_RO(hard_reset_cnt);
|
||||
static DEVICE_ATTR_RO(infineon_ver);
|
||||
static DEVICE_ATTR_RW(max_power);
|
||||
static DEVICE_ATTR_RO(pci_addr);
|
||||
static DEVICE_ATTR_RO(preboot_btl_ver);
|
||||
@@ -403,6 +384,7 @@ static DEVICE_ATTR_RO(soft_reset_cnt);
|
||||
static DEVICE_ATTR_RO(status);
|
||||
static DEVICE_ATTR_RO(thermal_ver);
|
||||
static DEVICE_ATTR_RO(uboot_ver);
|
||||
static DEVICE_ATTR_RO(fw_os_ver);
|
||||
|
||||
static struct bin_attribute bin_attr_eeprom = {
|
||||
.attr = {.name = "eeprom", .mode = (0444)},
|
||||
@@ -420,13 +402,13 @@ static struct attribute *hl_dev_attrs[] = {
|
||||
&dev_attr_fuse_ver.attr,
|
||||
&dev_attr_hard_reset.attr,
|
||||
&dev_attr_hard_reset_cnt.attr,
|
||||
&dev_attr_infineon_ver.attr,
|
||||
&dev_attr_max_power.attr,
|
||||
&dev_attr_pci_addr.attr,
|
||||
&dev_attr_preboot_btl_ver.attr,
|
||||
&dev_attr_status.attr,
|
||||
&dev_attr_thermal_ver.attr,
|
||||
&dev_attr_uboot_ver.attr,
|
||||
&dev_attr_fw_os_ver.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -441,10 +423,12 @@ static struct attribute_group hl_dev_attr_group = {
|
||||
};
|
||||
|
||||
static struct attribute_group hl_dev_clks_attr_group;
|
||||
static struct attribute_group hl_dev_vrm_attr_group;
|
||||
|
||||
static const struct attribute_group *hl_dev_attr_groups[] = {
|
||||
&hl_dev_attr_group,
|
||||
&hl_dev_clks_attr_group,
|
||||
&hl_dev_vrm_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -463,13 +447,23 @@ static const struct attribute_group *hl_dev_inference_attr_groups[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp)
|
||||
{
|
||||
dev_clk_attr_grp->attrs = hl_dev_clk_attrs;
|
||||
}
|
||||
|
||||
void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp)
|
||||
{
|
||||
dev_vrm_attr_grp->attrs = hl_dev_vrm_attrs;
|
||||
}
|
||||
|
||||
int hl_sysfs_init(struct hl_device *hdev)
|
||||
{
|
||||
int rc;
|
||||
|
||||
hdev->max_power = hdev->asic_prop.max_power_default;
|
||||
|
||||
hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
|
||||
hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group, &hl_dev_vrm_attr_group);
|
||||
|
||||
rc = device_add_groups(hdev->dev, hl_dev_attr_groups);
|
||||
if (rc) {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
@@ -458,7 +458,6 @@ struct ecc_info_extract_params {
|
||||
u64 block_address;
|
||||
u32 num_memories;
|
||||
bool derr;
|
||||
bool disable_clock_gating;
|
||||
};
|
||||
|
||||
static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
|
||||
@@ -614,6 +613,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
|
||||
prop->pmmu.page_size = PAGE_SIZE_4KB;
|
||||
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
|
||||
prop->pmmu.last_mask = LAST_MASK;
|
||||
/* TODO: will be duplicated until implementing per-MMU props */
|
||||
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
|
||||
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
|
||||
|
||||
/* PMMU and HPMMU are the same except of page size */
|
||||
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
|
||||
@@ -667,6 +669,10 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
|
||||
|
||||
prop->use_get_power_for_reset_history = true;
|
||||
|
||||
prop->configurable_stop_on_err = true;
|
||||
|
||||
prop->set_max_power_on_device_init = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1636,7 +1642,7 @@ static int gaudi_late_init(struct hl_device *hdev)
|
||||
*/
|
||||
gaudi_mmu_prepare(hdev, 1);
|
||||
|
||||
hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
|
||||
hl_fw_set_pll_profile(hdev);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1896,7 +1902,6 @@ static int gaudi_sw_init(struct hl_device *hdev)
|
||||
goto free_cpu_accessible_dma_pool;
|
||||
|
||||
spin_lock_init(&gaudi->hw_queues_lock);
|
||||
mutex_init(&gaudi->clk_gate_mutex);
|
||||
|
||||
hdev->supports_sync_stream = true;
|
||||
hdev->supports_coresight = true;
|
||||
@@ -1946,8 +1951,6 @@ static int gaudi_sw_fini(struct hl_device *hdev)
|
||||
|
||||
dma_pool_destroy(hdev->dma_pool);
|
||||
|
||||
mutex_destroy(&gaudi->clk_gate_mutex);
|
||||
|
||||
kfree(gaudi);
|
||||
|
||||
return 0;
|
||||
@@ -3738,76 +3741,8 @@ static void gaudi_tpc_stall(struct hl_device *hdev)
|
||||
WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
|
||||
}
|
||||
|
||||
static void gaudi_set_clock_gating(struct hl_device *hdev)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u32 qman_offset;
|
||||
bool enable;
|
||||
int i;
|
||||
|
||||
/* In case we are during debug session, don't enable the clock gate
|
||||
* as it may interfere
|
||||
*/
|
||||
if (hdev->in_debug)
|
||||
return;
|
||||
|
||||
if (hdev->asic_prop.fw_security_enabled)
|
||||
return;
|
||||
|
||||
for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
|
||||
enable = !!(hdev->clock_gating_mask &
|
||||
(BIT_ULL(gaudi_dma_assignment[i])));
|
||||
|
||||
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
|
||||
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
|
||||
enable ? QMAN_CGM1_PWR_GATE_EN : 0);
|
||||
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
|
||||
enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
|
||||
}
|
||||
|
||||
for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
|
||||
enable = !!(hdev->clock_gating_mask &
|
||||
(BIT_ULL(gaudi_dma_assignment[i])));
|
||||
|
||||
/* GC sends work to DMA engine through Upper CP in DMA5 so
|
||||
* we need to not enable clock gating in that DMA
|
||||
*/
|
||||
if (i == GAUDI_HBM_DMA_4)
|
||||
enable = 0;
|
||||
|
||||
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
|
||||
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
|
||||
enable ? QMAN_CGM1_PWR_GATE_EN : 0);
|
||||
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
|
||||
enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
|
||||
}
|
||||
|
||||
enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
|
||||
WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
|
||||
WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
|
||||
|
||||
enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
|
||||
WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
|
||||
WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
|
||||
|
||||
for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
|
||||
enable = !!(hdev->clock_gating_mask &
|
||||
(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
|
||||
|
||||
WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
|
||||
enable ? QMAN_CGM1_PWR_GATE_EN : 0);
|
||||
WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
|
||||
enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
|
||||
|
||||
qman_offset += TPC_QMAN_OFFSET;
|
||||
}
|
||||
|
||||
gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
|
||||
}
|
||||
|
||||
static void gaudi_disable_clock_gating(struct hl_device *hdev)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u32 qman_offset;
|
||||
int i;
|
||||
|
||||
@@ -3832,8 +3767,6 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev)
|
||||
|
||||
qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
|
||||
}
|
||||
|
||||
gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
|
||||
}
|
||||
|
||||
static void gaudi_enable_timestamp(struct hl_device *hdev)
|
||||
@@ -3876,8 +3809,6 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_
|
||||
gaudi_stop_hbm_dma_qmans(hdev);
|
||||
gaudi_stop_pci_dma_qmans(hdev);
|
||||
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
|
||||
msleep(wait_timeout_ms);
|
||||
|
||||
gaudi_pci_dma_stall(hdev);
|
||||
@@ -3931,7 +3862,7 @@ static int gaudi_mmu_init(struct hl_device *hdev)
|
||||
/* mem cache invalidation */
|
||||
WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
|
||||
|
||||
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
|
||||
hl_mmu_invalidate_cache(hdev, true, 0);
|
||||
|
||||
WREG32(mmMMU_UP_MMU_ENABLE, 1);
|
||||
WREG32(mmMMU_UP_SPI_MASK, 0xF);
|
||||
@@ -4203,10 +4134,8 @@ static int gaudi_hw_init(struct hl_device *hdev)
|
||||
|
||||
/* In case the clock gating was enabled in preboot we need to disable
|
||||
* it here before touching the MME/TPC registers.
|
||||
* There is no need to take clk gating mutex because when this function
|
||||
* runs, no other relevant code can run
|
||||
*/
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
gaudi_disable_clock_gating(hdev);
|
||||
|
||||
/* SRAM scrambler must be initialized after CPU is running from HBM */
|
||||
gaudi_init_scrambler_sram(hdev);
|
||||
@@ -4232,8 +4161,6 @@ static int gaudi_hw_init(struct hl_device *hdev)
|
||||
|
||||
gaudi_init_nic_qmans(hdev);
|
||||
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
|
||||
gaudi_enable_timestamp(hdev);
|
||||
|
||||
/* MSI must be enabled before CPU queues and NIC are initialized */
|
||||
@@ -4400,14 +4327,11 @@ skip_reset:
|
||||
status);
|
||||
|
||||
if (gaudi) {
|
||||
gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
|
||||
HW_CAP_HBM | HW_CAP_PCI_DMA |
|
||||
HW_CAP_MME | HW_CAP_TPC_MASK |
|
||||
HW_CAP_HBM_DMA | HW_CAP_PLL |
|
||||
HW_CAP_NIC_MASK | HW_CAP_MMU |
|
||||
HW_CAP_SRAM_SCRAMBLER |
|
||||
HW_CAP_HBM_SCRAMBLER |
|
||||
HW_CAP_CLK_GATE);
|
||||
gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
|
||||
HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
|
||||
HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
|
||||
HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
|
||||
HW_CAP_HBM_SCRAMBLER);
|
||||
|
||||
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
|
||||
|
||||
@@ -4884,7 +4808,6 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev)
|
||||
static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
int rc = 0;
|
||||
u64 val = 0;
|
||||
|
||||
@@ -4919,17 +4842,11 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
|
||||
return rc;
|
||||
}
|
||||
|
||||
mutex_lock(&gaudi->clk_gate_mutex);
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
|
||||
/* Scrub HBM using all DMA channels in parallel */
|
||||
rc = gaudi_hbm_scrubbing(hdev);
|
||||
if (rc)
|
||||
dev_err(hdev->dev,
|
||||
"Failed to clear HBM in mem scrub all\n");
|
||||
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
}
|
||||
|
||||
return rc;
|
||||
@@ -6188,7 +6105,6 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
|
||||
bool user_address, u32 *val)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u64 hbm_bar_addr, host_phys_end;
|
||||
int rc = 0;
|
||||
|
||||
@@ -6196,38 +6112,31 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
|
||||
|
||||
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
|
||||
|
||||
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
|
||||
(hdev->clock_gating_mask &
|
||||
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
|
||||
*val = RREG32(addr - CFG_BASE);
|
||||
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Can't read register - clock gating is enabled!\n");
|
||||
rc = -EFAULT;
|
||||
} else {
|
||||
*val = RREG32(addr - CFG_BASE);
|
||||
}
|
||||
} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
|
||||
|
||||
*val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
|
||||
|
||||
} else if ((addr >= SRAM_BASE_ADDR) &&
|
||||
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
|
||||
*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
|
||||
(addr - SRAM_BASE_ADDR));
|
||||
} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
|
||||
u64 bar_base_addr = DRAM_PHYS_BASE +
|
||||
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
|
||||
|
||||
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
|
||||
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
|
||||
if (hbm_bar_addr != U64_MAX) {
|
||||
*val = readl(hdev->pcie_bar[HBM_BAR_ID] +
|
||||
(addr - bar_base_addr));
|
||||
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
|
||||
hbm_bar_addr);
|
||||
if (hbm_bar_addr != U64_MAX) {
|
||||
*val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
|
||||
}
|
||||
|
||||
if (hbm_bar_addr == U64_MAX)
|
||||
rc = -EIO;
|
||||
|
||||
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
|
||||
user_address && !iommu_present(&pci_bus_type)) {
|
||||
|
||||
*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
|
||||
|
||||
} else {
|
||||
rc = -EFAULT;
|
||||
}
|
||||
@@ -6239,7 +6148,6 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
|
||||
bool user_address, u32 val)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u64 hbm_bar_addr, host_phys_end;
|
||||
int rc = 0;
|
||||
|
||||
@@ -6247,38 +6155,31 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
|
||||
|
||||
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
|
||||
|
||||
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
|
||||
(hdev->clock_gating_mask &
|
||||
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
|
||||
WREG32(addr - CFG_BASE, val);
|
||||
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Can't write register - clock gating is enabled!\n");
|
||||
rc = -EFAULT;
|
||||
} else {
|
||||
WREG32(addr - CFG_BASE, val);
|
||||
}
|
||||
} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
|
||||
|
||||
writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
|
||||
|
||||
} else if ((addr >= SRAM_BASE_ADDR) &&
|
||||
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
|
||||
writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
|
||||
(addr - SRAM_BASE_ADDR));
|
||||
} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
|
||||
u64 bar_base_addr = DRAM_PHYS_BASE +
|
||||
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
|
||||
|
||||
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
|
||||
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
|
||||
if (hbm_bar_addr != U64_MAX) {
|
||||
writel(val, hdev->pcie_bar[HBM_BAR_ID] +
|
||||
(addr - bar_base_addr));
|
||||
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
|
||||
hbm_bar_addr);
|
||||
if (hbm_bar_addr != U64_MAX) {
|
||||
writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
|
||||
}
|
||||
|
||||
if (hbm_bar_addr == U64_MAX)
|
||||
rc = -EIO;
|
||||
|
||||
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
|
||||
user_address && !iommu_present(&pci_bus_type)) {
|
||||
|
||||
*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
|
||||
|
||||
} else {
|
||||
rc = -EFAULT;
|
||||
}
|
||||
@@ -6290,7 +6191,6 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
|
||||
bool user_address, u64 *val)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u64 hbm_bar_addr, host_phys_end;
|
||||
int rc = 0;
|
||||
|
||||
@@ -6298,42 +6198,35 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
|
||||
|
||||
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
|
||||
|
||||
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
|
||||
(hdev->clock_gating_mask &
|
||||
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
|
||||
u32 val_l = RREG32(addr - CFG_BASE);
|
||||
u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
|
||||
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Can't read register - clock gating is enabled!\n");
|
||||
rc = -EFAULT;
|
||||
} else {
|
||||
u32 val_l = RREG32(addr - CFG_BASE);
|
||||
u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
|
||||
|
||||
*val = (((u64) val_h) << 32) | val_l;
|
||||
}
|
||||
*val = (((u64) val_h) << 32) | val_l;
|
||||
|
||||
} else if ((addr >= SRAM_BASE_ADDR) &&
|
||||
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
|
||||
*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
|
||||
(addr - SRAM_BASE_ADDR));
|
||||
} else if (addr <=
|
||||
DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
|
||||
u64 bar_base_addr = DRAM_PHYS_BASE +
|
||||
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
|
||||
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
|
||||
|
||||
*val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
|
||||
|
||||
} else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
|
||||
|
||||
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
|
||||
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
|
||||
if (hbm_bar_addr != U64_MAX) {
|
||||
*val = readq(hdev->pcie_bar[HBM_BAR_ID] +
|
||||
(addr - bar_base_addr));
|
||||
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
|
||||
hbm_bar_addr);
|
||||
if (hbm_bar_addr != U64_MAX) {
|
||||
*val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
|
||||
}
|
||||
|
||||
if (hbm_bar_addr == U64_MAX)
|
||||
rc = -EIO;
|
||||
|
||||
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
|
||||
user_address && !iommu_present(&pci_bus_type)) {
|
||||
|
||||
*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
|
||||
|
||||
} else {
|
||||
rc = -EFAULT;
|
||||
}
|
||||
@@ -6345,7 +6238,6 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
|
||||
bool user_address, u64 val)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u64 hbm_bar_addr, host_phys_end;
|
||||
int rc = 0;
|
||||
|
||||
@@ -6353,41 +6245,33 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
|
||||
|
||||
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
|
||||
|
||||
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
|
||||
(hdev->clock_gating_mask &
|
||||
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
|
||||
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Can't write register - clock gating is enabled!\n");
|
||||
rc = -EFAULT;
|
||||
} else {
|
||||
WREG32(addr - CFG_BASE, lower_32_bits(val));
|
||||
WREG32(addr + sizeof(u32) - CFG_BASE,
|
||||
upper_32_bits(val));
|
||||
}
|
||||
WREG32(addr - CFG_BASE, lower_32_bits(val));
|
||||
WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
|
||||
|
||||
} else if ((addr >= SRAM_BASE_ADDR) &&
|
||||
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
|
||||
writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
|
||||
(addr - SRAM_BASE_ADDR));
|
||||
} else if (addr <=
|
||||
DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
|
||||
u64 bar_base_addr = DRAM_PHYS_BASE +
|
||||
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
|
||||
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
|
||||
|
||||
writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
|
||||
|
||||
} else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
|
||||
|
||||
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
|
||||
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
|
||||
if (hbm_bar_addr != U64_MAX) {
|
||||
writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
|
||||
(addr - bar_base_addr));
|
||||
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
|
||||
hbm_bar_addr);
|
||||
if (hbm_bar_addr != U64_MAX) {
|
||||
writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
|
||||
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
|
||||
}
|
||||
|
||||
if (hbm_bar_addr == U64_MAX)
|
||||
rc = -EIO;
|
||||
|
||||
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
|
||||
user_address && !iommu_present(&pci_bus_type)) {
|
||||
|
||||
*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
|
||||
|
||||
} else {
|
||||
rc = -EFAULT;
|
||||
}
|
||||
@@ -6446,7 +6330,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
|
||||
void *blob_addr)
|
||||
{
|
||||
u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u32 qm_glbl_sts0, qm_cgm_sts;
|
||||
u64 dma_offset, qm_offset;
|
||||
dma_addr_t dma_addr;
|
||||
@@ -6462,10 +6345,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
|
||||
if (!kernel_addr)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&gaudi->clk_gate_mutex);
|
||||
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
|
||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||
|
||||
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
|
||||
@@ -6550,10 +6429,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
|
||||
out:
|
||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
|
||||
hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
|
||||
dma_addr);
|
||||
|
||||
@@ -6601,10 +6476,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&gaudi->clk_gate_mutex);
|
||||
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
|
||||
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
|
||||
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
|
||||
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
|
||||
@@ -6882,10 +6753,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
|
||||
|
||||
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
|
||||
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
|
||||
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
}
|
||||
|
||||
static int gaudi_send_job_on_qman0(struct hl_device *hdev,
|
||||
@@ -7266,10 +7133,8 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
|
||||
struct ecc_info_extract_params *params, u64 *ecc_address,
|
||||
u64 *ecc_syndrom, u8 *memory_wrapper_idx)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u32 i, num_mem_regs, reg, err_bit;
|
||||
u64 err_addr, err_word = 0;
|
||||
int rc = 0;
|
||||
|
||||
num_mem_regs = params->num_memories / 32 +
|
||||
((params->num_memories % 32) ? 1 : 0);
|
||||
@@ -7282,11 +7147,6 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
|
||||
else
|
||||
err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
|
||||
|
||||
if (params->disable_clock_gating) {
|
||||
mutex_lock(&gaudi->clk_gate_mutex);
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
}
|
||||
|
||||
/* Set invalid wrapper index */
|
||||
*memory_wrapper_idx = 0xFF;
|
||||
|
||||
@@ -7303,8 +7163,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
|
||||
|
||||
if (*memory_wrapper_idx == 0xFF) {
|
||||
dev_err(hdev->dev, "ECC error information cannot be found\n");
|
||||
rc = -EINVAL;
|
||||
goto enable_clk_gate;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
|
||||
@@ -7324,14 +7183,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
|
||||
|
||||
WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
|
||||
|
||||
enable_clk_gate:
|
||||
if (params->disable_clock_gating) {
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
}
|
||||
|
||||
return rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -7589,7 +7441,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
|
||||
params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
|
||||
params.num_memories = 90;
|
||||
params.derr = false;
|
||||
params.disable_clock_gating = true;
|
||||
extract_info_from_fw = false;
|
||||
break;
|
||||
case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
|
||||
@@ -7598,7 +7449,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
|
||||
mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
|
||||
params.num_memories = 90;
|
||||
params.derr = true;
|
||||
params.disable_clock_gating = true;
|
||||
extract_info_from_fw = false;
|
||||
break;
|
||||
case GAUDI_EVENT_MME0_ACC_SERR:
|
||||
@@ -7609,7 +7459,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
|
||||
params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
|
||||
params.num_memories = 128;
|
||||
params.derr = false;
|
||||
params.disable_clock_gating = true;
|
||||
extract_info_from_fw = false;
|
||||
break;
|
||||
case GAUDI_EVENT_MME0_ACC_DERR:
|
||||
@@ -7620,7 +7469,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
|
||||
params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
|
||||
params.num_memories = 128;
|
||||
params.derr = true;
|
||||
params.disable_clock_gating = true;
|
||||
extract_info_from_fw = false;
|
||||
break;
|
||||
case GAUDI_EVENT_MME0_SBAB_SERR:
|
||||
@@ -7632,7 +7480,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
|
||||
mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
|
||||
params.num_memories = 33;
|
||||
params.derr = false;
|
||||
params.disable_clock_gating = true;
|
||||
extract_info_from_fw = false;
|
||||
break;
|
||||
case GAUDI_EVENT_MME0_SBAB_DERR:
|
||||
@@ -7644,7 +7491,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
|
||||
mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
|
||||
params.num_memories = 33;
|
||||
params.derr = true;
|
||||
params.disable_clock_gating = true;
|
||||
extract_info_from_fw = false;
|
||||
break;
|
||||
default:
|
||||
@@ -7819,6 +7665,48 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev,
|
||||
fw_alive->thread_id, fw_alive->uptime_seconds);
|
||||
}
|
||||
|
||||
static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
|
||||
void *data)
|
||||
{
|
||||
char desc[64] = "", *type;
|
||||
struct eq_nic_sei_event *eq_nic_sei = data;
|
||||
u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
|
||||
|
||||
switch (eq_nic_sei->axi_error_cause) {
|
||||
case RXB:
|
||||
type = "RXB";
|
||||
break;
|
||||
case RXE:
|
||||
type = "RXE";
|
||||
break;
|
||||
case TXS:
|
||||
type = "TXS";
|
||||
break;
|
||||
case TXE:
|
||||
type = "TXE";
|
||||
break;
|
||||
case QPC_RESP:
|
||||
type = "QPC_RESP";
|
||||
break;
|
||||
case NON_AXI_ERR:
|
||||
type = "NON_AXI_ERR";
|
||||
break;
|
||||
case TMR:
|
||||
type = "TMR";
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
|
||||
eq_nic_sei->axi_error_cause);
|
||||
type = "N/A";
|
||||
break;
|
||||
}
|
||||
|
||||
snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
|
||||
eq_nic_sei->id);
|
||||
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
|
||||
event_type, desc);
|
||||
}
|
||||
|
||||
static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
|
||||
{
|
||||
/* GAUDI doesn't support any reset except hard-reset */
|
||||
@@ -7966,19 +7854,9 @@ static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
|
||||
static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
|
||||
char *interrupt_name)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
|
||||
bool soft_reset_required = false;
|
||||
|
||||
/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
|
||||
* gating, and thus cannot be done in CPU-CP and should be done instead
|
||||
* by the driver.
|
||||
*/
|
||||
|
||||
mutex_lock(&gaudi->clk_gate_mutex);
|
||||
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
|
||||
tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
|
||||
TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
|
||||
|
||||
@@ -7996,10 +7874,6 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
|
||||
/* Clear interrupts */
|
||||
WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
|
||||
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
|
||||
return soft_reset_required;
|
||||
}
|
||||
|
||||
@@ -8066,6 +7940,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
struct hl_eq_entry *eq_entry)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u64 data = le64_to_cpu(eq_entry->data[0]);
|
||||
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
|
||||
u32 fw_fatal_err_flag = 0;
|
||||
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
|
||||
@@ -8102,6 +7977,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
case GAUDI_EVENT_PSOC_MEM_DERR:
|
||||
case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
|
||||
case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
|
||||
case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
|
||||
case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
|
||||
case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
|
||||
case GAUDI_EVENT_MMU_DERR:
|
||||
@@ -8202,6 +8078,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
case GAUDI_EVENT_PSOC_MEM_SERR:
|
||||
case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
|
||||
case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
|
||||
case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
|
||||
case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
|
||||
case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
|
||||
fallthrough;
|
||||
@@ -8263,6 +8140,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
|
||||
gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
|
||||
gaudi_print_irq_info(hdev, event_type, false);
|
||||
gaudi_print_sm_sei_info(hdev, event_type,
|
||||
@@ -8274,6 +8156,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
|
||||
gaudi_print_clk_change_info(hdev, event_type);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
@@ -8314,7 +8199,7 @@ reset_device:
|
||||
| HL_DRV_RESET_BYPASS_REQ_TO_FW
|
||||
| fw_fatal_err_flag);
|
||||
else if (hdev->hard_reset_on_fw_events)
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag);
|
||||
else
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
}
|
||||
@@ -8461,10 +8346,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
u64 offset;
|
||||
int i, dma_id, port;
|
||||
|
||||
mutex_lock(&gaudi->clk_gate_mutex);
|
||||
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
|
||||
if (s)
|
||||
seq_puts(s,
|
||||
"\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
|
||||
@@ -8585,10 +8466,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
if (s)
|
||||
seq_puts(s, "\n");
|
||||
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
|
||||
return is_idle;
|
||||
}
|
||||
|
||||
@@ -8628,10 +8505,8 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
|
||||
* this function should be used only during initialization and/or after reset,
|
||||
* when there are no active users.
|
||||
*/
|
||||
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
|
||||
u32 tpc_id)
|
||||
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u64 kernel_timeout;
|
||||
u32 status, offset;
|
||||
int rc;
|
||||
@@ -8643,10 +8518,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
|
||||
else
|
||||
kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
|
||||
|
||||
mutex_lock(&gaudi->clk_gate_mutex);
|
||||
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
|
||||
WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
|
||||
lower_32_bits(tpc_kernel));
|
||||
WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
|
||||
@@ -8686,8 +8557,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
|
||||
dev_err(hdev->dev,
|
||||
"Timeout while waiting for TPC%d icache prefetch\n",
|
||||
tpc_id);
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@@ -8711,8 +8580,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
|
||||
dev_err(hdev->dev,
|
||||
"Timeout while waiting for TPC%d vector pipe\n",
|
||||
tpc_id);
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@@ -8724,9 +8591,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
|
||||
1000,
|
||||
kernel_timeout);
|
||||
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Timeout while waiting for TPC%d kernel to execute\n",
|
||||
@@ -8791,7 +8655,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
|
||||
hdev->internal_cb_pool_dma_addr,
|
||||
HOST_SPACE_INTERNAL_CB_SZ);
|
||||
|
||||
hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
|
||||
hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
if (rc)
|
||||
@@ -8826,7 +8690,7 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
|
||||
HOST_SPACE_INTERNAL_CB_SZ);
|
||||
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
|
||||
HOST_SPACE_INTERNAL_CB_SZ);
|
||||
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
|
||||
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
gen_pool_destroy(hdev->internal_cb_pool);
|
||||
@@ -9204,14 +9068,7 @@ static void gaudi_reset_sob(struct hl_device *hdev, void *data)
|
||||
|
||||
static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
|
||||
{
|
||||
if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
|
||||
HL_POWER9_HOST_MAGIC) {
|
||||
hdev->power9_64bit_dma_enable = 1;
|
||||
hdev->dma_mask = 64;
|
||||
} else {
|
||||
hdev->power9_64bit_dma_enable = 0;
|
||||
hdev->dma_mask = 48;
|
||||
}
|
||||
hdev->dma_mask = 48;
|
||||
}
|
||||
|
||||
static u64 gaudi_get_device_time(struct hl_device *hdev)
|
||||
@@ -9293,23 +9150,15 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
|
||||
struct hl_sync_to_engine_map *map)
|
||||
{
|
||||
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
int i, j, rc;
|
||||
u32 reg_value;
|
||||
|
||||
/* Iterate over TPC engines */
|
||||
for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
|
||||
/* TPC registered must be accessed with clock gating disabled */
|
||||
mutex_lock(&gaudi->clk_gate_mutex);
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
|
||||
reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
|
||||
sds->props[SP_NEXT_TPC] * i);
|
||||
|
||||
/* We can reenable clock_gating */
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
|
||||
rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
|
||||
ENGINE_TPC, i);
|
||||
if (rc)
|
||||
@@ -9319,20 +9168,11 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
|
||||
/* Iterate over MME engines */
|
||||
for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
|
||||
for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
|
||||
/* MME registered must be accessed with clock gating
|
||||
* disabled
|
||||
*/
|
||||
mutex_lock(&gaudi->clk_gate_mutex);
|
||||
hdev->asic_funcs->disable_clock_gating(hdev);
|
||||
|
||||
reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
|
||||
sds->props[SP_NEXT_MME] * i +
|
||||
j * sizeof(u32));
|
||||
|
||||
/* We can reenable clock_gating */
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
|
||||
rc = gaudi_add_sync_to_engine_map_entry(
|
||||
map, reg_value, ENGINE_MME,
|
||||
i * sds->props[SP_SUB_MME_ENG_NUM] + j);
|
||||
@@ -9537,6 +9377,29 @@ static u32 *gaudi_get_stream_master_qid_arr(void)
|
||||
return gaudi_stream_master;
|
||||
}
|
||||
|
||||
static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
struct cpucp_info *cpucp_info;
|
||||
|
||||
cpucp_info = &hdev->asic_prop.cpucp_info;
|
||||
|
||||
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(infineon_ver);
|
||||
|
||||
static struct attribute *gaudi_vrm_dev_attrs[] = {
|
||||
&dev_attr_infineon_ver.attr,
|
||||
};
|
||||
|
||||
static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
|
||||
struct attribute_group *dev_vrm_attr_grp)
|
||||
{
|
||||
hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
|
||||
dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
|
||||
}
|
||||
|
||||
static const struct hl_asic_funcs gaudi_funcs = {
|
||||
.early_init = gaudi_early_init,
|
||||
.early_fini = gaudi_early_fini,
|
||||
@@ -9574,17 +9437,14 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
||||
.debugfs_read64 = gaudi_debugfs_read64,
|
||||
.debugfs_write64 = gaudi_debugfs_write64,
|
||||
.debugfs_read_dma = gaudi_debugfs_read_dma,
|
||||
.add_device_attr = hl_add_device_attr,
|
||||
.add_device_attr = gaudi_add_device_attr,
|
||||
.handle_eqe = gaudi_handle_eqe,
|
||||
.set_pll_profile = hl_set_pll_profile,
|
||||
.get_events_stat = gaudi_get_events_stat,
|
||||
.read_pte = gaudi_read_pte,
|
||||
.write_pte = gaudi_write_pte,
|
||||
.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
|
||||
.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
|
||||
.send_heartbeat = gaudi_send_heartbeat,
|
||||
.set_clock_gating = gaudi_set_clock_gating,
|
||||
.disable_clock_gating = gaudi_disable_clock_gating,
|
||||
.debug_coresight = gaudi_debug_coresight,
|
||||
.is_device_idle = gaudi_is_device_idle,
|
||||
.non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
|
||||
@@ -9600,7 +9460,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
||||
.halt_coresight = gaudi_halt_coresight,
|
||||
.ctx_init = gaudi_ctx_init,
|
||||
.ctx_fini = gaudi_ctx_fini,
|
||||
.get_clk_rate = hl_get_clk_rate,
|
||||
.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
|
||||
.load_firmware_to_device = gaudi_load_firmware_to_device,
|
||||
.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
|
||||
@@ -9626,7 +9485,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
||||
.state_dump_init = gaudi_state_dump_init,
|
||||
.get_sob_addr = gaudi_get_sob_addr,
|
||||
.set_pci_memory_regions = gaudi_set_pci_memory_regions,
|
||||
.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
|
||||
.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
|
||||
.is_valid_dram_page_size = NULL
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2019-2020 HabanaLabs, Ltd.
|
||||
* Copyright 2019-2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
@@ -177,7 +177,6 @@
|
||||
#define HW_CAP_MSI BIT(6)
|
||||
#define HW_CAP_CPU_Q BIT(7)
|
||||
#define HW_CAP_HBM_DMA BIT(8)
|
||||
#define HW_CAP_CLK_GATE BIT(9)
|
||||
#define HW_CAP_SRAM_SCRAMBLER BIT(10)
|
||||
#define HW_CAP_HBM_SCRAMBLER BIT(11)
|
||||
|
||||
@@ -313,8 +312,6 @@ struct gaudi_internal_qman_info {
|
||||
* struct gaudi_device - ASIC specific manage structure.
|
||||
* @cpucp_info_get: get information on device from CPU-CP
|
||||
* @hw_queues_lock: protects the H/W queues from concurrent access.
|
||||
* @clk_gate_mutex: protects code areas that require clock gating to be disabled
|
||||
* temporarily
|
||||
* @internal_qmans: Internal QMANs information. The array size is larger than
|
||||
* the actual number of internal queues because they are not in
|
||||
* consecutive order.
|
||||
@@ -337,7 +334,6 @@ struct gaudi_device {
|
||||
|
||||
/* TODO: remove hw_queues_lock after moving to scheduler code */
|
||||
spinlock_t hw_queues_lock;
|
||||
struct mutex clk_gate_mutex;
|
||||
|
||||
struct gaudi_internal_qman_info internal_qmans[GAUDI_QUEUE_ID_SIZE];
|
||||
|
||||
@@ -355,8 +351,6 @@ struct gaudi_device {
|
||||
|
||||
void gaudi_init_security(struct hl_device *hdev);
|
||||
void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
|
||||
void gaudi_add_device_attr(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp);
|
||||
int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
|
||||
void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||
void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
@@ -430,6 +430,9 @@ int goya_set_fixed_properties(struct hl_device *hdev)
|
||||
prop->dmmu.page_size = PAGE_SIZE_2MB;
|
||||
prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
|
||||
prop->dmmu.last_mask = LAST_MASK;
|
||||
/* TODO: will be duplicated until implementing per-MMU props */
|
||||
prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
|
||||
prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
|
||||
|
||||
/* shifts and masks are the same in PMMU and DMMU */
|
||||
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
|
||||
@@ -438,6 +441,9 @@ int goya_set_fixed_properties(struct hl_device *hdev)
|
||||
prop->pmmu.page_size = PAGE_SIZE_4KB;
|
||||
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
|
||||
prop->pmmu.last_mask = LAST_MASK;
|
||||
/* TODO: will be duplicated until implementing per-MMU props */
|
||||
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
|
||||
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
|
||||
|
||||
/* PMMU and HPMMU are the same except of page size */
|
||||
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
|
||||
@@ -477,6 +483,10 @@ int goya_set_fixed_properties(struct hl_device *hdev)
|
||||
|
||||
prop->use_get_power_for_reset_history = true;
|
||||
|
||||
prop->configurable_stop_on_err = true;
|
||||
|
||||
prop->set_max_power_on_device_init = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -893,7 +903,7 @@ int goya_late_init(struct hl_device *hdev)
|
||||
|
||||
goya->pm_mng_profile = PM_AUTO;
|
||||
|
||||
hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
|
||||
goya_set_pll_profile(hdev, PLL_LOW);
|
||||
|
||||
schedule_delayed_work(&goya->goya_work->work_freq,
|
||||
usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
|
||||
@@ -2700,8 +2710,7 @@ int goya_mmu_init(struct hl_device *hdev)
|
||||
WREG32_AND(mmSTLB_STLB_FEATURE_EN,
|
||||
(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
|
||||
|
||||
hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
|
||||
MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
|
||||
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
|
||||
|
||||
WREG32(mmMMU_MMU_ENABLE, 1);
|
||||
WREG32(mmMMU_SPI_MASK, 0xF);
|
||||
@@ -5341,7 +5350,7 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
|
||||
/* Treat as invalidate all because there is no range invalidation
|
||||
* in Goya
|
||||
*/
|
||||
return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
|
||||
return hl_mmu_invalidate_cache(hdev, is_hard, flags);
|
||||
}
|
||||
|
||||
int goya_send_heartbeat(struct hl_device *hdev)
|
||||
@@ -5391,16 +5400,6 @@ int goya_cpucp_info_get(struct hl_device *hdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void goya_set_clock_gating(struct hl_device *hdev)
|
||||
{
|
||||
/* clock gating not supported in Goya */
|
||||
}
|
||||
|
||||
static void goya_disable_clock_gating(struct hl_device *hdev)
|
||||
{
|
||||
/* clock gating not supported in Goya */
|
||||
}
|
||||
|
||||
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
|
||||
u8 mask_len, struct seq_file *s)
|
||||
{
|
||||
@@ -5564,16 +5563,7 @@ static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
|
||||
|
||||
static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
|
||||
{
|
||||
if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
|
||||
HL_POWER9_HOST_MAGIC) {
|
||||
dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
|
||||
hdev->power9_64bit_dma_enable = 1;
|
||||
hdev->dma_mask = 64;
|
||||
} else {
|
||||
dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
|
||||
hdev->power9_64bit_dma_enable = 0;
|
||||
hdev->dma_mask = 48;
|
||||
}
|
||||
hdev->dma_mask = 48;
|
||||
}
|
||||
|
||||
u64 goya_get_device_time(struct hl_device *hdev)
|
||||
@@ -5727,15 +5717,12 @@ static const struct hl_asic_funcs goya_funcs = {
|
||||
.debugfs_read_dma = goya_debugfs_read_dma,
|
||||
.add_device_attr = goya_add_device_attr,
|
||||
.handle_eqe = goya_handle_eqe,
|
||||
.set_pll_profile = goya_set_pll_profile,
|
||||
.get_events_stat = goya_get_events_stat,
|
||||
.read_pte = goya_read_pte,
|
||||
.write_pte = goya_write_pte,
|
||||
.mmu_invalidate_cache = goya_mmu_invalidate_cache,
|
||||
.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
|
||||
.send_heartbeat = goya_send_heartbeat,
|
||||
.set_clock_gating = goya_set_clock_gating,
|
||||
.disable_clock_gating = goya_disable_clock_gating,
|
||||
.debug_coresight = goya_debug_coresight,
|
||||
.is_device_idle = goya_is_device_idle,
|
||||
.non_hard_reset_late_init = goya_non_hard_reset_late_init,
|
||||
@@ -5751,7 +5738,6 @@ static const struct hl_asic_funcs goya_funcs = {
|
||||
.halt_coresight = goya_halt_coresight,
|
||||
.ctx_init = goya_ctx_init,
|
||||
.ctx_fini = goya_ctx_fini,
|
||||
.get_clk_rate = hl_get_clk_rate,
|
||||
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
|
||||
.load_firmware_to_device = goya_load_firmware_to_device,
|
||||
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
|
||||
@@ -5778,6 +5764,7 @@ static const struct hl_asic_funcs goya_funcs = {
|
||||
.get_sob_addr = &goya_get_sob_addr,
|
||||
.set_pci_memory_regions = goya_set_pci_memory_regions,
|
||||
.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
|
||||
.is_valid_dram_page_size = NULL
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
@@ -217,8 +217,8 @@ u64 goya_get_max_power(struct hl_device *hdev);
|
||||
void goya_set_max_power(struct hl_device *hdev, u64 value);
|
||||
|
||||
void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
|
||||
void goya_add_device_attr(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp);
|
||||
void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
|
||||
struct attribute_group *dev_vrm_attr_grp);
|
||||
int goya_cpucp_info_get(struct hl_device *hdev);
|
||||
int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
|
||||
void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2022 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
@@ -11,21 +11,24 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
|
||||
{
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
|
||||
if (!hdev->pdev)
|
||||
return;
|
||||
|
||||
switch (freq) {
|
||||
case PLL_HIGH:
|
||||
hl_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);
|
||||
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll);
|
||||
hl_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll);
|
||||
break;
|
||||
case PLL_LOW:
|
||||
hl_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW);
|
||||
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW);
|
||||
hl_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW);
|
||||
break;
|
||||
case PLL_LAST:
|
||||
hl_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk);
|
||||
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk);
|
||||
hl_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk);
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "unknown frequency setting\n");
|
||||
@@ -41,7 +44,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
|
||||
value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, false);
|
||||
|
||||
if (value < 0)
|
||||
return value;
|
||||
@@ -74,7 +77,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
hl_set_frequency(hdev, HL_GOYA_MME_PLL, value);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, value);
|
||||
goya->mme_clk = value;
|
||||
|
||||
fail:
|
||||
@@ -90,7 +93,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr,
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, false);
|
||||
value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, false);
|
||||
|
||||
if (value < 0)
|
||||
return value;
|
||||
@@ -123,7 +126,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, value);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, value);
|
||||
goya->tpc_clk = value;
|
||||
|
||||
fail:
|
||||
@@ -139,7 +142,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr,
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, false);
|
||||
value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, false);
|
||||
|
||||
if (value < 0)
|
||||
return value;
|
||||
@@ -172,7 +175,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
hl_set_frequency(hdev, HL_GOYA_IC_PLL, value);
|
||||
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, value);
|
||||
goya->ic_clk = value;
|
||||
|
||||
fail:
|
||||
@@ -188,7 +191,7 @@ static ssize_t mme_clk_curr_show(struct device *dev,
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
|
||||
value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, true);
|
||||
|
||||
if (value < 0)
|
||||
return value;
|
||||
@@ -205,7 +208,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev,
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, true);
|
||||
value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, true);
|
||||
|
||||
if (value < 0)
|
||||
return value;
|
||||
@@ -222,7 +225,7 @@ static ssize_t ic_clk_curr_show(struct device *dev,
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, true);
|
||||
value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, true);
|
||||
|
||||
if (value < 0)
|
||||
return value;
|
||||
@@ -347,7 +350,7 @@ static DEVICE_ATTR_RW(pm_mng_profile);
|
||||
static DEVICE_ATTR_RW(tpc_clk);
|
||||
static DEVICE_ATTR_RO(tpc_clk_curr);
|
||||
|
||||
static struct attribute *goya_dev_attrs[] = {
|
||||
static struct attribute *goya_clk_dev_attrs[] = {
|
||||
&dev_attr_high_pll.attr,
|
||||
&dev_attr_ic_clk.attr,
|
||||
&dev_attr_ic_clk_curr.attr,
|
||||
@@ -356,11 +359,27 @@ static struct attribute *goya_dev_attrs[] = {
|
||||
&dev_attr_pm_mng_profile.attr,
|
||||
&dev_attr_tpc_clk.attr,
|
||||
&dev_attr_tpc_clk_curr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
void goya_add_device_attr(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp)
|
||||
static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
dev_attr_grp->attrs = goya_dev_attrs;
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
struct cpucp_info *cpucp_info;
|
||||
|
||||
cpucp_info = &hdev->asic_prop.cpucp_info;
|
||||
|
||||
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(infineon_ver);
|
||||
|
||||
static struct attribute *goya_vrm_dev_attrs[] = {
|
||||
&dev_attr_infineon_ver.attr,
|
||||
};
|
||||
|
||||
void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
|
||||
struct attribute_group *dev_vrm_attr_grp)
|
||||
{
|
||||
dev_clk_attr_grp->attrs = goya_clk_dev_attrs;
|
||||
dev_vrm_attr_grp->attrs = goya_vrm_dev_attrs;
|
||||
}
|
||||
|
||||
@@ -780,6 +780,7 @@ struct cpucp_security_info {
|
||||
* (0 = functional 1 = binned)
|
||||
* @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance
|
||||
* (0 = functional 1 = binned)
|
||||
* @fw_os_version: Firmware OS Version
|
||||
*/
|
||||
struct cpucp_info {
|
||||
struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
|
||||
@@ -807,6 +808,7 @@ struct cpucp_info {
|
||||
__le32 reserved6;
|
||||
__u8 pll_map[PLL_MAP_LEN];
|
||||
__le64 mme_binning_mask;
|
||||
__u8 fw_os_version[VERSION_MAX_LEN];
|
||||
};
|
||||
|
||||
struct cpucp_mac_addr {
|
||||
|
||||
@@ -33,6 +33,7 @@ enum cpu_boot_err {
|
||||
CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18,
|
||||
CPU_BOOT_ERR_BINNING_FAIL = 19,
|
||||
CPU_BOOT_ERR_TPM_FAIL = 20,
|
||||
CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
|
||||
CPU_BOOT_ERR_ENABLED = 31,
|
||||
CPU_BOOT_ERR_SCND_EN = 63,
|
||||
CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
|
||||
@@ -111,6 +112,9 @@ enum cpu_boot_err {
|
||||
*
|
||||
* CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed.
|
||||
*
|
||||
* CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature
|
||||
* sensor.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
|
||||
* This is a main indication that the
|
||||
* running FW populates the error
|
||||
@@ -134,6 +138,7 @@ enum cpu_boot_err {
|
||||
#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR)
|
||||
#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL)
|
||||
#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL)
|
||||
#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
|
||||
#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
|
||||
#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
|
||||
|
||||
|
||||
@@ -311,6 +311,16 @@ enum gaudi_async_event_id {
|
||||
GAUDI_EVENT_FW_ALIVE_S = 645,
|
||||
GAUDI_EVENT_DEV_RESET_REQ = 646,
|
||||
GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647,
|
||||
GAUDI_EVENT_STATUS_NIC0_ENG0 = 648,
|
||||
GAUDI_EVENT_STATUS_NIC0_ENG1 = 649,
|
||||
GAUDI_EVENT_STATUS_NIC1_ENG0 = 650,
|
||||
GAUDI_EVENT_STATUS_NIC1_ENG1 = 651,
|
||||
GAUDI_EVENT_STATUS_NIC2_ENG0 = 652,
|
||||
GAUDI_EVENT_STATUS_NIC2_ENG1 = 653,
|
||||
GAUDI_EVENT_STATUS_NIC3_ENG0 = 654,
|
||||
GAUDI_EVENT_STATUS_NIC3_ENG1 = 655,
|
||||
GAUDI_EVENT_STATUS_NIC4_ENG0 = 656,
|
||||
GAUDI_EVENT_STATUS_NIC4_ENG1 = 657,
|
||||
GAUDI_EVENT_FIX_POWER_ENV_S = 658,
|
||||
GAUDI_EVENT_FIX_POWER_ENV_E = 659,
|
||||
GAUDI_EVENT_FIX_THERMAL_ENV_S = 660,
|
||||
|
||||
@@ -1070,10 +1070,10 @@ static int kgdbts_option_setup(char *opt)
|
||||
{
|
||||
if (strlen(opt) >= MAX_CONFIG_LEN) {
|
||||
printk(KERN_ERR "kgdbts: config string too long\n");
|
||||
return -ENOSPC;
|
||||
return 1;
|
||||
}
|
||||
strcpy(config, opt);
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("kgdbts=", kgdbts_option_setup);
|
||||
|
||||
@@ -44,14 +44,14 @@ void lkdtm_FORTIFIED_SUBOBJECT(void)
|
||||
strscpy(src, "over ten bytes", size);
|
||||
size = strlen(src) + 1;
|
||||
|
||||
pr_info("trying to strcpy past the end of a member of a struct\n");
|
||||
pr_info("trying to strncpy past the end of a member of a struct\n");
|
||||
|
||||
/*
|
||||
* memcpy(target.a, src, 20); will hit a compile error because the
|
||||
* strncpy(target.a, src, 20); will hit a compile error because the
|
||||
* compiler knows at build time that target.a < 20 bytes. Use a
|
||||
* volatile to force a runtime error.
|
||||
*/
|
||||
memcpy(target.a, src, size);
|
||||
strncpy(target.a, src, size);
|
||||
|
||||
/* Store result to global to prevent the code from being eliminated */
|
||||
fortify_scratch_space = target.a[3];
|
||||
|
||||
@@ -2148,6 +2148,7 @@ void mei_cl_all_disconnect(struct mei_device *dev)
|
||||
list_for_each_entry(cl, &dev->file_list, link)
|
||||
mei_cl_set_disconnected(cl);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mei_cl_all_disconnect);
|
||||
|
||||
static struct mei_cl *mei_cl_dma_map_find(struct mei_device *dev, u8 buffer_id)
|
||||
{
|
||||
|
||||
@@ -107,6 +107,7 @@
|
||||
#define MEI_DEV_ID_ADP_S 0x7AE8 /* Alder Lake Point S */
|
||||
#define MEI_DEV_ID_ADP_LP 0x7A60 /* Alder Lake Point LP */
|
||||
#define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */
|
||||
#define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */
|
||||
|
||||
/*
|
||||
* MEI HW Section
|
||||
@@ -120,6 +121,7 @@
|
||||
#define PCI_CFG_HFS_2 0x48
|
||||
#define PCI_CFG_HFS_3 0x60
|
||||
# define PCI_CFG_HFS_3_FW_SKU_MSK 0x00000070
|
||||
# define PCI_CFG_HFS_3_FW_SKU_IGN 0x00000000
|
||||
# define PCI_CFG_HFS_3_FW_SKU_SPS 0x00000060
|
||||
#define PCI_CFG_HFS_4 0x64
|
||||
#define PCI_CFG_HFS_5 0x68
|
||||
|
||||
@@ -1257,7 +1257,11 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
|
||||
/* check if ME wants a reset */
|
||||
if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) {
|
||||
dev_warn(dev->dev, "FW not ready: resetting.\n");
|
||||
schedule_work(&dev->reset_work);
|
||||
if (dev->dev_state == MEI_DEV_POWERING_DOWN ||
|
||||
dev->dev_state == MEI_DEV_POWER_DOWN)
|
||||
mei_cl_all_disconnect(dev);
|
||||
else if (dev->dev_state != MEI_DEV_DISABLED)
|
||||
schedule_work(&dev->reset_work);
|
||||
goto end;
|
||||
}
|
||||
|
||||
@@ -1289,12 +1293,14 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
|
||||
if (rets == -ENODATA)
|
||||
break;
|
||||
|
||||
if (rets &&
|
||||
(dev->dev_state != MEI_DEV_RESETTING &&
|
||||
dev->dev_state != MEI_DEV_POWER_DOWN)) {
|
||||
dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n",
|
||||
rets);
|
||||
schedule_work(&dev->reset_work);
|
||||
if (rets) {
|
||||
dev_err(dev->dev, "mei_irq_read_handler ret = %d, state = %d.\n",
|
||||
rets, dev->dev_state);
|
||||
if (dev->dev_state != MEI_DEV_RESETTING &&
|
||||
dev->dev_state != MEI_DEV_DISABLED &&
|
||||
dev->dev_state != MEI_DEV_POWERING_DOWN &&
|
||||
dev->dev_state != MEI_DEV_POWER_DOWN)
|
||||
schedule_work(&dev->reset_work);
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
@@ -1405,16 +1411,16 @@ static bool mei_me_fw_type_sps_4(const struct pci_dev *pdev)
|
||||
.quirk_probe = mei_me_fw_type_sps_4
|
||||
|
||||
/**
|
||||
* mei_me_fw_type_sps() - check for sps sku
|
||||
* mei_me_fw_type_sps_ign() - check for sps or ign sku
|
||||
*
|
||||
* Read ME FW Status register to check for SPS Firmware.
|
||||
* The SPS FW is only signaled in pci function 0
|
||||
* Read ME FW Status register to check for SPS or IGN Firmware.
|
||||
* The SPS/IGN FW is only signaled in pci function 0
|
||||
*
|
||||
* @pdev: pci device
|
||||
*
|
||||
* Return: true in case of SPS firmware
|
||||
* Return: true in case of SPS/IGN firmware
|
||||
*/
|
||||
static bool mei_me_fw_type_sps(const struct pci_dev *pdev)
|
||||
static bool mei_me_fw_type_sps_ign(const struct pci_dev *pdev)
|
||||
{
|
||||
u32 reg;
|
||||
u32 fw_type;
|
||||
@@ -1427,14 +1433,15 @@ static bool mei_me_fw_type_sps(const struct pci_dev *pdev)
|
||||
|
||||
dev_dbg(&pdev->dev, "fw type is %d\n", fw_type);
|
||||
|
||||
return fw_type == PCI_CFG_HFS_3_FW_SKU_SPS;
|
||||
return fw_type == PCI_CFG_HFS_3_FW_SKU_IGN ||
|
||||
fw_type == PCI_CFG_HFS_3_FW_SKU_SPS;
|
||||
}
|
||||
|
||||
#define MEI_CFG_KIND_ITOUCH \
|
||||
.kind = "itouch"
|
||||
|
||||
#define MEI_CFG_FW_SPS \
|
||||
.quirk_probe = mei_me_fw_type_sps
|
||||
#define MEI_CFG_FW_SPS_IGN \
|
||||
.quirk_probe = mei_me_fw_type_sps_ign
|
||||
|
||||
#define MEI_CFG_FW_VER_SUPP \
|
||||
.fw_ver_supported = 1
|
||||
@@ -1535,7 +1542,7 @@ static const struct mei_cfg mei_me_pch12_sps_cfg = {
|
||||
MEI_CFG_PCH8_HFS,
|
||||
MEI_CFG_FW_VER_SUPP,
|
||||
MEI_CFG_DMA_128,
|
||||
MEI_CFG_FW_SPS,
|
||||
MEI_CFG_FW_SPS_IGN,
|
||||
};
|
||||
|
||||
/* Cannon Lake itouch with quirk for SPS 5.0 and newer Firmware exclusion
|
||||
@@ -1545,7 +1552,7 @@ static const struct mei_cfg mei_me_pch12_itouch_sps_cfg = {
|
||||
MEI_CFG_KIND_ITOUCH,
|
||||
MEI_CFG_PCH8_HFS,
|
||||
MEI_CFG_FW_VER_SUPP,
|
||||
MEI_CFG_FW_SPS,
|
||||
MEI_CFG_FW_SPS_IGN,
|
||||
};
|
||||
|
||||
/* Tiger Lake and newer devices */
|
||||
@@ -1562,7 +1569,7 @@ static const struct mei_cfg mei_me_pch15_sps_cfg = {
|
||||
MEI_CFG_FW_VER_SUPP,
|
||||
MEI_CFG_DMA_128,
|
||||
MEI_CFG_TRC,
|
||||
MEI_CFG_FW_SPS,
|
||||
MEI_CFG_FW_SPS_IGN,
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@@ -161,6 +161,11 @@ int mei_reset(struct mei_device *dev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (dev->dev_state != MEI_DEV_RESETTING) {
|
||||
dev_dbg(dev->dev, "wrong state = %d on link start\n", dev->dev_state);
|
||||
return 0;
|
||||
}
|
||||
|
||||
dev_dbg(dev->dev, "link is established start sending messages.\n");
|
||||
|
||||
mei_set_devstate(dev, MEI_DEV_INIT_CLIENTS);
|
||||
|
||||
@@ -424,31 +424,26 @@ int mei_irq_read_handler(struct mei_device *dev,
|
||||
list_for_each_entry(cl, &dev->file_list, link) {
|
||||
if (mei_cl_hbm_equal(cl, mei_hdr)) {
|
||||
cl_dbg(dev, cl, "got a message\n");
|
||||
break;
|
||||
ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list);
|
||||
goto reset_slots;
|
||||
}
|
||||
}
|
||||
|
||||
/* if no recipient cl was found we assume corrupted header */
|
||||
if (&cl->link == &dev->file_list) {
|
||||
/* A message for not connected fixed address clients
|
||||
* should be silently discarded
|
||||
* On power down client may be force cleaned,
|
||||
* silently discard such messages
|
||||
*/
|
||||
if (hdr_is_fixed(mei_hdr) ||
|
||||
dev->dev_state == MEI_DEV_POWER_DOWN) {
|
||||
mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length);
|
||||
ret = 0;
|
||||
goto reset_slots;
|
||||
}
|
||||
dev_err(dev->dev, "no destination client found 0x%08X\n",
|
||||
dev->rd_msg_hdr[0]);
|
||||
ret = -EBADMSG;
|
||||
goto end;
|
||||
/* A message for not connected fixed address clients
|
||||
* should be silently discarded
|
||||
* On power down client may be force cleaned,
|
||||
* silently discard such messages
|
||||
*/
|
||||
if (hdr_is_fixed(mei_hdr) ||
|
||||
dev->dev_state == MEI_DEV_POWER_DOWN) {
|
||||
mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length);
|
||||
ret = 0;
|
||||
goto reset_slots;
|
||||
}
|
||||
|
||||
ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list);
|
||||
|
||||
dev_err(dev->dev, "no destination client found 0x%08X\n", dev->rd_msg_hdr[0]);
|
||||
ret = -EBADMSG;
|
||||
goto end;
|
||||
|
||||
reset_slots:
|
||||
/* reset the number of slots and header */
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
@@ -113,6 +114,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
|
||||
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)},
|
||||
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)},
|
||||
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)},
|
||||
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)},
|
||||
|
||||
/* required last entry */
|
||||
{0, }
|
||||
@@ -192,14 +194,7 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) ||
|
||||
dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
|
||||
|
||||
err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
|
||||
if (err)
|
||||
err = dma_set_coherent_mask(&pdev->dev,
|
||||
DMA_BIT_MASK(32));
|
||||
}
|
||||
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
|
||||
if (err) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
|
||||
goto end;
|
||||
|
||||
@@ -94,7 +94,7 @@ struct ocxl_link {
|
||||
struct spa *spa;
|
||||
void *platform_data;
|
||||
};
|
||||
static struct list_head links_list = LIST_HEAD_INIT(links_list);
|
||||
static LIST_HEAD(links_list);
|
||||
static DEFINE_MUTEX(links_list_lock);
|
||||
|
||||
enum xsl_response {
|
||||
|
||||
208
drivers/misc/open-dice.c
Normal file
208
drivers/misc/open-dice.c
Normal file
@@ -0,0 +1,208 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2021 - Google LLC
|
||||
* Author: David Brazdil <dbrazdil@google.com>
|
||||
*
|
||||
* Driver for Open Profile for DICE.
|
||||
*
|
||||
* This driver takes ownership of a reserved memory region containing data
|
||||
* generated by the Open Profile for DICE measured boot protocol. The memory
|
||||
* contents are not interpreted by the kernel but can be mapped into a userspace
|
||||
* process via a misc device. Userspace can also request a wipe of the memory.
|
||||
*
|
||||
* Userspace can access the data with (w/o error handling):
|
||||
*
|
||||
* fd = open("/dev/open-dice0", O_RDWR);
|
||||
* read(fd, &size, sizeof(unsigned long));
|
||||
* data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
* write(fd, NULL, 0); // wipe
|
||||
* close(fd);
|
||||
*/
|
||||
|
||||
#include <linux/io.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of_reserved_mem.h>
|
||||
#include <linux/platform_device.h>
|
||||
|
||||
#define DRIVER_NAME "open-dice"
|
||||
|
||||
struct open_dice_drvdata {
|
||||
struct mutex lock;
|
||||
char name[16];
|
||||
struct reserved_mem *rmem;
|
||||
struct miscdevice misc;
|
||||
};
|
||||
|
||||
static inline struct open_dice_drvdata *to_open_dice_drvdata(struct file *filp)
|
||||
{
|
||||
return container_of(filp->private_data, struct open_dice_drvdata, misc);
|
||||
}
|
||||
|
||||
static int open_dice_wipe(struct open_dice_drvdata *drvdata)
|
||||
{
|
||||
void *kaddr;
|
||||
|
||||
mutex_lock(&drvdata->lock);
|
||||
kaddr = devm_memremap(drvdata->misc.this_device, drvdata->rmem->base,
|
||||
drvdata->rmem->size, MEMREMAP_WC);
|
||||
if (IS_ERR(kaddr)) {
|
||||
mutex_unlock(&drvdata->lock);
|
||||
return PTR_ERR(kaddr);
|
||||
}
|
||||
|
||||
memset(kaddr, 0, drvdata->rmem->size);
|
||||
devm_memunmap(drvdata->misc.this_device, kaddr);
|
||||
mutex_unlock(&drvdata->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copies the size of the reserved memory region to the user-provided buffer.
|
||||
*/
|
||||
static ssize_t open_dice_read(struct file *filp, char __user *ptr, size_t len,
|
||||
loff_t *off)
|
||||
{
|
||||
unsigned long val = to_open_dice_drvdata(filp)->rmem->size;
|
||||
|
||||
return simple_read_from_buffer(ptr, len, off, &val, sizeof(val));
|
||||
}
|
||||
|
||||
/*
|
||||
* Triggers a wipe of the reserved memory region. The user-provided pointer
|
||||
* is never dereferenced.
|
||||
*/
|
||||
static ssize_t open_dice_write(struct file *filp, const char __user *ptr,
|
||||
size_t len, loff_t *off)
|
||||
{
|
||||
if (open_dice_wipe(to_open_dice_drvdata(filp)))
|
||||
return -EIO;
|
||||
|
||||
/* Consume the input buffer. */
|
||||
return len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Creates a mapping of the reserved memory region in user address space.
|
||||
*/
|
||||
static int open_dice_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct open_dice_drvdata *drvdata = to_open_dice_drvdata(filp);
|
||||
|
||||
/* Do not allow userspace to modify the underlying data. */
|
||||
if ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))
|
||||
return -EPERM;
|
||||
|
||||
/* Ensure userspace cannot acquire VM_WRITE + VM_SHARED later. */
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
vma->vm_flags &= ~VM_MAYSHARE;
|
||||
else if (vma->vm_flags & VM_SHARED)
|
||||
vma->vm_flags &= ~VM_MAYWRITE;
|
||||
|
||||
/* Create write-combine mapping so all clients observe a wipe. */
|
||||
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
|
||||
vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP;
|
||||
return vm_iomap_memory(vma, drvdata->rmem->base, drvdata->rmem->size);
|
||||
}
|
||||
|
||||
static const struct file_operations open_dice_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = open_dice_read,
|
||||
.write = open_dice_write,
|
||||
.mmap = open_dice_mmap,
|
||||
};
|
||||
|
||||
static int __init open_dice_probe(struct platform_device *pdev)
|
||||
{
|
||||
static unsigned int dev_idx;
|
||||
struct device *dev = &pdev->dev;
|
||||
struct reserved_mem *rmem;
|
||||
struct open_dice_drvdata *drvdata;
|
||||
int ret;
|
||||
|
||||
rmem = of_reserved_mem_lookup(dev->of_node);
|
||||
if (!rmem) {
|
||||
dev_err(dev, "failed to lookup reserved memory\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!rmem->size || (rmem->size > ULONG_MAX)) {
|
||||
dev_err(dev, "invalid memory region size\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!PAGE_ALIGNED(rmem->base) || !PAGE_ALIGNED(rmem->size)) {
|
||||
dev_err(dev, "memory region must be page-aligned\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
drvdata = devm_kmalloc(dev, sizeof(*drvdata), GFP_KERNEL);
|
||||
if (!drvdata)
|
||||
return -ENOMEM;
|
||||
|
||||
*drvdata = (struct open_dice_drvdata){
|
||||
.lock = __MUTEX_INITIALIZER(drvdata->lock),
|
||||
.rmem = rmem,
|
||||
.misc = (struct miscdevice){
|
||||
.parent = dev,
|
||||
.name = drvdata->name,
|
||||
.minor = MISC_DYNAMIC_MINOR,
|
||||
.fops = &open_dice_fops,
|
||||
.mode = 0600,
|
||||
},
|
||||
};
|
||||
|
||||
/* Index overflow check not needed, misc_register() will fail. */
|
||||
snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++);
|
||||
|
||||
ret = misc_register(&drvdata->misc);
|
||||
if (ret) {
|
||||
dev_err(dev, "failed to register misc device '%s': %d\n",
|
||||
drvdata->name, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
platform_set_drvdata(pdev, drvdata);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int open_dice_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct open_dice_drvdata *drvdata = platform_get_drvdata(pdev);
|
||||
|
||||
misc_deregister(&drvdata->misc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct of_device_id open_dice_of_match[] = {
|
||||
{ .compatible = "google,open-dice" },
|
||||
{},
|
||||
};
|
||||
|
||||
static struct platform_driver open_dice_driver = {
|
||||
.remove = open_dice_remove,
|
||||
.driver = {
|
||||
.name = DRIVER_NAME,
|
||||
.of_match_table = open_dice_of_match,
|
||||
},
|
||||
};
|
||||
|
||||
static int __init open_dice_init(void)
|
||||
{
|
||||
int ret = platform_driver_probe(&open_dice_driver, open_dice_probe);
|
||||
|
||||
/* DICE regions are optional. Succeed even with zero instances. */
|
||||
return (ret == -ENODEV) ? 0 : ret;
|
||||
}
|
||||
|
||||
static void __exit open_dice_exit(void)
|
||||
{
|
||||
platform_driver_unregister(&open_dice_driver);
|
||||
}
|
||||
|
||||
module_init(open_dice_init);
|
||||
module_exit(open_dice_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("David Brazdil <dbrazdil@google.com>");
|
||||
@@ -1016,7 +1016,7 @@ static int quicktest1(unsigned long arg)
|
||||
break;
|
||||
}
|
||||
if (ret != MQE_QUEUE_FULL || i != 4) {
|
||||
printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n",
|
||||
printk(KERN_DEBUG "GRU:%d quicktest1: unexpected status %d, i %d\n",
|
||||
smp_processor_id(), ret, i);
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -530,12 +530,6 @@ struct gru_blade_state {
|
||||
for ((i) = (k)*GRU_CBR_AU_SIZE; \
|
||||
(i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
|
||||
|
||||
/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
|
||||
#define for_each_dsr_in_allocation_map(i, map, k) \
|
||||
for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \
|
||||
for ((i) = (k) * GRU_DSR_AU_CL; \
|
||||
(i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
|
||||
|
||||
#define gseg_physical_address(gru, ctxnum) \
|
||||
((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
|
||||
#define gseg_virtual_address(gru, ctxnum) \
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/processor.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
@@ -31,6 +32,12 @@
|
||||
|
||||
#define VMCI_UTIL_NUM_RESOURCES 1
|
||||
|
||||
/*
|
||||
* Datagram buffers for DMA send/receive must accommodate at least
|
||||
* a maximum sized datagram and the header.
|
||||
*/
|
||||
#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
|
||||
|
||||
static bool vmci_disable_msi;
|
||||
module_param_named(disable_msi, vmci_disable_msi, bool, 0);
|
||||
MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
|
||||
@@ -45,13 +52,18 @@ static u32 vm_context_id = VMCI_INVALID_ID;
|
||||
struct vmci_guest_device {
|
||||
struct device *dev; /* PCI device we are attached to */
|
||||
void __iomem *iobase;
|
||||
void __iomem *mmio_base;
|
||||
|
||||
bool exclusive_vectors;
|
||||
|
||||
struct tasklet_struct datagram_tasklet;
|
||||
struct tasklet_struct bm_tasklet;
|
||||
struct wait_queue_head inout_wq;
|
||||
|
||||
void *data_buffer;
|
||||
dma_addr_t data_buffer_base;
|
||||
void *tx_buffer;
|
||||
dma_addr_t tx_buffer_base;
|
||||
void *notification_bitmap;
|
||||
dma_addr_t notification_base;
|
||||
};
|
||||
@@ -89,6 +101,92 @@ u32 vmci_get_vm_context_id(void)
|
||||
return vm_context_id;
|
||||
}
|
||||
|
||||
static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
|
||||
{
|
||||
if (dev->mmio_base != NULL)
|
||||
return readl(dev->mmio_base + reg);
|
||||
return ioread32(dev->iobase + reg);
|
||||
}
|
||||
|
||||
static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
|
||||
{
|
||||
if (dev->mmio_base != NULL)
|
||||
writel(val, dev->mmio_base + reg);
|
||||
else
|
||||
iowrite32(val, dev->iobase + reg);
|
||||
}
|
||||
|
||||
static void vmci_read_data(struct vmci_guest_device *vmci_dev,
|
||||
void *dest, size_t size)
|
||||
{
|
||||
if (vmci_dev->mmio_base == NULL)
|
||||
ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
|
||||
dest, size);
|
||||
else {
|
||||
/*
|
||||
* For DMA datagrams, the data_buffer will contain the header on the
|
||||
* first page, followed by the incoming datagram(s) on the following
|
||||
* pages. The header uses an S/G element immediately following the
|
||||
* header on the first page to point to the data area.
|
||||
*/
|
||||
struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer;
|
||||
struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1);
|
||||
size_t buffer_offset = dest - vmci_dev->data_buffer;
|
||||
|
||||
buffer_header->opcode = 1;
|
||||
buffer_header->size = 1;
|
||||
buffer_header->busy = 0;
|
||||
sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
|
||||
sg_array[0].size = size;
|
||||
|
||||
vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base),
|
||||
VMCI_DATA_IN_LOW_ADDR);
|
||||
|
||||
wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
|
||||
}
|
||||
}
|
||||
|
||||
static int vmci_write_data(struct vmci_guest_device *dev,
|
||||
struct vmci_datagram *dg)
|
||||
{
|
||||
int result;
|
||||
|
||||
if (dev->mmio_base != NULL) {
|
||||
struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
|
||||
u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
|
||||
|
||||
if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
|
||||
return VMCI_ERROR_INVALID_ARGS;
|
||||
|
||||
/*
|
||||
* Initialize send buffer with outgoing datagram
|
||||
* and set up header for inline data. Device will
|
||||
* not access buffer asynchronously - only after
|
||||
* the write to VMCI_DATA_OUT_LOW_ADDR.
|
||||
*/
|
||||
memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
|
||||
buffer_header->opcode = 0;
|
||||
buffer_header->size = VMCI_DG_SIZE(dg);
|
||||
buffer_header->busy = 1;
|
||||
|
||||
vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
|
||||
VMCI_DATA_OUT_LOW_ADDR);
|
||||
|
||||
/* Caller holds a spinlock, so cannot block. */
|
||||
spin_until_cond(buffer_header->busy == 0);
|
||||
|
||||
result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
|
||||
if (result == VMCI_SUCCESS)
|
||||
result = (int)buffer_header->result;
|
||||
} else {
|
||||
iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
|
||||
dg, VMCI_DG_SIZE(dg));
|
||||
result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* VM to hypervisor call mechanism. We use the standard VMware naming
|
||||
* convention since shared code is calling this function as well.
|
||||
@@ -114,9 +212,8 @@ int vmci_send_datagram(struct vmci_datagram *dg)
|
||||
spin_lock_irqsave(&vmci_dev_spinlock, flags);
|
||||
|
||||
if (vmci_dev_g) {
|
||||
iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
|
||||
dg, VMCI_DG_SIZE(dg));
|
||||
result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR);
|
||||
vmci_write_data(vmci_dev_g, dg);
|
||||
result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
|
||||
} else {
|
||||
result = VMCI_ERROR_UNAVAILABLE;
|
||||
}
|
||||
@@ -156,9 +253,9 @@ static void vmci_guest_cid_update(u32 sub_id,
|
||||
|
||||
/*
|
||||
* Verify that the host supports the hypercalls we need. If it does not,
|
||||
* try to find fallback hypercalls and use those instead. Returns
|
||||
* true if required hypercalls (or fallback hypercalls) are
|
||||
* supported by the host, false otherwise.
|
||||
* try to find fallback hypercalls and use those instead. Returns 0 if
|
||||
* required hypercalls (or fallback hypercalls) are supported by the host,
|
||||
* an error code otherwise.
|
||||
*/
|
||||
static int vmci_check_host_caps(struct pci_dev *pdev)
|
||||
{
|
||||
@@ -195,15 +292,17 @@ static int vmci_check_host_caps(struct pci_dev *pdev)
|
||||
}
|
||||
|
||||
/*
|
||||
* Reads datagrams from the data in port and dispatches them. We
|
||||
* always start reading datagrams into only the first page of the
|
||||
* datagram buffer. If the datagrams don't fit into one page, we
|
||||
* use the maximum datagram buffer size for the remainder of the
|
||||
* invocation. This is a simple heuristic for not penalizing
|
||||
* small datagrams.
|
||||
* Reads datagrams from the device and dispatches them. For IO port
|
||||
* based access to the device, we always start reading datagrams into
|
||||
* only the first page of the datagram buffer. If the datagrams don't
|
||||
* fit into one page, we use the maximum datagram buffer size for the
|
||||
* remainder of the invocation. This is a simple heuristic for not
|
||||
* penalizing small datagrams. For DMA-based datagrams, we always
|
||||
* use the maximum datagram buffer size, since there is no performance
|
||||
* penalty for doing so.
|
||||
*
|
||||
* This function assumes that it has exclusive access to the data
|
||||
* in port for the duration of the call.
|
||||
* in register(s) for the duration of the call.
|
||||
*/
|
||||
static void vmci_dispatch_dgs(unsigned long data)
|
||||
{
|
||||
@@ -211,23 +310,41 @@ static void vmci_dispatch_dgs(unsigned long data)
|
||||
u8 *dg_in_buffer = vmci_dev->data_buffer;
|
||||
struct vmci_datagram *dg;
|
||||
size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
|
||||
size_t current_dg_in_buffer_size = PAGE_SIZE;
|
||||
size_t current_dg_in_buffer_size;
|
||||
size_t remaining_bytes;
|
||||
bool is_io_port = vmci_dev->mmio_base == NULL;
|
||||
|
||||
BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
|
||||
|
||||
ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
|
||||
vmci_dev->data_buffer, current_dg_in_buffer_size);
|
||||
if (!is_io_port) {
|
||||
/* For mmio, the first page is used for the header. */
|
||||
dg_in_buffer += PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* For DMA-based datagram operations, there is no performance
|
||||
* penalty for reading the maximum buffer size.
|
||||
*/
|
||||
current_dg_in_buffer_size = VMCI_MAX_DG_SIZE;
|
||||
} else {
|
||||
current_dg_in_buffer_size = PAGE_SIZE;
|
||||
}
|
||||
vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size);
|
||||
dg = (struct vmci_datagram *)dg_in_buffer;
|
||||
remaining_bytes = current_dg_in_buffer_size;
|
||||
|
||||
/*
|
||||
* Read through the buffer until an invalid datagram header is
|
||||
* encountered. The exit condition for datagrams read through
|
||||
* VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram
|
||||
* can start on any page boundary in the buffer.
|
||||
*/
|
||||
while (dg->dst.resource != VMCI_INVALID_ID ||
|
||||
remaining_bytes > PAGE_SIZE) {
|
||||
(is_io_port && remaining_bytes > PAGE_SIZE)) {
|
||||
unsigned dg_in_size;
|
||||
|
||||
/*
|
||||
* When the input buffer spans multiple pages, a datagram can
|
||||
* start on any page boundary in the buffer.
|
||||
* If using VMCI_DATA_IN_ADDR, skip to the next page
|
||||
* as a datagram can start on any page boundary.
|
||||
*/
|
||||
if (dg->dst.resource == VMCI_INVALID_ID) {
|
||||
dg = (struct vmci_datagram *)roundup(
|
||||
@@ -277,11 +394,10 @@ static void vmci_dispatch_dgs(unsigned long data)
|
||||
current_dg_in_buffer_size =
|
||||
dg_in_buffer_size;
|
||||
|
||||
ioread8_rep(vmci_dev->iobase +
|
||||
VMCI_DATA_IN_ADDR,
|
||||
vmci_dev->data_buffer +
|
||||
vmci_read_data(vmci_dev,
|
||||
dg_in_buffer +
|
||||
remaining_bytes,
|
||||
current_dg_in_buffer_size -
|
||||
current_dg_in_buffer_size -
|
||||
remaining_bytes);
|
||||
}
|
||||
|
||||
@@ -319,10 +435,8 @@ static void vmci_dispatch_dgs(unsigned long data)
|
||||
current_dg_in_buffer_size = dg_in_buffer_size;
|
||||
|
||||
for (;;) {
|
||||
ioread8_rep(vmci_dev->iobase +
|
||||
VMCI_DATA_IN_ADDR,
|
||||
vmci_dev->data_buffer,
|
||||
current_dg_in_buffer_size);
|
||||
vmci_read_data(vmci_dev, dg_in_buffer,
|
||||
current_dg_in_buffer_size);
|
||||
if (bytes_to_skip <= current_dg_in_buffer_size)
|
||||
break;
|
||||
|
||||
@@ -339,8 +453,7 @@ static void vmci_dispatch_dgs(unsigned long data)
|
||||
if (remaining_bytes < VMCI_DG_HEADERSIZE) {
|
||||
/* Get the next batch of datagrams. */
|
||||
|
||||
ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
|
||||
vmci_dev->data_buffer,
|
||||
vmci_read_data(vmci_dev, dg_in_buffer,
|
||||
current_dg_in_buffer_size);
|
||||
dg = (struct vmci_datagram *)dg_in_buffer;
|
||||
remaining_bytes = current_dg_in_buffer_size;
|
||||
@@ -384,7 +497,7 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
|
||||
unsigned int icr;
|
||||
|
||||
/* Acknowledge interrupt and determine what needs doing. */
|
||||
icr = ioread32(dev->iobase + VMCI_ICR_ADDR);
|
||||
icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
|
||||
if (icr == 0 || icr == ~0)
|
||||
return IRQ_NONE;
|
||||
|
||||
@@ -398,6 +511,12 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
|
||||
icr &= ~VMCI_ICR_NOTIFICATION;
|
||||
}
|
||||
|
||||
|
||||
if (icr & VMCI_ICR_DMA_DATAGRAM) {
|
||||
wake_up_all(&dev->inout_wq);
|
||||
icr &= ~VMCI_ICR_DMA_DATAGRAM;
|
||||
}
|
||||
|
||||
if (icr != 0)
|
||||
dev_warn(dev->dev,
|
||||
"Ignoring unknown interrupt cause (%d)\n",
|
||||
@@ -422,6 +541,38 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
|
||||
* which is for the completion of a DMA datagram send or receive operation.
|
||||
* Will only get called if we are using MSI-X with exclusive vectors.
|
||||
*/
|
||||
static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
|
||||
{
|
||||
struct vmci_guest_device *dev = _dev;
|
||||
|
||||
wake_up_all(&dev->inout_wq);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
|
||||
{
|
||||
if (vmci_dev->mmio_base != NULL) {
|
||||
if (vmci_dev->tx_buffer != NULL)
|
||||
dma_free_coherent(vmci_dev->dev,
|
||||
VMCI_DMA_DG_BUFFER_SIZE,
|
||||
vmci_dev->tx_buffer,
|
||||
vmci_dev->tx_buffer_base);
|
||||
if (vmci_dev->data_buffer != NULL)
|
||||
dma_free_coherent(vmci_dev->dev,
|
||||
VMCI_DMA_DG_BUFFER_SIZE,
|
||||
vmci_dev->data_buffer,
|
||||
vmci_dev->data_buffer_base);
|
||||
} else {
|
||||
vfree(vmci_dev->data_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Most of the initialization at module load time is done here.
|
||||
*/
|
||||
@@ -429,7 +580,9 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
|
||||
const struct pci_device_id *id)
|
||||
{
|
||||
struct vmci_guest_device *vmci_dev;
|
||||
void __iomem *iobase;
|
||||
void __iomem *iobase = NULL;
|
||||
void __iomem *mmio_base = NULL;
|
||||
unsigned int num_irq_vectors;
|
||||
unsigned int capabilities;
|
||||
unsigned int caps_in_use;
|
||||
unsigned long cmd;
|
||||
@@ -445,16 +598,29 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
|
||||
return error;
|
||||
}
|
||||
|
||||
error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME);
|
||||
if (error) {
|
||||
dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
|
||||
return error;
|
||||
/*
|
||||
* The VMCI device with mmio access to registers requests 256KB
|
||||
* for BAR1. If present, driver will use new VMCI device
|
||||
* functionality for register access and datagram send/recv.
|
||||
*/
|
||||
|
||||
if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
|
||||
dev_info(&pdev->dev, "MMIO register access is available\n");
|
||||
mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
|
||||
VMCI_MMIO_ACCESS_SIZE);
|
||||
/* If the map fails, we fall back to IOIO access. */
|
||||
if (!mmio_base)
|
||||
dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
|
||||
}
|
||||
|
||||
iobase = pcim_iomap_table(pdev)[0];
|
||||
|
||||
dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u\n",
|
||||
(unsigned long)iobase, pdev->irq);
|
||||
if (!mmio_base) {
|
||||
error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
|
||||
if (error) {
|
||||
dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
|
||||
return error;
|
||||
}
|
||||
iobase = pcim_iomap_table(pdev)[0];
|
||||
}
|
||||
|
||||
vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
|
||||
if (!vmci_dev) {
|
||||
@@ -466,17 +632,35 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
|
||||
vmci_dev->dev = &pdev->dev;
|
||||
vmci_dev->exclusive_vectors = false;
|
||||
vmci_dev->iobase = iobase;
|
||||
vmci_dev->mmio_base = mmio_base;
|
||||
|
||||
tasklet_init(&vmci_dev->datagram_tasklet,
|
||||
vmci_dispatch_dgs, (unsigned long)vmci_dev);
|
||||
tasklet_init(&vmci_dev->bm_tasklet,
|
||||
vmci_process_bitmap, (unsigned long)vmci_dev);
|
||||
init_waitqueue_head(&vmci_dev->inout_wq);
|
||||
|
||||
vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
|
||||
if (mmio_base != NULL) {
|
||||
vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
|
||||
&vmci_dev->tx_buffer_base,
|
||||
GFP_KERNEL);
|
||||
if (!vmci_dev->tx_buffer) {
|
||||
dev_err(&pdev->dev,
|
||||
"Can't allocate memory for datagram tx buffer\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
|
||||
&vmci_dev->data_buffer_base,
|
||||
GFP_KERNEL);
|
||||
} else {
|
||||
vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
|
||||
}
|
||||
if (!vmci_dev->data_buffer) {
|
||||
dev_err(&pdev->dev,
|
||||
"Can't allocate memory for datagram buffer\n");
|
||||
return -ENOMEM;
|
||||
error = -ENOMEM;
|
||||
goto err_free_data_buffers;
|
||||
}
|
||||
|
||||
pci_set_master(pdev); /* To enable queue_pair functionality. */
|
||||
@@ -490,11 +674,11 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
|
||||
*
|
||||
* Right now, we need datagrams. There are no fallbacks.
|
||||
*/
|
||||
capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR);
|
||||
capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
|
||||
if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
|
||||
dev_err(&pdev->dev, "Device does not support datagrams\n");
|
||||
error = -ENXIO;
|
||||
goto err_free_data_buffer;
|
||||
goto err_free_data_buffers;
|
||||
}
|
||||
caps_in_use = VMCI_CAPS_DATAGRAM;
|
||||
|
||||
@@ -522,19 +706,39 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
|
||||
vmci_dev->notification_bitmap = dma_alloc_coherent(
|
||||
&pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
|
||||
GFP_KERNEL);
|
||||
if (!vmci_dev->notification_bitmap) {
|
||||
if (!vmci_dev->notification_bitmap)
|
||||
dev_warn(&pdev->dev,
|
||||
"Unable to allocate notification bitmap\n");
|
||||
} else {
|
||||
memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE);
|
||||
else
|
||||
caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
|
||||
}
|
||||
|
||||
if (mmio_base != NULL) {
|
||||
if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
|
||||
caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
|
||||
} else {
|
||||
dev_err(&pdev->dev,
|
||||
"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
|
||||
error = -ENXIO;
|
||||
goto err_free_notification_bitmap;
|
||||
}
|
||||
}
|
||||
|
||||
dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
|
||||
|
||||
/* Let the host know which capabilities we intend to use. */
|
||||
iowrite32(caps_in_use, vmci_dev->iobase + VMCI_CAPS_ADDR);
|
||||
vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
|
||||
|
||||
if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
|
||||
/* Let the device know the size for pages passed down. */
|
||||
vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
|
||||
|
||||
/* Configure the high order parts of the data in/out buffers. */
|
||||
vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
|
||||
VMCI_DATA_IN_HIGH_ADDR);
|
||||
vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
|
||||
VMCI_DATA_OUT_HIGH_ADDR);
|
||||
}
|
||||
|
||||
/* Set up global device so that we can start sending datagrams */
|
||||
spin_lock_irq(&vmci_dev_spinlock);
|
||||
@@ -561,7 +765,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
|
||||
/* Check host capabilities. */
|
||||
error = vmci_check_host_caps(pdev);
|
||||
if (error)
|
||||
goto err_remove_bitmap;
|
||||
goto err_remove_vmci_dev_g;
|
||||
|
||||
/* Enable device. */
|
||||
|
||||
@@ -581,13 +785,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
|
||||
* Enable interrupts. Try MSI-X first, then MSI, and then fallback on
|
||||
* legacy interrupts.
|
||||
*/
|
||||
error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
|
||||
PCI_IRQ_MSIX);
|
||||
if (vmci_dev->mmio_base != NULL)
|
||||
num_irq_vectors = VMCI_MAX_INTRS;
|
||||
else
|
||||
num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
|
||||
error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
|
||||
PCI_IRQ_MSIX);
|
||||
if (error < 0) {
|
||||
error = pci_alloc_irq_vectors(pdev, 1, 1,
|
||||
PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
|
||||
if (error < 0)
|
||||
goto err_remove_bitmap;
|
||||
goto err_unsubscribe_event;
|
||||
} else {
|
||||
vmci_dev->exclusive_vectors = true;
|
||||
}
|
||||
@@ -620,6 +828,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
|
||||
pci_irq_vector(pdev, 1), error);
|
||||
goto err_free_irq;
|
||||
}
|
||||
if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
|
||||
error = request_irq(pci_irq_vector(pdev, 2),
|
||||
vmci_interrupt_dma_datagram,
|
||||
0, KBUILD_MODNAME, vmci_dev);
|
||||
if (error) {
|
||||
dev_err(&pdev->dev,
|
||||
"Failed to allocate irq %u: %d\n",
|
||||
pci_irq_vector(pdev, 2), error);
|
||||
goto err_free_bm_irq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dev_dbg(&pdev->dev, "Registered device\n");
|
||||
@@ -630,17 +849,22 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
|
||||
cmd = VMCI_IMR_DATAGRAM;
|
||||
if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
|
||||
cmd |= VMCI_IMR_NOTIFICATION;
|
||||
iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR);
|
||||
if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
|
||||
cmd |= VMCI_IMR_DMA_DATAGRAM;
|
||||
vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
|
||||
|
||||
/* Enable interrupts. */
|
||||
iowrite32(VMCI_CONTROL_INT_ENABLE,
|
||||
vmci_dev->iobase + VMCI_CONTROL_ADDR);
|
||||
vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
|
||||
|
||||
pci_set_drvdata(pdev, vmci_dev);
|
||||
|
||||
vmci_call_vsock_callback(false);
|
||||
return 0;
|
||||
|
||||
err_free_bm_irq:
|
||||
if (vmci_dev->exclusive_vectors)
|
||||
free_irq(pci_irq_vector(pdev, 1), vmci_dev);
|
||||
|
||||
err_free_irq:
|
||||
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
|
||||
tasklet_kill(&vmci_dev->datagram_tasklet);
|
||||
@@ -649,29 +873,29 @@ err_free_irq:
|
||||
err_disable_msi:
|
||||
pci_free_irq_vectors(pdev);
|
||||
|
||||
err_unsubscribe_event:
|
||||
vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
|
||||
if (vmci_err < VMCI_SUCCESS)
|
||||
dev_warn(&pdev->dev,
|
||||
"Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
|
||||
VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
|
||||
|
||||
err_remove_bitmap:
|
||||
if (vmci_dev->notification_bitmap) {
|
||||
iowrite32(VMCI_CONTROL_RESET,
|
||||
vmci_dev->iobase + VMCI_CONTROL_ADDR);
|
||||
dma_free_coherent(&pdev->dev, PAGE_SIZE,
|
||||
vmci_dev->notification_bitmap,
|
||||
vmci_dev->notification_base);
|
||||
}
|
||||
|
||||
err_remove_vmci_dev_g:
|
||||
spin_lock_irq(&vmci_dev_spinlock);
|
||||
vmci_pdev = NULL;
|
||||
vmci_dev_g = NULL;
|
||||
spin_unlock_irq(&vmci_dev_spinlock);
|
||||
|
||||
err_free_data_buffer:
|
||||
vfree(vmci_dev->data_buffer);
|
||||
err_free_notification_bitmap:
|
||||
if (vmci_dev->notification_bitmap) {
|
||||
vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
|
||||
dma_free_coherent(&pdev->dev, PAGE_SIZE,
|
||||
vmci_dev->notification_bitmap,
|
||||
vmci_dev->notification_base);
|
||||
}
|
||||
|
||||
err_free_data_buffers:
|
||||
vmci_free_dg_buffers(vmci_dev);
|
||||
|
||||
/* The rest are managed resources and will be freed by PCI core */
|
||||
return error;
|
||||
@@ -700,15 +924,18 @@ static void vmci_guest_remove_device(struct pci_dev *pdev)
|
||||
spin_unlock_irq(&vmci_dev_spinlock);
|
||||
|
||||
dev_dbg(&pdev->dev, "Resetting vmci device\n");
|
||||
iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR);
|
||||
vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
|
||||
|
||||
/*
|
||||
* Free IRQ and then disable MSI/MSI-X as appropriate. For
|
||||
* MSI-X, we might have multiple vectors, each with their own
|
||||
* IRQ, which we must free too.
|
||||
*/
|
||||
if (vmci_dev->exclusive_vectors)
|
||||
if (vmci_dev->exclusive_vectors) {
|
||||
free_irq(pci_irq_vector(pdev, 1), vmci_dev);
|
||||
if (vmci_dev->mmio_base != NULL)
|
||||
free_irq(pci_irq_vector(pdev, 2), vmci_dev);
|
||||
}
|
||||
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
|
||||
pci_free_irq_vectors(pdev);
|
||||
|
||||
@@ -726,7 +953,10 @@ static void vmci_guest_remove_device(struct pci_dev *pdev)
|
||||
vmci_dev->notification_base);
|
||||
}
|
||||
|
||||
vfree(vmci_dev->data_buffer);
|
||||
vmci_free_dg_buffers(vmci_dev);
|
||||
|
||||
if (vmci_dev->mmio_base != NULL)
|
||||
pci_iounmap(pdev, vmci_dev->mmio_base);
|
||||
|
||||
/* The rest are managed resources and will be freed by PCI core */
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user