Merge tag 'char-misc-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc

Pull char/misc and other driver updates from Greg KH:
 "Here is the big set of char/misc and other small driver subsystem
  updates for 5.18-rc1.

  Included in here are merges from driver subsystems which contain:

   - iio driver updates and new drivers

   - fsi driver updates

   - fpga driver updates

   - habanalabs driver updates and support for new hardware

   - soundwire driver updates and new drivers

   - phy driver updates and new drivers

   - coresight driver updates

   - icc driver updates

  Individual changes include:

   - mei driver updates

   - interconnect driver updates

   - new PECI driver subsystem added

   - vmci driver updates

   - lots of tiny misc/char driver updates

  All of these have been in linux-next for a while with no reported
  problems"

* tag 'char-misc-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (556 commits)
  firmware: google: Properly state IOMEM dependency
  kgdbts: fix return value of __setup handler
  firmware: sysfb: fix platform-device leak in error path
  firmware: stratix10-svc: add missing callback parameter on RSU
  arm64: dts: qcom: add non-secure domain property to fastrpc nodes
  misc: fastrpc: Add dma handle implementation
  misc: fastrpc: Add fdlist implementation
  misc: fastrpc: Add helper function to get list and page
  misc: fastrpc: Add support to secure memory map
  dt-bindings: misc: add fastrpc domain vmid property
  misc: fastrpc: check before loading process to the DSP
  misc: fastrpc: add secure domain support
  dt-bindings: misc: add property to support non-secure DSP
  misc: fastrpc: Add support to get DSP capabilities
  misc: fastrpc: add support for FASTRPC_IOCTL_MEM_MAP/UNMAP
  misc: fastrpc: separate fastrpc device from channel context
  dt-bindings: nvmem: brcm,nvram: add basic NVMEM cells
  dt-bindings: nvmem: make "reg" property optional
  nvmem: brcm_nvram: parse NVRAM content into NVMEM cells
  nvmem: dt-bindings: Fix the error of dt-bindings check
  ...
This commit is contained in:
Linus Torvalds
2022-03-28 12:27:35 -07:00
567 changed files with 27052 additions and 6728 deletions

View File

@@ -259,6 +259,7 @@ config QCOM_FASTRPC
depends on ARCH_QCOM || COMPILE_TEST
depends on RPMSG
select DMA_SHARED_BUFFER
select QCOM_SCM
help
Provides a communication mechanism that allows for clients to
make remote method invocations across processor boundary to
@@ -470,6 +471,18 @@ config HISI_HIKEY_USB
switching between the dual-role USB-C port and the USB-A host ports
using only one USB controller.
config OPEN_DICE
tristate "Open Profile for DICE driver"
depends on OF_RESERVED_MEM
help
This driver exposes a DICE reserved memory region to userspace via
a character device. The memory region contains Compound Device
Identifiers (CDIs) generated by firmware as an output of DICE
measured boot flow. Userspace can use CDIs for remote attestation
and sealing.
If unsure, say N.
source "drivers/misc/c2port/Kconfig"
source "drivers/misc/eeprom/Kconfig"
source "drivers/misc/cb710/Kconfig"

View File

@@ -59,3 +59,4 @@ obj-$(CONFIG_UACCE) += uacce/
obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o
obj-$(CONFIG_HI6421V600_IRQ) += hi6421v600-irq.o
obj-$(CONFIG_OPEN_DICE) += open-dice.o

View File

@@ -1633,7 +1633,6 @@ static void bcm_vk_shutdown(struct pci_dev *pdev)
static const struct pci_device_id bcm_vk_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VALKYRIE), },
{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VIPER), },
{ }
};
MODULE_DEVICE_TABLE(pci, bcm_vk_ids);

View File

@@ -266,7 +266,7 @@ static int alcor_pci_probe(struct pci_dev *pdev,
if (!priv)
return -ENOMEM;
ret = ida_simple_get(&alcor_pci_idr, 0, 0, GFP_KERNEL);
ret = ida_alloc(&alcor_pci_idr, GFP_KERNEL);
if (ret < 0)
return ret;
priv->id = ret;
@@ -280,7 +280,8 @@ static int alcor_pci_probe(struct pci_dev *pdev,
ret = pci_request_regions(pdev, DRV_NAME_ALCOR_PCI);
if (ret) {
dev_err(&pdev->dev, "Cannot request region\n");
return -ENOMEM;
ret = -ENOMEM;
goto error_free_ida;
}
if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
@@ -324,6 +325,8 @@ static int alcor_pci_probe(struct pci_dev *pdev,
error_release_regions:
pci_release_regions(pdev);
error_free_ida:
ida_free(&alcor_pci_idr, priv->id);
return ret;
}
@@ -337,7 +340,7 @@ static void alcor_pci_remove(struct pci_dev *pdev)
mfd_remove_devices(&pdev->dev);
ida_simple_remove(&alcor_pci_idr, priv->id);
ida_free(&alcor_pci_idr, priv->id);
pci_release_regions(pdev);
pci_set_drvdata(pdev, NULL);

View File

@@ -76,7 +76,7 @@ static void rtl8411b_fetch_vendor_settings(struct rtsx_pcr *pcr)
map_sd_drive(rtl8411b_reg_to_sd30_drive_sel_3v3(reg));
}
static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
{
rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07);
}

View File

@@ -47,7 +47,7 @@ static void rts5209_fetch_vendor_settings(struct rtsx_pcr *pcr)
}
}
static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
{
rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07);
}

View File

@@ -72,6 +72,8 @@ static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr)
pci_read_config_dword(pdev, PCR_SETTING_REG2, &reg);
pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
if (CHK_PCI_PID(pcr, 0x522A))
pcr->rtd3_en = rtsx_reg_to_rtd3(reg);
if (rtsx_check_mmc_support(reg))
pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg);
@@ -171,6 +173,28 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
else
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x00);
if (CHK_PCI_PID(pcr, 0x522A))
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_AUTOLOAD_CFG1,
CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
if (pcr->rtd3_en) {
if (CHK_PCI_PID(pcr, 0x522A)) {
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PM_CTRL3, 0x01, 0x01);
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PME_FORCE_CTL, 0x30, 0x30);
} else {
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CTRL3, 0x01, 0x01);
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PME_FORCE_CTL, 0xFF, 0x33);
}
} else {
if (CHK_PCI_PID(pcr, 0x522A)) {
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PM_CTRL3, 0x01, 0x00);
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PME_FORCE_CTL, 0x30, 0x20);
} else {
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PME_FORCE_CTL, 0xFF, 0x30);
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CTRL3, 0x01, 0x00);
}
}
if (option->force_clkreq_0)
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
@@ -438,6 +462,28 @@ static int rts522a_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
return rtsx_pci_send_cmd(pcr, 100);
}
static void rts522a_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
{
/* Set relink_time to 0 */
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
RELINK_TIME_MASK, 0);
rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3,
D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
if (!runtime) {
rtsx_pci_write_register(pcr, RTS522A_AUTOLOAD_CFG1,
CD_RESUME_EN_MASK, 0);
rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, 0x01, 0x00);
rtsx_pci_write_register(pcr, RTS522A_PME_FORCE_CTL, 0x30, 0x20);
}
rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN);
}
static void rts522a_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
{
struct rtsx_cr_option *option = &pcr->option;
@@ -473,6 +519,7 @@ static const struct pcr_ops rts522a_pcr_ops = {
.card_power_on = rts5227_card_power_on,
.card_power_off = rts5227_card_power_off,
.switch_output_voltage = rts522a_switch_output_voltage,
.force_power_down = rts522a_force_power_down,
.cd_deglitch = NULL,
.conv_clk_and_div_n = NULL,
.set_l1off_cfg_sub_d0 = rts522a_set_l1off_cfg_sub_d0,

View File

@@ -91,7 +91,7 @@ static int rts5228_optimize_phy(struct rtsx_pcr *pcr)
return rtsx_pci_write_phy_register(pcr, 0x07, 0x8F40);
}
static void rts5228_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
static void rts5228_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
{
/* Set relink_time to 0 */
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
@@ -102,6 +102,14 @@ static void rts5228_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3,
D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
if (!runtime) {
rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1,
CD_RESUME_EN_MASK, 0);
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
}
rtsx_pci_write_register(pcr, FPDCTL,
SSC_POWER_DOWN, SSC_POWER_DOWN);
}
@@ -480,9 +488,18 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
if (pcr->rtd3_en) {
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x01);
rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
FORCE_PM_CONTROL | FORCE_PM_VALUE,
FORCE_PM_CONTROL | FORCE_PM_VALUE);
} else {
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
}
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, D3_DELINK_MODE_EN, 0x00);
return 0;
}

View File

@@ -44,7 +44,7 @@ static void rts5229_fetch_vendor_settings(struct rtsx_pcr *pcr)
map_sd_drive(rtsx_reg_to_sd30_drive_sel_3v3(reg));
}
static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
{
rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03);
}

View File

@@ -74,7 +74,8 @@ static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr)
pci_read_config_dword(pdev, PCR_SETTING_REG2, &reg);
pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
pcr->rtd3_en = rtsx_reg_to_rtd3_uhsii(reg);
if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A))
pcr->rtd3_en = rtsx_reg_to_rtd3_uhsii(reg);
if (rtsx_check_mmc_support(reg))
pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
@@ -143,6 +144,27 @@ static int rts5249_init_from_hw(struct rtsx_pcr *pcr)
return 0;
}
static void rts52xa_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
{
/* Set relink_time to 0 */
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
RELINK_TIME_MASK, 0);
rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3,
D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
if (!runtime) {
rtsx_pci_write_register(pcr, RTS524A_AUTOLOAD_CFG1,
CD_RESUME_EN_MASK, 0);
rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, 0x01, 0x00);
rtsx_pci_write_register(pcr, RTS524A_PME_FORCE_CTL, 0x30, 0x20);
}
rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN);
}
static void rts52xa_save_content_from_efuse(struct rtsx_pcr *pcr)
{
u8 cnt, sv;
@@ -281,8 +303,11 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr)
rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF);
if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A))
if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) {
rtsx_pci_write_register(pcr, REG_VREF, PWD_SUSPND_EN, PWD_SUSPND_EN);
rtsx_pci_write_register(pcr, RTS524A_AUTOLOAD_CFG1,
CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
}
if (pcr->rtd3_en) {
if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) {
@@ -724,6 +749,7 @@ static const struct pcr_ops rts524a_pcr_ops = {
.card_power_on = rtsx_base_card_power_on,
.card_power_off = rtsx_base_card_power_off,
.switch_output_voltage = rtsx_base_switch_output_voltage,
.force_power_down = rts52xa_force_power_down,
.set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0,
};
@@ -841,6 +867,7 @@ static const struct pcr_ops rts525a_pcr_ops = {
.card_power_on = rts525a_card_power_on,
.card_power_off = rtsx_base_card_power_off,
.switch_output_voltage = rts525a_switch_output_voltage,
.force_power_down = rts52xa_force_power_down,
.set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0,
};

View File

@@ -91,7 +91,7 @@ static void rtsx5261_fetch_vendor_settings(struct rtsx_pcr *pcr)
pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(reg);
}
static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
{
/* Set relink_time to 0 */
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
@@ -103,6 +103,24 @@ static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3,
D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
if (!runtime) {
rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1,
CD_RESUME_EN_MASK, 0);
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
} else {
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
FORCE_PM_CONTROL | FORCE_PM_VALUE, 0);
rtsx_pci_write_register(pcr, RTS5261_FW_CTL,
RTS5261_INFORM_RTD3_COLD, RTS5261_INFORM_RTD3_COLD);
rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4,
RTS5261_FORCE_PRSNT_LOW, RTS5261_FORCE_PRSNT_LOW);
}
rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL,
SSC_POWER_DOWN, SSC_POWER_DOWN);
}
@@ -536,9 +554,18 @@ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
if (pcr->rtd3_en) {
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x01);
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
FORCE_PM_CONTROL | FORCE_PM_VALUE,
FORCE_PM_CONTROL | FORCE_PM_VALUE);
} else {
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
}
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, D3_DELINK_MODE_EN, 0x00);
/* Clear Enter RTD3_cold Information*/
rtsx_pci_write_register(pcr, RTS5261_FW_CTL,

View File

@@ -152,20 +152,12 @@ void rtsx_pci_start_run(struct rtsx_pcr *pcr)
if (pcr->remove_pci)
return;
if (pcr->rtd3_en)
if (pcr->is_runtime_suspended) {
pm_runtime_get(&(pcr->pci->dev));
pcr->is_runtime_suspended = false;
}
if (pcr->state != PDEV_STAT_RUN) {
pcr->state = PDEV_STAT_RUN;
if (pcr->ops->enable_auto_blink)
pcr->ops->enable_auto_blink(pcr);
rtsx_pm_full_on(pcr);
}
mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200));
}
EXPORT_SYMBOL_GPL(rtsx_pci_start_run);
@@ -1062,73 +1054,7 @@ static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr)
return 0;
}
static void rtsx_enable_aspm(struct rtsx_pcr *pcr)
{
if (pcr->ops->set_aspm)
pcr->ops->set_aspm(pcr, true);
else
rtsx_comm_set_aspm(pcr, true);
}
static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr)
{
struct rtsx_cr_option *option = &pcr->option;
if (option->ltr_enabled) {
u32 latency = option->ltr_l1off_latency;
if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN))
mdelay(option->l1_snooze_delay);
rtsx_set_ltr_latency(pcr, latency);
}
if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN))
rtsx_set_l1off_sub_cfg_d0(pcr, 0);
rtsx_enable_aspm(pcr);
}
static void rtsx_pm_power_saving(struct rtsx_pcr *pcr)
{
rtsx_comm_pm_power_saving(pcr);
}
static void rtsx_pci_rtd3_work(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, rtd3_work);
pcr_dbg(pcr, "--> %s\n", __func__);
if (!pcr->is_runtime_suspended)
pm_runtime_put(&(pcr->pci->dev));
}
static void rtsx_pci_idle_work(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work);
pcr_dbg(pcr, "--> %s\n", __func__);
mutex_lock(&pcr->pcr_mutex);
pcr->state = PDEV_STAT_IDLE;
if (pcr->ops->disable_auto_blink)
pcr->ops->disable_auto_blink(pcr);
if (pcr->ops->turn_off_led)
pcr->ops->turn_off_led(pcr);
rtsx_pm_power_saving(pcr);
mutex_unlock(&pcr->pcr_mutex);
if (pcr->rtd3_en)
mod_delayed_work(system_wq, &pcr->rtd3_work, msecs_to_jiffies(10000));
}
static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
static void rtsx_base_force_power_down(struct rtsx_pcr *pcr)
{
/* Set relink_time to 0 */
rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
@@ -1142,7 +1068,7 @@ static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN);
}
static void __maybe_unused rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state)
static void __maybe_unused rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
{
if (pcr->ops->turn_off_led)
pcr->ops->turn_off_led(pcr);
@@ -1154,9 +1080,9 @@ static void __maybe_unused rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state)
rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, pm_state);
if (pcr->ops->force_power_down)
pcr->ops->force_power_down(pcr, pm_state);
pcr->ops->force_power_down(pcr, pm_state, runtime);
else
rtsx_base_force_power_down(pcr, pm_state);
rtsx_base_force_power_down(pcr);
}
void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr)
@@ -1598,7 +1524,6 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
pcr->card_inserted = 0;
pcr->card_removed = 0;
INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect);
INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work);
pcr->msi_en = msi_en;
if (pcr->msi_en) {
@@ -1623,20 +1548,14 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
rtsx_pcr_cells[i].pdata_size = sizeof(*handle);
}
if (pcr->rtd3_en) {
INIT_DELAYED_WORK(&pcr->rtd3_work, rtsx_pci_rtd3_work);
pm_runtime_allow(&pcidev->dev);
pm_runtime_enable(&pcidev->dev);
pcr->is_runtime_suspended = false;
}
ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells,
ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL);
if (ret < 0)
goto free_slots;
schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
pm_runtime_allow(&pcidev->dev);
pm_runtime_put(&pcidev->dev);
return 0;
@@ -1668,11 +1587,11 @@ static void rtsx_pci_remove(struct pci_dev *pcidev)
struct pcr_handle *handle = pci_get_drvdata(pcidev);
struct rtsx_pcr *pcr = handle->pcr;
if (pcr->rtd3_en)
pm_runtime_get_noresume(&pcr->pci->dev);
pcr->remove_pci = true;
pm_runtime_get_sync(&pcidev->dev);
pm_runtime_forbid(&pcidev->dev);
/* Disable interrupts at the pcr level */
spin_lock_irq(&pcr->lock);
rtsx_pci_writel(pcr, RTSX_BIER, 0);
@@ -1680,9 +1599,6 @@ static void rtsx_pci_remove(struct pci_dev *pcidev)
spin_unlock_irq(&pcr->lock);
cancel_delayed_work_sync(&pcr->carddet_work);
cancel_delayed_work_sync(&pcr->idle_work);
if (pcr->rtd3_en)
cancel_delayed_work_sync(&pcr->rtd3_work);
mfd_remove_devices(&pcidev->dev);
@@ -1700,11 +1616,6 @@ static void rtsx_pci_remove(struct pci_dev *pcidev)
idr_remove(&rtsx_pci_idr, pcr->id);
spin_unlock(&rtsx_pci_lock);
if (pcr->rtd3_en) {
pm_runtime_disable(&pcr->pci->dev);
pm_runtime_put_noidle(&pcr->pci->dev);
}
kfree(pcr->slots);
kfree(pcr);
kfree(handle);
@@ -1717,22 +1628,16 @@ static void rtsx_pci_remove(struct pci_dev *pcidev)
static int __maybe_unused rtsx_pci_suspend(struct device *dev_d)
{
struct pci_dev *pcidev = to_pci_dev(dev_d);
struct pcr_handle *handle;
struct rtsx_pcr *pcr;
struct pcr_handle *handle = pci_get_drvdata(pcidev);
struct rtsx_pcr *pcr = handle->pcr;
dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
handle = pci_get_drvdata(pcidev);
pcr = handle->pcr;
cancel_delayed_work(&pcr->carddet_work);
cancel_delayed_work(&pcr->idle_work);
cancel_delayed_work_sync(&pcr->carddet_work);
mutex_lock(&pcr->pcr_mutex);
rtsx_pci_power_off(pcr, HOST_ENTER_S3);
device_wakeup_disable(dev_d);
rtsx_pci_power_off(pcr, HOST_ENTER_S3, false);
mutex_unlock(&pcr->pcr_mutex);
return 0;
@@ -1741,15 +1646,12 @@ static int __maybe_unused rtsx_pci_suspend(struct device *dev_d)
static int __maybe_unused rtsx_pci_resume(struct device *dev_d)
{
struct pci_dev *pcidev = to_pci_dev(dev_d);
struct pcr_handle *handle;
struct rtsx_pcr *pcr;
struct pcr_handle *handle = pci_get_drvdata(pcidev);
struct rtsx_pcr *pcr = handle->pcr;
int ret = 0;
dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
handle = pci_get_drvdata(pcidev);
pcr = handle->pcr;
mutex_lock(&pcr->pcr_mutex);
ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
@@ -1760,8 +1662,6 @@ static int __maybe_unused rtsx_pci_resume(struct device *dev_d)
if (ret)
goto out;
schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
out:
mutex_unlock(&pcr->pcr_mutex);
return ret;
@@ -1769,16 +1669,46 @@ out:
#ifdef CONFIG_PM
static void rtsx_enable_aspm(struct rtsx_pcr *pcr)
{
if (pcr->ops->set_aspm)
pcr->ops->set_aspm(pcr, true);
else
rtsx_comm_set_aspm(pcr, true);
}
static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr)
{
struct rtsx_cr_option *option = &pcr->option;
if (option->ltr_enabled) {
u32 latency = option->ltr_l1off_latency;
if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN))
mdelay(option->l1_snooze_delay);
rtsx_set_ltr_latency(pcr, latency);
}
if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN))
rtsx_set_l1off_sub_cfg_d0(pcr, 0);
rtsx_enable_aspm(pcr);
}
static void rtsx_pm_power_saving(struct rtsx_pcr *pcr)
{
rtsx_comm_pm_power_saving(pcr);
}
static void rtsx_pci_shutdown(struct pci_dev *pcidev)
{
struct pcr_handle *handle;
struct rtsx_pcr *pcr;
struct pcr_handle *handle = pci_get_drvdata(pcidev);
struct rtsx_pcr *pcr = handle->pcr;
dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
handle = pci_get_drvdata(pcidev);
pcr = handle->pcr;
rtsx_pci_power_off(pcr, HOST_ENTER_S1);
rtsx_pci_power_off(pcr, HOST_ENTER_S1, false);
pci_disable_device(pcidev);
free_irq(pcr->irq, (void *)pcr);
@@ -1786,26 +1716,47 @@ static void rtsx_pci_shutdown(struct pci_dev *pcidev)
pci_disable_msi(pcr->pci);
}
static int rtsx_pci_runtime_suspend(struct device *device)
static int rtsx_pci_runtime_idle(struct device *device)
{
struct pci_dev *pcidev = to_pci_dev(device);
struct pcr_handle *handle;
struct rtsx_pcr *pcr;
struct pcr_handle *handle = pci_get_drvdata(pcidev);
struct rtsx_pcr *pcr = handle->pcr;
handle = pci_get_drvdata(pcidev);
pcr = handle->pcr;
dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
cancel_delayed_work(&pcr->carddet_work);
cancel_delayed_work(&pcr->rtd3_work);
cancel_delayed_work(&pcr->idle_work);
dev_dbg(device, "--> %s\n", __func__);
mutex_lock(&pcr->pcr_mutex);
rtsx_pci_power_off(pcr, HOST_ENTER_S3);
pcr->state = PDEV_STAT_IDLE;
if (pcr->ops->disable_auto_blink)
pcr->ops->disable_auto_blink(pcr);
if (pcr->ops->turn_off_led)
pcr->ops->turn_off_led(pcr);
rtsx_pm_power_saving(pcr);
mutex_unlock(&pcr->pcr_mutex);
pcr->is_runtime_suspended = true;
if (pcr->rtd3_en)
pm_schedule_suspend(device, 10000);
return -EBUSY;
}
static int rtsx_pci_runtime_suspend(struct device *device)
{
struct pci_dev *pcidev = to_pci_dev(device);
struct pcr_handle *handle = pci_get_drvdata(pcidev);
struct rtsx_pcr *pcr = handle->pcr;
dev_dbg(device, "--> %s\n", __func__);
cancel_delayed_work_sync(&pcr->carddet_work);
mutex_lock(&pcr->pcr_mutex);
rtsx_pci_power_off(pcr, HOST_ENTER_S3, true);
mutex_unlock(&pcr->pcr_mutex);
return 0;
}
@@ -1813,20 +1764,15 @@ static int rtsx_pci_runtime_suspend(struct device *device)
static int rtsx_pci_runtime_resume(struct device *device)
{
struct pci_dev *pcidev = to_pci_dev(device);
struct pcr_handle *handle;
struct rtsx_pcr *pcr;
struct pcr_handle *handle = pci_get_drvdata(pcidev);
struct rtsx_pcr *pcr = handle->pcr;
handle = pci_get_drvdata(pcidev);
pcr = handle->pcr;
dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
dev_dbg(device, "--> %s\n", __func__);
mutex_lock(&pcr->pcr_mutex);
rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
if (pcr->ops->fetch_vendor_settings)
pcr->ops->fetch_vendor_settings(pcr);
rtsx_pci_init_hw(pcr);
if (pcr->slots[RTSX_SD_CARD].p_dev != NULL) {
@@ -1834,8 +1780,6 @@ static int rtsx_pci_runtime_resume(struct device *device)
pcr->slots[RTSX_SD_CARD].p_dev);
}
schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
mutex_unlock(&pcr->pcr_mutex);
return 0;
}
@@ -1850,7 +1794,7 @@ static int rtsx_pci_runtime_resume(struct device *device)
static const struct dev_pm_ops rtsx_pci_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(rtsx_pci_suspend, rtsx_pci_resume)
SET_RUNTIME_PM_OPS(rtsx_pci_runtime_suspend, rtsx_pci_runtime_resume, NULL)
SET_RUNTIME_PM_OPS(rtsx_pci_runtime_suspend, rtsx_pci_runtime_resume, rtsx_pci_runtime_idle)
};
static struct pci_driver rtsx_pci_driver = {

View File

@@ -15,6 +15,8 @@
#define MIN_DIV_N_PCR 80
#define MAX_DIV_N_PCR 208
#define RTS522A_PME_FORCE_CTL 0xFF78
#define RTS522A_AUTOLOAD_CFG1 0xFF7C
#define RTS522A_PM_CTRL3 0xFF7E
#define RTS524A_PME_FORCE_CTL 0xFF78
@@ -25,6 +27,7 @@
#define REG_EFUSE_POWEROFF 0x00
#define RTS5250_CLK_CFG3 0xFF79
#define RTS525A_CFG_MEM_PD 0xF0
#define RTS524A_AUTOLOAD_CFG1 0xFF7C
#define RTS524A_PM_CTRL3 0xFF7E
#define RTS525A_BIOS_CFG 0xFF2D
#define RTS525A_LOAD_BIOS_FLAG 0x01

View File

@@ -309,7 +309,7 @@ static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip)
u32 val;
int err;
strncpy(chip->name, "at25", sizeof(chip->name));
strscpy(chip->name, "at25", sizeof(chip->name));
err = device_property_read_u32(dev, "size", &val);
if (err)
@@ -370,7 +370,7 @@ static int at25_fram_to_chip(struct device *dev, struct spi_eeprom *chip)
u8 id[FM25_ID_LEN];
int i;
strncpy(chip->name, "fm25", sizeof(chip->name));
strscpy(chip->name, "fm25", sizeof(chip->name));
/* Get ID of chip */
fm25_aux_read(at25, id, FM25_RDID, FM25_ID_LEN);

View File

@@ -17,6 +17,7 @@
#include <linux/rpmsg.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/qcom_scm.h>
#include <uapi/misc/fastrpc.h>
#define ADSP_DOMAIN_ID (0)
@@ -25,16 +26,22 @@
#define CDSP_DOMAIN_ID (3)
#define FASTRPC_DEV_MAX 4 /* adsp, mdsp, slpi, cdsp*/
#define FASTRPC_MAX_SESSIONS 13 /*12 compute, 1 cpz*/
#define FASTRPC_MAX_VMIDS 16
#define FASTRPC_ALIGN 128
#define FASTRPC_MAX_FDLIST 16
#define FASTRPC_MAX_CRCLIST 64
#define FASTRPC_PHYS(p) ((p) & 0xffffffff)
#define FASTRPC_CTX_MAX (256)
#define FASTRPC_INIT_HANDLE 1
#define FASTRPC_DSP_UTILITIES_HANDLE 2
#define FASTRPC_CTXID_MASK (0xFF0)
#define INIT_FILELEN_MAX (2 * 1024 * 1024)
#define FASTRPC_DEVICE_NAME "fastrpc"
#define ADSP_MMAP_ADD_PAGES 0x1000
#define DSP_UNSUPPORTED_API (0x80000414)
/* MAX NUMBER of DSP ATTRIBUTES SUPPORTED */
#define FASTRPC_MAX_DSP_ATTRIBUTES (256)
#define FASTRPC_MAX_DSP_ATTRIBUTES_LEN (sizeof(u32) * FASTRPC_MAX_DSP_ATTRIBUTES)
/* Retrives number of input buffers from the scalars parameter */
#define REMOTE_SCALARS_INBUFS(sc) (((sc) >> 16) & 0x0ff)
@@ -72,13 +79,15 @@
#define FASTRPC_RMID_INIT_CREATE 6
#define FASTRPC_RMID_INIT_CREATE_ATTR 7
#define FASTRPC_RMID_INIT_CREATE_STATIC 8
#define FASTRPC_RMID_INIT_MEM_MAP 10
#define FASTRPC_RMID_INIT_MEM_UNMAP 11
/* Protection Domain(PD) ids */
#define AUDIO_PD (0) /* also GUEST_OS PD? */
#define USER_PD (1)
#define SENSORS_PD (2)
#define miscdev_to_cctx(d) container_of(d, struct fastrpc_channel_ctx, miscdev)
#define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
"sdsp", "cdsp"};
@@ -92,9 +101,20 @@ struct fastrpc_invoke_buf {
u32 pgidx; /* index to start of contiguous region */
};
struct fastrpc_remote_arg {
u64 pv;
u64 len;
struct fastrpc_remote_dmahandle {
s32 fd; /* dma handle fd */
u32 offset; /* dma handle offset */
u32 len; /* dma handle length */
};
struct fastrpc_remote_buf {
u64 pv; /* buffer pointer */
u64 len; /* length of buffer */
};
union fastrpc_remote_arg {
struct fastrpc_remote_buf buf;
struct fastrpc_remote_dmahandle dma;
};
struct fastrpc_mmap_rsp_msg {
@@ -108,12 +128,29 @@ struct fastrpc_mmap_req_msg {
s32 num;
};
struct fastrpc_mem_map_req_msg {
s32 pgid;
s32 fd;
s32 offset;
u32 flags;
u64 vaddrin;
s32 num;
s32 data_len;
};
struct fastrpc_munmap_req_msg {
s32 pgid;
u64 vaddr;
u64 size;
};
struct fastrpc_mem_unmap_req_msg {
s32 pgid;
s32 fd;
u64 vaddrin;
u64 len;
};
struct fastrpc_msg {
int pid; /* process group id */
int tid; /* thread id */
@@ -170,6 +207,8 @@ struct fastrpc_map {
u64 size;
void *va;
u64 len;
u64 raddr;
u32 attr;
struct kref refcount;
};
@@ -189,7 +228,7 @@ struct fastrpc_invoke_ctx {
struct work_struct put_work;
struct fastrpc_msg msg;
struct fastrpc_user *fl;
struct fastrpc_remote_arg *rpra;
union fastrpc_remote_arg *rpra;
struct fastrpc_map **maps;
struct fastrpc_buf *buf;
struct fastrpc_invoke_args *args;
@@ -207,13 +246,28 @@ struct fastrpc_session_ctx {
struct fastrpc_channel_ctx {
int domain_id;
int sesscount;
int vmcount;
u32 perms;
struct qcom_scm_vmperm vmperms[FASTRPC_MAX_VMIDS];
struct rpmsg_device *rpdev;
struct fastrpc_session_ctx session[FASTRPC_MAX_SESSIONS];
spinlock_t lock;
struct idr ctx_idr;
struct list_head users;
struct miscdevice miscdev;
struct kref refcount;
/* Flag if dsp attributes are cached */
bool valid_attributes;
u32 dsp_attributes[FASTRPC_MAX_DSP_ATTRIBUTES];
struct fastrpc_device *secure_fdevice;
struct fastrpc_device *fdevice;
bool secure;
bool unsigned_support;
};
struct fastrpc_device {
struct fastrpc_channel_ctx *cctx;
struct miscdevice miscdev;
bool secure;
};
struct fastrpc_user {
@@ -228,6 +282,7 @@ struct fastrpc_user {
int tgid;
int pd;
bool is_secure_dev;
/* Lock for lists */
spinlock_t lock;
/* lock for allocations */
@@ -241,6 +296,20 @@ static void fastrpc_free_map(struct kref *ref)
map = container_of(ref, struct fastrpc_map, refcount);
if (map->table) {
if (map->attr & FASTRPC_ATTR_SECUREMAP) {
struct qcom_scm_vmperm perm;
int err = 0;
perm.vmid = QCOM_SCM_VMID_HLOS;
perm.perm = QCOM_SCM_PERM_RWX;
err = qcom_scm_assign_mem(map->phys, map->size,
&(map->fl->cctx->vmperms[0].vmid), &perm, 1);
if (err) {
dev_err(map->fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
map->phys, map->size, err);
return;
}
}
dma_buf_unmap_attachment(map->attach, map->table,
DMA_BIDIRECTIONAL);
dma_buf_detach(map->buf, map->attach);
@@ -262,7 +331,8 @@ static void fastrpc_map_get(struct fastrpc_map *map)
kref_get(&map->refcount);
}
static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
static int fastrpc_map_lookup(struct fastrpc_user *fl, int fd,
struct fastrpc_map **ppmap)
{
struct fastrpc_map *map = NULL;
@@ -270,7 +340,6 @@ static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
mutex_lock(&fl->mutex);
list_for_each_entry(map, &fl->maps, node) {
if (map->fd == fd) {
fastrpc_map_get(map);
*ppmap = map;
mutex_unlock(&fl->mutex);
return 0;
@@ -281,6 +350,17 @@ static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
return -ENOENT;
}
static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
struct fastrpc_map **ppmap)
{
int ret = fastrpc_map_lookup(fl, fd, ppmap);
if (!ret)
fastrpc_map_get(*ppmap);
return ret;
}
static void fastrpc_buf_free(struct fastrpc_buf *buf)
{
dma_free_coherent(buf->dev, buf->size, buf->virt,
@@ -353,7 +433,7 @@ static void fastrpc_context_free(struct kref *ref)
ctx = container_of(ref, struct fastrpc_invoke_ctx, refcount);
cctx = ctx->cctx;
for (i = 0; i < ctx->nscalars; i++)
for (i = 0; i < ctx->nbufs; i++)
fastrpc_map_put(ctx->maps[i]);
if (ctx->buf)
@@ -617,7 +697,7 @@ static const struct dma_buf_ops fastrpc_dma_buf_ops = {
};
static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
u64 len, struct fastrpc_map **ppmap)
u64 len, u32 attr, struct fastrpc_map **ppmap)
{
struct fastrpc_session_ctx *sess = fl->sctx;
struct fastrpc_map *map = NULL;
@@ -659,6 +739,22 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
map->len = len;
kref_init(&map->refcount);
if (attr & FASTRPC_ATTR_SECUREMAP) {
/*
* If subsystem VMIDs are defined in DTSI, then do
* hyp_assign from HLOS to those VM(s)
*/
unsigned int perms = BIT(QCOM_SCM_VMID_HLOS);
map->attr = attr;
err = qcom_scm_assign_mem(map->phys, (u64)map->size, &perms,
fl->cctx->vmperms, fl->cctx->vmcount);
if (err) {
dev_err(sess->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d",
map->phys, map->size, err);
goto map_err;
}
}
spin_lock(&fl->lock);
list_add_tail(&map->node, &fl->maps);
spin_unlock(&fl->lock);
@@ -682,7 +778,7 @@ get_err:
* >>>>>> START of METADATA <<<<<<<<<
* +---------------------------------+
* | Arguments |
* | type:(struct fastrpc_remote_arg)|
* | type:(union fastrpc_remote_arg)|
* | (0 - N) |
* +---------------------------------+
* | Invoke Buffer list |
@@ -707,7 +803,7 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
{
int size = 0;
size = (sizeof(struct fastrpc_remote_arg) +
size = (sizeof(struct fastrpc_remote_buf) +
sizeof(struct fastrpc_invoke_buf) +
sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
sizeof(u64) * FASTRPC_MAX_FDLIST +
@@ -743,16 +839,13 @@ static int fastrpc_create_maps(struct fastrpc_invoke_ctx *ctx)
int i, err;
for (i = 0; i < ctx->nscalars; ++i) {
/* Make sure reserved field is set to 0 */
if (ctx->args[i].reserved)
return -EINVAL;
if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1 ||
ctx->args[i].length == 0)
continue;
err = fastrpc_map_create(ctx->fl, ctx->args[i].fd,
ctx->args[i].length, &ctx->maps[i]);
ctx->args[i].length, ctx->args[i].attr, &ctx->maps[i]);
if (err) {
dev_err(dev, "Error Creating map %d\n", err);
return -EINVAL;
@@ -762,10 +855,20 @@ static int fastrpc_create_maps(struct fastrpc_invoke_ctx *ctx)
return 0;
}
static struct fastrpc_invoke_buf *fastrpc_invoke_buf_start(union fastrpc_remote_arg *pra, int len)
{
return (struct fastrpc_invoke_buf *)(&pra[len]);
}
static struct fastrpc_phy_page *fastrpc_phy_page_start(struct fastrpc_invoke_buf *buf, int len)
{
return (struct fastrpc_phy_page *)(&buf[len]);
}
static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
{
struct device *dev = ctx->fl->sctx->dev;
struct fastrpc_remote_arg *rpra;
union fastrpc_remote_arg *rpra;
struct fastrpc_invoke_buf *list;
struct fastrpc_phy_page *pages;
int inbufs, i, oix, err = 0;
@@ -789,9 +892,8 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
return err;
rpra = ctx->buf->virt;
list = ctx->buf->virt + ctx->nscalars * sizeof(*rpra);
pages = ctx->buf->virt + ctx->nscalars * (sizeof(*list) +
sizeof(*rpra));
list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
pages = fastrpc_phy_page_start(list, ctx->nscalars);
args = (uintptr_t)ctx->buf->virt + metalen;
rlen = pkt_size - metalen;
ctx->rpra = rpra;
@@ -802,8 +904,8 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
i = ctx->olaps[oix].raix;
len = ctx->args[i].length;
rpra[i].pv = 0;
rpra[i].len = len;
rpra[i].buf.pv = 0;
rpra[i].buf.len = len;
list[i].num = len ? 1 : 0;
list[i].pgidx = i;
@@ -813,7 +915,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
if (ctx->maps[i]) {
struct vm_area_struct *vma = NULL;
rpra[i].pv = (u64) ctx->args[i].ptr;
rpra[i].buf.pv = (u64) ctx->args[i].ptr;
pages[i].addr = ctx->maps[i]->phys;
mmap_read_lock(current->mm);
@@ -840,7 +942,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
if (rlen < mlen)
goto bail;
rpra[i].pv = args - ctx->olaps[oix].offset;
rpra[i].buf.pv = args - ctx->olaps[oix].offset;
pages[i].addr = ctx->buf->phys -
ctx->olaps[oix].offset +
(pkt_size - rlen);
@@ -854,7 +956,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
}
if (i < inbufs && !ctx->maps[i]) {
void *dst = (void *)(uintptr_t)rpra[i].pv;
void *dst = (void *)(uintptr_t)rpra[i].buf.pv;
void *src = (void *)(uintptr_t)ctx->args[i].ptr;
if (!kernel) {
@@ -870,12 +972,15 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
}
for (i = ctx->nbufs; i < ctx->nscalars; ++i) {
rpra[i].pv = (u64) ctx->args[i].ptr;
rpra[i].len = ctx->args[i].length;
list[i].num = ctx->args[i].length ? 1 : 0;
list[i].pgidx = i;
pages[i].addr = ctx->maps[i]->phys;
pages[i].size = ctx->maps[i]->size;
if (ctx->maps[i]) {
pages[i].addr = ctx->maps[i]->phys;
pages[i].size = ctx->maps[i]->size;
}
rpra[i].dma.fd = ctx->args[i].fd;
rpra[i].dma.len = ctx->args[i].length;
rpra[i].dma.offset = (u64) ctx->args[i].ptr;
}
bail:
@@ -888,16 +993,26 @@ bail:
static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
u32 kernel)
{
struct fastrpc_remote_arg *rpra = ctx->rpra;
int i, inbufs;
union fastrpc_remote_arg *rpra = ctx->rpra;
struct fastrpc_user *fl = ctx->fl;
struct fastrpc_map *mmap = NULL;
struct fastrpc_invoke_buf *list;
struct fastrpc_phy_page *pages;
u64 *fdlist;
int i, inbufs, outbufs, handles;
inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc);
handles = REMOTE_SCALARS_INHANDLES(ctx->sc) + REMOTE_SCALARS_OUTHANDLES(ctx->sc);
list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
pages = fastrpc_phy_page_start(list, ctx->nscalars);
fdlist = (uint64_t *)(pages + inbufs + outbufs + handles);
for (i = inbufs; i < ctx->nbufs; ++i) {
if (!ctx->maps[i]) {
void *src = (void *)(uintptr_t)rpra[i].pv;
void *src = (void *)(uintptr_t)rpra[i].buf.pv;
void *dst = (void *)(uintptr_t)ctx->args[i].ptr;
u64 len = rpra[i].len;
u64 len = rpra[i].buf.len;
if (!kernel) {
if (copy_to_user((void __user *)dst, src, len))
@@ -908,6 +1023,13 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
}
}
for (i = 0; i < FASTRPC_MAX_FDLIST; i++) {
if (!fdlist[i])
break;
if (!fastrpc_map_lookup(fl, (int)fdlist[i], &mmap))
fastrpc_map_put(mmap);
}
return 0;
}
@@ -1016,6 +1138,24 @@ bail:
return err;
}
static bool is_session_rejected(struct fastrpc_user *fl, bool unsigned_pd_request)
{
/* Check if the device node is non-secure and channel is secure*/
if (!fl->is_secure_dev && fl->cctx->secure) {
/*
* Allow untrusted applications to offload only to Unsigned PD when
* channel is configured as secure and block untrusted apps on channel
* that does not support unsigned PD offload
*/
if (!fl->cctx->unsigned_support || !unsigned_pd_request) {
dev_err(&fl->cctx->rpdev->dev, "Error: Untrusted application trying to offload to signed PD");
return true;
}
}
return false;
}
static int fastrpc_init_create_process(struct fastrpc_user *fl,
char __user *argp)
{
@@ -1035,6 +1175,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl,
u32 siglen;
} inbuf;
u32 sc;
bool unsigned_module = false;
args = kcalloc(FASTRPC_CREATE_PROCESS_NARGS, sizeof(*args), GFP_KERNEL);
if (!args)
@@ -1045,6 +1186,14 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl,
goto err;
}
if (init.attrs & FASTRPC_MODE_UNSIGNED_MODULE)
unsigned_module = true;
if (is_session_rejected(fl, unsigned_module)) {
err = -ECONNREFUSED;
goto err;
}
if (init.filelen > INIT_FILELEN_MAX) {
err = -EINVAL;
goto err;
@@ -1059,7 +1208,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl,
fl->pd = USER_PD;
if (init.filelen && init.filefd) {
err = fastrpc_map_create(fl, init.filefd, init.filelen, &map);
err = fastrpc_map_create(fl, init.filefd, init.filelen, 0, &map);
if (err)
goto err;
}
@@ -1168,7 +1317,6 @@ static int fastrpc_release_current_dsp_process(struct fastrpc_user *fl)
args[0].ptr = (u64)(uintptr_t) &tgid;
args[0].length = sizeof(tgid);
args[0].fd = -1;
args[0].reserved = 0;
sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_RELEASE, 1, 0);
return fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
@@ -1220,10 +1368,14 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
static int fastrpc_device_open(struct inode *inode, struct file *filp)
{
struct fastrpc_channel_ctx *cctx = miscdev_to_cctx(filp->private_data);
struct fastrpc_channel_ctx *cctx;
struct fastrpc_device *fdevice;
struct fastrpc_user *fl = NULL;
unsigned long flags;
fdevice = miscdev_to_fdevice(filp->private_data);
cctx = fdevice->cctx;
fl = kzalloc(sizeof(*fl), GFP_KERNEL);
if (!fl)
return -ENOMEM;
@@ -1240,6 +1392,7 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp)
INIT_LIST_HEAD(&fl->user);
fl->tgid = current->tgid;
fl->cctx = cctx;
fl->is_secure_dev = fdevice->secure;
fl->sctx = fastrpc_session_alloc(cctx);
if (!fl->sctx) {
@@ -1311,7 +1464,6 @@ static int fastrpc_init_attach(struct fastrpc_user *fl, int pd)
args[0].ptr = (u64)(uintptr_t) &tgid;
args[0].length = sizeof(tgid);
args[0].fd = -1;
args[0].reserved = 0;
sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_ATTACH, 1, 0);
fl->pd = pd;
@@ -1349,6 +1501,107 @@ static int fastrpc_invoke(struct fastrpc_user *fl, char __user *argp)
return err;
}
static int fastrpc_get_info_from_dsp(struct fastrpc_user *fl, uint32_t *dsp_attr_buf,
uint32_t dsp_attr_buf_len)
{
struct fastrpc_invoke_args args[2] = { 0 };
/* Capability filled in userspace */
dsp_attr_buf[0] = 0;
args[0].ptr = (u64)(uintptr_t)&dsp_attr_buf_len;
args[0].length = sizeof(dsp_attr_buf_len);
args[0].fd = -1;
args[1].ptr = (u64)(uintptr_t)&dsp_attr_buf[1];
args[1].length = dsp_attr_buf_len;
args[1].fd = -1;
fl->pd = 1;
return fastrpc_internal_invoke(fl, true, FASTRPC_DSP_UTILITIES_HANDLE,
FASTRPC_SCALARS(0, 1, 1), args);
}
static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
struct fastrpc_user *fl)
{
struct fastrpc_channel_ctx *cctx = fl->cctx;
uint32_t attribute_id = cap->attribute_id;
uint32_t *dsp_attributes;
unsigned long flags;
uint32_t domain = cap->domain;
int err;
spin_lock_irqsave(&cctx->lock, flags);
/* check if we already have queried dsp for attributes */
if (cctx->valid_attributes) {
spin_unlock_irqrestore(&cctx->lock, flags);
goto done;
}
spin_unlock_irqrestore(&cctx->lock, flags);
dsp_attributes = kzalloc(FASTRPC_MAX_DSP_ATTRIBUTES_LEN, GFP_KERNEL);
if (!dsp_attributes)
return -ENOMEM;
err = fastrpc_get_info_from_dsp(fl, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN);
if (err == DSP_UNSUPPORTED_API) {
dev_info(&cctx->rpdev->dev,
"Warning: DSP capabilities not supported on domain: %d\n", domain);
kfree(dsp_attributes);
return -EOPNOTSUPP;
} else if (err) {
dev_err(&cctx->rpdev->dev, "Error: dsp information is incorrect err: %d\n", err);
kfree(dsp_attributes);
return err;
}
spin_lock_irqsave(&cctx->lock, flags);
memcpy(cctx->dsp_attributes, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN);
cctx->valid_attributes = true;
spin_unlock_irqrestore(&cctx->lock, flags);
kfree(dsp_attributes);
done:
cap->capability = cctx->dsp_attributes[attribute_id];
return 0;
}
static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
{
struct fastrpc_ioctl_capability cap = {0};
int err = 0;
if (copy_from_user(&cap, argp, sizeof(cap)))
return -EFAULT;
cap.capability = 0;
if (cap.domain >= FASTRPC_DEV_MAX) {
dev_err(&fl->cctx->rpdev->dev, "Error: Invalid domain id:%d, err:%d\n",
cap.domain, err);
return -ECHRNG;
}
/* Fastrpc Capablities does not support modem domain */
if (cap.domain == MDSP_DOMAIN_ID) {
dev_err(&fl->cctx->rpdev->dev, "Error: modem not supported %d\n", err);
return -ECHRNG;
}
if (cap.attribute_id >= FASTRPC_MAX_DSP_ATTRIBUTES) {
dev_err(&fl->cctx->rpdev->dev, "Error: invalid attribute: %d, err: %d\n",
cap.attribute_id, err);
return -EOVERFLOW;
}
err = fastrpc_get_info_from_kernel(&cap, fl);
if (err)
return err;
if (copy_to_user(argp, &cap.capability, sizeof(cap.capability)))
return -EFAULT;
return 0;
}
static int fastrpc_req_munmap_impl(struct fastrpc_user *fl,
struct fastrpc_req_munmap *req)
{
@@ -1491,6 +1744,134 @@ err_invoke:
return err;
}
static int fastrpc_req_mem_unmap_impl(struct fastrpc_user *fl, struct fastrpc_mem_unmap *req)
{
struct fastrpc_invoke_args args[1] = { [0] = { 0 } };
struct fastrpc_map *map = NULL, *m;
struct fastrpc_mem_unmap_req_msg req_msg = { 0 };
int err = 0;
u32 sc;
struct device *dev = fl->sctx->dev;
spin_lock(&fl->lock);
list_for_each_entry_safe(map, m, &fl->maps, node) {
if ((req->fd < 0 || map->fd == req->fd) && (map->raddr == req->vaddr))
break;
map = NULL;
}
spin_unlock(&fl->lock);
if (!map) {
dev_err(dev, "map not in list\n");
return -EINVAL;
}
req_msg.pgid = fl->tgid;
req_msg.len = map->len;
req_msg.vaddrin = map->raddr;
req_msg.fd = map->fd;
args[0].ptr = (u64) (uintptr_t) &req_msg;
args[0].length = sizeof(req_msg);
sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MEM_UNMAP, 1, 0);
err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc,
&args[0]);
fastrpc_map_put(map);
if (err)
dev_err(dev, "unmmap\tpt fd = %d, 0x%09llx error\n", map->fd, map->raddr);
return err;
}
static int fastrpc_req_mem_unmap(struct fastrpc_user *fl, char __user *argp)
{
struct fastrpc_mem_unmap req;
if (copy_from_user(&req, argp, sizeof(req)))
return -EFAULT;
return fastrpc_req_mem_unmap_impl(fl, &req);
}
static int fastrpc_req_mem_map(struct fastrpc_user *fl, char __user *argp)
{
struct fastrpc_invoke_args args[4] = { [0 ... 3] = { 0 } };
struct fastrpc_mem_map_req_msg req_msg = { 0 };
struct fastrpc_mmap_rsp_msg rsp_msg = { 0 };
struct fastrpc_mem_unmap req_unmap = { 0 };
struct fastrpc_phy_page pages = { 0 };
struct fastrpc_mem_map req;
struct device *dev = fl->sctx->dev;
struct fastrpc_map *map = NULL;
int err;
u32 sc;
if (copy_from_user(&req, argp, sizeof(req)))
return -EFAULT;
/* create SMMU mapping */
err = fastrpc_map_create(fl, req.fd, req.length, 0, &map);
if (err) {
dev_err(dev, "failed to map buffer, fd = %d\n", req.fd);
return err;
}
req_msg.pgid = fl->tgid;
req_msg.fd = req.fd;
req_msg.offset = req.offset;
req_msg.vaddrin = req.vaddrin;
map->va = (void *) (uintptr_t) req.vaddrin;
req_msg.flags = req.flags;
req_msg.num = sizeof(pages);
req_msg.data_len = 0;
args[0].ptr = (u64) (uintptr_t) &req_msg;
args[0].length = sizeof(req_msg);
pages.addr = map->phys;
pages.size = map->size;
args[1].ptr = (u64) (uintptr_t) &pages;
args[1].length = sizeof(pages);
args[2].ptr = (u64) (uintptr_t) &pages;
args[2].length = 0;
args[3].ptr = (u64) (uintptr_t) &rsp_msg;
args[3].length = sizeof(rsp_msg);
sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MEM_MAP, 3, 1);
err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, &args[0]);
if (err) {
dev_err(dev, "mem mmap error, fd %d, vaddr %llx, size %lld\n",
req.fd, req.vaddrin, map->size);
goto err_invoke;
}
/* update the buffer to be able to deallocate the memory on the DSP */
map->raddr = rsp_msg.vaddr;
/* let the client know the address to use */
req.vaddrout = rsp_msg.vaddr;
if (copy_to_user((void __user *)argp, &req, sizeof(req))) {
/* unmap the memory and release the buffer */
req_unmap.vaddr = (uintptr_t) rsp_msg.vaddr;
req_unmap.length = map->size;
fastrpc_req_mem_unmap_impl(fl, &req_unmap);
return -EFAULT;
}
return 0;
err_invoke:
fastrpc_map_put(map);
return err;
}
static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -1520,6 +1901,15 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
case FASTRPC_IOCTL_MUNMAP:
err = fastrpc_req_munmap(fl, argp);
break;
case FASTRPC_IOCTL_MEM_MAP:
err = fastrpc_req_mem_map(fl, argp);
break;
case FASTRPC_IOCTL_MEM_UNMAP:
err = fastrpc_req_mem_unmap(fl, argp);
break;
case FASTRPC_IOCTL_GET_DSP_INFO:
err = fastrpc_get_dsp_info(fl, argp);
break;
default:
err = -ENOTTY;
break;
@@ -1615,12 +2005,41 @@ static struct platform_driver fastrpc_cb_driver = {
},
};
static int fastrpc_device_register(struct device *dev, struct fastrpc_channel_ctx *cctx,
bool is_secured, const char *domain)
{
struct fastrpc_device *fdev;
int err;
fdev = devm_kzalloc(dev, sizeof(*fdev), GFP_KERNEL);
if (!fdev)
return -ENOMEM;
fdev->secure = is_secured;
fdev->cctx = cctx;
fdev->miscdev.minor = MISC_DYNAMIC_MINOR;
fdev->miscdev.fops = &fastrpc_fops;
fdev->miscdev.name = devm_kasprintf(dev, GFP_KERNEL, "fastrpc-%s%s",
domain, is_secured ? "-secure" : "");
err = misc_register(&fdev->miscdev);
if (!err) {
if (is_secured)
cctx->secure_fdevice = fdev;
else
cctx->fdevice = fdev;
}
return err;
}
static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
{
struct device *rdev = &rpdev->dev;
struct fastrpc_channel_ctx *data;
int i, err, domain_id = -1;
int i, err, domain_id = -1, vmcount;
const char *domain;
bool secure_dsp;
unsigned int vmids[FASTRPC_MAX_VMIDS];
err = of_property_read_string(rdev->of_node, "label", &domain);
if (err) {
@@ -1640,18 +2059,53 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
return -EINVAL;
}
vmcount = of_property_read_variable_u32_array(rdev->of_node,
"qcom,vmids", &vmids[0], 0, FASTRPC_MAX_VMIDS);
if (vmcount < 0)
vmcount = 0;
else if (!qcom_scm_is_available())
return -EPROBE_DEFER;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->miscdev.minor = MISC_DYNAMIC_MINOR;
data->miscdev.name = devm_kasprintf(rdev, GFP_KERNEL, "fastrpc-%s",
domains[domain_id]);
data->miscdev.fops = &fastrpc_fops;
err = misc_register(&data->miscdev);
if (err) {
kfree(data);
return err;
if (vmcount) {
data->vmcount = vmcount;
data->perms = BIT(QCOM_SCM_VMID_HLOS);
for (i = 0; i < data->vmcount; i++) {
data->vmperms[i].vmid = vmids[i];
data->vmperms[i].perm = QCOM_SCM_PERM_RWX;
}
}
secure_dsp = !(of_property_read_bool(rdev->of_node, "qcom,non-secure-domain"));
data->secure = secure_dsp;
switch (domain_id) {
case ADSP_DOMAIN_ID:
case MDSP_DOMAIN_ID:
case SDSP_DOMAIN_ID:
/* Unsigned PD offloading is only supported on CDSP*/
data->unsigned_support = false;
err = fastrpc_device_register(rdev, data, secure_dsp, domains[domain_id]);
if (err)
goto fdev_error;
break;
case CDSP_DOMAIN_ID:
data->unsigned_support = true;
/* Create both device nodes so that we can allow both Signed and Unsigned PD */
err = fastrpc_device_register(rdev, data, true, domains[domain_id]);
if (err)
goto fdev_error;
err = fastrpc_device_register(rdev, data, false, domains[domain_id]);
if (err)
goto fdev_error;
break;
default:
err = -EINVAL;
goto fdev_error;
}
kref_init(&data->refcount);
@@ -1665,6 +2119,9 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
data->rpdev = rpdev;
return of_platform_populate(rdev->of_node, NULL, NULL, rdev);
fdev_error:
kfree(data);
return err;
}
static void fastrpc_notify_users(struct fastrpc_user *user)
@@ -1688,7 +2145,12 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
fastrpc_notify_users(user);
spin_unlock_irqrestore(&cctx->lock, flags);
misc_deregister(&cctx->miscdev);
if (cctx->fdevice)
misc_deregister(&cctx->fdevice->miscdev);
if (cctx->secure_fdevice)
misc_deregister(&cctx->secure_fdevice->miscdev);
of_platform_depopulate(&rpdev->dev);
cctx->rpdev = NULL;

View File

@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/firmware_if.o \
common/state_dump.o common/hwmgr.o
common/state_dump.o

View File

@@ -424,8 +424,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
{
union hl_cb_args *args = data;
struct hl_device *hdev = hpriv->hdev;
u64 handle = 0, device_va = 0;
enum hl_device_status status;
u64 handle = 0, device_va;
u32 usage_cnt = 0;
int rc;
@@ -464,6 +464,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
args->in.flags,
&usage_cnt,
&device_va);
if (rc)
break;
memset(&args->out, 0, sizeof(args->out));

View File

@@ -14,6 +14,8 @@
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
HL_CS_FLAGS_COLLECTIVE_WAIT)
#define MAX_TS_ITER_NUM 10
/**
* enum hl_cs_wait_status - cs wait status
* @CS_WAIT_STATUS_BUSY: cs was not completed yet
@@ -919,18 +921,21 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
complete_job(hdev, job);
}
void hl_cs_rollback_all(struct hl_device *hdev)
void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
{
int i;
struct hl_cs *cs, *tmp;
flush_workqueue(hdev->sob_reset_wq);
if (!skip_wq_flush) {
flush_workqueue(hdev->ts_free_obj_wq);
/* flush all completions before iterating over the CS mirror list in
* order to avoid a race with the release functions
*/
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
flush_workqueue(hdev->cq_wq[i]);
/* flush all completions before iterating over the CS mirror list in
* order to avoid a race with the release functions
*/
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
flush_workqueue(hdev->cq_wq[i]);
}
/* Make sure we don't have leftovers in the CS mirror list */
list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
@@ -948,13 +953,19 @@ void hl_cs_rollback_all(struct hl_device *hdev)
static void
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
{
struct hl_user_pending_interrupt *pend;
struct hl_user_pending_interrupt *pend, *temp;
unsigned long flags;
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
pend->fence.error = -EIO;
complete_all(&pend->fence.completion);
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
if (pend->ts_reg_info.ts_buff) {
list_del(&pend->wait_list_node);
hl_ts_put(pend->ts_reg_info.ts_buff);
hl_cb_put(pend->ts_reg_info.cq_cb);
} else {
pend->fence.error = -EIO;
complete_all(&pend->fence.completion);
}
}
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
}
@@ -2063,13 +2074,16 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
idp = &ctx->sig_mgr.handles;
idr_for_each_entry(idp, encaps_sig_hdl, id) {
if (encaps_sig_hdl->cs_seq == signal_seq) {
handle_found = true;
/* get refcount to protect removing
* this handle from idr, needed when
* multiple wait cs are used with offset
/* get refcount to protect removing this handle from idr,
* needed when multiple wait cs are used with offset
* to wait on reserved encaps signals.
* Since kref_put of this handle is executed outside the
* current lock, it is possible that the handle refcount
* is 0 but it yet to be removed from the list. In this
* case need to consider the handle as not valid.
*/
kref_get(&encaps_sig_hdl->refcount);
if (kref_get_unless_zero(&encaps_sig_hdl->refcount))
handle_found = true;
break;
}
}
@@ -2739,7 +2753,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
mcs_data.update_ts = false;
rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
if (mcs_data.completion_bitmap)
if (rc || mcs_data.completion_bitmap)
break;
/*
@@ -2854,64 +2868,174 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return 0;
}
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
struct hl_cb_mgr *cb_mgr, u64 timeout_us,
u64 cq_counters_handle, u64 cq_counters_offset,
u64 target_value, struct hl_user_interrupt *interrupt,
u32 *status,
u64 *timestamp)
static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
struct hl_cb *cq_cb,
u64 ts_offset, u64 cq_offset, u64 target_value,
spinlock_t *wait_list_lock,
struct hl_user_pending_interrupt **pend)
{
struct hl_user_pending_interrupt *requested_offset_record =
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
ts_offset;
struct hl_user_pending_interrupt *cb_last =
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
unsigned long flags, iter_counter = 0;
u64 current_cq_counter;
/* Validate ts_offset not exceeding last max */
if (requested_offset_record > cb_last) {
dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
(u64)(uintptr_t)cb_last);
return -EINVAL;
}
start_over:
spin_lock_irqsave(wait_list_lock, flags);
/* Unregister only if we didn't reach the target value
* since in this case there will be no handling in irq context
* and then it's safe to delete the node out of the interrupt list
* then re-use it on other interrupt
*/
if (requested_offset_record->ts_reg_info.in_use) {
current_cq_counter = *requested_offset_record->cq_kernel_addr;
if (current_cq_counter < requested_offset_record->cq_target_value) {
list_del(&requested_offset_record->wait_list_node);
spin_unlock_irqrestore(wait_list_lock, flags);
hl_ts_put(requested_offset_record->ts_reg_info.ts_buff);
hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n");
} else {
dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n");
/* irq handling in the middle give it time to finish */
spin_unlock_irqrestore(wait_list_lock, flags);
usleep_range(1, 10);
if (++iter_counter == MAX_TS_ITER_NUM) {
dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n");
return -EINVAL;
}
goto start_over;
}
} else {
spin_unlock_irqrestore(wait_list_lock, flags);
}
/* Fill up the new registration node info */
requested_offset_record->ts_reg_info.in_use = 1;
requested_offset_record->ts_reg_info.ts_buff = ts_buff;
requested_offset_record->ts_reg_info.cq_cb = cq_cb;
requested_offset_record->ts_reg_info.timestamp_kernel_addr =
(u64 *) ts_buff->user_buff_address + ts_offset;
requested_offset_record->cq_kernel_addr =
(u64 *) cq_cb->kernel_address + cq_offset;
requested_offset_record->cq_target_value = target_value;
*pend = requested_offset_record;
dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n",
(u64)(uintptr_t)requested_offset_record);
return 0;
}
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr,
u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset,
u64 target_value, struct hl_user_interrupt *interrupt,
bool register_ts_record, u64 ts_handle, u64 ts_offset,
u32 *status, u64 *timestamp)
{
u32 cq_patched_handle, ts_patched_handle;
struct hl_user_pending_interrupt *pend;
struct hl_ts_buff *ts_buff;
struct hl_cb *cq_cb;
unsigned long timeout, flags;
long completion_rc;
struct hl_cb *cb;
int rc = 0;
u32 handle;
timeout = hl_usecs64_to_jiffies(timeout_us);
hl_ctx_get(hdev, ctx);
cq_counters_handle >>= PAGE_SHIFT;
handle = (u32) cq_counters_handle;
cb = hl_cb_get(hdev, cb_mgr, handle);
if (!cb) {
hl_ctx_put(ctx);
return -EINVAL;
cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT);
cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle);
if (!cq_cb) {
rc = -EINVAL;
goto put_ctx;
}
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
if (!pend) {
hl_cb_put(cb);
hl_ctx_put(ctx);
return -ENOMEM;
if (register_ts_record) {
dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
interrupt->interrupt_id, ts_offset, cq_counters_offset);
ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT);
ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle);
if (!ts_buff) {
rc = -EINVAL;
goto put_cq_cb;
}
/* Find first available record */
rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset,
cq_counters_offset, target_value,
&interrupt->wait_list_lock, &pend);
if (rc)
goto put_ts_buff;
} else {
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
if (!pend) {
rc = -ENOMEM;
goto put_cq_cb;
}
hl_fence_init(&pend->fence, ULONG_MAX);
pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
pend->cq_target_value = target_value;
}
hl_fence_init(&pend->fence, ULONG_MAX);
pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
pend->cq_target_value = target_value;
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
/* We check for completion value as interrupt could have been received
* before we added the node to the wait list
*/
if (*pend->cq_kernel_addr >= target_value) {
if (register_ts_record)
pend->ts_reg_info.in_use = 0;
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
*status = HL_WAIT_CS_STATUS_COMPLETED;
/* There was no interrupt, we assume the completion is now. */
if (register_ts_record) {
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
goto put_ts_buff;
} else {
pend->fence.timestamp = ktime_get();
goto set_timestamp;
}
} else if (!timeout_us) {
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
*status = HL_WAIT_CS_STATUS_BUSY;
pend->fence.timestamp = ktime_get();
goto set_timestamp;
}
if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
goto set_timestamp;
/* Add pending user interrupt to relevant list for the interrupt
* handler to monitor
* handler to monitor.
* Note that we cannot have sorted list by target value,
* in order to shorten the list pass loop, since
* same list could have nodes for different cq counter handle.
*/
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
if (register_ts_record) {
rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
goto ts_registration_exit;
}
/* Wait for interrupt handler to signal completion */
completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
timeout);
@@ -2932,23 +3056,41 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
rc = -EIO;
*status = HL_WAIT_CS_STATUS_ABORTED;
} else {
dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n",
interrupt->interrupt_id);
rc = -ETIMEDOUT;
/* The wait has timed-out. We don't know anything beyond that
* because the workload wasn't submitted through the driver.
* Therefore, from driver's perspective, the workload is still
* executing.
*/
rc = 0;
*status = HL_WAIT_CS_STATUS_BUSY;
}
*status = HL_WAIT_CS_STATUS_BUSY;
}
}
/*
* We keep removing the node from list here, and not at the irq handler
* for completion timeout case. and if it's a registration
* for ts record, the node will be deleted in the irq handler after
* we reach the target value.
*/
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
list_del(&pend->wait_list_node);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
set_timestamp:
*timestamp = ktime_to_ns(pend->fence.timestamp);
kfree(pend);
hl_cb_put(cb);
hl_cb_put(cq_cb);
ts_registration_exit:
hl_ctx_put(ctx);
return rc;
put_ts_buff:
hl_ts_put(ts_buff);
put_cq_cb:
hl_cb_put(cq_cb);
put_ctx:
hl_ctx_put(ctx);
return rc;
@@ -3049,6 +3191,12 @@ wait_again:
interrupt->interrupt_id);
rc = -EINTR;
} else {
/* The wait has timed-out. We don't know anything beyond that
* because the workload wasn't submitted through the driver.
* Therefore, from driver's perspective, the workload is still
* executing.
*/
rc = 0;
*status = HL_WAIT_CS_STATUS_BUSY;
}
@@ -3101,23 +3249,20 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr,
args->in.interrupt_timeout_us, args->in.cq_counters_handle,
args->in.cq_counters_offset,
args->in.target, interrupt, &status,
&timestamp);
args->in.target, interrupt,
!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
args->in.timestamp_handle, args->in.timestamp_offset,
&status, &timestamp);
else
rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
args->in.interrupt_timeout_us, args->in.addr,
args->in.target, interrupt, &status,
&timestamp);
if (rc) {
if (rc != -EINTR)
dev_err_ratelimited(hdev->dev,
"interrupt_wait_ioctl failed (%d)\n", rc);
if (rc)
return rc;
}
memset(args, 0, sizeof(*args));
args->out.status = status;

View File

@@ -890,6 +890,8 @@ static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
pci_set_power_state(hdev->pdev, PCI_D0);
pci_restore_state(hdev->pdev);
rc = pci_enable_device(hdev->pdev);
if (rc < 0)
return rc;
} else if (value == 2) {
pci_save_state(hdev->pdev);
pci_disable_device(hdev->pdev);
@@ -1054,42 +1056,12 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf,
static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
char tmp_buf[200];
ssize_t rc;
if (*ppos)
return 0;
sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask);
rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
strlen(tmp_buf) + 1);
return rc;
return 0;
}
static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u64 value;
ssize_t rc;
if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev,
"Can't change clock gating during reset\n");
return 0;
}
rc = kstrtoull_from_user(buf, count, 16, &value);
if (rc)
return rc;
hdev->clock_gating_mask = value;
hdev->asic_funcs->set_clock_gating(hdev);
return count;
}
@@ -1101,6 +1073,9 @@ static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
char tmp_buf[200];
ssize_t rc;
if (!hdev->asic_prop.configurable_stop_on_err)
return -EOPNOTSUPP;
if (*ppos)
return 0;
@@ -1119,6 +1094,9 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
u32 value;
ssize_t rc;
if (!hdev->asic_prop.configurable_stop_on_err)
return -EOPNOTSUPP;
if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev,
"Can't change stop on error during reset\n");

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -13,6 +13,8 @@
#include <linux/pci.h>
#include <linux/hwmon.h>
#define HL_RESET_DELAY_USEC 10000 /* 10ms */
enum hl_device_status hl_device_status(struct hl_device *hdev)
{
enum hl_device_status status;
@@ -145,6 +147,7 @@ static int hl_device_release(struct inode *inode, struct file *filp)
hl_release_pending_user_interrupts(hpriv->hdev);
hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
if (!hl_hpriv_put(hpriv))
@@ -209,6 +212,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
case HL_MMAP_TYPE_BLOCK:
return hl_hw_block_mmap(hpriv, vma);
case HL_MMAP_TYPE_TS_BUFF:
return hl_ts_mmap(hpriv, vma);
}
return -EINVAL;
@@ -410,10 +416,10 @@ static int device_early_init(struct hl_device *hdev)
goto free_cq_wq;
}
hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
if (!hdev->sob_reset_wq) {
hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
if (!hdev->ts_free_obj_wq) {
dev_err(hdev->dev,
"Failed to allocate SOB reset workqueue\n");
"Failed to allocate Timestamp registration free workqueue\n");
rc = -ENOMEM;
goto free_eq_wq;
}
@@ -422,7 +428,7 @@ static int device_early_init(struct hl_device *hdev)
GFP_KERNEL);
if (!hdev->hl_chip_info) {
rc = -ENOMEM;
goto free_sob_reset_wq;
goto free_ts_free_wq;
}
rc = hl_mmu_if_set_funcs(hdev);
@@ -461,8 +467,8 @@ free_cb_mgr:
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
free_chip_info:
kfree(hdev->hl_chip_info);
free_sob_reset_wq:
destroy_workqueue(hdev->sob_reset_wq);
free_ts_free_wq:
destroy_workqueue(hdev->ts_free_obj_wq);
free_eq_wq:
destroy_workqueue(hdev->eq_wq);
free_cq_wq:
@@ -501,7 +507,7 @@ static void device_early_fini(struct hl_device *hdev)
kfree(hdev->hl_chip_info);
destroy_workqueue(hdev->sob_reset_wq);
destroy_workqueue(hdev->ts_free_obj_wq);
destroy_workqueue(hdev->eq_wq);
destroy_workqueue(hdev->device_reset_work.wq);
@@ -610,7 +616,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
u64 max_power, curr_power, dc_power, dividend;
int rc;
max_power = hdev->asic_prop.max_power_default;
max_power = hdev->max_power;
dc_power = hdev->asic_prop.dc_power_default;
rc = hl_fw_cpucp_power_get(hdev, &curr_power);
@@ -644,9 +650,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en
hdev->in_debug = 0;
if (!hdev->reset_info.hard_reset_pending)
hdev->asic_funcs->set_clock_gating(hdev);
goto out;
}
@@ -657,7 +660,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en
goto out;
}
hdev->asic_funcs->disable_clock_gating(hdev);
hdev->in_debug = 1;
out:
@@ -685,7 +687,8 @@ static void take_release_locks(struct hl_device *hdev)
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
}
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
bool skip_wq_flush)
{
if (hard_reset)
device_late_fini(hdev);
@@ -698,7 +701,7 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r
hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
/* Go over all the queues, release all CS and their jobs */
hl_cs_rollback_all(hdev);
hl_cs_rollback_all(hdev, skip_wq_flush);
/* Release all pending user interrupts, each pending user interrupt
* holds a reference to user context
@@ -978,7 +981,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
int hl_device_reset(struct hl_device *hdev, u32 flags)
{
bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
reset_upon_device_release = false, schedule_hard_reset = false;
reset_upon_device_release = false, schedule_hard_reset = false,
skip_wq_flush, delay_reset;
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
struct hl_ctx *ctx;
int i, rc;
@@ -991,6 +995,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hard_reset = !!(flags & HL_DRV_RESET_HARD);
from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
delay_reset = !!(flags & HL_DRV_RESET_DELAY);
if (!hard_reset && !hdev->asic_prop.supports_soft_reset) {
hard_instead_soft = true;
@@ -1040,6 +1046,9 @@ do_reset:
hdev->reset_info.in_reset = 1;
spin_unlock(&hdev->reset_info.lock);
if (delay_reset)
usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
handle_reset_trigger(hdev, flags);
/* This still allows the completion of some KDMA ops */
@@ -1076,7 +1085,7 @@ again:
return 0;
}
cleanup_resources(hdev, hard_reset, fw_reset);
cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush);
kill_processes:
if (hard_reset) {
@@ -1232,7 +1241,7 @@ kill_processes:
goto out_err;
}
hl_set_max_power(hdev);
hl_fw_set_max_power(hdev);
} else {
rc = hdev->asic_funcs->non_hard_reset_late_init(hdev);
if (rc) {
@@ -1297,11 +1306,14 @@ out_err:
hdev->reset_info.hard_reset_cnt++;
} else if (reset_upon_device_release) {
dev_err(hdev->dev, "Failed to reset device after user release\n");
flags |= HL_DRV_RESET_HARD;
flags &= ~HL_DRV_RESET_DEV_RELEASE;
hard_reset = true;
goto again;
} else {
dev_err(hdev->dev, "Failed to do soft-reset\n");
hdev->reset_info.soft_reset_cnt++;
flags |= HL_DRV_RESET_HARD;
hard_reset = true;
goto again;
}
@@ -1538,7 +1550,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
/* Need to call this again because the max power might change,
* depending on card type for certain ASICs
*/
hl_set_max_power(hdev);
if (hdev->asic_prop.set_max_power_on_device_init)
hl_fw_set_max_power(hdev);
/*
* hl_hwmon_init() must be called after device_late_init(), because only
@@ -1682,7 +1695,7 @@ void hl_device_fini(struct hl_device *hdev)
hl_hwmon_fini(hdev);
cleanup_resources(hdev, true, false);
cleanup_resources(hdev, true, false, false);
/* Kill processes here after CS rollback. This is because the process
* can't really exit until all its CSs are done, which is what we

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -214,7 +214,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
dma_addr_t pkt_dma_addr;
struct hl_bd *sent_bd;
u32 tmp, expected_ack_val, pi;
int rc = 0;
int rc;
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
&pkt_dma_addr);
@@ -228,8 +228,11 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
mutex_lock(&hdev->send_cpu_message_lock);
if (hdev->disabled)
/* CPU-CP messages can be sent during soft-reset */
if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) {
rc = 0;
goto out;
}
if (hdev->device_cpu_disabled) {
rc = -EIO;
@@ -958,15 +961,17 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
if (rc)
if (rc) {
dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
return rc;
}
pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
return rc;
return 0;
}
int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
@@ -1202,8 +1207,6 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
hdev,
cpu_boot_status_reg,
status,
(status == CPU_BOOT_STATUS_IN_UBOOT) ||
(status == CPU_BOOT_STATUS_DRAM_RDY) ||
(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
@@ -2682,3 +2685,138 @@ int hl_fw_init_cpu(struct hl_device *hdev)
hl_fw_dynamic_init_cpu(hdev, fw_loader) :
hl_fw_static_init_cpu(hdev, fw_loader);
}
void hl_fw_set_pll_profile(struct hl_device *hdev)
{
hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
hdev->asic_prop.max_freq_value);
}
int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
if (!hdev->pdev) {
*cur_clk = 0;
*max_clk = 0;
return 0;
}
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value);
return value;
}
*max_clk = (value / 1000 / 1000);
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value);
return value;
}
*cur_clk = (value / 1000 / 1000);
return 0;
}
long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
{
struct cpucp_packet pkt;
u32 used_pll_idx;
u64 result;
int rc;
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
if (rc)
return rc;
memset(&pkt, 0, sizeof(pkt));
if (curr)
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
else
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
if (rc) {
dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n",
used_pll_idx, rc);
return rc;
}
return (long) result;
}
void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
{
struct cpucp_packet pkt;
u32 used_pll_idx;
int rc;
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
if (rc)
return;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
pkt.value = cpu_to_le64(freq);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
if (rc)
dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n",
used_pll_idx, rc);
}
long hl_fw_get_max_power(struct hl_device *hdev)
{
struct cpucp_packet pkt;
u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
if (rc) {
dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
return rc;
}
return result;
}
void hl_fw_set_max_power(struct hl_device *hdev)
{
struct cpucp_packet pkt;
int rc;
/* TODO: remove this after simulator supports this packet */
if (!hdev->pdev)
return;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = cpu_to_le64(hdev->max_power);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
if (rc)
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
}

View File

@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
@@ -31,14 +31,15 @@
#define HL_NAME "habanalabs"
/* Use upper bits of mmap offset to store habana driver specific information.
* bits[63:61] - Encode mmap type
* bits[63:59] - Encode mmap type
* bits[45:0] - mmap offset value
*
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
* defines are w.r.t to PAGE_SIZE
*/
#define HL_MMAP_TYPE_SHIFT (61 - PAGE_SHIFT)
#define HL_MMAP_TYPE_MASK (0x7ull << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_SHIFT (59 - PAGE_SHIFT)
#define HL_MMAP_TYPE_MASK (0x1full << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_TS_BUFF (0x10ull << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_BLOCK (0x4ull << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT)
@@ -141,6 +142,9 @@ enum hl_mmu_page_table_location {
*
* - HL_DRV_RESET_FW_FATAL_ERR
* Set if reset is due to a fatal error from FW
*
* - HL_DRV_RESET_DELAY
* Set if a delay should be added before the reset
*/
#define HL_DRV_RESET_HARD (1 << 0)
@@ -150,6 +154,7 @@ enum hl_mmu_page_table_location {
#define HL_DRV_RESET_DEV_RELEASE (1 << 4)
#define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5)
#define HL_DRV_RESET_FW_FATAL_ERR (1 << 6)
#define HL_DRV_RESET_DELAY (1 << 7)
#define HL_MAX_SOBS_PER_MONITOR 8
@@ -402,8 +407,11 @@ enum hl_device_hw_state {
* @hop4_mask: mask to get the PTE address in hop 4.
* @hop5_mask: mask to get the PTE address in hop 5.
* @last_mask: mask to get the bit indicating this is the last hop.
* @pgt_size: size for page tables.
* @page_size: default page size used to allocate memory.
* @num_hops: The amount of hops supported by the translation table.
* @hop_table_size: HOP table size.
* @hop0_tables_total_size: total size for all HOP0 tables.
* @host_resident: Should the MMU page table reside in host memory or in the
* device DRAM.
*/
@@ -423,8 +431,11 @@ struct hl_mmu_properties {
u64 hop4_mask;
u64 hop5_mask;
u64 last_mask;
u64 pgt_size;
u32 page_size;
u32 num_hops;
u32 hop_table_size;
u32 hop0_tables_total_size;
u8 host_resident;
};
@@ -554,6 +565,9 @@ struct hl_hints_range {
* use-case of doing soft-reset in training (due
* to the fact that training runs on multiple
* devices)
* @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
* @set_max_power_on_device_init: true if need to set max power in F/W on device init.
* @supports_user_set_page_size: true if user can set the allocation page size.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -637,6 +651,9 @@ struct asic_fixed_properties {
u8 use_get_power_for_reset_history;
u8 supports_soft_reset;
u8 allow_inference_soft_reset;
u8 configurable_stop_on_err;
u8 set_max_power_on_device_init;
u8 supports_user_set_page_size;
};
/**
@@ -703,6 +720,40 @@ struct hl_cb_mgr {
struct idr cb_handles; /* protected by cb_lock */
};
/**
* struct hl_ts_mgr - describes the timestamp registration memory manager.
* @ts_lock: protects ts_handles.
* @ts_handles: an idr to hold all ts bufferes handles.
*/
struct hl_ts_mgr {
spinlock_t ts_lock;
struct idr ts_handles;
};
/**
* struct hl_ts_buff - describes a timestamp buffer.
* @refcount: reference counter for usage of the buffer.
* @hdev: pointer to device this buffer belongs to.
* @mmap: true if the buff is currently mapped to user.
* @kernel_buff_address: Holds the internal buffer's kernel virtual address.
* @user_buff_address: Holds the user buffer's kernel virtual address.
* @id: the buffer ID.
* @mmap_size: Holds the buffer size that was mmaped.
* @kernel_buff_size: Holds the internal kernel buffer size.
* @user_buff_size: Holds the user buffer size.
*/
struct hl_ts_buff {
struct kref refcount;
struct hl_device *hdev;
atomic_t mmap;
void *kernel_buff_address;
void *user_buff_address;
u32 id;
u32 mmap_size;
u32 kernel_buff_size;
u32 user_buff_size;
};
/**
* struct hl_cb - describes a Command Buffer.
* @refcount: reference counter for usage of the CB.
@@ -880,9 +931,54 @@ struct hl_user_interrupt {
u32 interrupt_id;
};
/**
* struct timestamp_reg_free_node - holds the timestamp registration free objects node
* @free_objects_node: node in the list free_obj_jobs
* @cq_cb: pointer to cq command buffer to be freed
* @ts_buff: pointer to timestamp buffer to be freed
*/
struct timestamp_reg_free_node {
struct list_head free_objects_node;
struct hl_cb *cq_cb;
struct hl_ts_buff *ts_buff;
};
/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
* the job will be to pass over the free_obj_jobs list and put refcount to objects
* in each node of the list
* @free_obj: workqueue object to free timestamp registration node objects
* @hdev: pointer to the device structure
* @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
*/
struct timestamp_reg_work_obj {
struct work_struct free_obj;
struct hl_device *hdev;
struct list_head *free_obj_head;
};
/* struct timestamp_reg_info - holds the timestamp registration related data.
* @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers.
* relevant only when doing timestamps records registration.
* @cq_cb: pointer to CQ counter CB.
* @timestamp_kernel_addr: timestamp handle address, where to set timestamp
* relevant only when doing timestamps records
* registration.
* @in_use: indicates if the node already in use. relevant only when doing
* timestamps records registration, since in this case the driver
* will have it's own buffer which serve as a records pool instead of
* allocating records dynamically.
*/
struct timestamp_reg_info {
struct hl_ts_buff *ts_buff;
struct hl_cb *cq_cb;
u64 *timestamp_kernel_addr;
u8 in_use;
};
/**
* struct hl_user_pending_interrupt - holds a context to a user thread
* pending on an interrupt
* @ts_reg_info: holds the timestamps registration nodes info
* @wait_list_node: node in the list of user threads pending on an interrupt
* @fence: hl fence object for interrupt completion
* @cq_target_value: CQ target value
@@ -890,10 +986,11 @@ struct hl_user_interrupt {
* handler for taget value comparison
*/
struct hl_user_pending_interrupt {
struct list_head wait_list_node;
struct hl_fence fence;
u64 cq_target_value;
u64 *cq_kernel_addr;
struct timestamp_reg_info ts_reg_info;
struct list_head wait_list_node;
struct hl_fence fence;
u64 cq_target_value;
u64 *cq_kernel_addr;
};
/**
@@ -1155,7 +1252,6 @@ struct fw_load_mgr {
* internal memory via DMA engine.
* @add_device_attr: add ASIC specific device attributes.
* @handle_eqe: handle event queue entry (IRQ) from CPU-CP.
* @set_pll_profile: change PLL profile (manual/automatic).
* @get_events_stat: retrieve event queue entries histogram.
* @read_pte: read MMU page table entry from DRAM.
* @write_pte: write MMU page table entry to DRAM.
@@ -1164,9 +1260,6 @@ struct fw_load_mgr {
* @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
* ASID-VA-size mask.
* @send_heartbeat: send is-alive packet to CPU-CP and verify response.
* @set_clock_gating: enable/disable clock gating per engine according to
* clock gating mask in hdev
* @disable_clock_gating: disable clock gating completely
* @debug_coresight: perform certain actions on Coresight for debugging.
* @is_device_idle: return true if device is idle, false otherwise.
* @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset
@@ -1187,7 +1280,6 @@ struct fw_load_mgr {
* @halt_coresight: stop the ETF and ETR traces.
* @ctx_init: context dependent initialization.
* @ctx_fini: context dependent cleanup.
* @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
* @load_firmware_to_device: load the firmware to the device's memory
* @load_boot_fit_to_device: load boot fit to device's memory
@@ -1225,6 +1317,8 @@ struct fw_load_mgr {
* @get_sob_addr: get SOB base address offset.
* @set_pci_memory_regions: setting properties of PCI memory regions
* @get_stream_master_qid_arr: get pointer to stream masters QID array
* @is_valid_dram_page_size: return true if page size is supported in device
* memory allocation, otherwise false.
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -1285,12 +1379,10 @@ struct hl_asic_funcs {
bool user_address, u64 val);
int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size,
void *blob_addr);
void (*add_device_attr)(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
struct attribute_group *dev_vrm_attr_grp);
void (*handle_eqe)(struct hl_device *hdev,
struct hl_eq_entry *eq_entry);
void (*set_pll_profile)(struct hl_device *hdev,
enum hl_pll_frequency freq);
void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
u32 *size);
u64 (*read_pte)(struct hl_device *hdev, u64 addr);
@@ -1300,8 +1392,6 @@ struct hl_asic_funcs {
int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
u32 flags, u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
void (*set_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
u8 mask_len, struct seq_file *s);
@@ -1320,7 +1410,6 @@ struct hl_asic_funcs {
void (*halt_coresight)(struct hl_device *hdev, struct hl_ctx *ctx);
int (*ctx_init)(struct hl_ctx *ctx);
void (*ctx_fini)(struct hl_ctx *ctx);
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
int (*load_firmware_to_device)(struct hl_device *hdev);
int (*load_boot_fit_to_device)(struct hl_device *hdev);
@@ -1355,6 +1444,7 @@ struct hl_asic_funcs {
u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
void (*set_pci_memory_regions)(struct hl_device *hdev);
u32* (*get_stream_master_qid_arr)(void);
bool (*is_valid_dram_page_size)(u32 page_size);
};
@@ -1742,6 +1832,8 @@ struct hl_vm_hw_block_list_node {
* @pages: the physical page array.
* @npages: num physical pages in the pack.
* @total_size: total size of all the pages in this list.
* @node: used to attach to deletion list that is used when all the allocations are cleared
* at the teardown of the context.
* @mapping_cnt: number of shared mappings.
* @exporting_cnt: number of dma-buf exporting.
* @asid: the context related to this list.
@@ -1757,6 +1849,7 @@ struct hl_vm_phys_pg_pack {
u64 *pages;
u64 npages;
u64 total_size;
struct list_head node;
atomic_t mapping_cnt;
u32 exporting_cnt;
u32 asid;
@@ -1834,6 +1927,7 @@ struct hl_debug_params {
* @ctx: current executing context. TODO: remove for multiple ctx per process
* @ctx_mgr: context manager to handle multiple context for this FD.
* @cb_mgr: command buffer manager to handle multiple buffers for this FD.
* @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers.
* @debugfs_list: list of relevant ASIC debugfs.
* @dev_node: node in the device list of file private data
* @refcount: number of related contexts.
@@ -1846,6 +1940,7 @@ struct hl_fpriv {
struct hl_ctx *ctx;
struct hl_ctx_mgr ctx_mgr;
struct hl_cb_mgr cb_mgr;
struct hl_ts_mgr ts_mem_mgr;
struct list_head debugfs_list;
struct list_head dev_node;
struct kref refcount;
@@ -2518,7 +2613,7 @@ struct hl_reset_info {
* @cq_wq: work queues of completion queues for executing work in process
* context.
* @eq_wq: work queue of event queue for executing work in process context.
* @sob_reset_wq: work queue for sob reset executions.
* @ts_free_obj_wq: work queue for timestamp registration objects release.
* @kernel_ctx: Kernel driver context structure.
* @kernel_queues: array of hl_hw_queue.
* @cs_mirror_list: CS mirror list for TDR.
@@ -2569,9 +2664,6 @@ struct hl_reset_info {
* @max_power: the max power of the device, as configured by the sysadmin. This
* value is saved so in case of hard-reset, the driver will restore
* this value and update the F/W after the re-initialization
* @clock_gating_mask: is clock gating enabled. bitmask that represents the
* different engines. See debugfs-driver-habanalabs for
* details.
* @boot_error_status_mask: contains a mask of the device boot error status.
* Each bit represents a different error, according to
* the defines in hl_boot_if.h. If the bit is cleared,
@@ -2611,8 +2703,6 @@ struct hl_reset_info {
* @in_debug: whether the device is in a state where the profiling/tracing infrastructure
* can be used. This indication is needed because in some ASICs we need to do
* specific operations to enable that infrastructure.
* @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
* only to POWER9 machines.
* @cdev_sysfs_created: were char devices and sysfs nodes created.
* @stop_on_err: true if engines should stop on error.
* @supports_sync_stream: is sync stream supported.
@@ -2651,7 +2741,7 @@ struct hl_device {
struct hl_user_interrupt common_user_interrupt;
struct workqueue_struct **cq_wq;
struct workqueue_struct *eq_wq;
struct workqueue_struct *sob_reset_wq;
struct workqueue_struct *ts_free_obj_wq;
struct hl_ctx *kernel_ctx;
struct hl_hw_queue *kernel_queues;
struct list_head cs_mirror_list;
@@ -2710,7 +2800,6 @@ struct hl_device {
atomic64_t dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
u64 clock_gating_mask;
u64 boot_error_status_mask;
u64 dram_pci_bar_start;
u64 last_successful_open_jif;
@@ -2736,7 +2825,6 @@ struct hl_device {
u8 device_cpu_disabled;
u8 dma_mask;
u8 in_debug;
u8 power9_64bit_dma_enable;
u8 cdev_sysfs_created;
u8 stop_on_err;
u8 supports_sync_stream;
@@ -2970,7 +3058,7 @@ int hl_cb_pool_fini(struct hl_device *hdev);
int hl_cb_va_pool_init(struct hl_ctx *ctx);
void hl_cb_va_pool_fini(struct hl_ctx *ctx);
void hl_cs_rollback_all(struct hl_device *hdev);
void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
void hl_sob_reset_error(struct kref *ref);
@@ -3024,6 +3112,9 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags);
int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
u32 flags, u32 asid, u64 va, u64 size);
u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte);
u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
u8 hop_idx, u64 hop_addr, u64 virt_addr);
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);
int hl_mmu_if_set_funcs(struct hl_device *hdev);
@@ -3094,39 +3185,26 @@ enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr);
int hl_pci_init(struct hl_device *hdev);
void hl_pci_fini(struct hl_device *hdev);
long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
bool curr);
void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
u64 freq);
int hl_get_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_set_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_get_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_current(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_fan_speed(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_pwm_info(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value);
u64 hl_get_max_power(struct hl_device *hdev);
void hl_set_max_power(struct hl_device *hdev);
int hl_set_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_set_power(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_get_power(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_clk_rate(struct hl_device *hdev,
u32 *cur_clk, u32 *max_clk);
void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void hl_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
int hl_set_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long value);
int hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
int hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
int hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value);
long hl_fw_get_max_power(struct hl_device *hdev);
void hl_fw_set_max_power(struct hl_device *hdev);
int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
int hl_get_power(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
void hl_fw_set_pll_profile(struct hl_device *hdev);
void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp);
void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp);
void hw_sob_get(struct hl_hw_sob *hw_sob);
void hw_sob_put(struct hl_hw_sob *hw_sob);
void hl_encaps_handle_do_release(struct kref *ref);
@@ -3146,6 +3224,11 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
const char *format, ...);
char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
void hl_ts_mgr_init(struct hl_ts_mgr *mgr);
void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr);
int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle);
void hl_ts_put(struct hl_ts_buff *buff);
#ifdef CONFIG_DEBUG_FS

View File

@@ -140,6 +140,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_cb_mgr_init(&hpriv->cb_mgr);
hl_ctx_mgr_init(&hpriv->ctx_mgr);
hl_ts_mgr_init(&hpriv->ts_mem_mgr);
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
@@ -184,6 +185,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
out_err:
mutex_unlock(&hdev->fpriv_list_lock);
hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL;
mutex_destroy(&hpriv->restore_phase_mutex);
@@ -256,7 +258,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
hdev->cpu_queues_enable = 1;
hdev->heartbeat = 1;
hdev->mmu_enable = 1;
hdev->clock_gating_mask = ULONG_MAX;
hdev->sram_scrambler_enable = 1;
hdev->dram_scrambler_enable = 1;
hdev->bmc_enable = 1;

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2019 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -92,8 +92,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
hw_ip.first_available_interrupt_id =
prop->first_available_user_msix_interrupt;
hw_ip.first_available_interrupt_id = prop->first_available_user_msix_interrupt;
hw_ip.number_of_user_interrupts = prop->user_interrupt_count;
hw_ip.server_type = prop->server_type;
return copy_to_user(out, &hw_ip,
@@ -251,13 +251,12 @@ static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz,
&clk_rate.max_clk_rate_mhz);
rc = hl_fw_get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, &clk_rate.max_clk_rate_mhz);
if (rc)
return rc;
return copy_to_user(out, &clk_rate,
min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
return copy_to_user(out, &clk_rate, min_t(size_t, max_size, sizeof(clk_rate)))
? -EFAULT : 0;
}
static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)

View File

@@ -1,117 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2019-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "habanalabs.h"
void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
{
hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
hdev->asic_prop.max_freq_value);
}
int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
value);
return value;
}
*max_clk = (value / 1000 / 1000);
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
if (value < 0) {
dev_err(hdev->dev,
"Failed to retrieve device current clock %ld\n",
value);
return value;
}
*cur_clk = (value / 1000 / 1000);
return 0;
}
static ssize_t clk_max_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
hdev->asic_prop.max_freq_value = value;
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
}
static ssize_t clk_max_freq_mhz_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct hl_device *hdev = dev_get_drvdata(dev);
int rc;
u64 value;
if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV;
goto fail;
}
rc = kstrtoull(buf, 0, &value);
if (rc) {
count = -EINVAL;
goto fail;
}
hdev->asic_prop.max_freq_value = value * 1000 * 1000;
hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
hdev->asic_prop.max_freq_value);
fail:
return count;
}
static ssize_t clk_cur_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
}
static DEVICE_ATTR_RW(clk_max_freq_mhz);
static DEVICE_ATTR_RO(clk_cur_freq_mhz);
static struct attribute *hl_dev_attrs[] = {
&dev_attr_clk_max_freq_mhz.attr,
&dev_attr_clk_cur_freq_mhz.attr,
NULL,
};
void hl_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp)
{
dev_attr_grp->attrs = hl_dev_attrs;
}

View File

@@ -137,22 +137,137 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
return IRQ_HANDLED;
}
/*
* hl_ts_free_objects - handler of the free objects workqueue.
* This function should put refcount to objects that the registration node
* took refcount to them.
* @work: workqueue object pointer
*/
static void hl_ts_free_objects(struct work_struct *work)
{
struct timestamp_reg_work_obj *job =
container_of(work, struct timestamp_reg_work_obj, free_obj);
struct timestamp_reg_free_node *free_obj, *temp_free_obj;
struct list_head *free_list_head = job->free_obj_head;
struct hl_device *hdev = job->hdev;
list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n",
free_obj->ts_buff,
free_obj->cq_cb);
hl_ts_put(free_obj->ts_buff);
hl_cb_put(free_obj->cq_cb);
kfree(free_obj);
}
kfree(free_list_head);
kfree(job);
}
/*
* This function called with spin_lock of wait_list_lock taken
* This function will set timestamp and delete the registration node from the
* wait_list_lock.
* and since we're protected with spin_lock here, so we cannot just put the refcount
* for the objects here, since the release function may be called and it's also a long
* logic (which might sleep also) that cannot be handled in irq context.
* so here we'll be filling a list with nodes of "put" jobs and then will send this
* list to a dedicated workqueue to do the actual put.
*/
static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
struct list_head **free_list)
{
struct timestamp_reg_free_node *free_node;
u64 timestamp;
if (!(*free_list)) {
/* Alloc/Init the timestamp registration free objects list */
*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
if (!(*free_list))
return -ENOMEM;
INIT_LIST_HEAD(*free_list);
}
free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
if (!free_node)
return -ENOMEM;
timestamp = ktime_get_ns();
*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
pend->ts_reg_info.timestamp_kernel_addr,
*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
list_del(&pend->wait_list_node);
/* Mark kernel CB node as free */
pend->ts_reg_info.in_use = 0;
/* Putting the refcount for ts_buff and cq_cb objects will be handled
* in workqueue context, just add job to free_list.
*/
free_node->ts_buff = pend->ts_reg_info.ts_buff;
free_node->cq_cb = pend->ts_reg_info.cq_cb;
list_add(&free_node->free_objects_node, *free_list);
return 0;
}
static void handle_user_cq(struct hl_device *hdev,
struct hl_user_interrupt *user_cq)
{
struct hl_user_pending_interrupt *pend;
struct hl_user_pending_interrupt *pend, *temp_pend;
struct list_head *ts_reg_free_list_head = NULL;
struct timestamp_reg_work_obj *job;
bool reg_node_handle_fail = false;
ktime_t now = ktime_get();
int rc;
/* For registration nodes:
* As part of handling the registration nodes, we should put refcount to
* some objects. the problem is that we cannot do that under spinlock
* or in irq handler context at all (since release functions are long and
* might sleep), so we will need to handle that part in workqueue context.
* To avoid handling kmalloc failure which compels us rolling back actions
* and move nodes hanged on the free list back to the interrupt wait list
* we always alloc the job of the WQ at the beginning.
*/
job = kmalloc(sizeof(*job), GFP_ATOMIC);
if (!job)
return;
spin_lock(&user_cq->wait_list_lock);
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
if ((pend->cq_kernel_addr &&
*(pend->cq_kernel_addr) >= pend->cq_target_value) ||
list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
pend->fence.timestamp = now;
complete_all(&pend->fence.completion);
if (pend->ts_reg_info.ts_buff) {
if (!reg_node_handle_fail) {
rc = handle_registration_node(hdev, pend,
&ts_reg_free_list_head);
if (rc)
reg_node_handle_fail = true;
}
} else {
/* Handle wait target value node */
pend->fence.timestamp = now;
complete_all(&pend->fence.completion);
}
}
}
spin_unlock(&user_cq->wait_list_lock);
if (ts_reg_free_list_head) {
INIT_WORK(&job->free_obj, hl_ts_free_objects);
job->free_obj_head = ts_reg_free_list_head;
job->hdev = hdev;
queue_work(hdev->ts_free_obj_wq, &job->free_obj);
} else {
kfree(job);
}
}
/**

View File

@@ -11,6 +11,7 @@
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/pci-p2pdma.h>
MODULE_IMPORT_NS(DMA_BUF);
@@ -20,6 +21,34 @@ MODULE_IMPORT_NS(DMA_BUF);
/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
#define DRAM_POOL_PAGE_SIZE SZ_8M
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
struct hl_mem_in *args, u64 *handle);
static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 psize;
/*
* for ASIC that supports setting the allocation page size by user we will address
* user's choice only if it is not 0 (as 0 means taking the default page size)
*/
if (prop->supports_user_set_page_size && args->alloc.page_size) {
psize = args->alloc.page_size;
if (!hdev->asic_funcs->is_valid_dram_page_size(psize)) {
dev_err(hdev->dev, "user page size (%#x) is not valid\n", psize);
return -EINVAL;
}
} else {
psize = hdev->asic_prop.dram_page_size;
}
*page_size = psize;
return 0;
}
/*
* The va ranges in context object contain a list with the available chunks of
* device virtual memory.
@@ -61,11 +90,15 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
struct hl_vm_phys_pg_pack *phys_pg_pack;
u64 paddr = 0, total_size, num_pgs, i;
u32 num_curr_pgs, page_size;
int handle, rc;
bool contiguous;
int handle, rc;
num_curr_pgs = 0;
page_size = hdev->asic_prop.dram_page_size;
rc = set_alloc_page_size(hdev, args, &page_size);
if (rc)
return rc;
num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
total_size = num_pgs * page_size;
@@ -77,7 +110,11 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
contiguous = args->flags & HL_MEM_CONTIGUOUS;
if (contiguous) {
paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
if (is_power_of_2(page_size))
paddr = (u64) (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool,
total_size, NULL, page_size);
else
paddr = (u64) (uintptr_t) gen_pool_alloc(vm->dram_pg_pool, total_size);
if (!paddr) {
dev_err(hdev->dev,
"failed to allocate %llu contiguous pages with total size of %llu\n",
@@ -111,9 +148,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
phys_pg_pack->pages[i] = paddr + i * page_size;
} else {
for (i = 0 ; i < num_pgs ; i++) {
phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
vm->dram_pg_pool,
page_size);
if (is_power_of_2(page_size))
phys_pg_pack->pages[i] =
(u64) gen_pool_dma_alloc_align(vm->dram_pg_pool,
page_size, NULL,
page_size);
else
phys_pg_pack->pages[i] = (u64) gen_pool_alloc(vm->dram_pg_pool,
page_size);
if (!phys_pg_pack->pages[i]) {
dev_err(hdev->dev,
"Failed to allocate device memory (out of memory)\n");
@@ -652,7 +694,7 @@ static u64 get_va_block(struct hl_device *hdev,
continue;
/*
* In case hint address is 0, and arc_hints_range_reservation
* In case hint address is 0, and hints_range_reservation
* property enabled, then avoid allocating va blocks from the
* range reserved for hint addresses
*/
@@ -1967,16 +2009,15 @@ err_dec_exporting_cnt:
static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
u64 block_handle, device_addr = 0;
struct hl_ctx *ctx = hpriv->ctx;
u32 handle = 0, block_size;
int rc, dmabuf_fd = -EBADF;
int rc;
switch (args->in.op) {
case HL_MEM_OP_ALLOC:
if (args->in.alloc.mem_size == 0) {
dev_err(hdev->dev,
"alloc size must be larger than 0\n");
dev_err(hdev->dev, "alloc size must be larger than 0\n");
rc = -EINVAL;
goto out;
}
@@ -1997,15 +2038,14 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
case HL_MEM_OP_MAP:
if (args->in.flags & HL_MEM_USERPTR) {
device_addr = args->in.map_host.host_virt_addr;
rc = 0;
dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n");
rc = -EPERM;
} else {
rc = get_paddr_from_handle(ctx, &args->in,
&device_addr);
rc = get_paddr_from_handle(ctx, &args->in, &device_addr);
memset(args, 0, sizeof(*args));
args->out.device_virt_addr = device_addr;
}
memset(args, 0, sizeof(*args));
args->out.device_virt_addr = device_addr;
break;
case HL_MEM_OP_UNMAP:
@@ -2013,22 +2053,19 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
break;
case HL_MEM_OP_MAP_BLOCK:
rc = map_block(hdev, args->in.map_block.block_addr,
&block_handle, &block_size);
rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size);
args->out.block_handle = block_handle;
args->out.block_size = block_size;
break;
case HL_MEM_OP_EXPORT_DMABUF_FD:
rc = export_dmabuf_from_addr(ctx,
args->in.export_dmabuf_fd.handle,
args->in.export_dmabuf_fd.mem_size,
args->in.flags,
&dmabuf_fd);
memset(args, 0, sizeof(*args));
args->out.fd = dmabuf_fd;
dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n");
rc = -EPERM;
break;
case HL_MEM_OP_TS_ALLOC:
rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
break;
default:
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
rc = -EINVAL;
@@ -2039,6 +2076,258 @@ out:
return rc;
}
static void ts_buff_release(struct kref *ref)
{
struct hl_ts_buff *buff;
buff = container_of(ref, struct hl_ts_buff, refcount);
vfree(buff->kernel_buff_address);
vfree(buff->user_buff_address);
kfree(buff);
}
struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr,
u32 handle)
{
struct hl_ts_buff *buff;
spin_lock(&mgr->ts_lock);
buff = idr_find(&mgr->ts_handles, handle);
if (!buff) {
spin_unlock(&mgr->ts_lock);
dev_warn(hdev->dev,
"TS buff get failed, no match to handle 0x%x\n", handle);
return NULL;
}
kref_get(&buff->refcount);
spin_unlock(&mgr->ts_lock);
return buff;
}
void hl_ts_put(struct hl_ts_buff *buff)
{
kref_put(&buff->refcount, ts_buff_release);
}
static void buff_vm_close(struct vm_area_struct *vma)
{
struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data;
long new_mmap_size;
new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start);
if (new_mmap_size > 0) {
buff->mmap_size = new_mmap_size;
return;
}
atomic_set(&buff->mmap, 0);
hl_ts_put(buff);
vma->vm_private_data = NULL;
}
static const struct vm_operations_struct ts_buff_vm_ops = {
.close = buff_vm_close
};
int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_ts_buff *buff;
u32 handle, user_buff_size;
int rc;
/* We use the page offset to hold the idr and thus we need to clear
* it before doing the mmap itself
*/
handle = vma->vm_pgoff;
vma->vm_pgoff = 0;
buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle);
if (!buff) {
dev_err(hdev->dev,
"TS buff mmap failed, no match to handle 0x%x\n", handle);
return -EINVAL;
}
/* Validation check */
user_buff_size = vma->vm_end - vma->vm_start;
if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) {
dev_err(hdev->dev,
"TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n",
user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE));
rc = -EINVAL;
goto put_buff;
}
#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
if (!access_ok(VERIFY_WRITE,
(void __user *) (uintptr_t) vma->vm_start, user_buff_size)) {
#else
if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
user_buff_size)) {
#endif
dev_err(hdev->dev,
"user pointer is invalid - 0x%lx\n",
vma->vm_start);
rc = -EINVAL;
goto put_buff;
}
if (atomic_cmpxchg(&buff->mmap, 0, 1)) {
dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n");
rc = -EINVAL;
goto put_buff;
}
vma->vm_ops = &ts_buff_vm_ops;
vma->vm_private_data = buff;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE;
rc = remap_vmalloc_range(vma, buff->user_buff_address, 0);
if (rc) {
atomic_set(&buff->mmap, 0);
goto put_buff;
}
buff->mmap_size = buff->user_buff_size;
vma->vm_pgoff = handle;
return 0;
put_buff:
hl_ts_put(buff);
return rc;
}
void hl_ts_mgr_init(struct hl_ts_mgr *mgr)
{
spin_lock_init(&mgr->ts_lock);
idr_init(&mgr->ts_handles);
}
void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr)
{
struct hl_ts_buff *buff;
struct idr *idp;
u32 id;
idp = &mgr->ts_handles;
idr_for_each_entry(idp, buff, id) {
if (kref_put(&buff->refcount, ts_buff_release) != 1)
dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n",
id);
}
idr_destroy(&mgr->ts_handles);
}
static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements)
{
struct hl_ts_buff *ts_buff = NULL;
u32 size;
void *p;
ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL);
if (!ts_buff)
return NULL;
/* Allocate the user buffer */
size = num_elements * sizeof(u64);
p = vmalloc_user(size);
if (!p)
goto free_mem;
ts_buff->user_buff_address = p;
ts_buff->user_buff_size = size;
/* Allocate the internal kernel buffer */
size = num_elements * sizeof(struct hl_user_pending_interrupt);
p = vmalloc(size);
if (!p)
goto free_user_buff;
ts_buff->kernel_buff_address = p;
ts_buff->kernel_buff_size = size;
return ts_buff;
free_user_buff:
vfree(ts_buff->user_buff_address);
free_mem:
kfree(ts_buff);
return NULL;
}
/**
* allocate_timestamps_buffers() - allocate timestamps buffers
* This function will allocate ts buffer that will later on be mapped to the user
* in order to be able to read the timestamp.
* in additon it'll allocate an extra buffer for registration management.
* since we cannot fail during registration for out-of-memory situation, so
* we'll prepare a pool which will be used as user interrupt nodes and instead
* of dynamically allocating nodes while registration we'll pick the node from
* this pool. in addtion it'll add node to the mapping hash which will be used
* to map user ts buffer to the internal kernel ts buffer.
* @hpriv: pointer to the private data of the fd
* @args: ioctl input
* @handle: user timestamp buffer handle as an output
*/
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
{
struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr;
struct hl_device *hdev = hpriv->hdev;
struct hl_ts_buff *ts_buff;
int rc = 0;
if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
args->num_of_elements, TS_MAX_ELEMENTS_NUM);
return -EINVAL;
}
/* Allocate ts buffer object
* This object will contain two buffers one that will be mapped to the user
* and another internal buffer for the driver use only, which won't be mapped
* to the user.
*/
ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements);
if (!ts_buff) {
rc = -ENOMEM;
goto out_err;
}
spin_lock(&ts_mgr->ts_lock);
rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC);
spin_unlock(&ts_mgr->ts_lock);
if (rc < 0) {
dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n");
goto release_ts_buff;
}
ts_buff->id = rc;
ts_buff->hdev = hdev;
kref_init(&ts_buff->refcount);
/* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */
*handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF;
*handle <<= PAGE_SHIFT;
dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id);
return 0;
release_ts_buff:
kref_put(&ts_buff->refcount, ts_buff_release);
out_err:
*handle = 0;
return rc;
}
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
{
enum hl_device_status status;
@@ -2154,6 +2443,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
args->out.fd = dmabuf_fd;
break;
case HL_MEM_OP_TS_ALLOC:
rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
break;
default:
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
rc = -EINVAL;
@@ -2607,11 +2899,12 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
*/
void hl_vm_ctx_fini(struct hl_ctx *ctx)
{
struct hl_vm_phys_pg_pack *phys_pg_list, *tmp_phys_node;
struct hl_device *hdev = ctx->hdev;
struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_list;
struct hl_vm_hash_node *hnode;
struct hl_vm *vm = &hdev->vm;
struct hlist_node *tmp_node;
struct list_head free_list;
struct hl_mem_in args;
int i;
@@ -2644,19 +2937,24 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
mutex_unlock(&ctx->mmu_lock);
INIT_LIST_HEAD(&free_list);
spin_lock(&vm->idr_lock);
idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
if (phys_pg_list->asid == ctx->asid) {
dev_dbg(hdev->dev,
"page list 0x%px of asid %d is still alive\n",
phys_pg_list, ctx->asid);
atomic64_sub(phys_pg_list->total_size,
&hdev->dram_used_mem);
free_phys_pg_pack(hdev, phys_pg_list);
atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem);
idr_remove(&vm->phys_pg_pack_handles, i);
list_add(&phys_pg_list->node, &free_list);
}
spin_unlock(&vm->idr_lock);
list_for_each_entry_safe(phys_pg_list, tmp_phys_node, &free_list, node)
free_phys_pg_pack(hdev, phys_pg_list);
va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]);
va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]);

View File

@@ -662,3 +662,58 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
return rc;
}
u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
{
return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX;
}
/**
* hl_mmu_get_hop_pte_phys_addr() - extract PTE address from HOP
* @ctx: pointer to the context structure to initialize.
* @hop_idx: HOP index.
* @hop_addr: HOP address.
* @virt_addr: virtual address fro the translation.
*
* @return the matching PTE value on success, otherwise U64_MAX.
*/
u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
u8 hop_idx, u64 hop_addr, u64 virt_addr)
{
u64 mask, shift;
if (hop_idx >= mmu_prop->num_hops) {
dev_err_ratelimited(ctx->hdev->dev, "Invalid hop index %d\n", hop_idx);
return U64_MAX;
}
/* currently max number of HOPs is 6 */
switch (hop_idx) {
case 0:
mask = mmu_prop->hop0_mask;
shift = mmu_prop->hop0_shift;
break;
case 1:
mask = mmu_prop->hop1_mask;
shift = mmu_prop->hop1_shift;
break;
case 2:
mask = mmu_prop->hop2_mask;
shift = mmu_prop->hop2_shift;
break;
case 3:
mask = mmu_prop->hop3_mask;
shift = mmu_prop->hop3_shift;
break;
case 4:
mask = mmu_prop->hop4_mask;
shift = mmu_prop->hop4_shift;
break;
default:
mask = mmu_prop->hop5_mask;
shift = mmu_prop->hop5_shift;
break;
}
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
}

View File

@@ -217,18 +217,10 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
mmu_prop->hop4_shift);
}
static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
{
if (curr_pte & PAGE_PRESENT_MASK)
return curr_pte & HOP_PHYS_ADDR_MASK;
else
return ULLONG_MAX;
}
static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
bool *is_new_hop)
{
u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop_addr == ULLONG_MAX) {
hop_addr = alloc_hop(ctx);
@@ -467,7 +459,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
{
/* MMU H/W fini was already done in device hw_fini() */
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
@@ -546,7 +538,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
hop1_addr = get_next_hop_addr(ctx, curr_pte);
hop1_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop1_addr == ULLONG_MAX)
goto not_mapped;
@@ -555,7 +547,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
hop2_addr = get_next_hop_addr(ctx, curr_pte);
hop2_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop2_addr == ULLONG_MAX)
goto not_mapped;
@@ -564,7 +556,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
hop3_addr = get_next_hop_addr(ctx, curr_pte);
hop3_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop3_addr == ULLONG_MAX)
goto not_mapped;
@@ -582,7 +574,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
}
if (!is_huge) {
hop4_addr = get_next_hop_addr(ctx, curr_pte);
hop4_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop4_addr == ULLONG_MAX)
goto not_mapped;
@@ -845,27 +837,6 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
}
static inline u64 get_hop_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
int hop_num, u64 hop_addr, u64 virt_addr)
{
switch (hop_num) {
case 0:
return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
case 1:
return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
case 2:
return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
case 3:
return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
case 4:
return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
default:
break;
}
return U64_MAX;
}
static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops)
{
@@ -906,7 +877,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
hops->hop_info[0].hop_pte_addr =
get_hop_pte_addr(ctx, mmu_prop, 0,
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
hops->hop_info[0].hop_addr, virt_addr);
hops->hop_info[0].hop_pte_val =
hdev->asic_funcs->read_pte(hdev,
@@ -914,13 +885,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
for (i = 1 ; i < used_hops ; i++) {
hops->hop_info[i].hop_addr =
get_next_hop_addr(ctx,
hl_mmu_get_next_hop_addr(ctx,
hops->hop_info[i - 1].hop_pte_val);
if (hops->hop_info[i].hop_addr == ULLONG_MAX)
return -EFAULT;
hops->hop_info[i].hop_pte_addr =
get_hop_pte_addr(ctx, mmu_prop, i,
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
hops->hop_info[i].hop_addr,
virt_addr);
hops->hop_info[i].hop_pte_val =

View File

@@ -338,10 +338,7 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
lower_32_bits(outbound_region_end_address));
rc |= hl_pci_iatu_write(hdev, 0x014, 0);
if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000);
else
rc |= hl_pci_iatu_write(hdev, 0x018, 0);
rc |= hl_pci_iatu_write(hdev, 0x018, 0);
rc |= hl_pci_iatu_write(hdev, 0x020,
upper_32_bits(outbound_region_end_address));
@@ -411,13 +408,13 @@ int hl_pci_init(struct hl_device *hdev)
rc = hdev->asic_funcs->pci_bars_map(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
dev_err(hdev->dev, "Failed to map PCI BAR addresses\n");
goto disable_device;
}
rc = hdev->asic_funcs->init_iatu(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to initialize iATU\n");
dev_err(hdev->dev, "PCI controller was not initialized successfully\n");
goto unmap_pci_bars;
}

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2019 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -9,105 +9,91 @@
#include <linux/pci.h>
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct cpucp_packet pkt;
u32 used_pll_idx;
u64 result;
struct hl_device *hdev = dev_get_drvdata(dev);
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
if (value < 0)
return value;
hdev->asic_prop.max_freq_value = value;
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
}
static ssize_t clk_max_freq_mhz_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct hl_device *hdev = dev_get_drvdata(dev);
int rc;
u64 value;
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
if (rc)
return rc;
if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV;
goto fail;
}
memset(&pkt, 0, sizeof(pkt));
rc = kstrtoull(buf, 0, &value);
if (rc) {
count = -EINVAL;
goto fail;
}
if (curr)
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
hdev->asic_prop.max_freq_value = value * 1000 * 1000;
hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index, hdev->asic_prop.max_freq_value);
fail:
return count;
}
static ssize_t clk_cur_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
if (value < 0)
return value;
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
}
static DEVICE_ATTR_RW(clk_max_freq_mhz);
static DEVICE_ATTR_RO(clk_cur_freq_mhz);
static struct attribute *hl_dev_clk_attrs[] = {
&dev_attr_clk_max_freq_mhz.attr,
&dev_attr_clk_cur_freq_mhz.attr,
};
static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
struct cpucp_info *cpucp_info;
cpucp_info = &hdev->asic_prop.cpucp_info;
if (cpucp_info->infineon_second_stage_version)
return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version),
le32_to_cpu(cpucp_info->infineon_second_stage_version));
else
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result);
if (rc) {
dev_err(hdev->dev,
"Failed to get frequency of PLL %d, error %d\n",
used_pll_idx, rc);
return rc;
}
return (long) result;
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
}
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
{
struct cpucp_packet pkt;
u32 used_pll_idx;
int rc;
static DEVICE_ATTR_RO(vrm_ver);
rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
if (rc)
return;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
pkt.value = cpu_to_le64(freq);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL);
if (rc)
dev_err(hdev->dev,
"Failed to set frequency to PLL %d, error %d\n",
used_pll_idx, rc);
}
u64 hl_get_max_power(struct hl_device *hdev)
{
struct cpucp_packet pkt;
u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result);
if (rc) {
dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
return (u64) rc;
}
return result;
}
void hl_set_max_power(struct hl_device *hdev)
{
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = cpu_to_le64(hdev->max_power);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL);
if (rc)
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
}
static struct attribute *hl_dev_vrm_attrs[] = {
&dev_attr_vrm_ver.attr,
};
static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -158,20 +144,6 @@ static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
}
static ssize_t infineon_ver_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
if (hdev->asic_prop.cpucp_info.infineon_second_stage_version)
return sprintf(buf, "%#04x %#04x\n",
le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version),
le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_second_stage_version));
else
return sprintf(buf, "%#04x\n",
le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version));
}
static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -188,6 +160,14 @@ static ssize_t thermal_ver_show(struct device *dev,
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version);
}
static ssize_t fw_os_ver_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.fw_os_version);
}
static ssize_t preboot_btl_ver_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -323,7 +303,9 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
val = hl_get_max_power(hdev);
val = hl_fw_get_max_power(hdev);
if (val < 0)
return val;
return sprintf(buf, "%lu\n", val);
}
@@ -348,7 +330,7 @@ static ssize_t max_power_store(struct device *dev,
}
hdev->max_power = value;
hl_set_max_power(hdev);
hl_fw_set_max_power(hdev);
out:
return count;
@@ -394,7 +376,6 @@ static DEVICE_ATTR_RO(device_type);
static DEVICE_ATTR_RO(fuse_ver);
static DEVICE_ATTR_WO(hard_reset);
static DEVICE_ATTR_RO(hard_reset_cnt);
static DEVICE_ATTR_RO(infineon_ver);
static DEVICE_ATTR_RW(max_power);
static DEVICE_ATTR_RO(pci_addr);
static DEVICE_ATTR_RO(preboot_btl_ver);
@@ -403,6 +384,7 @@ static DEVICE_ATTR_RO(soft_reset_cnt);
static DEVICE_ATTR_RO(status);
static DEVICE_ATTR_RO(thermal_ver);
static DEVICE_ATTR_RO(uboot_ver);
static DEVICE_ATTR_RO(fw_os_ver);
static struct bin_attribute bin_attr_eeprom = {
.attr = {.name = "eeprom", .mode = (0444)},
@@ -420,13 +402,13 @@ static struct attribute *hl_dev_attrs[] = {
&dev_attr_fuse_ver.attr,
&dev_attr_hard_reset.attr,
&dev_attr_hard_reset_cnt.attr,
&dev_attr_infineon_ver.attr,
&dev_attr_max_power.attr,
&dev_attr_pci_addr.attr,
&dev_attr_preboot_btl_ver.attr,
&dev_attr_status.attr,
&dev_attr_thermal_ver.attr,
&dev_attr_uboot_ver.attr,
&dev_attr_fw_os_ver.attr,
NULL,
};
@@ -441,10 +423,12 @@ static struct attribute_group hl_dev_attr_group = {
};
static struct attribute_group hl_dev_clks_attr_group;
static struct attribute_group hl_dev_vrm_attr_group;
static const struct attribute_group *hl_dev_attr_groups[] = {
&hl_dev_attr_group,
&hl_dev_clks_attr_group,
&hl_dev_vrm_attr_group,
NULL,
};
@@ -463,13 +447,23 @@ static const struct attribute_group *hl_dev_inference_attr_groups[] = {
NULL,
};
void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp)
{
dev_clk_attr_grp->attrs = hl_dev_clk_attrs;
}
void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp)
{
dev_vrm_attr_grp->attrs = hl_dev_vrm_attrs;
}
int hl_sysfs_init(struct hl_device *hdev)
{
int rc;
hdev->max_power = hdev->asic_prop.max_power_default;
hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group, &hl_dev_vrm_attr_group);
rc = device_add_groups(hdev->dev, hl_dev_attr_groups);
if (rc) {

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -458,7 +458,6 @@ struct ecc_info_extract_params {
u64 block_address;
u32 num_memories;
bool derr;
bool disable_clock_gating;
};
static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
@@ -614,6 +613,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
prop->pmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
@@ -667,6 +669,10 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->use_get_power_for_reset_history = true;
prop->configurable_stop_on_err = true;
prop->set_max_power_on_device_init = true;
return 0;
}
@@ -1636,7 +1642,7 @@ static int gaudi_late_init(struct hl_device *hdev)
*/
gaudi_mmu_prepare(hdev, 1);
hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
hl_fw_set_pll_profile(hdev);
return 0;
@@ -1896,7 +1902,6 @@ static int gaudi_sw_init(struct hl_device *hdev)
goto free_cpu_accessible_dma_pool;
spin_lock_init(&gaudi->hw_queues_lock);
mutex_init(&gaudi->clk_gate_mutex);
hdev->supports_sync_stream = true;
hdev->supports_coresight = true;
@@ -1946,8 +1951,6 @@ static int gaudi_sw_fini(struct hl_device *hdev)
dma_pool_destroy(hdev->dma_pool);
mutex_destroy(&gaudi->clk_gate_mutex);
kfree(gaudi);
return 0;
@@ -3738,76 +3741,8 @@ static void gaudi_tpc_stall(struct hl_device *hdev)
WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
}
static void gaudi_set_clock_gating(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 qman_offset;
bool enable;
int i;
/* In case we are during debug session, don't enable the clock gate
* as it may interfere
*/
if (hdev->in_debug)
return;
if (hdev->asic_prop.fw_security_enabled)
return;
for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
enable = !!(hdev->clock_gating_mask &
(BIT_ULL(gaudi_dma_assignment[i])));
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
enable ? QMAN_CGM1_PWR_GATE_EN : 0);
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
}
for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
enable = !!(hdev->clock_gating_mask &
(BIT_ULL(gaudi_dma_assignment[i])));
/* GC sends work to DMA engine through Upper CP in DMA5 so
* we need to not enable clock gating in that DMA
*/
if (i == GAUDI_HBM_DMA_4)
enable = 0;
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
enable ? QMAN_CGM1_PWR_GATE_EN : 0);
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
}
enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
enable = !!(hdev->clock_gating_mask &
(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
enable ? QMAN_CGM1_PWR_GATE_EN : 0);
WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
qman_offset += TPC_QMAN_OFFSET;
}
gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
}
static void gaudi_disable_clock_gating(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 qman_offset;
int i;
@@ -3832,8 +3767,6 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev)
qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
}
gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
}
static void gaudi_enable_timestamp(struct hl_device *hdev)
@@ -3876,8 +3809,6 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_
gaudi_stop_hbm_dma_qmans(hdev);
gaudi_stop_pci_dma_qmans(hdev);
hdev->asic_funcs->disable_clock_gating(hdev);
msleep(wait_timeout_ms);
gaudi_pci_dma_stall(hdev);
@@ -3931,7 +3862,7 @@ static int gaudi_mmu_init(struct hl_device *hdev)
/* mem cache invalidation */
WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
hl_mmu_invalidate_cache(hdev, true, 0);
WREG32(mmMMU_UP_MMU_ENABLE, 1);
WREG32(mmMMU_UP_SPI_MASK, 0xF);
@@ -4203,10 +4134,8 @@ static int gaudi_hw_init(struct hl_device *hdev)
/* In case the clock gating was enabled in preboot we need to disable
* it here before touching the MME/TPC registers.
* There is no need to take clk gating mutex because when this function
* runs, no other relevant code can run
*/
hdev->asic_funcs->disable_clock_gating(hdev);
gaudi_disable_clock_gating(hdev);
/* SRAM scrambler must be initialized after CPU is running from HBM */
gaudi_init_scrambler_sram(hdev);
@@ -4232,8 +4161,6 @@ static int gaudi_hw_init(struct hl_device *hdev)
gaudi_init_nic_qmans(hdev);
hdev->asic_funcs->set_clock_gating(hdev);
gaudi_enable_timestamp(hdev);
/* MSI must be enabled before CPU queues and NIC are initialized */
@@ -4400,14 +4327,11 @@ skip_reset:
status);
if (gaudi) {
gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
HW_CAP_HBM | HW_CAP_PCI_DMA |
HW_CAP_MME | HW_CAP_TPC_MASK |
HW_CAP_HBM_DMA | HW_CAP_PLL |
HW_CAP_NIC_MASK | HW_CAP_MMU |
HW_CAP_SRAM_SCRAMBLER |
HW_CAP_HBM_SCRAMBLER |
HW_CAP_CLK_GATE);
gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
HW_CAP_HBM_SCRAMBLER);
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
@@ -4884,7 +4808,6 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev)
static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
int rc = 0;
u64 val = 0;
@@ -4919,17 +4842,11 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
return rc;
}
mutex_lock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->disable_clock_gating(hdev);
/* Scrub HBM using all DMA channels in parallel */
rc = gaudi_hbm_scrubbing(hdev);
if (rc)
dev_err(hdev->dev,
"Failed to clear HBM in mem scrub all\n");
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
}
return rc;
@@ -6188,7 +6105,6 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
bool user_address, u32 *val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
u64 hbm_bar_addr, host_phys_end;
int rc = 0;
@@ -6196,38 +6112,31 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
(hdev->clock_gating_mask &
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
*val = RREG32(addr - CFG_BASE);
dev_err_ratelimited(hdev->dev,
"Can't read register - clock gating is enabled!\n");
rc = -EFAULT;
} else {
*val = RREG32(addr - CFG_BASE);
}
} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
*val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
(addr - SRAM_BASE_ADDR));
} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
if (hbm_bar_addr != U64_MAX) {
*val = readl(hdev->pcie_bar[HBM_BAR_ID] +
(addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
hbm_bar_addr);
if (hbm_bar_addr != U64_MAX) {
*val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
user_address && !iommu_present(&pci_bus_type)) {
*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
} else {
rc = -EFAULT;
}
@@ -6239,7 +6148,6 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
bool user_address, u32 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
u64 hbm_bar_addr, host_phys_end;
int rc = 0;
@@ -6247,38 +6155,31 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
(hdev->clock_gating_mask &
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
WREG32(addr - CFG_BASE, val);
dev_err_ratelimited(hdev->dev,
"Can't write register - clock gating is enabled!\n");
rc = -EFAULT;
} else {
WREG32(addr - CFG_BASE, val);
}
} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
(addr - SRAM_BASE_ADDR));
} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
if (hbm_bar_addr != U64_MAX) {
writel(val, hdev->pcie_bar[HBM_BAR_ID] +
(addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
hbm_bar_addr);
if (hbm_bar_addr != U64_MAX) {
writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
user_address && !iommu_present(&pci_bus_type)) {
*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
} else {
rc = -EFAULT;
}
@@ -6290,7 +6191,6 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
bool user_address, u64 *val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
u64 hbm_bar_addr, host_phys_end;
int rc = 0;
@@ -6298,42 +6198,35 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
(hdev->clock_gating_mask &
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
u32 val_l = RREG32(addr - CFG_BASE);
u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
dev_err_ratelimited(hdev->dev,
"Can't read register - clock gating is enabled!\n");
rc = -EFAULT;
} else {
u32 val_l = RREG32(addr - CFG_BASE);
u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
*val = (((u64) val_h) << 32) | val_l;
}
*val = (((u64) val_h) << 32) | val_l;
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
(addr - SRAM_BASE_ADDR));
} else if (addr <=
DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
*val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
} else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
if (hbm_bar_addr != U64_MAX) {
*val = readq(hdev->pcie_bar[HBM_BAR_ID] +
(addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
hbm_bar_addr);
if (hbm_bar_addr != U64_MAX) {
*val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
user_address && !iommu_present(&pci_bus_type)) {
*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
} else {
rc = -EFAULT;
}
@@ -6345,7 +6238,6 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
bool user_address, u64 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
u64 hbm_bar_addr, host_phys_end;
int rc = 0;
@@ -6353,41 +6245,33 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
(hdev->clock_gating_mask &
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
dev_err_ratelimited(hdev->dev,
"Can't write register - clock gating is enabled!\n");
rc = -EFAULT;
} else {
WREG32(addr - CFG_BASE, lower_32_bits(val));
WREG32(addr + sizeof(u32) - CFG_BASE,
upper_32_bits(val));
}
WREG32(addr - CFG_BASE, lower_32_bits(val));
WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
(addr - SRAM_BASE_ADDR));
} else if (addr <=
DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
} else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
if (hbm_bar_addr != U64_MAX) {
writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
(addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
hbm_bar_addr);
if (hbm_bar_addr != U64_MAX) {
writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
user_address && !iommu_present(&pci_bus_type)) {
*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
} else {
rc = -EFAULT;
}
@@ -6446,7 +6330,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
void *blob_addr)
{
u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
struct gaudi_device *gaudi = hdev->asic_specific;
u32 qm_glbl_sts0, qm_cgm_sts;
u64 dma_offset, qm_offset;
dma_addr_t dma_addr;
@@ -6462,10 +6345,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
if (!kernel_addr)
return -ENOMEM;
mutex_lock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->disable_clock_gating(hdev);
hdev->asic_funcs->hw_queues_lock(hdev);
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
@@ -6550,10 +6429,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
out:
hdev->asic_funcs->hw_queues_unlock(hdev);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
dma_addr);
@@ -6601,10 +6476,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
return;
}
mutex_lock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->disable_clock_gating(hdev);
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
@@ -6882,10 +6753,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
}
static int gaudi_send_job_on_qman0(struct hl_device *hdev,
@@ -7266,10 +7133,8 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
struct ecc_info_extract_params *params, u64 *ecc_address,
u64 *ecc_syndrom, u8 *memory_wrapper_idx)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 i, num_mem_regs, reg, err_bit;
u64 err_addr, err_word = 0;
int rc = 0;
num_mem_regs = params->num_memories / 32 +
((params->num_memories % 32) ? 1 : 0);
@@ -7282,11 +7147,6 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
else
err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
if (params->disable_clock_gating) {
mutex_lock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->disable_clock_gating(hdev);
}
/* Set invalid wrapper index */
*memory_wrapper_idx = 0xFF;
@@ -7303,8 +7163,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
if (*memory_wrapper_idx == 0xFF) {
dev_err(hdev->dev, "ECC error information cannot be found\n");
rc = -EINVAL;
goto enable_clk_gate;
return -EINVAL;
}
WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
@@ -7324,14 +7183,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
enable_clk_gate:
if (params->disable_clock_gating) {
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
}
return rc;
return 0;
}
/*
@@ -7589,7 +7441,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
params.num_memories = 90;
params.derr = false;
params.disable_clock_gating = true;
extract_info_from_fw = false;
break;
case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
@@ -7598,7 +7449,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
params.num_memories = 90;
params.derr = true;
params.disable_clock_gating = true;
extract_info_from_fw = false;
break;
case GAUDI_EVENT_MME0_ACC_SERR:
@@ -7609,7 +7459,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
params.num_memories = 128;
params.derr = false;
params.disable_clock_gating = true;
extract_info_from_fw = false;
break;
case GAUDI_EVENT_MME0_ACC_DERR:
@@ -7620,7 +7469,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
params.num_memories = 128;
params.derr = true;
params.disable_clock_gating = true;
extract_info_from_fw = false;
break;
case GAUDI_EVENT_MME0_SBAB_SERR:
@@ -7632,7 +7480,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
params.num_memories = 33;
params.derr = false;
params.disable_clock_gating = true;
extract_info_from_fw = false;
break;
case GAUDI_EVENT_MME0_SBAB_DERR:
@@ -7644,7 +7491,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
params.num_memories = 33;
params.derr = true;
params.disable_clock_gating = true;
extract_info_from_fw = false;
break;
default:
@@ -7819,6 +7665,48 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev,
fw_alive->thread_id, fw_alive->uptime_seconds);
}
static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
void *data)
{
char desc[64] = "", *type;
struct eq_nic_sei_event *eq_nic_sei = data;
u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
switch (eq_nic_sei->axi_error_cause) {
case RXB:
type = "RXB";
break;
case RXE:
type = "RXE";
break;
case TXS:
type = "TXS";
break;
case TXE:
type = "TXE";
break;
case QPC_RESP:
type = "QPC_RESP";
break;
case NON_AXI_ERR:
type = "NON_AXI_ERR";
break;
case TMR:
type = "TMR";
break;
default:
dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
eq_nic_sei->axi_error_cause);
type = "N/A";
break;
}
snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
eq_nic_sei->id);
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
event_type, desc);
}
static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
{
/* GAUDI doesn't support any reset except hard-reset */
@@ -7966,19 +7854,9 @@ static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
char *interrupt_name)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
bool soft_reset_required = false;
/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
* gating, and thus cannot be done in CPU-CP and should be done instead
* by the driver.
*/
mutex_lock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->disable_clock_gating(hdev);
tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
@@ -7996,10 +7874,6 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
/* Clear interrupts */
WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
return soft_reset_required;
}
@@ -8066,6 +7940,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
struct hl_eq_entry *eq_entry)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u64 data = le64_to_cpu(eq_entry->data[0]);
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
u32 fw_fatal_err_flag = 0;
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
@@ -8102,6 +7977,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_PSOC_MEM_DERR:
case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
case GAUDI_EVENT_MMU_DERR:
@@ -8202,6 +8078,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_PSOC_MEM_SERR:
case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
fallthrough;
@@ -8263,6 +8140,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
hl_fw_unmask_irq(hdev, event_type);
break;
case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
hl_fw_unmask_irq(hdev, event_type);
break;
case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
gaudi_print_irq_info(hdev, event_type, false);
gaudi_print_sm_sei_info(hdev, event_type,
@@ -8274,6 +8156,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
hl_fw_unmask_irq(hdev, event_type);
break;
case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
break;
case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
gaudi_print_clk_change_info(hdev, event_type);
hl_fw_unmask_irq(hdev, event_type);
@@ -8314,7 +8199,7 @@ reset_device:
| HL_DRV_RESET_BYPASS_REQ_TO_FW
| fw_fatal_err_flag);
else if (hdev->hard_reset_on_fw_events)
hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag);
else
hl_fw_unmask_irq(hdev, event_type);
}
@@ -8461,10 +8346,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
u64 offset;
int i, dma_id, port;
mutex_lock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->disable_clock_gating(hdev);
if (s)
seq_puts(s,
"\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
@@ -8585,10 +8466,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
if (s)
seq_puts(s, "\n");
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
return is_idle;
}
@@ -8628,10 +8505,8 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
* this function should be used only during initialization and/or after reset,
* when there are no active users.
*/
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
u32 tpc_id)
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u64 kernel_timeout;
u32 status, offset;
int rc;
@@ -8643,10 +8518,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
else
kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
mutex_lock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->disable_clock_gating(hdev);
WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
lower_32_bits(tpc_kernel));
WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
@@ -8686,8 +8557,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
dev_err(hdev->dev,
"Timeout while waiting for TPC%d icache prefetch\n",
tpc_id);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
return -EIO;
}
@@ -8711,8 +8580,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
dev_err(hdev->dev,
"Timeout while waiting for TPC%d vector pipe\n",
tpc_id);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
return -EIO;
}
@@ -8724,9 +8591,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
1000,
kernel_timeout);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
if (rc) {
dev_err(hdev->dev,
"Timeout while waiting for TPC%d kernel to execute\n",
@@ -8791,7 +8655,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
hdev->internal_cb_pool_dma_addr,
HOST_SPACE_INTERNAL_CB_SZ);
hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
mutex_unlock(&ctx->mmu_lock);
if (rc)
@@ -8826,7 +8690,7 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
HOST_SPACE_INTERNAL_CB_SZ);
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
HOST_SPACE_INTERNAL_CB_SZ);
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
mutex_unlock(&ctx->mmu_lock);
gen_pool_destroy(hdev->internal_cb_pool);
@@ -9204,14 +9068,7 @@ static void gaudi_reset_sob(struct hl_device *hdev, void *data)
static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
{
if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
HL_POWER9_HOST_MAGIC) {
hdev->power9_64bit_dma_enable = 1;
hdev->dma_mask = 64;
} else {
hdev->power9_64bit_dma_enable = 0;
hdev->dma_mask = 48;
}
hdev->dma_mask = 48;
}
static u64 gaudi_get_device_time(struct hl_device *hdev)
@@ -9293,23 +9150,15 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
struct hl_sync_to_engine_map *map)
{
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
struct gaudi_device *gaudi = hdev->asic_specific;
int i, j, rc;
u32 reg_value;
/* Iterate over TPC engines */
for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
/* TPC registered must be accessed with clock gating disabled */
mutex_lock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->disable_clock_gating(hdev);
reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
sds->props[SP_NEXT_TPC] * i);
/* We can reenable clock_gating */
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
ENGINE_TPC, i);
if (rc)
@@ -9319,20 +9168,11 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
/* Iterate over MME engines */
for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
/* MME registered must be accessed with clock gating
* disabled
*/
mutex_lock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->disable_clock_gating(hdev);
reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
sds->props[SP_NEXT_MME] * i +
j * sizeof(u32));
/* We can reenable clock_gating */
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
rc = gaudi_add_sync_to_engine_map_entry(
map, reg_value, ENGINE_MME,
i * sds->props[SP_SUB_MME_ENG_NUM] + j);
@@ -9537,6 +9377,29 @@ static u32 *gaudi_get_stream_master_qid_arr(void)
return gaudi_stream_master;
}
static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
struct cpucp_info *cpucp_info;
cpucp_info = &hdev->asic_prop.cpucp_info;
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
}
static DEVICE_ATTR_RO(infineon_ver);
static struct attribute *gaudi_vrm_dev_attrs[] = {
&dev_attr_infineon_ver.attr,
};
static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
struct attribute_group *dev_vrm_attr_grp)
{
hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
}
static const struct hl_asic_funcs gaudi_funcs = {
.early_init = gaudi_early_init,
.early_fini = gaudi_early_fini,
@@ -9574,17 +9437,14 @@ static const struct hl_asic_funcs gaudi_funcs = {
.debugfs_read64 = gaudi_debugfs_read64,
.debugfs_write64 = gaudi_debugfs_write64,
.debugfs_read_dma = gaudi_debugfs_read_dma,
.add_device_attr = hl_add_device_attr,
.add_device_attr = gaudi_add_device_attr,
.handle_eqe = gaudi_handle_eqe,
.set_pll_profile = hl_set_pll_profile,
.get_events_stat = gaudi_get_events_stat,
.read_pte = gaudi_read_pte,
.write_pte = gaudi_write_pte,
.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
.send_heartbeat = gaudi_send_heartbeat,
.set_clock_gating = gaudi_set_clock_gating,
.disable_clock_gating = gaudi_disable_clock_gating,
.debug_coresight = gaudi_debug_coresight,
.is_device_idle = gaudi_is_device_idle,
.non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
@@ -9600,7 +9460,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
.halt_coresight = gaudi_halt_coresight,
.ctx_init = gaudi_ctx_init,
.ctx_fini = gaudi_ctx_fini,
.get_clk_rate = hl_get_clk_rate,
.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
.load_firmware_to_device = gaudi_load_firmware_to_device,
.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
@@ -9626,7 +9485,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
.state_dump_init = gaudi_state_dump_init,
.get_sob_addr = gaudi_get_sob_addr,
.set_pci_memory_regions = gaudi_set_pci_memory_regions,
.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
.is_valid_dram_page_size = NULL
};
/**

View File

@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2019-2020 HabanaLabs, Ltd.
* Copyright 2019-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
@@ -177,7 +177,6 @@
#define HW_CAP_MSI BIT(6)
#define HW_CAP_CPU_Q BIT(7)
#define HW_CAP_HBM_DMA BIT(8)
#define HW_CAP_CLK_GATE BIT(9)
#define HW_CAP_SRAM_SCRAMBLER BIT(10)
#define HW_CAP_HBM_SCRAMBLER BIT(11)
@@ -313,8 +312,6 @@ struct gaudi_internal_qman_info {
* struct gaudi_device - ASIC specific manage structure.
* @cpucp_info_get: get information on device from CPU-CP
* @hw_queues_lock: protects the H/W queues from concurrent access.
* @clk_gate_mutex: protects code areas that require clock gating to be disabled
* temporarily
* @internal_qmans: Internal QMANs information. The array size is larger than
* the actual number of internal queues because they are not in
* consecutive order.
@@ -337,7 +334,6 @@ struct gaudi_device {
/* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock;
struct mutex clk_gate_mutex;
struct gaudi_internal_qman_info internal_qmans[GAUDI_QUEUE_ID_SIZE];
@@ -355,8 +351,6 @@ struct gaudi_device {
void gaudi_init_security(struct hl_device *hdev);
void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
void gaudi_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -430,6 +430,9 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
prop->dmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
/* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
@@ -438,6 +441,9 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
prop->pmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
@@ -477,6 +483,10 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->use_get_power_for_reset_history = true;
prop->configurable_stop_on_err = true;
prop->set_max_power_on_device_init = true;
return 0;
}
@@ -893,7 +903,7 @@ int goya_late_init(struct hl_device *hdev)
goya->pm_mng_profile = PM_AUTO;
hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
goya_set_pll_profile(hdev, PLL_LOW);
schedule_delayed_work(&goya->goya_work->work_freq,
usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
@@ -2700,8 +2710,7 @@ int goya_mmu_init(struct hl_device *hdev)
WREG32_AND(mmSTLB_STLB_FEATURE_EN,
(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
WREG32(mmMMU_MMU_ENABLE, 1);
WREG32(mmMMU_SPI_MASK, 0xF);
@@ -5341,7 +5350,7 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
/* Treat as invalidate all because there is no range invalidation
* in Goya
*/
return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
return hl_mmu_invalidate_cache(hdev, is_hard, flags);
}
int goya_send_heartbeat(struct hl_device *hdev)
@@ -5391,16 +5400,6 @@ int goya_cpucp_info_get(struct hl_device *hdev)
return 0;
}
static void goya_set_clock_gating(struct hl_device *hdev)
{
/* clock gating not supported in Goya */
}
static void goya_disable_clock_gating(struct hl_device *hdev)
{
/* clock gating not supported in Goya */
}
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
u8 mask_len, struct seq_file *s)
{
@@ -5564,16 +5563,7 @@ static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
{
if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
HL_POWER9_HOST_MAGIC) {
dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
hdev->power9_64bit_dma_enable = 1;
hdev->dma_mask = 64;
} else {
dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
hdev->power9_64bit_dma_enable = 0;
hdev->dma_mask = 48;
}
hdev->dma_mask = 48;
}
u64 goya_get_device_time(struct hl_device *hdev)
@@ -5727,15 +5717,12 @@ static const struct hl_asic_funcs goya_funcs = {
.debugfs_read_dma = goya_debugfs_read_dma,
.add_device_attr = goya_add_device_attr,
.handle_eqe = goya_handle_eqe,
.set_pll_profile = goya_set_pll_profile,
.get_events_stat = goya_get_events_stat,
.read_pte = goya_read_pte,
.write_pte = goya_write_pte,
.mmu_invalidate_cache = goya_mmu_invalidate_cache,
.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
.send_heartbeat = goya_send_heartbeat,
.set_clock_gating = goya_set_clock_gating,
.disable_clock_gating = goya_disable_clock_gating,
.debug_coresight = goya_debug_coresight,
.is_device_idle = goya_is_device_idle,
.non_hard_reset_late_init = goya_non_hard_reset_late_init,
@@ -5751,7 +5738,6 @@ static const struct hl_asic_funcs goya_funcs = {
.halt_coresight = goya_halt_coresight,
.ctx_init = goya_ctx_init,
.ctx_fini = goya_ctx_fini,
.get_clk_rate = hl_get_clk_rate,
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
.load_firmware_to_device = goya_load_firmware_to_device,
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
@@ -5778,6 +5764,7 @@ static const struct hl_asic_funcs goya_funcs = {
.get_sob_addr = &goya_get_sob_addr,
.set_pci_memory_regions = goya_set_pci_memory_regions,
.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
.is_valid_dram_page_size = NULL
};
/*

View File

@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2019 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
@@ -217,8 +217,8 @@ u64 goya_get_max_power(struct hl_device *hdev);
void goya_set_max_power(struct hl_device *hdev, u64 value);
void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
struct attribute_group *dev_vrm_attr_grp);
int goya_cpucp_info_get(struct hl_device *hdev);
int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -11,21 +11,24 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
{
struct goya_device *goya = hdev->asic_specific;
if (!hdev->pdev)
return;
switch (freq) {
case PLL_HIGH:
hl_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll);
hl_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll);
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll);
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll);
break;
case PLL_LOW:
hl_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW);
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW);
hl_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW);
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW);
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW);
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW);
break;
case PLL_LAST:
hl_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk);
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk);
hl_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk);
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk);
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk);
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk);
break;
default:
dev_err(hdev->dev, "unknown frequency setting\n");
@@ -41,7 +44,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, false);
if (value < 0)
return value;
@@ -74,7 +77,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr,
goto fail;
}
hl_set_frequency(hdev, HL_GOYA_MME_PLL, value);
hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, value);
goya->mme_clk = value;
fail:
@@ -90,7 +93,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, false);
value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, false);
if (value < 0)
return value;
@@ -123,7 +126,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr,
goto fail;
}
hl_set_frequency(hdev, HL_GOYA_TPC_PLL, value);
hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, value);
goya->tpc_clk = value;
fail:
@@ -139,7 +142,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, false);
value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, false);
if (value < 0)
return value;
@@ -172,7 +175,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr,
goto fail;
}
hl_set_frequency(hdev, HL_GOYA_IC_PLL, value);
hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, value);
goya->ic_clk = value;
fail:
@@ -188,7 +191,7 @@ static ssize_t mme_clk_curr_show(struct device *dev,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, true);
if (value < 0)
return value;
@@ -205,7 +208,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, true);
value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, true);
if (value < 0)
return value;
@@ -222,7 +225,7 @@ static ssize_t ic_clk_curr_show(struct device *dev,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, true);
value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, true);
if (value < 0)
return value;
@@ -347,7 +350,7 @@ static DEVICE_ATTR_RW(pm_mng_profile);
static DEVICE_ATTR_RW(tpc_clk);
static DEVICE_ATTR_RO(tpc_clk_curr);
static struct attribute *goya_dev_attrs[] = {
static struct attribute *goya_clk_dev_attrs[] = {
&dev_attr_high_pll.attr,
&dev_attr_ic_clk.attr,
&dev_attr_ic_clk_curr.attr,
@@ -356,11 +359,27 @@ static struct attribute *goya_dev_attrs[] = {
&dev_attr_pm_mng_profile.attr,
&dev_attr_tpc_clk.attr,
&dev_attr_tpc_clk_curr.attr,
NULL,
};
void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp)
static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
{
dev_attr_grp->attrs = goya_dev_attrs;
struct hl_device *hdev = dev_get_drvdata(dev);
struct cpucp_info *cpucp_info;
cpucp_info = &hdev->asic_prop.cpucp_info;
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
}
static DEVICE_ATTR_RO(infineon_ver);
static struct attribute *goya_vrm_dev_attrs[] = {
&dev_attr_infineon_ver.attr,
};
void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
struct attribute_group *dev_vrm_attr_grp)
{
dev_clk_attr_grp->attrs = goya_clk_dev_attrs;
dev_vrm_attr_grp->attrs = goya_vrm_dev_attrs;
}

View File

@@ -780,6 +780,7 @@ struct cpucp_security_info {
* (0 = functional 1 = binned)
* @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance
* (0 = functional 1 = binned)
* @fw_os_version: Firmware OS Version
*/
struct cpucp_info {
struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
@@ -807,6 +808,7 @@ struct cpucp_info {
__le32 reserved6;
__u8 pll_map[PLL_MAP_LEN];
__le64 mme_binning_mask;
__u8 fw_os_version[VERSION_MAX_LEN];
};
struct cpucp_mac_addr {

View File

@@ -33,6 +33,7 @@ enum cpu_boot_err {
CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18,
CPU_BOOT_ERR_BINNING_FAIL = 19,
CPU_BOOT_ERR_TPM_FAIL = 20,
CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
CPU_BOOT_ERR_ENABLED = 31,
CPU_BOOT_ERR_SCND_EN = 63,
CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
@@ -111,6 +112,9 @@ enum cpu_boot_err {
*
* CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed.
*
* CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature
* sensor.
*
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
* This is a main indication that the
* running FW populates the error
@@ -134,6 +138,7 @@ enum cpu_boot_err {
#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR)
#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL)
#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL)
#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED)

View File

@@ -311,6 +311,16 @@ enum gaudi_async_event_id {
GAUDI_EVENT_FW_ALIVE_S = 645,
GAUDI_EVENT_DEV_RESET_REQ = 646,
GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647,
GAUDI_EVENT_STATUS_NIC0_ENG0 = 648,
GAUDI_EVENT_STATUS_NIC0_ENG1 = 649,
GAUDI_EVENT_STATUS_NIC1_ENG0 = 650,
GAUDI_EVENT_STATUS_NIC1_ENG1 = 651,
GAUDI_EVENT_STATUS_NIC2_ENG0 = 652,
GAUDI_EVENT_STATUS_NIC2_ENG1 = 653,
GAUDI_EVENT_STATUS_NIC3_ENG0 = 654,
GAUDI_EVENT_STATUS_NIC3_ENG1 = 655,
GAUDI_EVENT_STATUS_NIC4_ENG0 = 656,
GAUDI_EVENT_STATUS_NIC4_ENG1 = 657,
GAUDI_EVENT_FIX_POWER_ENV_S = 658,
GAUDI_EVENT_FIX_POWER_ENV_E = 659,
GAUDI_EVENT_FIX_THERMAL_ENV_S = 660,

View File

@@ -1070,10 +1070,10 @@ static int kgdbts_option_setup(char *opt)
{
if (strlen(opt) >= MAX_CONFIG_LEN) {
printk(KERN_ERR "kgdbts: config string too long\n");
return -ENOSPC;
return 1;
}
strcpy(config, opt);
return 0;
return 1;
}
__setup("kgdbts=", kgdbts_option_setup);

View File

@@ -44,14 +44,14 @@ void lkdtm_FORTIFIED_SUBOBJECT(void)
strscpy(src, "over ten bytes", size);
size = strlen(src) + 1;
pr_info("trying to strcpy past the end of a member of a struct\n");
pr_info("trying to strncpy past the end of a member of a struct\n");
/*
* memcpy(target.a, src, 20); will hit a compile error because the
* strncpy(target.a, src, 20); will hit a compile error because the
* compiler knows at build time that target.a < 20 bytes. Use a
* volatile to force a runtime error.
*/
memcpy(target.a, src, size);
strncpy(target.a, src, size);
/* Store result to global to prevent the code from being eliminated */
fortify_scratch_space = target.a[3];

View File

@@ -2148,6 +2148,7 @@ void mei_cl_all_disconnect(struct mei_device *dev)
list_for_each_entry(cl, &dev->file_list, link)
mei_cl_set_disconnected(cl);
}
EXPORT_SYMBOL_GPL(mei_cl_all_disconnect);
static struct mei_cl *mei_cl_dma_map_find(struct mei_device *dev, u8 buffer_id)
{

View File

@@ -107,6 +107,7 @@
#define MEI_DEV_ID_ADP_S 0x7AE8 /* Alder Lake Point S */
#define MEI_DEV_ID_ADP_LP 0x7A60 /* Alder Lake Point LP */
#define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */
#define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */
/*
* MEI HW Section
@@ -120,6 +121,7 @@
#define PCI_CFG_HFS_2 0x48
#define PCI_CFG_HFS_3 0x60
# define PCI_CFG_HFS_3_FW_SKU_MSK 0x00000070
# define PCI_CFG_HFS_3_FW_SKU_IGN 0x00000000
# define PCI_CFG_HFS_3_FW_SKU_SPS 0x00000060
#define PCI_CFG_HFS_4 0x64
#define PCI_CFG_HFS_5 0x68

View File

@@ -1257,7 +1257,11 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
/* check if ME wants a reset */
if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) {
dev_warn(dev->dev, "FW not ready: resetting.\n");
schedule_work(&dev->reset_work);
if (dev->dev_state == MEI_DEV_POWERING_DOWN ||
dev->dev_state == MEI_DEV_POWER_DOWN)
mei_cl_all_disconnect(dev);
else if (dev->dev_state != MEI_DEV_DISABLED)
schedule_work(&dev->reset_work);
goto end;
}
@@ -1289,12 +1293,14 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
if (rets == -ENODATA)
break;
if (rets &&
(dev->dev_state != MEI_DEV_RESETTING &&
dev->dev_state != MEI_DEV_POWER_DOWN)) {
dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n",
rets);
schedule_work(&dev->reset_work);
if (rets) {
dev_err(dev->dev, "mei_irq_read_handler ret = %d, state = %d.\n",
rets, dev->dev_state);
if (dev->dev_state != MEI_DEV_RESETTING &&
dev->dev_state != MEI_DEV_DISABLED &&
dev->dev_state != MEI_DEV_POWERING_DOWN &&
dev->dev_state != MEI_DEV_POWER_DOWN)
schedule_work(&dev->reset_work);
goto end;
}
}
@@ -1405,16 +1411,16 @@ static bool mei_me_fw_type_sps_4(const struct pci_dev *pdev)
.quirk_probe = mei_me_fw_type_sps_4
/**
* mei_me_fw_type_sps() - check for sps sku
* mei_me_fw_type_sps_ign() - check for sps or ign sku
*
* Read ME FW Status register to check for SPS Firmware.
* The SPS FW is only signaled in pci function 0
* Read ME FW Status register to check for SPS or IGN Firmware.
* The SPS/IGN FW is only signaled in pci function 0
*
* @pdev: pci device
*
* Return: true in case of SPS firmware
* Return: true in case of SPS/IGN firmware
*/
static bool mei_me_fw_type_sps(const struct pci_dev *pdev)
static bool mei_me_fw_type_sps_ign(const struct pci_dev *pdev)
{
u32 reg;
u32 fw_type;
@@ -1427,14 +1433,15 @@ static bool mei_me_fw_type_sps(const struct pci_dev *pdev)
dev_dbg(&pdev->dev, "fw type is %d\n", fw_type);
return fw_type == PCI_CFG_HFS_3_FW_SKU_SPS;
return fw_type == PCI_CFG_HFS_3_FW_SKU_IGN ||
fw_type == PCI_CFG_HFS_3_FW_SKU_SPS;
}
#define MEI_CFG_KIND_ITOUCH \
.kind = "itouch"
#define MEI_CFG_FW_SPS \
.quirk_probe = mei_me_fw_type_sps
#define MEI_CFG_FW_SPS_IGN \
.quirk_probe = mei_me_fw_type_sps_ign
#define MEI_CFG_FW_VER_SUPP \
.fw_ver_supported = 1
@@ -1535,7 +1542,7 @@ static const struct mei_cfg mei_me_pch12_sps_cfg = {
MEI_CFG_PCH8_HFS,
MEI_CFG_FW_VER_SUPP,
MEI_CFG_DMA_128,
MEI_CFG_FW_SPS,
MEI_CFG_FW_SPS_IGN,
};
/* Cannon Lake itouch with quirk for SPS 5.0 and newer Firmware exclusion
@@ -1545,7 +1552,7 @@ static const struct mei_cfg mei_me_pch12_itouch_sps_cfg = {
MEI_CFG_KIND_ITOUCH,
MEI_CFG_PCH8_HFS,
MEI_CFG_FW_VER_SUPP,
MEI_CFG_FW_SPS,
MEI_CFG_FW_SPS_IGN,
};
/* Tiger Lake and newer devices */
@@ -1562,7 +1569,7 @@ static const struct mei_cfg mei_me_pch15_sps_cfg = {
MEI_CFG_FW_VER_SUPP,
MEI_CFG_DMA_128,
MEI_CFG_TRC,
MEI_CFG_FW_SPS,
MEI_CFG_FW_SPS_IGN,
};
/*

View File

@@ -161,6 +161,11 @@ int mei_reset(struct mei_device *dev)
return ret;
}
if (dev->dev_state != MEI_DEV_RESETTING) {
dev_dbg(dev->dev, "wrong state = %d on link start\n", dev->dev_state);
return 0;
}
dev_dbg(dev->dev, "link is established start sending messages.\n");
mei_set_devstate(dev, MEI_DEV_INIT_CLIENTS);

View File

@@ -424,31 +424,26 @@ int mei_irq_read_handler(struct mei_device *dev,
list_for_each_entry(cl, &dev->file_list, link) {
if (mei_cl_hbm_equal(cl, mei_hdr)) {
cl_dbg(dev, cl, "got a message\n");
break;
ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list);
goto reset_slots;
}
}
/* if no recipient cl was found we assume corrupted header */
if (&cl->link == &dev->file_list) {
/* A message for not connected fixed address clients
* should be silently discarded
* On power down client may be force cleaned,
* silently discard such messages
*/
if (hdr_is_fixed(mei_hdr) ||
dev->dev_state == MEI_DEV_POWER_DOWN) {
mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length);
ret = 0;
goto reset_slots;
}
dev_err(dev->dev, "no destination client found 0x%08X\n",
dev->rd_msg_hdr[0]);
ret = -EBADMSG;
goto end;
/* A message for not connected fixed address clients
* should be silently discarded
* On power down client may be force cleaned,
* silently discard such messages
*/
if (hdr_is_fixed(mei_hdr) ||
dev->dev_state == MEI_DEV_POWER_DOWN) {
mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length);
ret = 0;
goto reset_slots;
}
ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list);
dev_err(dev->dev, "no destination client found 0x%08X\n", dev->rd_msg_hdr[0]);
ret = -EBADMSG;
goto end;
reset_slots:
/* reset the number of slots and header */

View File

@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
@@ -113,6 +114,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)},
/* required last entry */
{0, }
@@ -192,14 +194,7 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto end;
}
if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) ||
dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (err)
err = dma_set_coherent_mask(&pdev->dev,
DMA_BIT_MASK(32));
}
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err) {
dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
goto end;

View File

@@ -94,7 +94,7 @@ struct ocxl_link {
struct spa *spa;
void *platform_data;
};
static struct list_head links_list = LIST_HEAD_INIT(links_list);
static LIST_HEAD(links_list);
static DEFINE_MUTEX(links_list_lock);
enum xsl_response {

208
drivers/misc/open-dice.c Normal file
View File

@@ -0,0 +1,208 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2021 - Google LLC
* Author: David Brazdil <dbrazdil@google.com>
*
* Driver for Open Profile for DICE.
*
* This driver takes ownership of a reserved memory region containing data
* generated by the Open Profile for DICE measured boot protocol. The memory
* contents are not interpreted by the kernel but can be mapped into a userspace
* process via a misc device. Userspace can also request a wipe of the memory.
*
* Userspace can access the data with (w/o error handling):
*
* fd = open("/dev/open-dice0", O_RDWR);
* read(fd, &size, sizeof(unsigned long));
* data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
* write(fd, NULL, 0); // wipe
* close(fd);
*/
#include <linux/io.h>
#include <linux/miscdevice.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/of_reserved_mem.h>
#include <linux/platform_device.h>
#define DRIVER_NAME "open-dice"
struct open_dice_drvdata {
struct mutex lock;
char name[16];
struct reserved_mem *rmem;
struct miscdevice misc;
};
static inline struct open_dice_drvdata *to_open_dice_drvdata(struct file *filp)
{
return container_of(filp->private_data, struct open_dice_drvdata, misc);
}
static int open_dice_wipe(struct open_dice_drvdata *drvdata)
{
void *kaddr;
mutex_lock(&drvdata->lock);
kaddr = devm_memremap(drvdata->misc.this_device, drvdata->rmem->base,
drvdata->rmem->size, MEMREMAP_WC);
if (IS_ERR(kaddr)) {
mutex_unlock(&drvdata->lock);
return PTR_ERR(kaddr);
}
memset(kaddr, 0, drvdata->rmem->size);
devm_memunmap(drvdata->misc.this_device, kaddr);
mutex_unlock(&drvdata->lock);
return 0;
}
/*
* Copies the size of the reserved memory region to the user-provided buffer.
*/
static ssize_t open_dice_read(struct file *filp, char __user *ptr, size_t len,
loff_t *off)
{
unsigned long val = to_open_dice_drvdata(filp)->rmem->size;
return simple_read_from_buffer(ptr, len, off, &val, sizeof(val));
}
/*
* Triggers a wipe of the reserved memory region. The user-provided pointer
* is never dereferenced.
*/
static ssize_t open_dice_write(struct file *filp, const char __user *ptr,
size_t len, loff_t *off)
{
if (open_dice_wipe(to_open_dice_drvdata(filp)))
return -EIO;
/* Consume the input buffer. */
return len;
}
/*
* Creates a mapping of the reserved memory region in user address space.
*/
static int open_dice_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct open_dice_drvdata *drvdata = to_open_dice_drvdata(filp);
/* Do not allow userspace to modify the underlying data. */
if ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))
return -EPERM;
/* Ensure userspace cannot acquire VM_WRITE + VM_SHARED later. */
if (vma->vm_flags & VM_WRITE)
vma->vm_flags &= ~VM_MAYSHARE;
else if (vma->vm_flags & VM_SHARED)
vma->vm_flags &= ~VM_MAYWRITE;
/* Create write-combine mapping so all clients observe a wipe. */
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP;
return vm_iomap_memory(vma, drvdata->rmem->base, drvdata->rmem->size);
}
static const struct file_operations open_dice_fops = {
.owner = THIS_MODULE,
.read = open_dice_read,
.write = open_dice_write,
.mmap = open_dice_mmap,
};
static int __init open_dice_probe(struct platform_device *pdev)
{
static unsigned int dev_idx;
struct device *dev = &pdev->dev;
struct reserved_mem *rmem;
struct open_dice_drvdata *drvdata;
int ret;
rmem = of_reserved_mem_lookup(dev->of_node);
if (!rmem) {
dev_err(dev, "failed to lookup reserved memory\n");
return -EINVAL;
}
if (!rmem->size || (rmem->size > ULONG_MAX)) {
dev_err(dev, "invalid memory region size\n");
return -EINVAL;
}
if (!PAGE_ALIGNED(rmem->base) || !PAGE_ALIGNED(rmem->size)) {
dev_err(dev, "memory region must be page-aligned\n");
return -EINVAL;
}
drvdata = devm_kmalloc(dev, sizeof(*drvdata), GFP_KERNEL);
if (!drvdata)
return -ENOMEM;
*drvdata = (struct open_dice_drvdata){
.lock = __MUTEX_INITIALIZER(drvdata->lock),
.rmem = rmem,
.misc = (struct miscdevice){
.parent = dev,
.name = drvdata->name,
.minor = MISC_DYNAMIC_MINOR,
.fops = &open_dice_fops,
.mode = 0600,
},
};
/* Index overflow check not needed, misc_register() will fail. */
snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++);
ret = misc_register(&drvdata->misc);
if (ret) {
dev_err(dev, "failed to register misc device '%s': %d\n",
drvdata->name, ret);
return ret;
}
platform_set_drvdata(pdev, drvdata);
return 0;
}
static int open_dice_remove(struct platform_device *pdev)
{
struct open_dice_drvdata *drvdata = platform_get_drvdata(pdev);
misc_deregister(&drvdata->misc);
return 0;
}
static const struct of_device_id open_dice_of_match[] = {
{ .compatible = "google,open-dice" },
{},
};
static struct platform_driver open_dice_driver = {
.remove = open_dice_remove,
.driver = {
.name = DRIVER_NAME,
.of_match_table = open_dice_of_match,
},
};
static int __init open_dice_init(void)
{
int ret = platform_driver_probe(&open_dice_driver, open_dice_probe);
/* DICE regions are optional. Succeed even with zero instances. */
return (ret == -ENODEV) ? 0 : ret;
}
static void __exit open_dice_exit(void)
{
platform_driver_unregister(&open_dice_driver);
}
module_init(open_dice_init);
module_exit(open_dice_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("David Brazdil <dbrazdil@google.com>");

View File

@@ -1016,7 +1016,7 @@ static int quicktest1(unsigned long arg)
break;
}
if (ret != MQE_QUEUE_FULL || i != 4) {
printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n",
printk(KERN_DEBUG "GRU:%d quicktest1: unexpected status %d, i %d\n",
smp_processor_id(), ret, i);
goto done;
}

View File

@@ -530,12 +530,6 @@ struct gru_blade_state {
for ((i) = (k)*GRU_CBR_AU_SIZE; \
(i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
#define for_each_dsr_in_allocation_map(i, map, k) \
for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \
for ((i) = (k) * GRU_DSR_AU_CL; \
(i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
#define gseg_physical_address(gru, ctxnum) \
((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
#define gseg_virtual_address(gru, ctxnum) \

View File

@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/processor.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/init.h>
@@ -31,6 +32,12 @@
#define VMCI_UTIL_NUM_RESOURCES 1
/*
* Datagram buffers for DMA send/receive must accommodate at least
* a maximum sized datagram and the header.
*/
#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
static bool vmci_disable_msi;
module_param_named(disable_msi, vmci_disable_msi, bool, 0);
MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
@@ -45,13 +52,18 @@ static u32 vm_context_id = VMCI_INVALID_ID;
struct vmci_guest_device {
struct device *dev; /* PCI device we are attached to */
void __iomem *iobase;
void __iomem *mmio_base;
bool exclusive_vectors;
struct tasklet_struct datagram_tasklet;
struct tasklet_struct bm_tasklet;
struct wait_queue_head inout_wq;
void *data_buffer;
dma_addr_t data_buffer_base;
void *tx_buffer;
dma_addr_t tx_buffer_base;
void *notification_bitmap;
dma_addr_t notification_base;
};
@@ -89,6 +101,92 @@ u32 vmci_get_vm_context_id(void)
return vm_context_id;
}
static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
{
if (dev->mmio_base != NULL)
return readl(dev->mmio_base + reg);
return ioread32(dev->iobase + reg);
}
static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
{
if (dev->mmio_base != NULL)
writel(val, dev->mmio_base + reg);
else
iowrite32(val, dev->iobase + reg);
}
static void vmci_read_data(struct vmci_guest_device *vmci_dev,
void *dest, size_t size)
{
if (vmci_dev->mmio_base == NULL)
ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
dest, size);
else {
/*
* For DMA datagrams, the data_buffer will contain the header on the
* first page, followed by the incoming datagram(s) on the following
* pages. The header uses an S/G element immediately following the
* header on the first page to point to the data area.
*/
struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer;
struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1);
size_t buffer_offset = dest - vmci_dev->data_buffer;
buffer_header->opcode = 1;
buffer_header->size = 1;
buffer_header->busy = 0;
sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
sg_array[0].size = size;
vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base),
VMCI_DATA_IN_LOW_ADDR);
wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
}
}
static int vmci_write_data(struct vmci_guest_device *dev,
struct vmci_datagram *dg)
{
int result;
if (dev->mmio_base != NULL) {
struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
return VMCI_ERROR_INVALID_ARGS;
/*
* Initialize send buffer with outgoing datagram
* and set up header for inline data. Device will
* not access buffer asynchronously - only after
* the write to VMCI_DATA_OUT_LOW_ADDR.
*/
memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
buffer_header->opcode = 0;
buffer_header->size = VMCI_DG_SIZE(dg);
buffer_header->busy = 1;
vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
VMCI_DATA_OUT_LOW_ADDR);
/* Caller holds a spinlock, so cannot block. */
spin_until_cond(buffer_header->busy == 0);
result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
if (result == VMCI_SUCCESS)
result = (int)buffer_header->result;
} else {
iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
dg, VMCI_DG_SIZE(dg));
result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
}
return result;
}
/*
* VM to hypervisor call mechanism. We use the standard VMware naming
* convention since shared code is calling this function as well.
@@ -114,9 +212,8 @@ int vmci_send_datagram(struct vmci_datagram *dg)
spin_lock_irqsave(&vmci_dev_spinlock, flags);
if (vmci_dev_g) {
iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
dg, VMCI_DG_SIZE(dg));
result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR);
vmci_write_data(vmci_dev_g, dg);
result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
} else {
result = VMCI_ERROR_UNAVAILABLE;
}
@@ -156,9 +253,9 @@ static void vmci_guest_cid_update(u32 sub_id,
/*
* Verify that the host supports the hypercalls we need. If it does not,
* try to find fallback hypercalls and use those instead. Returns
* true if required hypercalls (or fallback hypercalls) are
* supported by the host, false otherwise.
* try to find fallback hypercalls and use those instead. Returns 0 if
* required hypercalls (or fallback hypercalls) are supported by the host,
* an error code otherwise.
*/
static int vmci_check_host_caps(struct pci_dev *pdev)
{
@@ -195,15 +292,17 @@ static int vmci_check_host_caps(struct pci_dev *pdev)
}
/*
* Reads datagrams from the data in port and dispatches them. We
* always start reading datagrams into only the first page of the
* datagram buffer. If the datagrams don't fit into one page, we
* use the maximum datagram buffer size for the remainder of the
* invocation. This is a simple heuristic for not penalizing
* small datagrams.
* Reads datagrams from the device and dispatches them. For IO port
* based access to the device, we always start reading datagrams into
* only the first page of the datagram buffer. If the datagrams don't
* fit into one page, we use the maximum datagram buffer size for the
* remainder of the invocation. This is a simple heuristic for not
* penalizing small datagrams. For DMA-based datagrams, we always
* use the maximum datagram buffer size, since there is no performance
* penalty for doing so.
*
* This function assumes that it has exclusive access to the data
* in port for the duration of the call.
* in register(s) for the duration of the call.
*/
static void vmci_dispatch_dgs(unsigned long data)
{
@@ -211,23 +310,41 @@ static void vmci_dispatch_dgs(unsigned long data)
u8 *dg_in_buffer = vmci_dev->data_buffer;
struct vmci_datagram *dg;
size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
size_t current_dg_in_buffer_size = PAGE_SIZE;
size_t current_dg_in_buffer_size;
size_t remaining_bytes;
bool is_io_port = vmci_dev->mmio_base == NULL;
BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
vmci_dev->data_buffer, current_dg_in_buffer_size);
if (!is_io_port) {
/* For mmio, the first page is used for the header. */
dg_in_buffer += PAGE_SIZE;
/*
* For DMA-based datagram operations, there is no performance
* penalty for reading the maximum buffer size.
*/
current_dg_in_buffer_size = VMCI_MAX_DG_SIZE;
} else {
current_dg_in_buffer_size = PAGE_SIZE;
}
vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size);
dg = (struct vmci_datagram *)dg_in_buffer;
remaining_bytes = current_dg_in_buffer_size;
/*
* Read through the buffer until an invalid datagram header is
* encountered. The exit condition for datagrams read through
* VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram
* can start on any page boundary in the buffer.
*/
while (dg->dst.resource != VMCI_INVALID_ID ||
remaining_bytes > PAGE_SIZE) {
(is_io_port && remaining_bytes > PAGE_SIZE)) {
unsigned dg_in_size;
/*
* When the input buffer spans multiple pages, a datagram can
* start on any page boundary in the buffer.
* If using VMCI_DATA_IN_ADDR, skip to the next page
* as a datagram can start on any page boundary.
*/
if (dg->dst.resource == VMCI_INVALID_ID) {
dg = (struct vmci_datagram *)roundup(
@@ -277,11 +394,10 @@ static void vmci_dispatch_dgs(unsigned long data)
current_dg_in_buffer_size =
dg_in_buffer_size;
ioread8_rep(vmci_dev->iobase +
VMCI_DATA_IN_ADDR,
vmci_dev->data_buffer +
vmci_read_data(vmci_dev,
dg_in_buffer +
remaining_bytes,
current_dg_in_buffer_size -
current_dg_in_buffer_size -
remaining_bytes);
}
@@ -319,10 +435,8 @@ static void vmci_dispatch_dgs(unsigned long data)
current_dg_in_buffer_size = dg_in_buffer_size;
for (;;) {
ioread8_rep(vmci_dev->iobase +
VMCI_DATA_IN_ADDR,
vmci_dev->data_buffer,
current_dg_in_buffer_size);
vmci_read_data(vmci_dev, dg_in_buffer,
current_dg_in_buffer_size);
if (bytes_to_skip <= current_dg_in_buffer_size)
break;
@@ -339,8 +453,7 @@ static void vmci_dispatch_dgs(unsigned long data)
if (remaining_bytes < VMCI_DG_HEADERSIZE) {
/* Get the next batch of datagrams. */
ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
vmci_dev->data_buffer,
vmci_read_data(vmci_dev, dg_in_buffer,
current_dg_in_buffer_size);
dg = (struct vmci_datagram *)dg_in_buffer;
remaining_bytes = current_dg_in_buffer_size;
@@ -384,7 +497,7 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
unsigned int icr;
/* Acknowledge interrupt and determine what needs doing. */
icr = ioread32(dev->iobase + VMCI_ICR_ADDR);
icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
if (icr == 0 || icr == ~0)
return IRQ_NONE;
@@ -398,6 +511,12 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
icr &= ~VMCI_ICR_NOTIFICATION;
}
if (icr & VMCI_ICR_DMA_DATAGRAM) {
wake_up_all(&dev->inout_wq);
icr &= ~VMCI_ICR_DMA_DATAGRAM;
}
if (icr != 0)
dev_warn(dev->dev,
"Ignoring unknown interrupt cause (%d)\n",
@@ -422,6 +541,38 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
return IRQ_HANDLED;
}
/*
* Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
* which is for the completion of a DMA datagram send or receive operation.
* Will only get called if we are using MSI-X with exclusive vectors.
*/
static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
{
struct vmci_guest_device *dev = _dev;
wake_up_all(&dev->inout_wq);
return IRQ_HANDLED;
}
static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
{
if (vmci_dev->mmio_base != NULL) {
if (vmci_dev->tx_buffer != NULL)
dma_free_coherent(vmci_dev->dev,
VMCI_DMA_DG_BUFFER_SIZE,
vmci_dev->tx_buffer,
vmci_dev->tx_buffer_base);
if (vmci_dev->data_buffer != NULL)
dma_free_coherent(vmci_dev->dev,
VMCI_DMA_DG_BUFFER_SIZE,
vmci_dev->data_buffer,
vmci_dev->data_buffer_base);
} else {
vfree(vmci_dev->data_buffer);
}
}
/*
* Most of the initialization at module load time is done here.
*/
@@ -429,7 +580,9 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
const struct pci_device_id *id)
{
struct vmci_guest_device *vmci_dev;
void __iomem *iobase;
void __iomem *iobase = NULL;
void __iomem *mmio_base = NULL;
unsigned int num_irq_vectors;
unsigned int capabilities;
unsigned int caps_in_use;
unsigned long cmd;
@@ -445,16 +598,29 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
return error;
}
error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME);
if (error) {
dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
return error;
/*
* The VMCI device with mmio access to registers requests 256KB
* for BAR1. If present, driver will use new VMCI device
* functionality for register access and datagram send/recv.
*/
if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
dev_info(&pdev->dev, "MMIO register access is available\n");
mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
VMCI_MMIO_ACCESS_SIZE);
/* If the map fails, we fall back to IOIO access. */
if (!mmio_base)
dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
}
iobase = pcim_iomap_table(pdev)[0];
dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u\n",
(unsigned long)iobase, pdev->irq);
if (!mmio_base) {
error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
if (error) {
dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
return error;
}
iobase = pcim_iomap_table(pdev)[0];
}
vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
if (!vmci_dev) {
@@ -466,17 +632,35 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
vmci_dev->dev = &pdev->dev;
vmci_dev->exclusive_vectors = false;
vmci_dev->iobase = iobase;
vmci_dev->mmio_base = mmio_base;
tasklet_init(&vmci_dev->datagram_tasklet,
vmci_dispatch_dgs, (unsigned long)vmci_dev);
tasklet_init(&vmci_dev->bm_tasklet,
vmci_process_bitmap, (unsigned long)vmci_dev);
init_waitqueue_head(&vmci_dev->inout_wq);
vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
if (mmio_base != NULL) {
vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
&vmci_dev->tx_buffer_base,
GFP_KERNEL);
if (!vmci_dev->tx_buffer) {
dev_err(&pdev->dev,
"Can't allocate memory for datagram tx buffer\n");
return -ENOMEM;
}
vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
&vmci_dev->data_buffer_base,
GFP_KERNEL);
} else {
vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
}
if (!vmci_dev->data_buffer) {
dev_err(&pdev->dev,
"Can't allocate memory for datagram buffer\n");
return -ENOMEM;
error = -ENOMEM;
goto err_free_data_buffers;
}
pci_set_master(pdev); /* To enable queue_pair functionality. */
@@ -490,11 +674,11 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
*
* Right now, we need datagrams. There are no fallbacks.
*/
capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR);
capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
dev_err(&pdev->dev, "Device does not support datagrams\n");
error = -ENXIO;
goto err_free_data_buffer;
goto err_free_data_buffers;
}
caps_in_use = VMCI_CAPS_DATAGRAM;
@@ -522,19 +706,39 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
vmci_dev->notification_bitmap = dma_alloc_coherent(
&pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
GFP_KERNEL);
if (!vmci_dev->notification_bitmap) {
if (!vmci_dev->notification_bitmap)
dev_warn(&pdev->dev,
"Unable to allocate notification bitmap\n");
} else {
memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE);
else
caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
}
if (mmio_base != NULL) {
if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
} else {
dev_err(&pdev->dev,
"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
error = -ENXIO;
goto err_free_notification_bitmap;
}
}
dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
/* Let the host know which capabilities we intend to use. */
iowrite32(caps_in_use, vmci_dev->iobase + VMCI_CAPS_ADDR);
vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
/* Let the device know the size for pages passed down. */
vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
/* Configure the high order parts of the data in/out buffers. */
vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
VMCI_DATA_IN_HIGH_ADDR);
vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
VMCI_DATA_OUT_HIGH_ADDR);
}
/* Set up global device so that we can start sending datagrams */
spin_lock_irq(&vmci_dev_spinlock);
@@ -561,7 +765,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
/* Check host capabilities. */
error = vmci_check_host_caps(pdev);
if (error)
goto err_remove_bitmap;
goto err_remove_vmci_dev_g;
/* Enable device. */
@@ -581,13 +785,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
* Enable interrupts. Try MSI-X first, then MSI, and then fallback on
* legacy interrupts.
*/
error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
PCI_IRQ_MSIX);
if (vmci_dev->mmio_base != NULL)
num_irq_vectors = VMCI_MAX_INTRS;
else
num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
PCI_IRQ_MSIX);
if (error < 0) {
error = pci_alloc_irq_vectors(pdev, 1, 1,
PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
if (error < 0)
goto err_remove_bitmap;
goto err_unsubscribe_event;
} else {
vmci_dev->exclusive_vectors = true;
}
@@ -620,6 +828,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
pci_irq_vector(pdev, 1), error);
goto err_free_irq;
}
if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
error = request_irq(pci_irq_vector(pdev, 2),
vmci_interrupt_dma_datagram,
0, KBUILD_MODNAME, vmci_dev);
if (error) {
dev_err(&pdev->dev,
"Failed to allocate irq %u: %d\n",
pci_irq_vector(pdev, 2), error);
goto err_free_bm_irq;
}
}
}
dev_dbg(&pdev->dev, "Registered device\n");
@@ -630,17 +849,22 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
cmd = VMCI_IMR_DATAGRAM;
if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
cmd |= VMCI_IMR_NOTIFICATION;
iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR);
if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
cmd |= VMCI_IMR_DMA_DATAGRAM;
vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
/* Enable interrupts. */
iowrite32(VMCI_CONTROL_INT_ENABLE,
vmci_dev->iobase + VMCI_CONTROL_ADDR);
vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
pci_set_drvdata(pdev, vmci_dev);
vmci_call_vsock_callback(false);
return 0;
err_free_bm_irq:
if (vmci_dev->exclusive_vectors)
free_irq(pci_irq_vector(pdev, 1), vmci_dev);
err_free_irq:
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
tasklet_kill(&vmci_dev->datagram_tasklet);
@@ -649,29 +873,29 @@ err_free_irq:
err_disable_msi:
pci_free_irq_vectors(pdev);
err_unsubscribe_event:
vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
if (vmci_err < VMCI_SUCCESS)
dev_warn(&pdev->dev,
"Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
err_remove_bitmap:
if (vmci_dev->notification_bitmap) {
iowrite32(VMCI_CONTROL_RESET,
vmci_dev->iobase + VMCI_CONTROL_ADDR);
dma_free_coherent(&pdev->dev, PAGE_SIZE,
vmci_dev->notification_bitmap,
vmci_dev->notification_base);
}
err_remove_vmci_dev_g:
spin_lock_irq(&vmci_dev_spinlock);
vmci_pdev = NULL;
vmci_dev_g = NULL;
spin_unlock_irq(&vmci_dev_spinlock);
err_free_data_buffer:
vfree(vmci_dev->data_buffer);
err_free_notification_bitmap:
if (vmci_dev->notification_bitmap) {
vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
dma_free_coherent(&pdev->dev, PAGE_SIZE,
vmci_dev->notification_bitmap,
vmci_dev->notification_base);
}
err_free_data_buffers:
vmci_free_dg_buffers(vmci_dev);
/* The rest are managed resources and will be freed by PCI core */
return error;
@@ -700,15 +924,18 @@ static void vmci_guest_remove_device(struct pci_dev *pdev)
spin_unlock_irq(&vmci_dev_spinlock);
dev_dbg(&pdev->dev, "Resetting vmci device\n");
iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR);
vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
/*
* Free IRQ and then disable MSI/MSI-X as appropriate. For
* MSI-X, we might have multiple vectors, each with their own
* IRQ, which we must free too.
*/
if (vmci_dev->exclusive_vectors)
if (vmci_dev->exclusive_vectors) {
free_irq(pci_irq_vector(pdev, 1), vmci_dev);
if (vmci_dev->mmio_base != NULL)
free_irq(pci_irq_vector(pdev, 2), vmci_dev);
}
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
pci_free_irq_vectors(pdev);
@@ -726,7 +953,10 @@ static void vmci_guest_remove_device(struct pci_dev *pdev)
vmci_dev->notification_base);
}
vfree(vmci_dev->data_buffer);
vmci_free_dg_buffers(vmci_dev);
if (vmci_dev->mmio_base != NULL)
pci_iounmap(pdev, vmci_dev->mmio_base);
/* The rest are managed resources and will be freed by PCI core */
}