diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 304e4f3b0e7e..cf4118515678 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -13,7 +13,7 @@ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ - HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) + HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND) #define MAX_TS_ITER_NUM 10 @@ -1244,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) return CS_RESERVE_SIGNALS; else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) return CS_UNRESERVE_SIGNALS; + else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) + return CS_TYPE_ENGINE_CORE; else return CS_TYPE_DEFAULT; } @@ -2355,6 +2357,41 @@ out: return rc; } +static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, + u32 num_engine_cores, u32 core_command) +{ + int rc; + struct hl_device *hdev = hpriv->hdev; + void __user *engine_cores_arr; + u32 *cores; + + if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { + dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); + return -EINVAL; + } + + if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) { + dev_err(hdev->dev, "Engine core command is invalid\n"); + return -EINVAL; + } + + engine_cores_arr = (void __user *) (uintptr_t) engine_cores; + cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL); + if (!cores) + return -ENOMEM; + + if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) { + dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); + kfree(cores); + return -EFAULT; + } + + rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); + kfree(cores); + + return rc; +} + int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) { union hl_cs_args *args = data; @@ -2407,6 +2444,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) rc = cs_ioctl_unreserve_signals(hpriv, args->in.encaps_sig_handle_id); break; + case CS_TYPE_ENGINE_CORE: + rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, + args->in.num_engine_cores, args->in.core_command); + break; default: rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, args->in.cs_flags, diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 8d9e96c6092a..ae3f5832fe58 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -345,7 +345,8 @@ enum hl_cs_type { CS_TYPE_WAIT, CS_TYPE_COLLECTIVE_WAIT, CS_RESERVE_SIGNALS, - CS_UNRESERVE_SIGNALS + CS_UNRESERVE_SIGNALS, + CS_TYPE_ENGINE_CORE }; /* @@ -617,6 +618,7 @@ struct hl_hints_range { * which the property supports_user_set_page_size is true * (i.e. the DRAM supports multiple page sizes), otherwise * it will shall be equal to dram_page_size. + * @num_engine_cores: number of engine cpu cores * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use @@ -737,6 +739,7 @@ struct asic_fixed_properties { u32 faulty_dram_cluster_map; u32 xbar_edge_enabled_mask; u32 device_mem_alloc_default_page_size; + u32 num_engine_cores; u16 collective_first_sob; u16 collective_first_mon; u16 sync_stream_first_sob; @@ -1511,6 +1514,7 @@ struct engines_data { * @check_if_razwi_happened: check if there was a razwi due to RR violation. * @access_dev_mem: access device memory * @set_dram_bar_base: set the base of the DRAM BAR + * @set_engine_cores: set a config command to enigne cores */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1645,6 +1649,8 @@ struct hl_asic_funcs { int (*access_dev_mem)(struct hl_device *hdev, enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type); u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); + int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids, + u32 num_cores, u32 core_command); }; diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 9ccde0258eca..676419961f86 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -1989,6 +1989,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; } + prop->num_engine_cores = CPU_ID_MAX; prop->cfg_size = CFG_SIZE; prop->max_asid = MAX_ASID; prop->num_of_events = GAUDI2_EVENT_SIZE; @@ -3751,14 +3752,16 @@ static void gaudi2_stop_dec(struct hl_device *hdev) gaudi2_stop_pcie_dec(hdev); } -static void gaudi2_halt_arc(struct hl_device *hdev, u32 cpu_id) +static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) { u32 reg_base, reg_val; reg_base = gaudi2_arc_blocks_bases[cpu_id]; + if (run_mode == HL_ENGINE_CORE_RUN) + reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1); + else + reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); - /* Halt ARC */ - reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val); } @@ -3768,10 +3771,37 @@ static void gaudi2_halt_arcs(struct hl_device *hdev) for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) { if (gaudi2_is_arc_enabled(hdev, arc_id)) - gaudi2_halt_arc(hdev, arc_id); + gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT); } } +static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) +{ + int rc; + u32 reg_base, val, ack_mask, timeout_usec = 100000; + + if (hdev->pldm) + timeout_usec *= 100; + + reg_base = gaudi2_arc_blocks_bases[cpu_id]; + if (run_mode == HL_ENGINE_CORE_RUN) + ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK; + else + ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK; + + rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET, + val, ((val & ack_mask) == ack_mask), + 1000, timeout_usec); + + if (!rc) { + /* Clear */ + val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0); + WREG32(reg_base + ARC_HALT_REQ_OFFSET, val); + } + + return rc; +} + static void gaudi2_reset_arcs(struct hl_device *hdev) { struct gaudi2_device *gaudi2 = hdev->asic_specific; @@ -3796,8 +3826,39 @@ static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev) queue_id = GAUDI2_QUEUE_ID_NIC_0_0; - for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) + for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { + if (!(hdev->nic_ports_mask & BIT(i))) + continue; + gaudi2_qman_manual_flush_common(hdev, queue_id); + } +} + +static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, + u32 num_cores, u32 core_command) +{ + int i, rc; + + + for (i = 0 ; i < num_cores ; i++) { + if (gaudi2_is_arc_enabled(hdev, core_ids[i])) + gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); + } + + for (i = 0 ; i < num_cores ; i++) { + if (gaudi2_is_arc_enabled(hdev, core_ids[i])) { + rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command); + + if (rc) { + dev_err(hdev->dev, "failed to %s arc: %d\n", + (core_command == HL_ENGINE_CORE_HALT) ? + "HALT" : "RUN", core_ids[i]); + return -1; + } + } + } + + return 0; } static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) @@ -9968,6 +10029,7 @@ static const struct hl_asic_funcs gaudi2_funcs = { .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size, .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = gaudi2_set_hbm_bar_base, + .set_engine_cores = gaudi2_set_engine_cores, }; void gaudi2_set_asic_funcs(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h index d0e2c68a639f..bfda4223bdc8 100644 --- a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h +++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h @@ -239,6 +239,7 @@ #define SFT_IF_RTR_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE) #define ARC_HALT_REQ_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE) +#define ARC_HALT_ACK_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_ACK - mmARC_FARM_ARC0_AUX_BASE) #define ARC_REGION_CFG_OFFSET(region) \ (mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 0da8894ab94a..f51c6ae4f94d 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1361,17 +1361,47 @@ struct hl_cs_chunk { #define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 #define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 +/* + * The engine cores CS is merged into the existing CS ioctls. + * Use it to control the engine cores mode. + */ +#define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 + #define HL_CS_STATUS_SUCCESS 0 #define HL_MAX_JOBS_PER_CS 512 +/* HL_ENGINE_CORE_ values + * + * HL_ENGINE_CORE_HALT: engine core halt + * HL_ENGINE_CORE_RUN: engine core run + */ +#define HL_ENGINE_CORE_HALT (1 << 0) +#define HL_ENGINE_CORE_RUN (1 << 1) + struct hl_cs_in { - /* this holds address of array of hl_cs_chunk for restore phase */ - __u64 chunks_restore; + union { + struct { + /* this holds address of array of hl_cs_chunk for restore phase */ + __u64 chunks_restore; - /* holds address of array of hl_cs_chunk for execution phase */ - __u64 chunks_execute; + /* holds address of array of hl_cs_chunk for execution phase */ + __u64 chunks_execute; + }; + + /* Valid only when HL_CS_FLAGS_ENGINE_CORE_COMMAND is set */ + struct { + /* this holds address of array of uint32 for engine_cores */ + __u64 engine_cores; + + /* number of engine cores in engine_cores array */ + __u32 num_engine_cores; + + /* the core command to be sent towards engine cores */ + __u32 core_command; + }; + }; union { /*