diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 0e37aad85930..cd3422bfe6f8 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -568,9 +568,36 @@ static int validate_queue_index(struct hl_device *hdev, return -EINVAL; } - *queue_type = hw_queue_prop->type; - *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; + /* When hw queue type isn't QUEUE_TYPE_HW, + * USER_ALLOC_CB flag shall be referred as "don't care". + */ + if (hw_queue_prop->type == QUEUE_TYPE_HW) { + if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { + if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { + dev_err(hdev->dev, + "Queue index %d doesn't support user CB\n", + chunk->queue_index); + return -EINVAL; + } + *is_kernel_allocated_cb = false; + } else { + if (!(hw_queue_prop->cb_alloc_flags & + CB_ALLOC_KERNEL)) { + dev_err(hdev->dev, + "Queue index %d doesn't support kernel CB\n", + chunk->queue_index); + return -EINVAL; + } + + *is_kernel_allocated_cb = true; + } + } else { + *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags + & CB_ALLOC_KERNEL); + } + + *queue_type = hw_queue_prop->type; return 0; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index b5a34936e22d..0823798f292e 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -206,6 +206,17 @@ struct hl_outbound_pci_region { u64 size; }; +/* + * enum queue_cb_alloc_flags - Indicates queue support for CBs that + * allocated by Kernel or by User + * @CB_ALLOC_KERNEL: support only CBs that allocated by Kernel + * @CB_ALLOC_USER: support only CBs that allocated by User + */ +enum queue_cb_alloc_flags { + CB_ALLOC_KERNEL = 0x1, + CB_ALLOC_USER = 0x2 +}; + /* * struct hl_hw_sob - H/W SOB info. * @hdev: habanalabs device structure. @@ -223,16 +234,18 @@ struct hl_hw_sob { /** * struct hw_queue_properties - queue information. * @type: queue type. + * @queue_cb_alloc_flags: bitmap which indicates if the hw queue supports CB + * that allocated by the Kernel driver and therefore, + * a CB handle can be provided for jobs on this queue. + * Otherwise, a CB address must be provided. * @driver_only: true if only the driver is allowed to send a job to this queue, * false otherwise. - * @requires_kernel_cb: true if a CB handle must be provided for jobs on this - * queue, false otherwise (a CB address must be provided). * @supports_sync_stream: True if queue supports sync stream */ struct hw_queue_properties { enum hl_queue_type type; + enum queue_cb_alloc_flags cb_alloc_flags; u8 driver_only; - u8 requires_kernel_cb; u8 supports_sync_stream; }; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 2dd9b732299a..9393e34b9719 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -381,23 +381,28 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; prop->hw_queues_props[i].driver_only = 0; - prop->hw_queues_props[i].requires_kernel_cb = 1; prop->hw_queues_props[i].supports_sync_stream = 1; + prop->hw_queues_props[i].cb_alloc_flags = + CB_ALLOC_KERNEL; num_sync_stream_queues++; } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; prop->hw_queues_props[i].driver_only = 1; - prop->hw_queues_props[i].requires_kernel_cb = 0; prop->hw_queues_props[i].supports_sync_stream = 0; + prop->hw_queues_props[i].cb_alloc_flags = + CB_ALLOC_KERNEL; } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { prop->hw_queues_props[i].type = QUEUE_TYPE_INT; prop->hw_queues_props[i].driver_only = 0; - prop->hw_queues_props[i].requires_kernel_cb = 0; + prop->hw_queues_props[i].supports_sync_stream = 0; + prop->hw_queues_props[i].cb_alloc_flags = + CB_ALLOC_USER; } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) { prop->hw_queues_props[i].type = QUEUE_TYPE_NA; prop->hw_queues_props[i].driver_only = 0; - prop->hw_queues_props[i].requires_kernel_cb = 0; prop->hw_queues_props[i].supports_sync_stream = 0; + prop->hw_queues_props[i].cb_alloc_flags = + CB_ALLOC_USER; } } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index d873f613acb0..74c44278166b 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -373,20 +373,20 @@ int goya_get_fixed_properties(struct hl_device *hdev) for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; prop->hw_queues_props[i].driver_only = 0; - prop->hw_queues_props[i].requires_kernel_cb = 1; + prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL; } for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; prop->hw_queues_props[i].driver_only = 1; - prop->hw_queues_props[i].requires_kernel_cb = 0; + prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL; } for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES + NUMBER_OF_INT_HW_QUEUES; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_INT; prop->hw_queues_props[i].driver_only = 0; - prop->hw_queues_props[i].requires_kernel_cb = 0; + prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER; } prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 9705b8adb60c..5753157e71b3 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -490,6 +490,22 @@ union hl_cb_args { struct hl_cb_out out; }; +/* HL_CS_CHUNK_FLAGS_ values + * + * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: + * Indicates if the CB was allocated and mapped by userspace. + * User allocated CB is a command buffer allocated by the user, via malloc + * (or similar). After allocating the CB, the user invokes “memory ioctl” + * to map the user memory into a device virtual address. The user provides + * this address via the cb_handle field. The interface provides the + * ability to create a large CBs, Which aren’t limited to + * “HL_MAX_CB_SIZE”. Therefore, it increases the PCI-DMA queues + * throughput. This CB allocation method also reduces the use of Linux + * DMA-able memory pool. Which are limited and used by other Linux + * sub-systems. + */ +#define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1 + /* * This structure size must always be fixed to 64-bytes for backward * compatibility