Skip to content

Commit

Permalink
habanalabs: add h/w queues module
Browse files Browse the repository at this point in the history
This patch adds the H/W queues module and the code to initialize Goya's
various compute and DMA engines and their queues.

Goya has 5 DMA channels, 8 TPC engines and a single MME engine. For each
channel/engine, there is a H/W queue logic which is used to pass commands
from the user to the H/W. That logic is called QMAN.

There are two types of QMANs: external and internal. The DMA QMANs are
considered external while the TPC and MME QMANs are considered internal.
For each external queue there is a completion queue, which is located on
the Host memory.

The differences between external and internal QMANs are:

1. The location of the queue's memory. External QMANs are located on the
   Host memory while internal QMANs are located on the on-chip memory.

2. The external QMAN write an entry to a completion queue and sends an
   MSI-X interrupt upon completion of a command buffer that was given to
   it. The internal QMAN doesn't do that.

Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
Oded Gabbay authored and Greg Kroah-Hartman committed Feb 18, 2019
1 parent 839c480 commit 9494a8d
Show file tree
Hide file tree
Showing 13 changed files with 2,915 additions and 116 deletions.
2 changes: 1 addition & 1 deletion drivers/misc/habanalabs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
obj-m := habanalabs.o

habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
command_buffer.o
command_buffer.o hw_queue.o irq.o

include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
75 changes: 73 additions & 2 deletions drivers/misc/habanalabs/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,13 +174,23 @@ static int device_early_init(struct hl_device *hdev)
if (rc)
goto early_fini;

hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
if (hdev->cq_wq == NULL) {
dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
rc = -ENOMEM;
goto asid_fini;
}

hl_cb_mgr_init(&hdev->kernel_cb_mgr);

mutex_init(&hdev->fd_open_cnt_lock);
mutex_init(&hdev->send_cpu_message_lock);
atomic_set(&hdev->fd_open_cnt, 0);

return 0;

asid_fini:
hl_asid_fini(hdev);
early_fini:
if (hdev->asic_funcs->early_fini)
hdev->asic_funcs->early_fini(hdev);
Expand All @@ -196,9 +206,12 @@ static int device_early_init(struct hl_device *hdev)
*/
static void device_early_fini(struct hl_device *hdev)
{
mutex_destroy(&hdev->send_cpu_message_lock);

hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);

destroy_workqueue(hdev->cq_wq);

hl_asid_fini(hdev);

if (hdev->asic_funcs->early_fini)
Expand Down Expand Up @@ -277,7 +290,7 @@ int hl_device_resume(struct hl_device *hdev)
*/
int hl_device_init(struct hl_device *hdev, struct class *hclass)
{
int rc;
int i, rc, cq_ready_cnt;

/* Create device */
rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops);
Expand All @@ -298,11 +311,48 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (rc)
goto early_fini;

/*
* Initialize the H/W queues. Must be done before hw_init, because
* there the addresses of the kernel queue are being written to the
* registers of the device
*/
rc = hl_hw_queues_create(hdev);
if (rc) {
dev_err(hdev->dev, "failed to initialize kernel queues\n");
goto sw_fini;
}

/*
* Initialize the completion queues. Must be done before hw_init,
* because there the addresses of the completion queues are being
* passed as arguments to request_irq
*/
hdev->completion_queue =
kcalloc(hdev->asic_prop.completion_queues_count,
sizeof(*hdev->completion_queue), GFP_KERNEL);

if (!hdev->completion_queue) {
dev_err(hdev->dev, "failed to allocate completion queues\n");
rc = -ENOMEM;
goto hw_queues_destroy;
}

for (i = 0, cq_ready_cnt = 0;
i < hdev->asic_prop.completion_queues_count;
i++, cq_ready_cnt++) {
rc = hl_cq_init(hdev, &hdev->completion_queue[i], i);
if (rc) {
dev_err(hdev->dev,
"failed to initialize completion queue\n");
goto cq_fini;
}
}

/* Allocate the kernel context */
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
if (!hdev->kernel_ctx) {
rc = -ENOMEM;
goto sw_fini;
goto cq_fini;
}

hdev->user_ctx = NULL;
Expand All @@ -328,6 +378,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)

hdev->disabled = false;

/* Check that the communication with the device is working */
rc = hdev->asic_funcs->test_queues(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to detect if device is alive\n");
rc = 0;
goto out_disabled;
}

dev_notice(hdev->dev,
"Successfully added device to habanalabs driver\n");

Expand All @@ -339,6 +397,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
"kernel ctx is still alive on initialization failure\n");
free_ctx:
kfree(hdev->kernel_ctx);
cq_fini:
for (i = 0 ; i < cq_ready_cnt ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
kfree(hdev->completion_queue);
hw_queues_destroy:
hl_hw_queues_destroy(hdev);
sw_fini:
hdev->asic_funcs->sw_fini(hdev);
early_fini:
Expand Down Expand Up @@ -368,6 +432,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
*/
void hl_device_fini(struct hl_device *hdev)
{
int i;
dev_info(hdev->dev, "Removing device\n");

/* Mark device as disabled */
Expand All @@ -382,6 +447,12 @@ void hl_device_fini(struct hl_device *hdev)
/* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, true);

for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
kfree(hdev->completion_queue);

hl_hw_queues_destroy(hdev);

/* Call ASIC S/W finalize function */
hdev->asic_funcs->sw_fini(hdev);

Expand Down
Loading

0 comments on commit 9494a8d

Please sign in to comment.