Skip to content

Commit

Permalink
Merge tag 'misc-habanalabs-next-2019-09-05' of git://people.freedeskt…
Browse files Browse the repository at this point in the history
…op.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.4:

- Create an additional char device per PCI device. The new char device
  allows any application to query the device for stats, information, idle
  state and more. This is needed to support system/monitoring
  applications, while also allowing the deep-learning application to send
  work to the ASIC through the main (original) char device.

- Fix possible kernel crash in case user supplies a smaller-than-required
  buffer to the DEBUG IOCTL.

- Expose the device to userspace only after initialization was done, to
  prevent a race between the initialization and user submitting workloads.

- Add uapi, as part of INFO IOCTL, to allow user to query the device
  utilization rate.

- Add uapi, as part of INFO IOCTL, to allow user to retrieve aggregate H/W
  events, i.e. counting H/W events from the loading of the driver.

- Register to the HWMON subsystem with the board's name, to allow the
  user to prepare a custom sensor file per board.

- Use correct macros for endian swapping.

- Improve error printing in multiple places.

- Small bug fixes.

* tag 'misc-habanalabs-next-2019-09-05' of git://people.freedesktop.org/~gabbayo/linux: (30 commits)
  habanalabs: correctly cast variable to __le32
  habanalabs: show correct id in error print
  habanalabs: stop using the acronym KMD
  habanalabs: display card name as sensors header
  habanalabs: add uapi to retrieve aggregate H/W events
  habanalabs: add uapi to retrieve device utilization
  habanalabs: Make the Coresight timestamp perpetual
  habanalabs: explicitly set the queue-id enumerated numbers
  habanalabs: print to kernel log when reset is finished
  habanalabs: replace __le32_to_cpu with le32_to_cpu
  habanalabs: replace __cpu_to_le32/64 with cpu_to_le32/64
  habanalabs: Handle HW_IP_INFO if device disabled or in reset
  habanalabs: Expose devices after initialization is done
  habanalabs: improve security in Debug IOCTL
  habanalabs: use default structure for user input in Debug IOCTL
  habanalabs: Add descriptive name to PSOC app status register
  habanalabs: Add descriptive names to PSOC scratch-pad registers
  habanalabs: create two char devices per ASIC
  habanalabs: change device_setup_cdev() to be more generic
  habanalabs: maintain a list of file private data objects
  ...
  • Loading branch information
Greg Kroah-Hartman committed Sep 6, 2019
2 parents 25ec871 + 6dc66f7 commit 9b4a66f
Show file tree
Hide file tree
Showing 22 changed files with 1,177 additions and 600 deletions.
14 changes: 4 additions & 10 deletions Documentation/ABI/testing/sysfs-driver-habanalabs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Allows the user to set the maximum clock frequency for MME, TPC
and IC when the power management profile is set to "automatic".
This property is valid only for the Goya ASIC family

What: /sys/class/habanalabs/hl<n>/ic_clk
Date: Jan 2019
Expand Down Expand Up @@ -127,8 +128,8 @@ Description: Power management profile. Values are "auto", "manual". In "auto"
the max clock frequency to a low value when there are no user
processes that are opened on the device's file. In "manual"
mode, the user sets the maximum clock frequency by writing to
ic_clk, mme_clk and tpc_clk

ic_clk, mme_clk and tpc_clk. This property is valid only for
the Goya ASIC family

What: /sys/class/habanalabs/hl<n>/preboot_btl_ver
Date: Jan 2019
Expand Down Expand Up @@ -186,11 +187,4 @@ What: /sys/class/habanalabs/hl<n>/uboot_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Version of the u-boot running on the device's CPU

What: /sys/class/habanalabs/hl<n>/write_open_cnt
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Displays the total number of user processes that are currently
opened on the device's file
Description: Version of the u-boot running on the device's CPU
2 changes: 1 addition & 1 deletion drivers/misc/habanalabs/asid.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ int hl_asid_init(struct hl_device *hdev)

mutex_init(&hdev->asid_mutex);

/* ASID 0 is reserved for KMD and device CPU */
/* ASID 0 is reserved for the kernel driver and device CPU */
set_bit(0, hdev->asid_bitmap);

return 0;
Expand Down
3 changes: 2 additions & 1 deletion drivers/misc/habanalabs/command_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
HL_KERNEL_ASID_ID);
if (rc) {
dev_err(hdev->dev, "Failed to allocate CB for KMD %d\n", rc);
dev_err(hdev->dev,
"Failed to allocate CB for the kernel driver %d\n", rc);
return NULL;
}

Expand Down
27 changes: 21 additions & 6 deletions drivers/misc/habanalabs/command_submission.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,23 @@ static void cs_do_release(struct kref *ref)

/* We also need to update CI for internal queues */
if (cs->submitted) {
int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
hdev->asic_funcs->hw_queues_lock(hdev);

WARN_ONCE((cs_cnt < 0),
"hl%d: error in CS active cnt %d\n",
hdev->id, cs_cnt);
hdev->cs_active_cnt--;
if (!hdev->cs_active_cnt) {
struct hl_device_idle_busy_ts *ts;

ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
ts->busy_to_idle_ts = ktime_get();

if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
hdev->idle_busy_ts_idx = 0;
} else if (hdev->cs_active_cnt < 0) {
dev_crit(hdev->dev, "CS active cnt %d is negative\n",
hdev->cs_active_cnt);
}

hdev->asic_funcs->hw_queues_unlock(hdev);

hl_int_hw_queue_update_ci(cs);

Expand Down Expand Up @@ -305,6 +317,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
other = ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)];
if ((other) && (!dma_fence_is_signaled(other))) {
spin_unlock(&ctx->cs_lock);
dev_dbg(hdev->dev,
"Rejecting CS because of too many in-flights CS\n");
rc = -EAGAIN;
goto free_fence;
}
Expand Down Expand Up @@ -395,8 +409,9 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
return NULL;
}

if (hw_queue_prop->kmd_only) {
dev_err(hdev->dev, "Queue index %d is restricted for KMD\n",
if (hw_queue_prop->driver_only) {
dev_err(hdev->dev,
"Queue index %d is restricted for the kernel driver\n",
chunk->queue_index);
return NULL;
} else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
Expand Down
40 changes: 24 additions & 16 deletions drivers/misc/habanalabs/context.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
dma_fence_put(ctx->cs_pending[i]);

if (ctx->asid != HL_KERNEL_ASID_ID) {
/*
* The engines are stopped as there is no executing CS, but the
/* The engines are stopped as there is no executing CS, but the
* Coresight might be still working by accessing addresses
* related to the stopped engines. Hence stop it explicitly.
* Stop only if this is the compute context, as there can be
* only one compute context
*/
if (hdev->in_debug)
if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
hl_device_set_debug_mode(hdev, false);

hl_vm_ctx_fini(ctx);
Expand Down Expand Up @@ -67,29 +68,36 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
goto out_err;
}

mutex_lock(&mgr->ctx_lock);
rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
mutex_unlock(&mgr->ctx_lock);

if (rc < 0) {
dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
goto free_ctx;
}

ctx->handle = rc;

rc = hl_ctx_init(hdev, ctx, false);
if (rc)
goto free_ctx;
goto remove_from_idr;

hl_hpriv_get(hpriv);
ctx->hpriv = hpriv;

/* TODO: remove for multiple contexts */
/* TODO: remove for multiple contexts per process */
hpriv->ctx = ctx;
hdev->user_ctx = ctx;

mutex_lock(&mgr->ctx_lock);
rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
mutex_unlock(&mgr->ctx_lock);

if (rc < 0) {
dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
hl_ctx_free(hdev, ctx);
goto out_err;
}
/* TODO: remove the following line for multiple process support */
hdev->compute_ctx = ctx;

return 0;

remove_from_idr:
mutex_lock(&mgr->ctx_lock);
idr_remove(&mgr->ctx_handles, ctx->handle);
mutex_unlock(&mgr->ctx_lock);
free_ctx:
kfree(ctx);
out_err:
Expand Down Expand Up @@ -120,7 +128,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
ctx->thread_ctx_switch_wait_token = 0;

if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
rc = hl_mmu_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "Failed to init mmu ctx module\n");
Expand Down
16 changes: 8 additions & 8 deletions drivers/misc/habanalabs/debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,

memset(&pkt, 0, sizeof(pkt));

pkt.ctl = __cpu_to_le32(ARMCP_PACKET_I2C_RD <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.i2c_bus = i2c_bus;
pkt.i2c_addr = i2c_addr;
Expand All @@ -55,12 +55,12 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,

memset(&pkt, 0, sizeof(pkt));

pkt.ctl = __cpu_to_le32(ARMCP_PACKET_I2C_WR <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.i2c_bus = i2c_bus;
pkt.i2c_addr = i2c_addr;
pkt.i2c_reg = i2c_reg;
pkt.value = __cpu_to_le64(val);
pkt.value = cpu_to_le64(val);

rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_DEVICE_TIMEOUT_USEC, NULL);
Expand All @@ -81,10 +81,10 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)

memset(&pkt, 0, sizeof(pkt));

pkt.ctl = __cpu_to_le32(ARMCP_PACKET_LED_SET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.led_index = __cpu_to_le32(led);
pkt.value = __cpu_to_le64(state);
pkt.led_index = cpu_to_le32(led);
pkt.value = cpu_to_le64(state);

rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_DEVICE_TIMEOUT_USEC, NULL);
Expand Down Expand Up @@ -370,7 +370,7 @@ static int mmu_show(struct seq_file *s, void *data)
if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
ctx = hdev->kernel_ctx;
else
ctx = hdev->user_ctx;
ctx = hdev->compute_ctx;

if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
Expand Down Expand Up @@ -533,7 +533,7 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
u64 *phys_addr)
{
struct hl_ctx *ctx = hdev->user_ctx;
struct hl_ctx *ctx = hdev->compute_ctx;
u64 hop_addr, hop_pte_addr, hop_pte;
u64 offset_mask = HOP4_MASK | OFFSET_MASK;
int rc = 0;
Expand Down
Loading

0 comments on commit 9b4a66f

Please sign in to comment.