Skip to content

Commit

Permalink
drm/amdgpu: Move scheduler init to after XGMI is ready
Browse files Browse the repository at this point in the history
Before we initialize schedulers we must know which reset
domain are we in - for single device there iis a single
domain per device and so single wq per device. For XGMI
the reset domain spans the entire XGMI hive and so the
reset wq is per hive.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://www.spinics.net/lists/amd-gfx/msg74112.html
  • Loading branch information
Andrey Grodzovsky committed Feb 9, 2022
1 parent a4c63ca commit 5fd8518
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 43 deletions.
45 changes: 45 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -2287,6 +2287,47 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
return r;
}

static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
{
long timeout;
int r, i;

for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];

/* No need to setup the GPU scheduler for rings that don't need it */
if (!ring || ring->no_scheduler)
continue;

switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_GFX:
timeout = adev->gfx_timeout;
break;
case AMDGPU_RING_TYPE_COMPUTE:
timeout = adev->compute_timeout;
break;
case AMDGPU_RING_TYPE_SDMA:
timeout = adev->sdma_timeout;
break;
default:
timeout = adev->video_timeout;
break;
}

r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
ring->num_hw_submission, amdgpu_job_hang_limit,
timeout, adev->reset_domain.wq, ring->sched_score, ring->name);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
ring->name);
return r;
}
}

return 0;
}


/**
* amdgpu_device_ip_init - run init for hardware IPs
*
Expand Down Expand Up @@ -2419,6 +2460,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
}

r = amdgpu_device_init_schedulers(adev);
if (r)
goto init_failed;

/* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset)
amdgpu_amdkfd_device_init(adev);
Expand Down
43 changes: 5 additions & 38 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,24 +446,18 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* for the requested ring.
*
* @ring: ring to init the fence driver on
* @num_hw_submission: number of entries on the hardware queue
* @sched_score: optional score atomic shared with other schedulers
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
*/
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
unsigned num_hw_submission,
atomic_t *sched_score)
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
long timeout;
int r;

if (!adev)
return -EINVAL;

if (!is_power_of_2(num_hw_submission))
if (!is_power_of_2(ring->num_hw_submission))
return -EINVAL;

ring->fence_drv.cpu_addr = NULL;
Expand All @@ -474,41 +468,14 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,

timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);

ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
spin_lock_init(&ring->fence_drv.lock);
ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
GFP_KERNEL);

if (!ring->fence_drv.fences)
return -ENOMEM;

/* No need to setup the GPU scheduler for rings that don't need it */
if (ring->no_scheduler)
return 0;

switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_GFX:
timeout = adev->gfx_timeout;
break;
case AMDGPU_RING_TYPE_COMPUTE:
timeout = adev->compute_timeout;
break;
case AMDGPU_RING_TYPE_SDMA:
timeout = adev->sdma_timeout;
break;
default:
timeout = adev->video_timeout;
break;
}

r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
num_hw_submission, amdgpu_job_hang_limit,
timeout, NULL, sched_score, ring->name);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
ring->name);
return r;
}

return 0;
}

Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->adev = adev;
ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring;
r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission,
sched_score);
ring->num_hw_submission = sched_hw_submission;
ring->sched_score = sched_score;
r = amdgpu_fence_driver_init_ring(ring);
if (r)
return r;
}
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,7 @@ struct amdgpu_fence_driver {
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);

int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
unsigned num_hw_submission,
atomic_t *sched_score);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
unsigned irq_type);
Expand Down Expand Up @@ -251,6 +249,8 @@ struct amdgpu_ring {
bool has_compute_vm_bug;
bool no_scheduler;
int hw_prio;
unsigned num_hw_submission;
atomic_t *sched_score;
};

#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
Expand Down

0 comments on commit 5fd8518

Please sign in to comment.