Skip to content

Commit

Permalink
nouveau: push event block/allowing out of the fence context
Browse files Browse the repository at this point in the history
There is a deadlock between the irq and fctx locks,
the irq handling takes irq then fctx lock
the fence signalling takes fctx then irq lock

This splits the fence signalling path so the code that hits
the irq lock is done in a separate work queue.

This seems to fix crashes/hangs when using nouveau gsp with
i915 primary GPU.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-11-airlied@gmail.com
  • Loading branch information
Dave Airlie authored and Dave Airlie committed Jan 5, 2024
1 parent 9c9dd22 commit eacabb5
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 6 deletions.
28 changes: 23 additions & 5 deletions drivers/gpu/drm/nouveau/nouveau_fence.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fence *fence)
if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);

if (!--fctx->notify_ref)
if (atomic_dec_and_test(&fctx->notify_ref))
drop = 1;
}

Expand Down Expand Up @@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
void
nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
{
cancel_work_sync(&fctx->allow_block_work);
nouveau_fence_context_kill(fctx, 0);
nvif_event_dtor(&fctx->event);
fctx->dead = 1;
Expand Down Expand Up @@ -167,6 +168,18 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc
return ret;
}

static void
nouveau_fence_work_allow_block(struct work_struct *work)
{
struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
allow_block_work);

if (atomic_read(&fctx->notify_ref) == 0)
nvif_event_block(&fctx->event);
else
nvif_event_allow(&fctx->event);
}

void
nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
{
Expand All @@ -178,6 +191,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
} args;
int ret;

INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block);
INIT_LIST_HEAD(&fctx->flip);
INIT_LIST_HEAD(&fctx->pending);
spin_lock_init(&fctx->lock);
Expand Down Expand Up @@ -521,15 +535,19 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f)
struct nouveau_fence *fence = from_fence(f);
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
bool ret;
bool do_work;

if (!fctx->notify_ref++)
nvif_event_allow(&fctx->event);
if (atomic_inc_return(&fctx->notify_ref) == 0)
do_work = true;

ret = nouveau_fence_no_signaling(f);
if (ret)
set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags);
else if (!--fctx->notify_ref)
nvif_event_block(&fctx->event);
else if (atomic_dec_and_test(&fctx->notify_ref))
do_work = true;

if (do_work)
schedule_work(&fctx->allow_block_work);

return ret;
}
Expand Down
5 changes: 4 additions & 1 deletion drivers/gpu/drm/nouveau/nouveau_fence.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#define __NOUVEAU_FENCE_H__

#include <linux/dma-fence.h>
#include <linux/workqueue.h>
#include <nvif/event.h>

struct nouveau_drm;
Expand Down Expand Up @@ -45,7 +46,9 @@ struct nouveau_fence_chan {
char name[32];

struct nvif_event event;
int notify_ref, dead, killed;
struct work_struct allow_block_work;
atomic_t notify_ref;
int dead, killed;
};

struct nouveau_fence_priv {
Expand Down

0 comments on commit eacabb5

Please sign in to comment.