Skip to content

Commit

Permalink
drm/vc4: Add support for async pageflips.
Browse files Browse the repository at this point in the history
An async pageflip stores the modeset to be done and executes it once
the BOs are ready to be displayed.  This gets us about 3x performance
in full screen rendering with pageflipping.

Signed-off-by: Eric Anholt <eric@anholt.net>
  • Loading branch information
Eric Anholt committed Dec 8, 2015
1 parent d5b1a78 commit b501bac
Show file tree
Hide file tree
Showing 5 changed files with 342 additions and 2 deletions.
99 changes: 98 additions & 1 deletion drivers/gpu/drm/vc4/vc4_crtc.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "drm_atomic_helper.h"
#include "drm_crtc_helper.h"
#include "linux/clk.h"
#include "drm_fb_cma_helper.h"
#include "linux/component.h"
#include "linux/of_device.h"
#include "vc4_drv.h"
Expand Down Expand Up @@ -475,10 +476,106 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
return ret;
}

struct vc4_async_flip_state {
struct drm_crtc *crtc;
struct drm_framebuffer *fb;
struct drm_pending_vblank_event *event;

struct vc4_seqno_cb cb;
};

/* Called when the V3D execution for the BO being flipped to is done, so that
* we can actually update the plane's address to point to it.
*/
static void
vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
{
struct vc4_async_flip_state *flip_state =
container_of(cb, struct vc4_async_flip_state, cb);
struct drm_crtc *crtc = flip_state->crtc;
struct drm_device *dev = crtc->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_plane *plane = crtc->primary;

vc4_plane_async_set_fb(plane, flip_state->fb);
if (flip_state->event) {
unsigned long flags;

spin_lock_irqsave(&dev->event_lock, flags);
drm_crtc_send_vblank_event(crtc, flip_state->event);
spin_unlock_irqrestore(&dev->event_lock, flags);
}

drm_framebuffer_unreference(flip_state->fb);
kfree(flip_state);

up(&vc4->async_modeset);
}

/* Implements async (non-vblank-synced) page flips.
*
* The page flip ioctl needs to return immediately, so we grab the
* modeset semaphore on the pipe, and queue the address update for
* when V3D is done with the BO being flipped to.
*/
static int vc4_async_page_flip(struct drm_crtc *crtc,
struct drm_framebuffer *fb,
struct drm_pending_vblank_event *event,
uint32_t flags)
{
struct drm_device *dev = crtc->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_plane *plane = crtc->primary;
int ret = 0;
struct vc4_async_flip_state *flip_state;
struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);

flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
if (!flip_state)
return -ENOMEM;

drm_framebuffer_reference(fb);
flip_state->fb = fb;
flip_state->crtc = crtc;
flip_state->event = event;

/* Make sure all other async modesetes have landed. */
ret = down_interruptible(&vc4->async_modeset);
if (ret) {
kfree(flip_state);
return ret;
}

/* Immediately update the plane's legacy fb pointer, so that later
* modeset prep sees the state that will be present when the semaphore
* is released.
*/
drm_atomic_set_fb_for_plane(plane->state, fb);
plane->fb = fb;

vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
vc4_async_page_flip_complete);

/* Driver takes ownership of state on successful async commit. */
return 0;
}

static int vc4_page_flip(struct drm_crtc *crtc,
struct drm_framebuffer *fb,
struct drm_pending_vblank_event *event,
uint32_t flags)
{
if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
return vc4_async_page_flip(crtc, fb, event, flags);
else
return drm_atomic_helper_page_flip(crtc, fb, event, flags);
}

static const struct drm_crtc_funcs vc4_crtc_funcs = {
.set_config = drm_atomic_helper_set_config,
.destroy = vc4_crtc_destroy,
.page_flip = drm_atomic_helper_page_flip,
.page_flip = vc4_page_flip,
.set_property = NULL,
.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
Expand Down
16 changes: 16 additions & 0 deletions drivers/gpu/drm/vc4/vc4_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ struct vc4_dev {
wait_queue_head_t job_wait_queue;
struct work_struct job_done_work;

/* List of struct vc4_seqno_cb for callbacks to be made from a
* workqueue when the given seqno is passed.
*/
struct list_head seqno_cb_list;

/* The binner overflow memory that's currently set up in
* BPOA/BPOS registers. When overflow occurs and a new one is
* allocated, the previous one will be moved to
Expand Down Expand Up @@ -128,6 +133,12 @@ to_vc4_bo(struct drm_gem_object *bo)
return (struct vc4_bo *)bo;
}

struct vc4_seqno_cb {
struct work_struct work;
uint64_t seqno;
void (*func)(struct vc4_seqno_cb *cb);
};

struct vc4_v3d {
struct platform_device *pdev;
void __iomem *regs;
Expand Down Expand Up @@ -384,6 +395,9 @@ void vc4_submit_next_job(struct drm_device *dev);
int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
uint64_t timeout_ns, bool interruptible);
void vc4_job_handle_completed(struct vc4_dev *vc4);
int vc4_queue_seqno_cb(struct drm_device *dev,
struct vc4_seqno_cb *cb, uint64_t seqno,
void (*func)(struct vc4_seqno_cb *cb));

/* vc4_hdmi.c */
extern struct platform_driver vc4_hdmi_driver;
Expand All @@ -409,6 +423,8 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
enum drm_plane_type type);
u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
u32 vc4_plane_dlist_size(struct drm_plane_state *state);
void vc4_plane_async_set_fb(struct drm_plane *plane,
struct drm_framebuffer *fb);

/* vc4_v3d.c */
extern struct platform_driver vc4_v3d_driver;
Expand Down
40 changes: 40 additions & 0 deletions drivers/gpu/drm/vc4/vc4_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ void
vc4_job_handle_completed(struct vc4_dev *vc4)
{
unsigned long irqflags;
struct vc4_seqno_cb *cb, *cb_temp;

spin_lock_irqsave(&vc4->job_lock, irqflags);
while (!list_empty(&vc4->job_done_list)) {
Expand All @@ -473,7 +474,45 @@ vc4_job_handle_completed(struct vc4_dev *vc4)
vc4_complete_exec(vc4->dev, exec);
spin_lock_irqsave(&vc4->job_lock, irqflags);
}

list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
if (cb->seqno <= vc4->finished_seqno) {
list_del_init(&cb->work.entry);
schedule_work(&cb->work);
}
}

spin_unlock_irqrestore(&vc4->job_lock, irqflags);
}

static void vc4_seqno_cb_work(struct work_struct *work)
{
struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);

cb->func(cb);
}

int vc4_queue_seqno_cb(struct drm_device *dev,
struct vc4_seqno_cb *cb, uint64_t seqno,
void (*func)(struct vc4_seqno_cb *cb))
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
int ret = 0;
unsigned long irqflags;

cb->func = func;
INIT_WORK(&cb->work, vc4_seqno_cb_work);

spin_lock_irqsave(&vc4->job_lock, irqflags);
if (seqno > vc4->finished_seqno) {
cb->seqno = seqno;
list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
} else {
schedule_work(&cb->work);
}
spin_unlock_irqrestore(&vc4->job_lock, irqflags);

return ret;
}

/* Scheduled when any job has been completed, this walks the list of
Expand Down Expand Up @@ -610,6 +649,7 @@ vc4_gem_init(struct drm_device *dev)

INIT_LIST_HEAD(&vc4->job_list);
INIT_LIST_HEAD(&vc4->job_done_list);
INIT_LIST_HEAD(&vc4->seqno_cb_list);
spin_lock_init(&vc4->job_lock);

INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
Expand Down
Loading

0 comments on commit b501bac

Please sign in to comment.