Skip to content

Commit

Permalink
drm/i915: Fix userptr so we do not have to worry about obj->mm.lock, v7.
Browse files Browse the repository at this point in the history
Instead of doing what we do currently, which will never work with
PROVE_LOCKING, do the same as AMD does, and something similar to
relocation slowpath. When all locks are dropped, we acquire the
pages for pinning. When the locks are taken, we transfer those
pages in .get_pages() to the bo. As a final check before installing
the fences, we ensure that the mmu notifier was not called; if it is,
we return -EAGAIN to userspace to signal it has to start over.

Changes since v1:
- Unbinding is done in submit_init only. submit_begin() removed.
- MMU_NOTFIER -> MMU_NOTIFIER
Changes since v2:
- Make i915->mm.notifier a spinlock.
Changes since v3:
- Add WARN_ON if there are any page references left, should have been 0.
- Return 0 on success in submit_init(), bug from spinlock conversion.
- Release pvec outside of notifier_lock (Thomas).
Changes since v4:
- Mention why we're clearing eb->[i + 1].vma in the code. (Thomas)
- Actually check all invalidations in eb_move_to_gpu. (Thomas)
- Do not wait when process is exiting to fix gem_ctx_persistence.userptr.
Changes since v5:
- Clarify why check on PF_EXITING is (temporarily) required.
Changes since v6:
- Ensure userptr validity is checked in set_domain through a special path.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Dave Airlie <airlied@redhat.com>
[danvet: s/kfree/kvfree/ in i915_gem_object_userptr_drop_ref in the
previous review round, but which got lost. The other open questions
around page refcount are imo better discussed in a separate series,
with amdgpu folks involved].
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210323155059.628690-17-maarten.lankhorst@linux.intel.com
  • Loading branch information
Maarten Lankhorst authored and Daniel Vetter committed Mar 24, 2021
1 parent 20ee27b commit ed29c26
Show file tree
Hide file tree
Showing 8 changed files with 395 additions and 584 deletions.
18 changes: 16 additions & 2 deletions drivers/gpu/drm/i915/gem/i915_gem_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -533,14 +533,28 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (err)
goto out;

if (i915_gem_object_is_userptr(obj)) {
/*
* Try to grab userptr pages, iris uses set_domain to check
* userptr validity
*/
err = i915_gem_object_userptr_validate(obj);
if (!err)
err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
goto out;
}

/*
* Proxy objects do not control access to the backing storage, ergo
* they cannot be used as a means to manipulate the cache domain
* tracking for that backing storage. The proxy object is always
* considered to be outside of any cache domain.
*/
if (i915_gem_object_is_proxy(obj) &&
!i915_gem_object_is_userptr(obj)) {
if (i915_gem_object_is_proxy(obj)) {
err = -ENXIO;
goto out;
}
Expand Down
101 changes: 88 additions & 13 deletions drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,16 @@ enum {
/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
#define __EXEC_OBJECT_HAS_PIN BIT(30)
#define __EXEC_OBJECT_HAS_FENCE BIT(29)
#define __EXEC_OBJECT_NEEDS_MAP BIT(28)
#define __EXEC_OBJECT_NEEDS_BIAS BIT(27)
#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above + */
#define __EXEC_OBJECT_USERPTR_INIT BIT(28)
#define __EXEC_OBJECT_NEEDS_MAP BIT(27)
#define __EXEC_OBJECT_NEEDS_BIAS BIT(26)
#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */
#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)

#define __EXEC_HAS_RELOC BIT(31)
#define __EXEC_ENGINE_PINNED BIT(30)
#define __EXEC_INTERNAL_FLAGS (~0u << 30)
#define __EXEC_USERPTR_USED BIT(29)
#define __EXEC_INTERNAL_FLAGS (~0u << 29)
#define UPDATE PIN_OFFSET_FIXED

#define BATCH_OFFSET_BIAS (256*1024)
Expand Down Expand Up @@ -871,6 +873,26 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
}

eb_add_vma(eb, i, batch, vma);

if (i915_gem_object_is_userptr(vma->obj)) {
err = i915_gem_object_userptr_submit_init(vma->obj);
if (err) {
if (i + 1 < eb->buffer_count) {
/*
* Execbuffer code expects last vma entry to be NULL,
* since we already initialized this entry,
* set the next value to NULL or we mess up
* cleanup handling.
*/
eb->vma[i + 1].vma = NULL;
}

return err;
}

eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
eb->args->flags |= __EXEC_USERPTR_USED;
}
}

if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
Expand Down Expand Up @@ -972,7 +994,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
}
}

static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr)
{
const unsigned int count = eb->buffer_count;
unsigned int i;
Expand All @@ -986,6 +1008,11 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)

eb_unreserve_vma(ev);

if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) {
ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT;
i915_gem_object_userptr_submit_fini(vma->obj);
}

if (final)
i915_vma_put(vma);
}
Expand Down Expand Up @@ -1923,6 +1950,31 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
return 0;
}

static int eb_reinit_userptr(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
unsigned int i;
int ret;

if (likely(!(eb->args->flags & __EXEC_USERPTR_USED)))
return 0;

for (i = 0; i < count; i++) {
struct eb_vma *ev = &eb->vma[i];

if (!i915_gem_object_is_userptr(ev->vma->obj))
continue;

ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
if (ret)
return ret;

ev->flags |= __EXEC_OBJECT_USERPTR_INIT;
}

return 0;
}

static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
struct i915_request *rq)
{
Expand All @@ -1937,7 +1989,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
}

/* We may process another execbuffer during the unlock... */
eb_release_vmas(eb, false);
eb_release_vmas(eb, false, true);
i915_gem_ww_ctx_fini(&eb->ww);

if (rq) {
Expand Down Expand Up @@ -1978,10 +2030,8 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
err = 0;
}

#ifdef CONFIG_MMU_NOTIFIER
if (!err)
flush_workqueue(eb->i915->mm.userptr_wq);
#endif
err = eb_reinit_userptr(eb);

err_relock:
i915_gem_ww_ctx_init(&eb->ww, true);
Expand Down Expand Up @@ -2043,7 +2093,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,

err:
if (err == -EDEADLK) {
eb_release_vmas(eb, false);
eb_release_vmas(eb, false, false);
err = i915_gem_ww_ctx_backoff(&eb->ww);
if (!err)
goto repeat_validate;
Expand Down Expand Up @@ -2140,7 +2190,7 @@ static int eb_relocate_parse(struct i915_execbuffer *eb)

err:
if (err == -EDEADLK) {
eb_release_vmas(eb, false);
eb_release_vmas(eb, false, false);
err = i915_gem_ww_ctx_backoff(&eb->ww);
if (!err)
goto retry;
Expand Down Expand Up @@ -2215,6 +2265,30 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
flags | __EXEC_OBJECT_NO_RESERVE);
}

#ifdef CONFIG_MMU_NOTIFIER
if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
spin_lock(&eb->i915->mm.notifier_lock);

/*
* count is always at least 1, otherwise __EXEC_USERPTR_USED
* could not have been set
*/
for (i = 0; i < count; i++) {
struct eb_vma *ev = &eb->vma[i];
struct drm_i915_gem_object *obj = ev->vma->obj;

if (!i915_gem_object_is_userptr(obj))
continue;

err = i915_gem_object_userptr_submit_done(obj);
if (err)
break;
}

spin_unlock(&eb->i915->mm.notifier_lock);
}
#endif

if (unlikely(err))
goto err_skip;

Expand Down Expand Up @@ -3359,7 +3433,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,

err = eb_lookup_vmas(&eb);
if (err) {
eb_release_vmas(&eb, true);
eb_release_vmas(&eb, true, true);
goto err_engine;
}

Expand Down Expand Up @@ -3431,6 +3505,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,

trace_i915_request_queue(eb.request, eb.batch_flags);
err = eb_submit(&eb, batch);

err_request:
i915_request_get(eb.request);
err = eb_request_add(&eb, err);
Expand All @@ -3451,7 +3526,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
i915_request_put(eb.request);

err_vma:
eb_release_vmas(&eb, true);
eb_release_vmas(&eb, true, true);
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
WARN_ON(err == -EDEADLK);
Expand Down
38 changes: 22 additions & 16 deletions drivers/gpu/drm/i915/gem/i915_gem_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
const void *data, resource_size_t size);

extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;

void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
bool needs_clflush);
Expand Down Expand Up @@ -252,12 +253,6 @@ i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj)
return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_MMAP);
}

static inline bool
i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
{
return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL);
}

static inline bool
i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
{
Expand Down Expand Up @@ -548,16 +543,6 @@ void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
enum fb_op_origin origin);

static inline bool
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
{
#ifdef CONFIG_MMU_NOTIFIER
return obj->userptr.mm;
#else
return false;
#endif
}

static inline void
i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
enum fb_op_origin origin)
Expand All @@ -578,4 +563,25 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,

bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj);

#ifdef CONFIG_MMU_NOTIFIER
static inline bool
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
{
return obj->userptr.notifier.mm;
}

int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj);
int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj);
void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj);
int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj);
#else
static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; }

static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); }
static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }

#endif

#endif
10 changes: 6 additions & 4 deletions drivers/gpu/drm/i915/gem/i915_gem_object_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#ifndef __I915_GEM_OBJECT_TYPES_H__
#define __I915_GEM_OBJECT_TYPES_H__

#include <linux/mmu_notifier.h>

#include <drm/drm_gem.h>
#include <uapi/drm/i915_drm.h>

Expand Down Expand Up @@ -34,7 +36,6 @@ struct drm_i915_gem_object_ops {
#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2)
#define I915_GEM_OBJECT_IS_PROXY BIT(3)
#define I915_GEM_OBJECT_NO_MMAP BIT(4)
#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5)

/* Interface between the GEM object and its backing storage.
* get_pages() is called once prior to the use of the associated set
Expand Down Expand Up @@ -293,10 +294,11 @@ struct drm_i915_gem_object {
#ifdef CONFIG_MMU_NOTIFIER
struct i915_gem_userptr {
uintptr_t ptr;
unsigned long notifier_seq;

struct i915_mm_struct *mm;
struct i915_mmu_object *mmu_object;
struct work_struct *work;
struct mmu_interval_notifier notifier;
struct page **pvec;
int page_ref;
} userptr;
#endif

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gem/i915_gem_pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
* get_pages backends we should be better able to handle the
* cancellation of the async task in a more uniform manner.
*/
if (!pages && !i915_gem_object_needs_async_cancel(obj))
if (!pages)
pages = ERR_PTR(-EINVAL);

if (!IS_ERR(pages))
Expand Down
Loading

0 comments on commit ed29c26

Please sign in to comment.