Skip to content

Commit

Permalink
Merge branch 'ttm-transhuge' of git://people.freedesktop.org/~thomash…
Browse files Browse the repository at this point in the history
…/linux into drm-next

Huge page-table entries for TTM

In order to reduce CPU usage [1] and in theory TLB misses this patchset enables
huge- and giant page-table entries for TTM and TTM-enabled graphics drivers.

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Thomas Hellstrom (VMware) <thomas_os@shipmail.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20200325073102.6129-1-thomas_os@shipmail.org
  • Loading branch information
Dave Airlie committed Apr 2, 2020
2 parents 59e7a8c + 9431042 commit 0e7e619
Show file tree
Hide file tree
Showing 16 changed files with 692 additions and 28 deletions.
141 changes: 141 additions & 0 deletions drivers/gpu/drm/drm_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@
#include "drm_internal.h"
#include "drm_legacy.h"

#if defined(CONFIG_MMU) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
#include <uapi/asm/mman.h>
#include <drm/drm_vma_manager.h>
#endif

/* from BKL pushdown */
DEFINE_MUTEX(drm_global_mutex);

Expand Down Expand Up @@ -872,3 +877,139 @@ struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags)
return file;
}
EXPORT_SYMBOL_FOR_TESTS_ONLY(mock_drm_getfile);

#ifdef CONFIG_MMU
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* drm_addr_inflate() attempts to construct an aligned area by inflating
* the area size and skipping the unaligned start of the area.
* adapted from shmem_get_unmapped_area()
*/
static unsigned long drm_addr_inflate(unsigned long addr,
unsigned long len,
unsigned long pgoff,
unsigned long flags,
unsigned long huge_size)
{
unsigned long offset, inflated_len;
unsigned long inflated_addr;
unsigned long inflated_offset;

offset = (pgoff << PAGE_SHIFT) & (huge_size - 1);
if (offset && offset + len < 2 * huge_size)
return addr;
if ((addr & (huge_size - 1)) == offset)
return addr;

inflated_len = len + huge_size - PAGE_SIZE;
if (inflated_len > TASK_SIZE)
return addr;
if (inflated_len < len)
return addr;

inflated_addr = current->mm->get_unmapped_area(NULL, 0, inflated_len,
0, flags);
if (IS_ERR_VALUE(inflated_addr))
return addr;
if (inflated_addr & ~PAGE_MASK)
return addr;

inflated_offset = inflated_addr & (huge_size - 1);
inflated_addr += offset - inflated_offset;
if (inflated_offset > offset)
inflated_addr += huge_size;

if (inflated_addr > TASK_SIZE - len)
return addr;

return inflated_addr;
}

/**
* drm_get_unmapped_area() - Get an unused user-space virtual memory area
* suitable for huge page table entries.
* @file: The struct file representing the address space being mmap()'d.
* @uaddr: Start address suggested by user-space.
* @len: Length of the area.
* @pgoff: The page offset into the address space.
* @flags: mmap flags
* @mgr: The address space manager used by the drm driver. This argument can
* probably be removed at some point when all drivers use the same
* address space manager.
*
* This function attempts to find an unused user-space virtual memory area
* that can accommodate the size we want to map, and that is properly
* aligned to facilitate huge page table entries matching actual
* huge pages or huge page aligned memory in buffer objects. Buffer objects
* are assumed to start at huge page boundary pfns (io memory) or be
* populated by huge pages aligned to the start of the buffer object
* (system- or coherent memory). Adapted from shmem_get_unmapped_area.
*
* Return: aligned user-space address.
*/
unsigned long drm_get_unmapped_area(struct file *file,
unsigned long uaddr, unsigned long len,
unsigned long pgoff, unsigned long flags,
struct drm_vma_offset_manager *mgr)
{
unsigned long addr;
unsigned long inflated_addr;
struct drm_vma_offset_node *node;

if (len > TASK_SIZE)
return -ENOMEM;

/*
* @pgoff is the file page-offset the huge page boundaries of
* which typically aligns to physical address huge page boundaries.
* That's not true for DRM, however, where physical address huge
* page boundaries instead are aligned with the offset from
* buffer object start. So adjust @pgoff to be the offset from
* buffer object start.
*/
drm_vma_offset_lock_lookup(mgr);
node = drm_vma_offset_lookup_locked(mgr, pgoff, 1);
if (node)
pgoff -= node->vm_node.start;
drm_vma_offset_unlock_lookup(mgr);

addr = current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
if (IS_ERR_VALUE(addr))
return addr;
if (addr & ~PAGE_MASK)
return addr;
if (addr > TASK_SIZE - len)
return addr;

if (len < HPAGE_PMD_SIZE)
return addr;
if (flags & MAP_FIXED)
return addr;
/*
* Our priority is to support MAP_SHARED mapped hugely;
* and support MAP_PRIVATE mapped hugely too, until it is COWed.
* But if caller specified an address hint, respect that as before.
*/
if (uaddr)
return addr;

inflated_addr = drm_addr_inflate(addr, len, pgoff, flags,
HPAGE_PMD_SIZE);

if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
len >= HPAGE_PUD_SIZE)
inflated_addr = drm_addr_inflate(inflated_addr, len, pgoff,
flags, HPAGE_PUD_SIZE);
return inflated_addr;
}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
unsigned long drm_get_unmapped_area(struct file *file,
unsigned long uaddr, unsigned long len,
unsigned long pgoff, unsigned long flags,
struct drm_vma_offset_manager *mgr)
{
return current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
EXPORT_SYMBOL_GPL(drm_get_unmapped_area);
#endif /* CONFIG_MMU */
161 changes: 158 additions & 3 deletions drivers/gpu/drm/ttm/ttm_bo_vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,97 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_vm_reserve);

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/**
* ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults
* @vmf: Fault data
* @bo: The buffer object
* @page_offset: Page offset from bo start
* @fault_page_size: The size of the fault in pages.
* @pgprot: The page protections.
* Does additional checking whether it's possible to insert a PUD or PMD
* pfn and performs the insertion.
*
* Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if
* a huge fault was not possible, or on insertion error.
*/
static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
struct ttm_buffer_object *bo,
pgoff_t page_offset,
pgoff_t fault_page_size,
pgprot_t pgprot)
{
pgoff_t i;
vm_fault_t ret;
unsigned long pfn;
pfn_t pfnt;
struct ttm_tt *ttm = bo->ttm;
bool write = vmf->flags & FAULT_FLAG_WRITE;

/* Fault should not cross bo boundary. */
page_offset &= ~(fault_page_size - 1);
if (page_offset + fault_page_size > bo->num_pages)
goto out_fallback;

if (bo->mem.bus.is_iomem)
pfn = ttm_bo_io_mem_pfn(bo, page_offset);
else
pfn = page_to_pfn(ttm->pages[page_offset]);

/* pfn must be fault_page_size aligned. */
if ((pfn & (fault_page_size - 1)) != 0)
goto out_fallback;

/* Check that memory is contiguous. */
if (!bo->mem.bus.is_iomem) {
for (i = 1; i < fault_page_size; ++i) {
if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i)
goto out_fallback;
}
} else if (bo->bdev->driver->io_mem_pfn) {
for (i = 1; i < fault_page_size; ++i) {
if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i)
goto out_fallback;
}
}

pfnt = __pfn_to_pfn_t(pfn, PFN_DEV);
if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT))
ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write);
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT))
ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write);
#endif
else
WARN_ON_ONCE(ret = VM_FAULT_FALLBACK);

if (ret != VM_FAULT_NOPAGE)
goto out_fallback;

return VM_FAULT_NOPAGE;
out_fallback:
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
#else
static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
struct ttm_buffer_object *bo,
pgoff_t page_offset,
pgoff_t fault_page_size,
pgprot_t pgprot)
{
return VM_FAULT_FALLBACK;
}
#endif

/**
* ttm_bo_vm_fault_reserved - TTM fault helper
* @vmf: The struct vm_fault given as argument to the fault callback
* @prot: The page protection to be used for this memory area.
* @num_prefault: Maximum number of prefault pages. The caller may want to
* specify this based on madvice settings and the size of the GPU object
* backed by the memory.
* @fault_page_size: The size of the fault in pages.
*
* This function inserts one or more page table entries pointing to the
* memory backing the buffer object, and then returns a return code
Expand All @@ -176,7 +260,8 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve);
*/
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
pgprot_t prot,
pgoff_t num_prefault)
pgoff_t num_prefault,
pgoff_t fault_page_size)
{
struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = vma->vm_private_data;
Expand Down Expand Up @@ -268,6 +353,13 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
prot = pgprot_decrypted(prot);
}

/* We don't prefault on huge faults. Yet. */
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) {
ret = ttm_bo_vm_insert_huge(vmf, bo, page_offset,
fault_page_size, prot);
goto out_io_unlock;
}

/*
* Speculatively prefault a number of pages. Only error on
* first page.
Expand Down Expand Up @@ -334,7 +426,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return ret;

prot = vma->vm_page_prot;
ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;

Expand All @@ -344,6 +436,66 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(ttm_bo_vm_fault);

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/**
* ttm_pgprot_is_wrprotecting - Is a page protection value write-protecting?
* @prot: The page protection value
*
* Return: true if @prot is write-protecting. false otherwise.
*/
static bool ttm_pgprot_is_wrprotecting(pgprot_t prot)
{
/*
* This is meant to say "pgprot_wrprotect(prot) == prot" in a generic
* way. Unfortunately there is no generic pgprot_wrprotect.
*/
return pte_val(pte_wrprotect(__pte(pgprot_val(prot)))) ==
pgprot_val(prot);
}

static vm_fault_t ttm_bo_vm_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
{
struct vm_area_struct *vma = vmf->vma;
pgprot_t prot;
struct ttm_buffer_object *bo = vma->vm_private_data;
vm_fault_t ret;
pgoff_t fault_page_size = 0;
bool write = vmf->flags & FAULT_FLAG_WRITE;

switch (pe_size) {
case PE_SIZE_PMD:
fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
break;
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
case PE_SIZE_PUD:
fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
break;
#endif
default:
WARN_ON_ONCE(1);
return VM_FAULT_FALLBACK;
}

/* Fallback on write dirty-tracking or COW */
if (write && ttm_pgprot_is_wrprotecting(vma->vm_page_prot))
return VM_FAULT_FALLBACK;

ret = ttm_bo_vm_reserve(bo, vmf);
if (ret)
return ret;

prot = vm_get_page_prot(vma->vm_flags);
ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;

dma_resv_unlock(bo->base.resv);

return ret;
}
#endif

void ttm_bo_vm_open(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo = vma->vm_private_data;
Expand Down Expand Up @@ -445,7 +597,10 @@ static const struct vm_operations_struct ttm_bo_vm_ops = {
.fault = ttm_bo_vm_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
.access = ttm_bo_vm_access
.access = ttm_bo_vm_access,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
.huge_fault = ttm_bo_vm_huge_fault,
#endif
};

static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/vmwgfx/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \
ttm_object.o ttm_lock.o

vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
13 changes: 13 additions & 0 deletions drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1247,6 +1247,18 @@ static void vmw_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
}

static unsigned long
vmw_get_unmapped_area(struct file *file, unsigned long uaddr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
struct drm_file *file_priv = file->private_data;
struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);

return drm_get_unmapped_area(file, uaddr, len, pgoff, flags,
&dev_priv->vma_manager);
}

static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
void *ptr)
{
Expand Down Expand Up @@ -1418,6 +1430,7 @@ static const struct file_operations vmwgfx_driver_fops = {
.compat_ioctl = vmw_compat_ioctl,
#endif
.llseek = noop_llseek,
.get_unmapped_area = vmw_get_unmapped_area,
};

static struct drm_driver driver = {
Expand Down
Loading

0 comments on commit 0e7e619

Please sign in to comment.