Skip to content

Commit

Permalink
xen/privcmd: improve performance of MMAPBATCH_V2
Browse files Browse the repository at this point in the history
Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map
multiple frames at a time rather than one at a time, despite the pages
being non-consecutive GFNs.

xen_remap_foreign_mfn_array() is added which maps an array of GFNs
(instead of a consecutive range of GFNs).

Since per-frame errors are returned in an array, privcmd must set the
MMAPBATCH_V1 error bits as part of the "report errors" phase, after
all the frames are mapped.

Migrate times are significantly improved (when using a PV toolstack
domain).  For example, for an idle 12 GiB PV guest:

        Before     After
  real  0m38.179s  0m26.868s
  user  0m15.096s  0m13.652s
  sys   0m28.988s  0m18.732s

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
  • Loading branch information
David Vrabel committed Mar 16, 2015
1 parent 628c28e commit 4e8c0c8
Show file tree
Hide file tree
Showing 5 changed files with 249 additions and 80 deletions.
20 changes: 16 additions & 4 deletions arch/arm/xen/enlighten.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,27 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);

static __read_mostly int xen_events_irq = -1;

int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
pgprot_t prot, unsigned domid,
xen_pfn_t *mfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages)
{
return xen_xlate_remap_gfn_range(vma, addr, mfn, nr,
return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);

/* Not used by XENFEAT_auto_translated guests. */
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages)
{
return -ENOSYS;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);

int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
Expand Down
101 changes: 82 additions & 19 deletions arch/x86/xen/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2439,7 +2439,8 @@ void __init xen_hvm_init_mmu_ops(void)
#define REMAP_BATCH_SIZE 16

struct remap_data {
unsigned long mfn;
xen_pfn_t *mfn;
bool contiguous;
pgprot_t prot;
struct mmu_update *mmu_update;
};
Expand All @@ -2448,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
{
struct remap_data *rmd = data;
pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));

/* If we have a contigious range, just update the mfn itself,
else update pointer to be "next mfn". */
if (rmd->contiguous)
(*rmd->mfn)++;
else
rmd->mfn++;

rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
rmd->mmu_update->val = pte_val_ma(pte);
Expand All @@ -2457,36 +2465,42 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
return 0;
}

int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages)

static int do_remap_mfn(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t *mfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages)
{
int err = 0;
struct remap_data rmd;
struct mmu_update mmu_update[REMAP_BATCH_SIZE];
int batch;
unsigned long range;
int err = 0;
int mapped = 0;

BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));

if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef CONFIG_XEN_PVH
/* We need to update the local page tables and the xen HAP */
return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
domid, pages);
return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
prot, domid, pages);
#else
return -EINVAL;
#endif
}

rmd.mfn = mfn;
rmd.prot = prot;
/* We use the err_ptr to indicate if there we are doing a contigious
* mapping or a discontigious mapping. */
rmd.contiguous = !err_ptr;

while (nr) {
batch = min(REMAP_BATCH_SIZE, nr);
int index = 0;
int done = 0;
int batch = min(REMAP_BATCH_SIZE, nr);
int batch_left = batch;
range = (unsigned long)batch << PAGE_SHIFT;

rmd.mmu_update = mmu_update;
Expand All @@ -2495,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
if (err)
goto out;

err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
if (err < 0)
goto out;
/* We record the error for each page that gives an error, but
* continue mapping until the whole set is done */
do {
int i;

err = HYPERVISOR_mmu_update(&mmu_update[index],
batch_left, &done, domid);

/*
* @err_ptr may be the same buffer as @mfn, so
* only clear it after each chunk of @mfn is
* used.
*/
if (err_ptr) {
for (i = index; i < index + done; i++)
err_ptr[i] = 0;
}
if (err < 0) {
if (!err_ptr)
goto out;
err_ptr[i] = err;
done++; /* Skip failed frame. */
} else
mapped += done;
batch_left -= done;
index += done;
} while (batch_left);

nr -= batch;
addr += range;
if (err_ptr)
err_ptr += batch;
}

err = 0;
out:

xen_flush_tlb_all();

return err;
return err < 0 ? err : mapped;
}

int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages)
{
return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);

int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t *mfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid, struct page **pages)
{
/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
* and the consequences later is quite hard to detect what the actual
* cause of "wrong memory was mapped in".
*/
BUG_ON(err_ptr == NULL);
return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);


/* Returns: 0 success */
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
int numpgs, struct page **pages)
Expand Down
117 changes: 82 additions & 35 deletions drivers/xen/privcmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size,
return ret;
}

/*
* Similar to traverse_pages, but use each page as a "block" of
* data to be processed as one unit.
*/
static int traverse_pages_block(unsigned nelem, size_t size,
struct list_head *pos,
int (*fn)(void *data, int nr, void *state),
void *state)
{
void *pagedata;
unsigned pageidx;
int ret = 0;

BUG_ON(size > PAGE_SIZE);

pageidx = PAGE_SIZE;

while (nelem) {
int nr = (PAGE_SIZE/size);
struct page *page;
if (nr > nelem)
nr = nelem;
pos = pos->next;
page = list_entry(pos, struct page, lru);
pagedata = page_address(page);
ret = (*fn)(pagedata, nr, state);
if (ret)
break;
nelem -= nr;
}

return ret;
}

struct mmap_mfn_state {
unsigned long va;
struct vm_area_struct *vma;
Expand Down Expand Up @@ -274,39 +308,25 @@ struct mmap_batch_state {
/* auto translated dom0 note: if domU being created is PV, then mfn is
* mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
*/
static int mmap_batch_fn(void *data, void *state)
static int mmap_batch_fn(void *data, int nr, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
struct vm_area_struct *vma = st->vma;
struct page **pages = vma->vm_private_data;
struct page *cur_page = NULL;
struct page **cur_pages = NULL;
int ret;

if (xen_feature(XENFEAT_auto_translated_physmap))
cur_page = pages[st->index++];
cur_pages = &pages[st->index];

ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
st->vma->vm_page_prot, st->domain,
&cur_page);
BUG_ON(nr < 0);
ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr,
(int *)mfnp, st->vma->vm_page_prot,
st->domain, cur_pages);

/* Store error code for second pass. */
if (st->version == 1) {
if (ret < 0) {
/*
* V1 encodes the error codes in the 32bit top nibble of the
* mfn (with its known limitations vis-a-vis 64 bit callers).
*/
*mfnp |= (ret == -ENOENT) ?
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR;
}
} else { /* st->version == 2 */
*((int *) mfnp) = ret;
}

/* And see if it affects the global_error. */
if (ret < 0) {
/* Adjust the global_error? */
if (ret != nr) {
if (ret == -ENOENT)
st->global_error = -ENOENT;
else {
Expand All @@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state)
st->global_error = 1;
}
}
st->va += PAGE_SIZE;
st->va += PAGE_SIZE * nr;
st->index += nr;

return 0;
}

static int mmap_return_errors(void *data, void *state)
static int mmap_return_error(int err, struct mmap_batch_state *st)
{
struct mmap_batch_state *st = state;
int ret;

if (st->version == 1) {
xen_pfn_t mfnp = *((xen_pfn_t *) data);
if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
return __put_user(mfnp, st->user_mfn++);
else
if (err) {
xen_pfn_t mfn;

ret = get_user(mfn, st->user_mfn);
if (ret < 0)
return ret;
/*
* V1 encodes the error codes in the 32bit top
* nibble of the mfn (with its known
* limitations vis-a-vis 64 bit callers).
*/
mfn |= (err == -ENOENT) ?
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR;
return __put_user(mfn, st->user_mfn++);
} else
st->user_mfn++;
} else { /* st->version == 2 */
int err = *((int *) data);
if (err)
return __put_user(err, st->user_err++);
else
Expand All @@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state)
return 0;
}

static int mmap_return_errors(void *data, int nr, void *state)
{
struct mmap_batch_state *st = state;
int *errs = data;
int i;
int ret;

for (i = 0; i < nr; i++) {
ret = mmap_return_error(errs[i], st);
if (ret < 0)
return ret;
}
return 0;
}

/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
* the vma with the page info to use later.
* Returns: 0 if success, otherwise -errno
Expand Down Expand Up @@ -472,17 +519,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
state.version = version;

/* mmap_batch_fn guarantees ret == 0 */
BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_batch_fn, &state));
BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_batch_fn, &state));

up_write(&mm->mmap_sem);

if (state.global_error) {
/* Write back errors in second pass. */
state.user_mfn = (xen_pfn_t *)m.arr;
state.user_err = m.err;
ret = traverse_pages(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors, &state);
ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors, &state);
} else
ret = 0;

Expand Down
Loading

0 comments on commit 4e8c0c8

Please sign in to comment.