Skip to content

Commit

Permalink
afs: Set up the iov_iter before calling afs_extract_data()
Browse files Browse the repository at this point in the history
afs_extract_data() sets up a temporary iov_iter and passes it to AF_RXRPC
each time it is called to describe the remaining buffer to be filled.

Instead:

 (1) Put an iterator in the afs_call struct.

 (2) Set the iterator for each marshalling stage to load data into the
     appropriate places.  A number of convenience functions are provided to
     this end (eg. afs_extract_to_buf()).

     This iterator is then passed to afs_extract_data().

 (3) Use the new ITER_XARRAY iterator when reading data to load directly
     into the inode's pages without needing to create a list of them.

This will allow O_DIRECT calls to be supported in future patches.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/152898380012.11616.12094591785228251717.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/153685394431.14766.3178466345696987059.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/153999787395.866.11218209749223643998.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/154033911195.12041.3882700371848894587.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/158861250059.340223.1248231474865140653.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/159465827399.1377938.11181327349704960046.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/160588533776.3465195.3612752083351956948.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161118151238.1232039.17015723405750601161.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161161047240.2537118.14721975104810564022.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/161340410333.1303470.16260122230371140878.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/161539554187.286939.15305559004905459852.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/161653810525.2770958.4630666029125411789.stgit@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/161789093719.6155.7877160739235087723.stgit@warthog.procyon.org.uk/ # v6
  • Loading branch information
David Howells committed Apr 23, 2021
1 parent 0509275 commit c450846
Show file tree
Hide file tree
Showing 6 changed files with 314 additions and 249 deletions.
222 changes: 149 additions & 73 deletions fs/afs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,35 @@ struct afs_lookup_cookie {
struct afs_fid fids[50];
};

/*
* Drop the refs that we're holding on the pages we were reading into. We've
* got refs on the first nr_pages pages.
*/
static void afs_dir_read_cleanup(struct afs_read *req)
{
struct address_space *mapping = req->vnode->vfs_inode.i_mapping;
struct page *page;
pgoff_t last = req->nr_pages - 1;

XA_STATE(xas, &mapping->i_pages, 0);

if (unlikely(!req->nr_pages))
return;

rcu_read_lock();
xas_for_each(&xas, page, last) {
if (xas_retry(&xas, page))
continue;
BUG_ON(xa_is_value(page));
BUG_ON(PageCompound(page));
ASSERTCMP(page->mapping, ==, mapping);

put_page(page);
}

rcu_read_unlock();
}

/*
* check that a directory page is valid
*/
Expand All @@ -127,7 +156,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
qty /= sizeof(union afs_xdr_dir_block);

/* check them */
dbuf = kmap(page);
dbuf = kmap_atomic(page);
for (tmp = 0; tmp < qty; tmp++) {
if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
Expand All @@ -146,7 +175,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
}

kunmap(page);
kunmap_atomic(dbuf);

checked:
afs_stat_v(dvnode, n_read_dir);
Expand All @@ -157,35 +186,74 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
}

/*
* Check the contents of a directory that we've just read.
* Dump the contents of a directory.
*/
static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req)
static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
{
struct afs_xdr_dir_page *dbuf;
unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
struct address_space *mapping = dvnode->vfs_inode.i_mapping;
struct page *page;
unsigned int i, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
pgoff_t last = req->nr_pages - 1;

for (i = 0; i < req->nr_pages; i++)
if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len))
goto bad;
return true;
XA_STATE(xas, &mapping->i_pages, 0);

bad:
pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n",
pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n",
dvnode->fid.vid, dvnode->fid.vnode,
req->file_size, req->len, req->actual_len, req->remain);
pr_warn("DIR %llx %x %x %x\n",
req->pos, req->index, req->nr_pages, req->offset);
req->file_size, req->len, req->actual_len);
pr_warn("DIR %llx %x %zx %zx\n",
req->pos, req->nr_pages,
req->iter->iov_offset, iov_iter_count(req->iter));

for (i = 0; i < req->nr_pages; i++) {
dbuf = kmap(req->pages[i]);
for (j = 0; j < qty; j++) {
union afs_xdr_dir_block *block = &dbuf->blocks[j];
xas_for_each(&xas, page, last) {
if (xas_retry(&xas, page))
continue;

BUG_ON(PageCompound(page));
BUG_ON(page->mapping != mapping);

dbuf = kmap_atomic(page);
for (i = 0; i < qty; i++) {
union afs_xdr_dir_block *block = &dbuf->blocks[i];

pr_warn("[%02x] %32phN\n", i * qty + j, block);
pr_warn("[%02lx] %32phN\n", page->index * qty + i, block);
}
kunmap(req->pages[i]);
kunmap_atomic(dbuf);
}
return false;
}

/*
* Check all the pages in a directory. All the pages are held pinned.
*/
static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
{
struct address_space *mapping = dvnode->vfs_inode.i_mapping;
struct page *page;
pgoff_t last = req->nr_pages - 1;
int ret = 0;

XA_STATE(xas, &mapping->i_pages, 0);

if (unlikely(!req->nr_pages))
return 0;

rcu_read_lock();
xas_for_each(&xas, page, last) {
if (xas_retry(&xas, page))
continue;

BUG_ON(PageCompound(page));
BUG_ON(page->mapping != mapping);

if (!afs_dir_check_page(dvnode, page, req->file_size)) {
afs_dir_dump(dvnode, req);
ret = -EIO;
break;
}
}

rcu_read_unlock();
return ret;
}

/*
Expand Down Expand Up @@ -214,81 +282,82 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
{
struct afs_read *req;
loff_t i_size;
int nr_pages, nr_inline, i, n;
int ret = -ENOMEM;
int nr_pages, i, n;
int ret;

_enter("");

retry:
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);

refcount_set(&req->usage, 1);
req->vnode = dvnode;
req->key = key_get(key);
req->cleanup = afs_dir_read_cleanup;

expand:
i_size = i_size_read(&dvnode->vfs_inode);
if (i_size < 2048)
return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small));
if (i_size < 2048) {
ret = afs_bad(dvnode, afs_file_error_dir_small);
goto error;
}
if (i_size > 2048 * 1024) {
trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
return ERR_PTR(-EFBIG);
ret = -EFBIG;
goto error;
}

_enter("%llu", i_size);

/* Get a request record to hold the page list. We want to hold it
* inline if we can, but we don't want to make an order 1 allocation.
*/
nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
nr_inline = nr_pages;
if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
nr_inline = 0;

req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);

refcount_set(&req->usage, 1);
req->key = key_get(key);
req->nr_pages = nr_pages;
req->actual_len = i_size; /* May change */
req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
req->data_version = dvnode->status.data_version; /* May change */
if (nr_inline > 0) {
req->pages = req->array;
} else {
req->pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_KERNEL);
if (!req->pages)
goto error;
}
iov_iter_xarray(&req->def_iter, READ, &dvnode->vfs_inode.i_mapping->i_pages,
0, i_size);
req->iter = &req->def_iter;

/* Get a list of all the pages that hold or will hold the directory
* content. We need to fill in any gaps that we might find where the
* memory reclaimer has been at work. If there are any gaps, we will
/* Fill in any gaps that we might find where the memory reclaimer has
* been at work and pin all the pages. If there are any gaps, we will
* need to reread the entire directory contents.
*/
i = 0;
do {
i = req->nr_pages;
while (i < nr_pages) {
struct page *pages[8], *page;

n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
req->nr_pages - i,
req->pages + i);
_debug("find %u at %u/%u", n, i, req->nr_pages);
min_t(unsigned int, nr_pages - i,
ARRAY_SIZE(pages)),
pages);
_debug("find %u at %u/%u", n, i, nr_pages);

if (n == 0) {
gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;

if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_stat_v(dvnode, n_inval);

ret = -ENOMEM;
req->pages[i] = __page_cache_alloc(gfp);
if (!req->pages[i])
page = __page_cache_alloc(gfp);
if (!page)
goto error;
ret = add_to_page_cache_lru(req->pages[i],
ret = add_to_page_cache_lru(page,
dvnode->vfs_inode.i_mapping,
i, gfp);
if (ret < 0)
goto error;

attach_page_private(req->pages[i], (void *)1);
unlock_page(req->pages[i]);
attach_page_private(page, (void *)1);
unlock_page(page);
req->nr_pages++;
i++;
} else {
req->nr_pages += n;
i += n;
}
} while (i < req->nr_pages);
}

/* If we're going to reload, we need to lock all the pages to prevent
* races.
Expand All @@ -312,12 +381,17 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)

task_io_account_read(PAGE_SIZE * req->nr_pages);

if (req->len < req->file_size)
goto content_has_grown;
if (req->len < req->file_size) {
/* The content has grown, so we need to expand the
* buffer.
*/
up_write(&dvnode->validate_lock);
goto expand;
}

/* Validate the data we just read. */
ret = -EIO;
if (!afs_dir_check_pages(dvnode, req))
ret = afs_dir_check(dvnode, req);
if (ret < 0)
goto error_unlock;

// TODO: Trim excess pages
Expand All @@ -335,11 +409,6 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
afs_put_read(req);
_leave(" = %d", ret);
return ERR_PTR(ret);

content_has_grown:
up_write(&dvnode->validate_lock);
afs_put_read(req);
goto retry;
}

/*
Expand Down Expand Up @@ -449,6 +518,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
struct afs_read *req;
struct page *page;
unsigned blkoff, limit;
void __rcu **slot;
int ret;

_enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
Expand All @@ -473,9 +543,15 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);

/* Fetch the appropriate page from the directory and re-add it
* to the LRU.
* to the LRU. We have all the pages pinned with an extra ref.
*/
page = req->pages[blkoff / PAGE_SIZE];
rcu_read_lock();
page = NULL;
slot = radix_tree_lookup_slot(&dvnode->vfs_inode.i_mapping->i_pages,
blkoff / PAGE_SIZE);
if (slot)
page = radix_tree_deref_slot(slot);
rcu_read_unlock();
if (!page) {
ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
break;
Expand Down
Loading

0 comments on commit c450846

Please sign in to comment.