Skip to content

Commit

Permalink
netfs: Replace PG_fscache by setting folio->private and marking dirty
Browse files Browse the repository at this point in the history
When dirty data is being written to the cache, setting/waiting on/clearing
the fscache flag is always done in tandem with setting/waiting on/clearing
the writeback flag.  The netfslib buffered write routines wait on and set
both flags and the write request cleanup clears both flags, so the fscache
flag is almost superfluous.

The reason it isn't superfluous is because the fscache flag is also used to
indicate that data just read from the server is being written to the cache.
The flag is used to prevent a race involving overlapping direct-I/O writes
to the cache.

Change this to indicate that a page is in need of being copied to the cache
by placing a magic value in folio->private and marking the folios dirty.
Then when the writeback code sees a folio marked in this way, it only
writes it to the cache and not to the server.

If a folio that has this magic value set is modified, the value is just
replaced and the folio will then be uplodaded too.

With this, PG_fscache is no longer required by the netfslib core, 9p and
afs.

Ceph and nfs, however, still need to use the old PG_fscache-based tracking.
To deal with this, a flag, NETFS_ICTX_USE_PGPRIV2, now has to be set on the
flags in the netfs_inode struct for those filesystems.  This reenables the
use of PG_fscache in that inode.  9p and afs use the netfslib write helpers
so get switched over; cifs, for the moment, does page-by-page manual access
to the cache, so doesn't use PG_fscache and is unaffected.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
cc: Shyam Prasad N <sprasad@microsoft.com>
cc: Tom Talpey <tom@talpey.com>
cc: Bharath SM <bharathsm@microsoft.com>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna@kernel.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
  • Loading branch information
David Howells committed Apr 29, 2024
1 parent 5f24162 commit 2ff1e97
Show file tree
Hide file tree
Showing 14 changed files with 143 additions and 97 deletions.
2 changes: 1 addition & 1 deletion fs/ceph/addr.c
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
struct fscache_cookie *cookie = ceph_fscache_cookie(ci);

fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode),
ceph_fscache_write_terminated, inode, caching);
ceph_fscache_write_terminated, inode, true, caching);
}
#else
static inline void ceph_set_page_fscache(struct page *page)
Expand Down
2 changes: 2 additions & 0 deletions fs/ceph/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)

/* Set parameters for the netfs library */
netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
__set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags);

spin_lock_init(&ci->i_ceph_lock);

Expand Down
36 changes: 26 additions & 10 deletions fs/netfs/buffered_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@
#include "internal.h"

/*
* Unlock the folios in a read operation. We need to set PG_fscache on any
* Unlock the folios in a read operation. We need to set PG_writeback on any
* folios we're going to write back before we unlock them.
*
* Note that if the deprecated NETFS_RREQ_USE_PGPRIV2 is set then we use
* PG_private_2 and do a direct write to the cache from here instead.
*/
void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
{
Expand Down Expand Up @@ -48,25 +51,31 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
xas_for_each(&xas, folio, last_page) {
loff_t pg_end;
bool pg_failed = false;
bool folio_started;
bool wback_to_cache = false;
bool folio_started = false;

if (xas_retry(&xas, folio))
continue;

pg_end = folio_pos(folio) + folio_size(folio) - 1;

folio_started = false;
for (;;) {
loff_t sreq_end;

if (!subreq) {
pg_failed = true;
break;
}
if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
folio_start_fscache(folio);
folio_started = true;
if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE,
&subreq->flags)) {
trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
folio_start_fscache(folio);
folio_started = true;
}
} else {
wback_to_cache |=
test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
}
pg_failed |= subreq_failed;
sreq_end = subreq->start + subreq->len - 1;
Expand Down Expand Up @@ -98,6 +107,11 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
kfree(finfo);
}
folio_mark_uptodate(folio);
if (wback_to_cache && !WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
filemap_dirty_folio(folio->mapping, folio);
}
}

if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
Expand Down Expand Up @@ -491,9 +505,11 @@ int netfs_write_begin(struct netfs_inode *ctx,
netfs_put_request(rreq, false, netfs_rreq_trace_put_return);

have_folio:
ret = folio_wait_fscache_killable(folio);
if (ret < 0)
goto error;
if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) {
ret = folio_wait_fscache_killable(folio);
if (ret < 0)
goto error;
}
have_folio_no_wait:
*_folio = folio;
_leave(" = 0");
Expand Down
93 changes: 44 additions & 49 deletions fs/netfs/buffered_write.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,13 @@ static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq);

static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
{
if (netfs_group && !folio_get_private(folio))
folio_attach_private(folio, netfs_get_group(netfs_group));
}
void *priv = folio_get_private(folio);

#if IS_ENABLED(CONFIG_FSCACHE)
static void netfs_folio_start_fscache(bool caching, struct folio *folio)
{
if (caching)
folio_start_fscache(folio);
}
#else
static void netfs_folio_start_fscache(bool caching, struct folio *folio)
{
if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
folio_attach_private(folio, netfs_get_group(netfs_group));
else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
folio_detach_private(folio);
}
#endif

/*
* Decide how we should modify a folio. We might be attempting to do
Expand All @@ -63,11 +55,12 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
bool maybe_trouble)
{
struct netfs_folio *finfo = netfs_folio_info(folio);
struct netfs_group *group = netfs_folio_group(folio);
loff_t pos = folio_file_pos(folio);

_enter("");

if (netfs_folio_group(folio) != netfs_group)
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE)
return NETFS_FLUSH_CONTENT;

if (folio_test_uptodate(folio))
Expand Down Expand Up @@ -396,9 +389,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
folio_clear_dirty_for_io(folio);
/* We make multiple writes to the folio... */
if (!folio_test_writeback(folio)) {
folio_wait_fscache(folio);
folio_start_writeback(folio);
folio_start_fscache(folio);
if (wreq->iter.count == 0)
trace_netfs_folio(folio, netfs_folio_trace_wthru);
else
Expand Down Expand Up @@ -528,6 +519,7 @@ EXPORT_SYMBOL(netfs_file_write_iter);
*/
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group)
{
struct netfs_group *group;
struct folio *folio = page_folio(vmf->page);
struct file *file = vmf->vma->vm_file;
struct inode *inode = file_inode(file);
Expand All @@ -550,7 +542,8 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
goto out;
}

if (netfs_folio_group(folio) != netfs_group) {
group = netfs_folio_group(folio);
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) {
folio_unlock(folio);
err = filemap_fdatawait_range(inode->i_mapping,
folio_pos(folio),
Expand Down Expand Up @@ -606,8 +599,6 @@ static void netfs_kill_pages(struct address_space *mapping,

trace_netfs_folio(folio, netfs_folio_trace_kill);
folio_clear_uptodate(folio);
if (folio_test_fscache(folio))
folio_end_fscache(folio);
folio_end_writeback(folio);
folio_lock(folio);
generic_error_remove_folio(mapping, folio);
Expand Down Expand Up @@ -643,8 +634,6 @@ static void netfs_redirty_pages(struct address_space *mapping,
next = folio_next_index(folio);
trace_netfs_folio(folio, netfs_folio_trace_redirty);
filemap_dirty_folio(mapping, folio);
if (folio_test_fscache(folio))
folio_end_fscache(folio);
folio_end_writeback(folio);
folio_put(folio);
} while (index = next, index <= last);
Expand Down Expand Up @@ -700,7 +689,11 @@ static void netfs_pages_written_back(struct netfs_io_request *wreq)
if (!folio_test_dirty(folio)) {
folio_detach_private(folio);
gcount++;
trace_netfs_folio(folio, netfs_folio_trace_clear_g);
if (group == NETFS_FOLIO_COPY_TO_CACHE)
trace_netfs_folio(folio,
netfs_folio_trace_end_copy);
else
trace_netfs_folio(folio, netfs_folio_trace_clear_g);
} else {
trace_netfs_folio(folio, netfs_folio_trace_redirtied);
}
Expand All @@ -724,8 +717,6 @@ static void netfs_pages_written_back(struct netfs_io_request *wreq)
trace_netfs_folio(folio, netfs_folio_trace_clear);
}
end_wb:
if (folio_test_fscache(folio))
folio_end_fscache(folio);
xas_advance(&xas, folio_next_index(folio) - 1);
folio_end_writeback(folio);
}
Expand Down Expand Up @@ -795,7 +786,6 @@ static void netfs_extend_writeback(struct address_space *mapping,
long *_count,
loff_t start,
loff_t max_len,
bool caching,
size_t *_len,
size_t *_top)
{
Expand Down Expand Up @@ -846,8 +836,7 @@ static void netfs_extend_writeback(struct address_space *mapping,
break;
}
if (!folio_test_dirty(folio) ||
folio_test_writeback(folio) ||
folio_test_fscache(folio)) {
folio_test_writeback(folio)) {
folio_unlock(folio);
folio_put(folio);
xas_reset(xas);
Expand All @@ -860,7 +849,8 @@ static void netfs_extend_writeback(struct address_space *mapping,
if ((const struct netfs_group *)priv != group) {
stop = true;
finfo = netfs_folio_info(folio);
if (finfo->netfs_group != group ||
if (!finfo ||
finfo->netfs_group != group ||
finfo->dirty_offset > 0) {
folio_unlock(folio);
folio_put(folio);
Expand Down Expand Up @@ -894,12 +884,14 @@ static void netfs_extend_writeback(struct address_space *mapping,

for (i = 0; i < folio_batch_count(&fbatch); i++) {
folio = fbatch.folios[i];
trace_netfs_folio(folio, netfs_folio_trace_store_plus);
if (group == NETFS_FOLIO_COPY_TO_CACHE)
trace_netfs_folio(folio, netfs_folio_trace_copy_plus);
else
trace_netfs_folio(folio, netfs_folio_trace_store_plus);

if (!folio_clear_dirty_for_io(folio))
BUG();
folio_start_writeback(folio);
netfs_folio_start_fscache(caching, folio);
folio_unlock(folio);
}

Expand All @@ -925,14 +917,14 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,
struct netfs_inode *ctx = netfs_inode(mapping->host);
unsigned long long i_size = i_size_read(&ctx->inode);
size_t len, max_len;
bool caching = netfs_is_cache_enabled(ctx);
long count = wbc->nr_to_write;
int ret;

_enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching);
_enter(",%lx,%llx-%llx", folio->index, start, end);

wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio),
NETFS_WRITEBACK);
group == NETFS_FOLIO_COPY_TO_CACHE ?
NETFS_COPY_TO_CACHE : NETFS_WRITEBACK);
if (IS_ERR(wreq)) {
folio_unlock(folio);
return PTR_ERR(wreq);
Expand All @@ -941,7 +933,6 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,
if (!folio_clear_dirty_for_io(folio))
BUG();
folio_start_writeback(folio);
netfs_folio_start_fscache(caching, folio);

count -= folio_nr_pages(folio);

Expand All @@ -950,7 +941,10 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,
* immediately lockable, is not dirty or is missing, or we reach the
* end of the range.
*/
trace_netfs_folio(folio, netfs_folio_trace_store);
if (group == NETFS_FOLIO_COPY_TO_CACHE)
trace_netfs_folio(folio, netfs_folio_trace_copy);
else
trace_netfs_folio(folio, netfs_folio_trace_store);

len = wreq->len;
finfo = netfs_folio_info(folio);
Expand All @@ -973,7 +967,7 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,

if (len < max_len)
netfs_extend_writeback(mapping, group, xas, &count, start,
max_len, caching, &len, &wreq->upper_len);
max_len, &len, &wreq->upper_len);
}

cant_expand:
Expand All @@ -997,15 +991,18 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,

iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start,
wreq->upper_len);
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback);
if (group != NETFS_FOLIO_COPY_TO_CACHE) {
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback);
} else {
ret = netfs_begin_write(wreq, true, netfs_write_trace_copy_to_cache);
}
if (ret == 0 || ret == -EIOCBQUEUED)
wbc->nr_to_write -= len / PAGE_SIZE;
} else {
_debug("write discard %zx @%llx [%llx]", len, start, i_size);

/* The dirty region was entirely beyond the EOF. */
fscache_clear_page_bits(mapping, start, len, caching);
netfs_pages_written_back(wreq);
ret = 0;
}
Expand Down Expand Up @@ -1058,9 +1055,11 @@ static ssize_t netfs_writepages_begin(struct address_space *mapping,

/* Skip any dirty folio that's not in the group of interest. */
priv = folio_get_private(folio);
if ((const struct netfs_group *)priv != group) {
finfo = netfs_folio_info(folio);
if (finfo->netfs_group != group) {
if ((const struct netfs_group *)priv == NETFS_FOLIO_COPY_TO_CACHE) {
group = NETFS_FOLIO_COPY_TO_CACHE;
} else if ((const struct netfs_group *)priv != group) {
finfo = __netfs_folio_info(priv);
if (!finfo || finfo->netfs_group != group) {
folio_put(folio);
continue;
}
Expand Down Expand Up @@ -1099,14 +1098,10 @@ static ssize_t netfs_writepages_begin(struct address_space *mapping,
goto search_again;
}

if (folio_test_writeback(folio) ||
folio_test_fscache(folio)) {
if (folio_test_writeback(folio)) {
folio_unlock(folio);
if (wbc->sync_mode != WB_SYNC_NONE) {
folio_wait_writeback(folio);
#ifdef CONFIG_FSCACHE
folio_wait_fscache(folio);
#endif
goto lock_again;
}

Expand Down Expand Up @@ -1265,7 +1260,8 @@ int netfs_launder_folio(struct folio *folio)

bvec_set_folio(&bvec, folio, len, offset);
iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len);
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
if (group != NETFS_FOLIO_COPY_TO_CACHE)
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
ret = netfs_begin_write(wreq, true, netfs_write_trace_launder);

out_put:
Expand All @@ -1274,7 +1270,6 @@ int netfs_launder_folio(struct folio *folio)
kfree(finfo);
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
out:
folio_wait_fscache(folio);
_leave(" = %d", ret);
return ret;
}
Expand Down
Loading

0 comments on commit 2ff1e97

Please sign in to comment.