From 0fac04e4e0ea9ca51ac16d1a3d0e5dfe2919a6dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 Apr 2024 08:16:14 +0200 Subject: [PATCH 1/6] iomap: convert iomap_writepages to writeack_iter This removes one indirect function call per folio, and adds type safety by not casting through a void pointer. Based on a patch by Matthew Wilcox. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240412061614.1511629-1-hch@lst.de Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 4e8e41c8b3c0e..e09441f4fceb6 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1958,18 +1958,13 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, return error; } -static int iomap_do_writepage(struct folio *folio, - struct writeback_control *wbc, void *data) -{ - return iomap_writepage_map(data, wbc, folio); -} - int iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, const struct iomap_writeback_ops *ops) { - int ret; + struct folio *folio = NULL; + int error; /* * Writeback from reclaim context should never happen except in the case @@ -1980,8 +1975,9 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, return -EIO; wpc->ops = ops; - ret = write_cache_pages(mapping, wbc, iomap_do_writepage, wpc); - return iomap_submit_ioend(wpc, ret); + while ((folio = writeback_iter(mapping, wbc, folio, &error))) + error = iomap_writepage_map(wpc, wbc, folio); + return iomap_submit_ioend(wpc, error); } EXPORT_SYMBOL_GPL(iomap_writepages); From 89c6c1d91ab2dc05c6f4ad504b03169aa32694cc Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 20 Mar 2024 19:05:44 +0800 Subject: [PATCH 2/6] iomap: drop the write failure handles when unsharing and zeroing Unsharing and zeroing can only happen within EOF, so there is never a need to perform posteof pagecache truncation if write begin fails, also partial write could never theoretically happened from iomap_write_end(), so remove both of them. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240320110548.2200662-6-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index e09441f4fceb6..2bd9e780ff34d 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -824,7 +824,6 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, out_unlock: __iomap_put_folio(iter, pos, 0, folio); - iomap_write_failed(iter->inode, pos, len); return status; } @@ -901,8 +900,6 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); - if (ret < len) - iomap_write_failed(iter->inode, pos + ret, len - ret); return ret; } @@ -950,8 +947,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) } status = iomap_write_begin(iter, pos, bytes, &folio); - if (unlikely(status)) + if (unlikely(status)) { + iomap_write_failed(iter->inode, pos, bytes); break; + } if (iter->iomap.flags & IOMAP_F_STALE) break; @@ -965,6 +964,9 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); status = iomap_write_end(iter, pos, bytes, copied, folio); + if (status < bytes) + iomap_write_failed(iter->inode, pos + status, + bytes - status); if (unlikely(copied != status)) iov_iter_revert(i, copied - status); From 943bc0882cebf482422640924062a7daac5a27ba Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 20 Mar 2024 19:05:45 +0800 Subject: [PATCH 3/6] iomap: don't increase i_size if it's not a write operation Increase i_size in iomap_zero_range() and iomap_unshare_iter() is not needed, the caller should handle it. Especially, when truncate partial block, we should not increase i_size beyond the new EOF here. It doesn't affect xfs and gfs2 now because they set the new file size after zero out, it doesn't matter that a transient increase in i_size, but it will affect ext4 because it set file size before truncate. So move the i_size updating logic to iomap_write_iter(). Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240320110548.2200662-7-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 2bd9e780ff34d..620c830dfec4e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -875,32 +875,13 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); - loff_t old_size = iter->inode->i_size; - size_t ret; - - if (srcmap->type == IOMAP_INLINE) { - ret = iomap_write_end_inline(iter, folio, pos, copied); - } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { - ret = block_write_end(NULL, iter->inode->i_mapping, pos, len, - copied, &folio->page, NULL); - } else { - ret = __iomap_write_end(iter->inode, pos, len, copied, folio); - } - - /* - * Update the in-memory inode size after copying the data into the page - * cache. It's up to the file system to write the updated size to disk, - * preferably after I/O completion so that no stale data is exposed. - */ - if (pos + ret > old_size) { - i_size_write(iter->inode, pos + ret); - iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; - } - __iomap_put_folio(iter, pos, ret, folio); - if (old_size < pos) - pagecache_isize_extended(iter->inode, old_size, pos); - return ret; + if (srcmap->type == IOMAP_INLINE) + return iomap_write_end_inline(iter, folio, pos, copied); + if (srcmap->flags & IOMAP_F_BUFFER_HEAD) + return block_write_end(NULL, iter->inode->i_mapping, pos, len, + copied, &folio->page, NULL); + return __iomap_write_end(iter->inode, pos, len, copied, folio); } static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) @@ -915,6 +896,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) do { struct folio *folio; + loff_t old_size; size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ @@ -964,6 +946,22 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); status = iomap_write_end(iter, pos, bytes, copied, folio); + /* + * Update the in-memory inode size after copying the data into + * the page cache. It's up to the file system to write the + * updated size to disk, preferably after I/O completion so that + * no stale data is exposed. Only once that's done can we + * unlock and release the folio. + */ + old_size = iter->inode->i_size; + if (pos + status > old_size) { + i_size_write(iter->inode, pos + status); + iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; + } + __iomap_put_folio(iter, pos, status, folio); + + if (old_size < pos) + pagecache_isize_extended(iter->inode, old_size, pos); if (status < bytes) iomap_write_failed(iter->inode, pos + status, bytes - status); @@ -1336,6 +1334,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) bytes = folio_size(folio) - offset; bytes = iomap_write_end(iter, pos, bytes, bytes, folio); + __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(bytes == 0)) return -EIO; @@ -1400,6 +1399,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_mark_accessed(folio); bytes = iomap_write_end(iter, pos, bytes, bytes, folio); + __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(bytes == 0)) return -EIO; From 1a61d74932d460f47ffaf08927dbe9d43315841f Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 20 Mar 2024 19:05:46 +0800 Subject: [PATCH 4/6] iomap: use a new variable to handle the written bytes in iomap_write_iter() In iomap_write_iter(), the status variable used to receive the return value from iomap_write_end() is confusing, replace it with a new written variable to represent the written bytes in each cycle, no logic changes. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240320110548.2200662-8-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 620c830dfec4e..1333a0d96f832 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -889,7 +889,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) loff_t length = iomap_length(iter); size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; loff_t pos = iter->pos; - ssize_t written = 0; + ssize_t total_written = 0; long status = 0; struct address_space *mapping = iter->inode->i_mapping; unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0; @@ -900,6 +900,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ + size_t written; /* Bytes have been written */ bytes = iov_iter_count(i); retry: @@ -944,7 +945,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); - status = iomap_write_end(iter, pos, bytes, copied, folio); + written = iomap_write_end(iter, pos, bytes, copied, folio); /* * Update the in-memory inode size after copying the data into @@ -954,22 +955,22 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) * unlock and release the folio. */ old_size = iter->inode->i_size; - if (pos + status > old_size) { - i_size_write(iter->inode, pos + status); + if (pos + written > old_size) { + i_size_write(iter->inode, pos + written); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } - __iomap_put_folio(iter, pos, status, folio); + __iomap_put_folio(iter, pos, written, folio); if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); - if (status < bytes) - iomap_write_failed(iter->inode, pos + status, - bytes - status); - if (unlikely(copied != status)) - iov_iter_revert(i, copied - status); + if (written < bytes) + iomap_write_failed(iter->inode, pos + written, + bytes - written); + if (unlikely(copied != written)) + iov_iter_revert(i, copied - written); cond_resched(); - if (unlikely(status == 0)) { + if (unlikely(written == 0)) { /* * A short copy made iomap_write_end() reject the * thing entirely. Might be memory poisoning @@ -983,17 +984,17 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) goto retry; } } else { - pos += status; - written += status; - length -= status; + pos += written; + total_written += written; + length -= written; } } while (iov_iter_count(i) && length); if (status == -EAGAIN) { - iov_iter_revert(i, written); + iov_iter_revert(i, total_written); return -EAGAIN; } - return written ? written : status; + return total_written ? total_written : status; } ssize_t From 815f4b633ba1c9c6a151f251616f23e2407fcf24 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 20 Mar 2024 19:05:47 +0800 Subject: [PATCH 5/6] iomap: make iomap_write_end() return a boolean For now, we can make sure iomap_write_end() always return 0 or copied bytes, so instead of return written bytes, convert to return a boolean to indicate the copied bytes have been written to the pagecache. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240320110548.2200662-9-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 48 +++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 1333a0d96f832..73241fbaff6bc 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -828,7 +828,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, return status; } -static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, +static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, size_t copied, struct folio *folio) { flush_dcache_folio(folio); @@ -845,14 +845,14 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, * redo the whole thing. */ if (unlikely(copied < len && !folio_test_uptodate(folio))) - return 0; + return false; iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len); iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied); filemap_dirty_folio(inode->i_mapping, folio); - return copied; + return true; } -static size_t iomap_write_end_inline(const struct iomap_iter *iter, +static void iomap_write_end_inline(const struct iomap_iter *iter, struct folio *folio, loff_t pos, size_t copied) { const struct iomap *iomap = &iter->iomap; @@ -867,20 +867,31 @@ static size_t iomap_write_end_inline(const struct iomap_iter *iter, kunmap_local(addr); mark_inode_dirty(iter->inode); - return copied; } -/* Returns the number of bytes copied. May be 0. Cannot be an errno. */ -static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, +/* + * Returns true if all copied bytes have been written to the pagecache, + * otherwise return false. + */ +static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); - if (srcmap->type == IOMAP_INLINE) - return iomap_write_end_inline(iter, folio, pos, copied); - if (srcmap->flags & IOMAP_F_BUFFER_HEAD) - return block_write_end(NULL, iter->inode->i_mapping, pos, len, - copied, &folio->page, NULL); + if (srcmap->type == IOMAP_INLINE) { + iomap_write_end_inline(iter, folio, pos, copied); + return true; + } + + if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { + size_t bh_written; + + bh_written = block_write_end(NULL, iter->inode->i_mapping, pos, + len, copied, &folio->page, NULL); + WARN_ON_ONCE(bh_written != copied && bh_written != 0); + return bh_written == copied; + } + return __iomap_write_end(iter->inode, pos, len, copied, folio); } @@ -945,7 +956,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); - written = iomap_write_end(iter, pos, bytes, copied, folio); + written = iomap_write_end(iter, pos, bytes, copied, folio) ? + copied : 0; /* * Update the in-memory inode size after copying the data into @@ -1323,6 +1335,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) int status; size_t offset; size_t bytes = min_t(u64, SIZE_MAX, length); + bool ret; status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) @@ -1334,9 +1347,9 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) if (bytes > folio_size(folio) - offset) bytes = folio_size(folio) - offset; - bytes = iomap_write_end(iter, pos, bytes, bytes, folio); + ret = iomap_write_end(iter, pos, bytes, bytes, folio); __iomap_put_folio(iter, pos, bytes, folio); - if (WARN_ON_ONCE(bytes == 0)) + if (WARN_ON_ONCE(!ret)) return -EIO; cond_resched(); @@ -1385,6 +1398,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) int status; size_t offset; size_t bytes = min_t(u64, SIZE_MAX, length); + bool ret; status = iomap_write_begin(iter, pos, bytes, &folio); if (status) @@ -1399,9 +1413,9 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_zero_range(folio, offset, bytes); folio_mark_accessed(folio); - bytes = iomap_write_end(iter, pos, bytes, bytes, folio); + ret = iomap_write_end(iter, pos, bytes, bytes, folio); __iomap_put_folio(iter, pos, bytes, folio); - if (WARN_ON_ONCE(bytes == 0)) + if (WARN_ON_ONCE(!ret)) return -EIO; pos += bytes; From e1f453d4336d5d7fbbd1910532201b4a07a20a5c Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 20 Mar 2024 19:05:48 +0800 Subject: [PATCH 6/6] iomap: do some small logical cleanup in buffered write Since iomap_write_end() can never return a partial write length, the comparison between written, copied and bytes becomes useless, just merge them with the unwritten branch. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240320110548.2200662-10-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 73241fbaff6bc..41c8f0c68ef56 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -975,11 +975,6 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); - if (written < bytes) - iomap_write_failed(iter->inode, pos + written, - bytes - written); - if (unlikely(copied != written)) - iov_iter_revert(i, copied - written); cond_resched(); if (unlikely(written == 0)) { @@ -989,6 +984,9 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) * halfway through, might be a race with munmap, * might be severe memory pressure. */ + iomap_write_failed(iter->inode, pos, bytes); + iov_iter_revert(i, copied); + if (chunk > PAGE_SIZE) chunk /= 2; if (copied) {