From 964688b32d9ada55a7fce2e650d85ef24188f73f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 17 May 2022 18:03:27 -0400 Subject: [PATCH 01/41] btrfs: Use a folio in wait_dev_supers() Remove a use of PageError and optimise putting the page reference twice. Signed-off-by: Matthew Wilcox (Oracle) --- fs/btrfs/disk-io.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ba005c419836..dbac856565718 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4178,7 +4178,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) max_mirrors = BTRFS_SUPER_MIRROR_MAX; for (i = 0; i < max_mirrors; i++) { - struct page *page; + struct folio *folio; ret = btrfs_sb_log_location(device, i, READ, &bytenr); if (ret == -ENOENT) { @@ -4193,27 +4193,24 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) device->commit_total_bytes) break; - page = find_get_page(device->bdev->bd_inode->i_mapping, + folio = filemap_get_folio(device->bdev->bd_inode->i_mapping, bytenr >> PAGE_SHIFT); - if (!page) { + if (!folio) { errors++; if (i == 0) primary_failed = true; continue; } - /* Page is submitted locked and unlocked once the IO completes */ - wait_on_page_locked(page); - if (PageError(page)) { + /* Folio is unlocked once the IO completes */ + folio_wait_locked(folio); + if (!folio_test_uptodate(folio)) { errors++; if (i == 0) primary_failed = true; } - /* Drop our reference */ - put_page(page); - - /* Drop the reference from the writing run */ - put_page(page); + /* Drop our reference and the one from the writing run */ + folio_put_refs(folio, 2); } /* log error, force error return */ From ba457436991abc8e3cc830cfc91e9f54b53a07ed Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 26 May 2022 23:19:49 -0400 Subject: [PATCH 02/41] buffer: Don't test folio error in block_read_full_folio() We can cache this information in a local variable instead of communicating from one part of the function to another via folio flags. Signed-off-by: Matthew Wilcox (Oracle) --- fs/buffer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index a0214e3f90d36..ce9844d7c10fa 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2259,6 +2259,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) unsigned int blocksize, bbits; int nr, i; int fully_mapped = 1; + bool page_error = false; VM_BUG_ON_FOLIO(folio_test_large(folio), folio); @@ -2283,8 +2284,10 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) if (iblock < lblock) { WARN_ON(bh->b_size != blocksize); err = get_block(inode, iblock, bh, 0); - if (err) + if (err) { folio_set_error(folio); + page_error = true; + } } if (!buffer_mapped(bh)) { folio_zero_range(folio, i * blocksize, @@ -2311,7 +2314,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) * All buffers are uptodate - we can set the folio uptodate * as well. But not if get_block() returned an error. */ - if (!folio_test_error(folio)) + if (!page_error) folio_mark_uptodate(folio); folio_unlock(folio); return 0; From 2c62b172802a648cb9bad9b1ad2415a8ecbb7f41 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 26 May 2022 23:33:43 -0400 Subject: [PATCH 03/41] squashfs: Return the actual error from squashfs_read_folio() Since we actually know what error happened, we can report it instead of having the generic code return -EIO for pages that were unlocked without being marked uptodate. Also remove a test of PageError since we have the return value at this point. Signed-off-by: Matthew Wilcox (Oracle) --- fs/squashfs/file.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index a8e495d8eb860..7f0904b203294 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -454,7 +454,7 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) int expected = index == file_end ? (i_size_read(inode) & (msblk->block_size - 1)) : msblk->block_size; - int res; + int res = 0; void *pageaddr; TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", @@ -467,14 +467,15 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) if (index < file_end || squashfs_i(inode)->fragment_block == SQUASHFS_INVALID_BLK) { u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - if (bsize < 0) + + res = read_blocklist(inode, index, &block); + if (res < 0) goto error_out; - if (bsize == 0) + if (res == 0) res = squashfs_readpage_sparse(page, expected); else - res = squashfs_readpage_block(page, block, bsize, expected); + res = squashfs_readpage_block(page, block, res, expected); } else res = squashfs_readpage_fragment(page, expected); @@ -488,11 +489,11 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) memset(pageaddr, 0, PAGE_SIZE); kunmap_atomic(pageaddr); flush_dcache_page(page); - if (!PageError(page)) + if (res == 0) SetPageUptodate(page); unlock_page(page); - return 0; + return res; } From 4188e3e2860d65a43397d4658f74136bf3297808 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 27 May 2022 11:20:56 -0400 Subject: [PATCH 04/41] hostfs: Handle page write errors correctly If a page can't be written back, we need to call mapping_set_error(), not clear the page's Uptodate flag. Also remove the clearing of PageError on success; that flag is used for read errors, not write errors. Signed-off-by: Matthew Wilcox (Oracle) --- fs/hostfs/hostfs_kern.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index cc1bc6f93a010..07881b76d42f9 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -416,15 +416,15 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc) err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); if (err != count) { - ClearPageUptodate(page); + if (err >= 0) + err = -EIO; + mapping_set_error(mapping, err); goto out; } if (base > inode->i_size) inode->i_size = base; - if (PageError(page)) - ClearPageError(page); err = 0; out: From 437084d7c5b825e714d0b50ba6624e6ba840fa32 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 27 May 2022 11:29:24 -0400 Subject: [PATCH 05/41] ocfs2: Use filemap_write_and_wait_range() in ocfs2_cow_sync_writeback() Remove the open-coding of filemap_fdatawait_range(). Signed-off-by: Matthew Wilcox (Oracle) --- fs/ocfs2/refcounttree.c | 42 ++++++----------------------------------- 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index e04358a46b680..1358981e80a36 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -3146,48 +3146,18 @@ int ocfs2_cow_sync_writeback(struct super_block *sb, struct inode *inode, u32 cpos, u32 num_clusters) { - int ret = 0; - loff_t offset, end, map_end; - pgoff_t page_index; - struct page *page; + int ret; + loff_t start, end; if (ocfs2_should_order_data(inode)) return 0; - offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; - end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); + start = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; + end = start + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits) - 1; - ret = filemap_fdatawrite_range(inode->i_mapping, - offset, end - 1); - if (ret < 0) { + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret < 0) mlog_errno(ret); - return ret; - } - - while (offset < end) { - page_index = offset >> PAGE_SHIFT; - map_end = ((loff_t)page_index + 1) << PAGE_SHIFT; - if (map_end > end) - map_end = end; - - page = find_or_create_page(inode->i_mapping, - page_index, GFP_NOFS); - BUG_ON(!page); - - wait_on_page_writeback(page); - if (PageError(page)) { - ret = -EIO; - mlog_errno(ret); - } else - mark_page_accessed(page); - - unlock_page(page); - put_page(page); - page = NULL; - offset = map_end; - if (ret) - break; - } return ret; } From 0ba02b002594cc1809da3a05a44bb5b9654448f6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 18 May 2022 08:54:42 -0400 Subject: [PATCH 06/41] cramfs: read_mapping_page() is synchronous Since commit 67f9fd91f93c, the code to wait for the read to complete has been dead. That commit wrongly stated that the read was synchronous already; this seems to have been a confusion about which ->readpage operation was being called. Instead of reintroducing an asynchronous version of read_mapping_page(), call the readahead code directly to submit all reads first before waiting for them in read_mapping_page(). Signed-off-by: Matthew Wilcox (Oracle) --- fs/cramfs/inode.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 7ae59a6afc5c1..61ccf7722fc3c 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -183,6 +183,7 @@ static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset, unsigned int len) { struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; + struct file_ra_state ra; struct page *pages[BLKS_PER_BUF]; unsigned i, blocknr, buffer; unsigned long devsize; @@ -212,6 +213,9 @@ static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset, devsize = bdev_nr_bytes(sb->s_bdev) >> PAGE_SHIFT; /* Ok, read in BLKS_PER_BUF pages completely first. */ + file_ra_state_init(&ra, mapping); + page_cache_sync_readahead(mapping, &ra, NULL, blocknr, BLKS_PER_BUF); + for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = NULL; @@ -224,19 +228,6 @@ static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset, pages[i] = page; } - for (i = 0; i < BLKS_PER_BUF; i++) { - struct page *page = pages[i]; - - if (page) { - wait_on_page_locked(page); - if (!PageUptodate(page)) { - /* asynchronous error */ - put_page(page); - pages[i] = NULL; - } - } - } - buffer = next_buffer; next_buffer = NEXT_BUFFER(buffer); buffer_blocknr[buffer] = blocknr; From 1cf29f882fa8e28e18cccc9b0a7e94e391b3291f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 17 May 2022 23:36:55 -0400 Subject: [PATCH 07/41] block: Simplify read_part_sector() That rather complicated expression is just trying to find the offset of this sector within a page, and there are easier ways to express that. Signed-off-by: Matthew Wilcox (Oracle) --- block/partitions/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index a9a51bac42df8..52871fa224eeb 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -718,8 +718,7 @@ void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p) goto out; p->v = page; - return (unsigned char *)page_address(page) + - ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << SECTOR_SHIFT); + return page_address(page) + offset_in_page(n * SECTOR_SIZE); out: p->v = NULL; return NULL; From a340b79b299109ded6928157dc24cf0fb6a90823 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 17 May 2022 23:38:37 -0400 Subject: [PATCH 08/41] block: Handle partition read errors more consistently Set p->v to NULL if we try to read beyond the end of the disk, just like we do if we get an error returned from trying to read the disk. Signed-off-by: Matthew Wilcox (Oracle) --- block/partitions/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index 52871fa224eeb..58034dd2d2155 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -709,7 +709,7 @@ void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p) if (n >= get_capacity(state->disk)) { state->access_beyond_eod = true; - return NULL; + goto out; } page = read_mapping_page(mapping, From 4639d0da923efd6704974893ba19eb1aaf396538 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 17 May 2022 23:40:45 -0400 Subject: [PATCH 09/41] block: Use PAGE_SECTORS_SHIFT The bare use of '9' confuses some people. We also don't need this cast, since the compiler does exactly that cast for us. Signed-off-by: Matthew Wilcox (Oracle) --- block/partitions/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index 58034dd2d2155..269c86523e67d 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -712,8 +712,7 @@ void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p) goto out; } - page = read_mapping_page(mapping, - (pgoff_t)(n >> (PAGE_SHIFT - 9)), NULL); + page = read_mapping_page(mapping, n >> PAGE_SECTORS_SHIFT, NULL); if (IS_ERR(page)) goto out; From 5b15f72a828b44a06d50ab739e89ac492298bec4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 17 May 2022 23:43:35 -0400 Subject: [PATCH 10/41] block: Convert read_part_sector() to use a folio This relatively straightforward converion saves a call to compound_head() hidden inside put_page(). Signed-off-by: Matthew Wilcox (Oracle) --- block/partitions/check.h | 4 ++-- block/partitions/core.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/block/partitions/check.h b/block/partitions/check.h index 4ffa2359b1a37..8d70a880c3720 100644 --- a/block/partitions/check.h +++ b/block/partitions/check.h @@ -24,13 +24,13 @@ struct parsed_partitions { }; typedef struct { - struct page *v; + struct folio *v; } Sector; void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p); static inline void put_dev_sector(Sector p) { - put_page(p.v); + folio_put(p.v); } static inline void diff --git a/block/partitions/core.c b/block/partitions/core.c index 269c86523e67d..e103ad08a948d 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -705,19 +705,19 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed); void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p) { struct address_space *mapping = state->disk->part0->bd_inode->i_mapping; - struct page *page; + struct folio *folio; if (n >= get_capacity(state->disk)) { state->access_beyond_eod = true; goto out; } - page = read_mapping_page(mapping, n >> PAGE_SECTORS_SHIFT, NULL); - if (IS_ERR(page)) + folio = read_mapping_folio(mapping, n >> PAGE_SECTORS_SHIFT, NULL); + if (IS_ERR(folio)) goto out; - p->v = page; - return page_address(page) + offset_in_page(n * SECTOR_SIZE); + p->v = folio; + return folio_address(folio) + offset_in_folio(folio, n * SECTOR_SIZE); out: p->v = NULL; return NULL; From fa19fbd23186e43714bf7694ec83b7a519e618c0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 29 Apr 2022 11:12:16 -0400 Subject: [PATCH 11/41] befs: Convert befs_symlink_read_folio() to use a folio This is a straightforward conversion from the page APIs to the folio APIs. Symlinks are not allowed to be larger than PAGE_SIZE, so there is little work to do here. Signed-off-by: Matthew Wilcox (Oracle) --- fs/befs/linuxvfs.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index be383fa46b12a..32749fcee090a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -108,8 +108,7 @@ static const struct export_operations befs_export_operations = { * passes it the address of befs_get_block, for mapping file * positions to disk blocks. */ -static int -befs_read_folio(struct file *file, struct folio *folio) +static int befs_read_folio(struct file *file, struct folio *folio) { return block_read_full_folio(folio, befs_get_block); } @@ -470,13 +469,12 @@ befs_destroy_inodecache(void) */ static int befs_symlink_read_folio(struct file *unused, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct super_block *sb = inode->i_sb; struct befs_inode_info *befs_ino = BEFS_I(inode); befs_data_stream *data = &befs_ino->i_data.ds; befs_off_t len = data->size; - char *link = page_address(page); + char *link = folio_address(folio); if (len == 0 || len > PAGE_SIZE) { befs_error(sb, "Long symlink with illegal length"); @@ -489,12 +487,12 @@ static int befs_symlink_read_folio(struct file *unused, struct folio *folio) goto fail; } link[len - 1] = '\0'; - SetPageUptodate(page); - unlock_page(page); + folio_mark_uptodate(folio); + folio_unlock(folio); return 0; fail: - SetPageError(page); - unlock_page(page); + folio_set_error(folio); + folio_unlock(folio); return -EIO; } From 1a6b7e5cb5504bf4f0f4e63b8bedaff8aad5798b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 29 Apr 2022 11:12:16 -0400 Subject: [PATCH 12/41] coda: Convert coda_symlink_filler() to use a folio This is a straightforward conversion from the page APIs to the folio APIs. Symlinks are not allowed to be larger than PAGE_SIZE, so there is little work to do here. Signed-off-by: Matthew Wilcox (Oracle) --- fs/coda/symlink.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c index 8adf810424986..ccdbec388091a 100644 --- a/fs/coda/symlink.c +++ b/fs/coda/symlink.c @@ -22,25 +22,24 @@ static int coda_symlink_filler(struct file *file, struct folio *folio) { - struct page *page = &folio->page; struct inode *inode = folio->mapping->host; int error; struct coda_inode_info *cii; unsigned int len = PAGE_SIZE; - char *p = page_address(page); + char *p = folio_address(folio); cii = ITOC(inode); error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len); if (error) goto fail; - SetPageUptodate(page); - unlock_page(page); + folio_mark_uptodate(folio); + folio_unlock(folio); return 0; fail: - SetPageError(page); - unlock_page(page); + folio_set_error(folio); + folio_unlock(folio); return error; } From 6e2a48222bc97d4028b1ff8b8bfdfadec7a72923 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 29 Apr 2022 11:12:16 -0400 Subject: [PATCH 13/41] freevxfs: Convert vxfs_immed_read_folio() to use a folio Reorganise the file to remove the forward declaration. Use folios throughout vxfs_immed_read_folio(). Use memcpy_to_page() instead of an open-coded kmap()/kunmap(). Remove flush_dcache_page() as this is embedded in memcpy_to_page(). Use folio_pos() instead of opencoding it. Handle multi-page folios. Signed-off-by: Matthew Wilcox (Oracle) --- fs/freevxfs/vxfs_immed.c | 43 ++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index c2ef9f0debbde..9b49ec36e6670 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -13,16 +13,6 @@ #include "vxfs_extern.h" #include "vxfs_inode.h" - -static int vxfs_immed_read_folio(struct file *, struct folio *); - -/* - * Address space operations for immed files and directories. - */ -const struct address_space_operations vxfs_immed_aops = { - .read_folio = vxfs_immed_read_folio, -}; - /** * vxfs_immed_read_folio - read part of an immed inode into pagecache * @file: file context (unused) @@ -30,7 +20,7 @@ const struct address_space_operations vxfs_immed_aops = { * * Description: * vxfs_immed_read_folio reads a part of the immed area of the - * file that hosts @pp into the pagecache. + * file that hosts @folio into the pagecache. * * Returns: * Zero on success, else a negative error code. @@ -38,21 +28,26 @@ const struct address_space_operations vxfs_immed_aops = { * Locking status: * @folio is locked and will be unlocked. */ -static int -vxfs_immed_read_folio(struct file *fp, struct folio *folio) +static int vxfs_immed_read_folio(struct file *fp, struct folio *folio) { - struct page *pp = &folio->page; - struct vxfs_inode_info *vip = VXFS_INO(pp->mapping->host); - u_int64_t offset = (u_int64_t)pp->index << PAGE_SHIFT; - caddr_t kaddr; + struct vxfs_inode_info *vip = VXFS_INO(folio->mapping->host); + void *src = vip->vii_immed.vi_immed + folio_pos(folio); + unsigned long i; - kaddr = kmap(pp); - memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_SIZE); - kunmap(pp); - - flush_dcache_page(pp); - SetPageUptodate(pp); - unlock_page(pp); + for (i = 0; i < folio_nr_pages(folio); i++) { + memcpy_to_page(folio_page(folio, i), 0, src, PAGE_SIZE); + src += PAGE_SIZE; + } + + folio_mark_uptodate(folio); + folio_unlock(folio); return 0; } + +/* + * Address space operations for immed files and directories. + */ +const struct address_space_operations vxfs_immed_aops = { + .read_folio = vxfs_immed_read_folio, +}; From 71864cbf4617f06ff2434235049e9c3daa2806a6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 29 Apr 2022 11:12:16 -0400 Subject: [PATCH 14/41] ocfs2: Convert ocfs2_read_folio() to use a folio Use the folio API throughout. There are a few places where we convert back to a page to call into the rest of the filesystem, so folio usage needs to be pushed down to those functions later. Signed-off-by: Matthew Wilcox (Oracle) --- fs/ocfs2/aops.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 35d40a67204c4..767df51f8657a 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -277,16 +277,14 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) static int ocfs2_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ocfs2_inode_info *oi = OCFS2_I(inode); - loff_t start = (loff_t)page->index << PAGE_SHIFT; + loff_t start = folio_pos(folio); int ret, unlock = 1; - trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, - (page ? page->index : 0)); + trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, folio->index); - ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); + ret = ocfs2_inode_lock_with_page(inode, NULL, 0, &folio->page); if (ret != 0) { if (ret == AOP_TRUNCATED_PAGE) unlock = 0; @@ -296,11 +294,11 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio) if (down_read_trylock(&oi->ip_alloc_sem) == 0) { /* - * Unlock the page and cycle ip_alloc_sem so that we don't + * Unlock the folio and cycle ip_alloc_sem so that we don't * busyloop waiting for ip_alloc_sem to unlock */ ret = AOP_TRUNCATED_PAGE; - unlock_page(page); + folio_unlock(folio); unlock = 0; down_read(&oi->ip_alloc_sem); up_read(&oi->ip_alloc_sem); @@ -313,21 +311,21 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio) * block_read_full_folio->get_block freaks out if it is asked to read * beyond the end of a file, so we check here. Callers * (generic_file_read, vm_ops->fault) are clever enough to check i_size - * and notice that the page they just read isn't needed. + * and notice that the folio they just read isn't needed. * * XXX sys_readahead() seems to get that wrong? */ if (start >= i_size_read(inode)) { - zero_user(page, 0, PAGE_SIZE); - SetPageUptodate(page); + folio_zero_segment(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); ret = 0; goto out_alloc; } if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) - ret = ocfs2_readpage_inline(inode, page); + ret = ocfs2_readpage_inline(inode, &folio->page); else - ret = block_read_full_folio(page_folio(page), ocfs2_get_block); + ret = block_read_full_folio(folio, ocfs2_get_block); unlock = 0; out_alloc: @@ -336,7 +334,7 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio) ocfs2_inode_unlock(inode, 0); out: if (unlock) - unlock_page(page); + folio_unlock(folio); return ret; } From d862e2d593685f8a895202493f1a059932cdb0e0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 13 May 2022 21:21:11 -0400 Subject: [PATCH 15/41] gfs2: Convert gfs2_jhead_process_page() to use a folio Use folio_put_refs() to perform only one atomic operation instead of two. The other changes are straightforward conversions from page APIs to their folio equivalents. Signed-off-by: Matthew Wilcox (Oracle) --- fs/gfs2/lops.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 6ba51cbb94cf2..1f67d37cd225c 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -452,36 +452,36 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, * @head: The journal head to start from * @done: If set, perform only cleanup, else search and set if found. * - * Find the page with 'index' in the journal's mapping. Search the page for + * Find the folio with 'index' in the journal's mapping. Search the folio for * the journal head if requested (cleanup == false). Release refs on the - * page so the page cache can reclaim it (put_page() twice). We grabbed a - * reference on this page two times, first when we did a find_or_create_page() - * to obtain the page to add it to the bio and second when we do a - * find_get_page() here to get the page to wait on while I/O on it is being + * folio so the page cache can reclaim it. We grabbed a + * reference on this folio twice, first when we did a find_or_create_page() + * to obtain the folio to add it to the bio and second when we do a + * filemap_get_folio() here to get the folio to wait on while I/O on it is being * completed. - * This function is also used to free up a page we might've grabbed but not + * This function is also used to free up a folio we might've grabbed but not * used. Maybe we added it to a bio, but not submitted it for I/O. Or we * submitted the I/O, but we already found the jhead so we only need to drop - * our references to the page. + * our references to the folio. */ static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index, struct gfs2_log_header_host *head, bool *done) { - struct page *page; + struct folio *folio; - page = find_get_page(jd->jd_inode->i_mapping, index); - wait_on_page_locked(page); + folio = filemap_get_folio(jd->jd_inode->i_mapping, index); - if (PageError(page)) + folio_wait_locked(folio); + if (folio_test_error(folio)) *done = true; if (!*done) - *done = gfs2_jhead_pg_srch(jd, head, page); + *done = gfs2_jhead_pg_srch(jd, head, &folio->page); - put_page(page); /* Once for find_get_page */ - put_page(page); /* Once more for find_or_create_page */ + /* filemap_get_folio() and the earlier find_or_create_page() */ + folio_put_refs(folio, 2); } static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs) From 1662afcecae535147b1cb143fde8d99a6a98d8fe Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 17 May 2022 18:06:23 -0400 Subject: [PATCH 16/41] ext2: Use a folio in ext2_get_page() Remove a call to read_mapping_page(). Signed-off-by: Matthew Wilcox (Oracle) --- fs/ext2/dir.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 8326b63f0b707..8f597753ac129 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -200,18 +200,19 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n, int quiet, void **page_addr) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_mapping_page(mapping, n, NULL); - if (!IS_ERR(page)) { - *page_addr = kmap_local_page(page); - if (unlikely(!PageChecked(page))) { - if (!ext2_check_page(page, quiet, *page_addr)) - goto fail; - } + struct folio *folio = read_mapping_folio(mapping, n, NULL); + + if (IS_ERR(folio)) + return &folio->page; + *page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1)); + if (unlikely(!folio_test_checked(folio))) { + if (!ext2_check_page(&folio->page, quiet, *page_addr)) + goto fail; } - return page; + return &folio->page; fail: - ext2_put_page(page, *page_addr); + ext2_put_page(&folio->page, *page_addr); return ERR_PTR(-EIO); } From 1a22e12f94c9f647cdd8e4e56bc313043e9eb5c2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Jun 2022 15:37:50 -0400 Subject: [PATCH 17/41] secretmem: Remove isolate_page The isolate_page operation is never called for filesystems, only for device drivers which call SetPageMovable. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand --- mm/secretmem.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mm/secretmem.c b/mm/secretmem.c index 206ed6b40c1d0..1c7f1775b56e7 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -133,11 +133,6 @@ static const struct file_operations secretmem_fops = { .mmap = secretmem_mmap, }; -static bool secretmem_isolate_page(struct page *page, isolate_mode_t mode) -{ - return false; -} - static int secretmem_migratepage(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode) @@ -155,7 +150,6 @@ const struct address_space_operations secretmem_aops = { .dirty_folio = noop_dirty_folio, .free_folio = secretmem_free_folio, .migratepage = secretmem_migratepage, - .isolate_page = secretmem_isolate_page, }; static int secretmem_setattr(struct user_namespace *mnt_userns, From b361f39863ed2d6ca644a2d53638c3fd87d32d14 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Jun 2022 15:38:48 -0400 Subject: [PATCH 18/41] mm: Convert all PageMovable users to movable_operations These drivers are rather uncomfortably hammered into the address_space_operations hole. They aren't filesystems and don't behave like filesystems. They just need their own movable_operations structure, which we can point to directly from page->mapping. Signed-off-by: Matthew Wilcox (Oracle) --- Documentation/filesystems/locking.rst | 4 - Documentation/filesystems/vfs.rst | 12 --- Documentation/vm/page_migration.rst | 113 +++----------------------- arch/powerpc/platforms/pseries/cmm.c | 60 +------------- drivers/misc/vmw_balloon.c | 61 +------------- drivers/virtio/virtio_balloon.c | 47 +---------- include/linux/balloon_compaction.h | 6 +- include/linux/fs.h | 2 - include/linux/migrate.h | 56 +++++++++++-- include/linux/page-flags.h | 2 +- include/uapi/linux/magic.h | 4 - mm/balloon_compaction.c | 10 +-- mm/compaction.c | 29 +++---- mm/migrate.c | 24 +++--- mm/util.c | 4 +- mm/z3fold.c | 84 ++----------------- mm/zsmalloc.c | 102 +++++------------------ 17 files changed, 134 insertions(+), 486 deletions(-) diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index c0fe711f14d3f..9963d9600b717 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -252,9 +252,7 @@ prototypes:: bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *); int (*direct_IO)(struct kiocb *, struct iov_iter *iter); - bool (*isolate_page) (struct page *, isolate_mode_t); int (*migratepage)(struct address_space *, struct page *, struct page *); - void (*putback_page) (struct page *); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count); int (*error_remove_page)(struct address_space *, struct page *); @@ -280,9 +278,7 @@ invalidate_folio: yes exclusive release_folio: yes free_folio: yes direct_IO: -isolate_page: yes migratepage: yes (both) -putback_page: yes launder_folio: yes is_partially_uptodate: yes error_remove_page: yes diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index a08c652467d7c..b51665cdabc46 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -737,12 +737,8 @@ cache in your filesystem. The following members are defined: bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); - /* isolate a page for migration */ - bool (*isolate_page) (struct page *, isolate_mode_t); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); - /* put migration-failed page back to right list */ - void (*putback_page) (struct page *); int (*launder_folio) (struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, @@ -930,11 +926,6 @@ cache in your filesystem. The following members are defined: data directly between the storage and the application's address space. -``isolate_page`` - Called by the VM when isolating a movable non-lru page. If page - is successfully isolated, VM marks the page as PG_isolated via - __SetPageIsolated. - ``migrate_page`` This is used to compact the physical memory usage. If the VM wants to relocate a page (maybe off a memory card that is @@ -942,9 +933,6 @@ cache in your filesystem. The following members are defined: page to this function. migrate_page should transfer any private data across and update any references that it has to the page. -``putback_page`` - Called by the VM when isolated page's migration fails. - ``launder_folio`` Called before freeing a folio - it writes back the dirty folio. To prevent redirtying the folio, it is kept locked during the diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst index 8c5cb8147e55e..11493bad71125 100644 --- a/Documentation/vm/page_migration.rst +++ b/Documentation/vm/page_migration.rst @@ -152,110 +152,15 @@ Steps: Non-LRU page migration ====================== -Although migration originally aimed for reducing the latency of memory accesses -for NUMA, compaction also uses migration to create high-order pages. +Although migration originally aimed for reducing the latency of memory +accesses for NUMA, compaction also uses migration to create high-order +pages. For compaction purposes, it is also useful to be able to move +non-LRU pages, such as zsmalloc and virtio-balloon pages. -Current problem of the implementation is that it is designed to migrate only -*LRU* pages. However, there are potential non-LRU pages which can be migrated -in drivers, for example, zsmalloc, virtio-balloon pages. - -For virtio-balloon pages, some parts of migration code path have been hooked -up and added virtio-balloon specific functions to intercept migration logics. -It's too specific to a driver so other drivers who want to make their pages -movable would have to add their own specific hooks in the migration path. - -To overcome the problem, VM supports non-LRU page migration which provides -generic functions for non-LRU movable pages without driver specific hooks -in the migration path. - -If a driver wants to make its pages movable, it should define three functions -which are function pointers of struct address_space_operations. - -1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);`` - - What VM expects from isolate_page() function of driver is to return *true* - if driver isolates the page successfully. On returning true, VM marks the page - as PG_isolated so concurrent isolation in several CPUs skip the page - for isolation. If a driver cannot isolate the page, it should return *false*. - - Once page is successfully isolated, VM uses page.lru fields so driver - shouldn't expect to preserve values in those fields. - -2. ``int (*migratepage) (struct address_space *mapping,`` -| ``struct page *newpage, struct page *oldpage, enum migrate_mode);`` - - After isolation, VM calls migratepage() of driver with the isolated page. - The function of migratepage() is to move the contents of the old page to the - new page - and set up fields of struct page newpage. Keep in mind that you should - indicate to the VM the oldpage is no longer movable via __ClearPageMovable() - under page_lock if you migrated the oldpage successfully and returned - MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver - can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time - because VM interprets -EAGAIN as "temporary migration failure". On returning - any error except -EAGAIN, VM will give up the page migration without - retrying. - - Driver shouldn't touch the page.lru field while in the migratepage() function. - -3. ``void (*putback_page)(struct page *);`` - - If migration fails on the isolated page, VM should return the isolated page - to the driver so VM calls the driver's putback_page() with the isolated page. - In this function, the driver should put the isolated page back into its own data - structure. - -Non-LRU movable page flags - - There are two page flags for supporting non-LRU movable page. - - * PG_movable - - Driver should use the function below to make page movable under page_lock:: - - void __SetPageMovable(struct page *page, struct address_space *mapping) - - It needs argument of address_space for registering migration - family functions which will be called by VM. Exactly speaking, - PG_movable is not a real flag of struct page. Rather, VM - reuses the page->mapping's lower bits to represent it:: - - #define PAGE_MAPPING_MOVABLE 0x2 - page->mapping = page->mapping | PAGE_MAPPING_MOVABLE; - - so driver shouldn't access page->mapping directly. Instead, driver should - use page_mapping() which masks off the low two bits of page->mapping under - page lock so it can get the right struct address_space. - - For testing of non-LRU movable pages, VM supports __PageMovable() function. - However, it doesn't guarantee to identify non-LRU movable pages because - the page->mapping field is unified with other variables in struct page. - If the driver releases the page after isolation by VM, page->mapping - doesn't have a stable value although it has PAGE_MAPPING_MOVABLE set - (look at __ClearPageMovable). But __PageMovable() is cheap to call whether - page is LRU or non-LRU movable once the page has been isolated because LRU - pages can never have PAGE_MAPPING_MOVABLE set in page->mapping. It is also - good for just peeking to test non-LRU movable pages before more expensive - checking with lock_page() in pfn scanning to select a victim. - - For guaranteeing non-LRU movable page, VM provides PageMovable() function. - Unlike __PageMovable(), PageMovable() validates page->mapping and - mapping->a_ops->isolate_page under lock_page(). The lock_page() prevents - sudden destroying of page->mapping. - - Drivers using __SetPageMovable() should clear the flag via - __ClearMovablePage() under page_lock() before the releasing the page. - - * PG_isolated - - To prevent concurrent isolation among several CPUs, VM marks isolated page - as PG_isolated under lock_page(). So if a CPU encounters PG_isolated - non-LRU movable page, it can skip it. Driver doesn't need to manipulate the - flag because VM will set/clear it automatically. Keep in mind that if the - driver sees a PG_isolated page, it means the page has been isolated by the - VM so it shouldn't touch the page.lru field. - The PG_isolated flag is aliased with the PG_reclaim flag so drivers - shouldn't use PG_isolated for its own purposes. +If a driver wants to make its pages movable, it should define a struct +movable_operations. It then needs to call __SetPageMovable() on each +page that it may be able to move. This uses the ``page->mapping`` field, +so this field is not available for the driver to use for other purposes. Monitoring Migration ===================== @@ -286,3 +191,5 @@ THP_MIGRATION_FAIL and PGMIGRATE_FAIL to increase. Christoph Lameter, May 8, 2006. Minchan Kim, Mar 28, 2016. + +.. kernel-doc:: include/linux/migrate.h diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 15ed8206c4630..5f4037c1d7fe8 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -19,9 +19,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -500,19 +497,6 @@ static struct notifier_block cmm_mem_nb = { }; #ifdef CONFIG_BALLOON_COMPACTION -static struct vfsmount *balloon_mnt; - -static int cmm_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type balloon_fs = { - .name = "ppc-cmm", - .init_fs_context = cmm_init_fs_context, - .kill_sb = kill_anon_super, -}; - static int cmm_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage, struct page *page, enum migrate_mode mode) @@ -564,47 +548,13 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info, return MIGRATEPAGE_SUCCESS; } -static int cmm_balloon_compaction_init(void) +static void cmm_balloon_compaction_init(void) { - int rc; - balloon_devinfo_init(&b_dev_info); b_dev_info.migratepage = cmm_migratepage; - - balloon_mnt = kern_mount(&balloon_fs); - if (IS_ERR(balloon_mnt)) { - rc = PTR_ERR(balloon_mnt); - balloon_mnt = NULL; - return rc; - } - - b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); - if (IS_ERR(b_dev_info.inode)) { - rc = PTR_ERR(b_dev_info.inode); - b_dev_info.inode = NULL; - kern_unmount(balloon_mnt); - balloon_mnt = NULL; - return rc; - } - - b_dev_info.inode->i_mapping->a_ops = &balloon_aops; - return 0; -} -static void cmm_balloon_compaction_deinit(void) -{ - if (b_dev_info.inode) - iput(b_dev_info.inode); - b_dev_info.inode = NULL; - kern_unmount(balloon_mnt); - balloon_mnt = NULL; } #else /* CONFIG_BALLOON_COMPACTION */ -static int cmm_balloon_compaction_init(void) -{ - return 0; -} - -static void cmm_balloon_compaction_deinit(void) +static void cmm_balloon_compaction_init(void) { } #endif /* CONFIG_BALLOON_COMPACTION */ @@ -622,9 +572,7 @@ static int cmm_init(void) if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate) return -EOPNOTSUPP; - rc = cmm_balloon_compaction_init(); - if (rc) - return rc; + cmm_balloon_compaction_init(); rc = register_oom_notifier(&cmm_oom_nb); if (rc < 0) @@ -658,7 +606,6 @@ static int cmm_init(void) out_oom_notifier: unregister_oom_notifier(&cmm_oom_nb); out_balloon_compaction: - cmm_balloon_compaction_deinit(); return rc; } @@ -677,7 +624,6 @@ static void cmm_exit(void) unregister_memory_notifier(&cmm_mem_nb); cmm_free_pages(atomic_long_read(&loaned_pages)); cmm_unregister_sysfs(&cmm_dev); - cmm_balloon_compaction_deinit(); } /** diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 086ce77d9074e..85dd6aa33df69 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -29,8 +29,6 @@ #include #include #include -#include -#include #include #include #include @@ -1730,20 +1728,6 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b) #ifdef CONFIG_BALLOON_COMPACTION - -static int vmballoon_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type vmballoon_fs = { - .name = "balloon-vmware", - .init_fs_context = vmballoon_init_fs_context, - .kill_sb = kill_anon_super, -}; - -static struct vfsmount *vmballoon_mnt; - /** * vmballoon_migratepage() - migrates a balloon page. * @b_dev_info: balloon device information descriptor. @@ -1862,21 +1846,6 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, return ret; } -/** - * vmballoon_compaction_deinit() - removes compaction related data. - * - * @b: pointer to the balloon. - */ -static void vmballoon_compaction_deinit(struct vmballoon *b) -{ - if (!IS_ERR(b->b_dev_info.inode)) - iput(b->b_dev_info.inode); - - b->b_dev_info.inode = NULL; - kern_unmount(vmballoon_mnt); - vmballoon_mnt = NULL; -} - /** * vmballoon_compaction_init() - initialized compaction for the balloon. * @@ -1888,33 +1857,15 @@ static void vmballoon_compaction_deinit(struct vmballoon *b) * * Return: zero on success or error code on failure. */ -static __init int vmballoon_compaction_init(struct vmballoon *b) +static __init void vmballoon_compaction_init(struct vmballoon *b) { - vmballoon_mnt = kern_mount(&vmballoon_fs); - if (IS_ERR(vmballoon_mnt)) - return PTR_ERR(vmballoon_mnt); - b->b_dev_info.migratepage = vmballoon_migratepage; - b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb); - - if (IS_ERR(b->b_dev_info.inode)) - return PTR_ERR(b->b_dev_info.inode); - - b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops; - return 0; } #else /* CONFIG_BALLOON_COMPACTION */ - -static void vmballoon_compaction_deinit(struct vmballoon *b) -{ -} - -static int vmballoon_compaction_init(struct vmballoon *b) +static inline void vmballoon_compaction_init(struct vmballoon *b) { - return 0; } - #endif /* CONFIG_BALLOON_COMPACTION */ static int __init vmballoon_init(void) @@ -1939,9 +1890,7 @@ static int __init vmballoon_init(void) * balloon_devinfo_init() . */ balloon_devinfo_init(&balloon.b_dev_info); - error = vmballoon_compaction_init(&balloon); - if (error) - goto fail; + vmballoon_compaction_init(&balloon); INIT_LIST_HEAD(&balloon.huge_pages); spin_lock_init(&balloon.comm_lock); @@ -1958,7 +1907,6 @@ static int __init vmballoon_init(void) return 0; fail: vmballoon_unregister_shrinker(&balloon); - vmballoon_compaction_deinit(&balloon); return error; } @@ -1985,8 +1933,5 @@ static void __exit vmballoon_exit(void) */ vmballoon_send_start(&balloon, 0); vmballoon_pop(&balloon); - - /* Only once we popped the balloon, compaction can be deinit */ - vmballoon_compaction_deinit(&balloon); } module_exit(vmballoon_exit); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index b9737da6c4ddb..bd360b91e9d3e 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -17,9 +17,6 @@ #include #include #include -#include -#include -#include #include /* @@ -42,10 +39,6 @@ (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT)) #define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER) -#ifdef CONFIG_BALLOON_COMPACTION -static struct vfsmount *balloon_mnt; -#endif - enum virtio_balloon_vq { VIRTIO_BALLOON_VQ_INFLATE, VIRTIO_BALLOON_VQ_DEFLATE, @@ -805,18 +798,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, return MIGRATEPAGE_SUCCESS; } - -static int balloon_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type balloon_fs = { - .name = "balloon-kvm", - .init_fs_context = balloon_init_fs_context, - .kill_sb = kill_anon_super, -}; - #endif /* CONFIG_BALLOON_COMPACTION */ static unsigned long shrink_free_pages(struct virtio_balloon *vb, @@ -909,19 +890,7 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_free_vb; #ifdef CONFIG_BALLOON_COMPACTION - balloon_mnt = kern_mount(&balloon_fs); - if (IS_ERR(balloon_mnt)) { - err = PTR_ERR(balloon_mnt); - goto out_del_vqs; - } - vb->vb_dev_info.migratepage = virtballoon_migratepage; - vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); - if (IS_ERR(vb->vb_dev_info.inode)) { - err = PTR_ERR(vb->vb_dev_info.inode); - goto out_kern_unmount; - } - vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { /* @@ -930,13 +899,13 @@ static int virtballoon_probe(struct virtio_device *vdev) */ if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { err = -ENOSPC; - goto out_iput; + goto out_del_vqs; } vb->balloon_wq = alloc_workqueue("balloon-wq", WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); if (!vb->balloon_wq) { err = -ENOMEM; - goto out_iput; + goto out_del_vqs; } INIT_WORK(&vb->report_free_page_work, report_free_page_func); vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; @@ -1030,13 +999,7 @@ static int virtballoon_probe(struct virtio_device *vdev) out_del_balloon_wq: if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) destroy_workqueue(vb->balloon_wq); -out_iput: -#ifdef CONFIG_BALLOON_COMPACTION - iput(vb->vb_dev_info.inode); -out_kern_unmount: - kern_unmount(balloon_mnt); out_del_vqs: -#endif vdev->config->del_vqs(vdev); out_free_vb: kfree(vb); @@ -1083,12 +1046,6 @@ static void virtballoon_remove(struct virtio_device *vdev) } remove_common(vb); -#ifdef CONFIG_BALLOON_COMPACTION - if (vb->vb_dev_info.inode) - iput(vb->vb_dev_info.inode); - - kern_unmount(balloon_mnt); -#endif kfree(vb); } diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index edb7f6d41faa0..5ca2d56996201 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -57,7 +57,6 @@ struct balloon_dev_info { struct list_head pages; /* Pages enqueued & handled to Host */ int (*migratepage)(struct balloon_dev_info *, struct page *newpage, struct page *page, enum migrate_mode mode); - struct inode *inode; }; extern struct page *balloon_page_alloc(void); @@ -75,11 +74,10 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) spin_lock_init(&balloon->pages_lock); INIT_LIST_HEAD(&balloon->pages); balloon->migratepage = NULL; - balloon->inode = NULL; } #ifdef CONFIG_BALLOON_COMPACTION -extern const struct address_space_operations balloon_aops; +extern const struct movable_operations balloon_mops; /* * balloon_page_insert - insert a page into the balloon's page list and make @@ -94,7 +92,7 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, struct page *page) { __SetPageOffline(page); - __SetPageMovable(page, balloon->inode->i_mapping); + __SetPageMovable(page, &balloon_mops); set_page_private(page, (unsigned long)balloon); list_add(&page->lru, &balloon->pages); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae5..5d8ee3155ca2e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -367,8 +367,6 @@ struct address_space_operations { */ int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); - bool (*isolate_page)(struct page *, isolate_mode_t); - void (*putback_page)(struct page *); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 069a89e847f34..82c735ba6109f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -19,6 +19,43 @@ struct migration_target_control; */ #define MIGRATEPAGE_SUCCESS 0 +/** + * struct movable_operations - Driver page migration + * @isolate_page: + * The VM calls this function to prepare the page to be moved. The page + * is locked and the driver should not unlock it. The driver should + * return ``true`` if the page is movable and ``false`` if it is not + * currently movable. After this function returns, the VM uses the + * page->lru field, so the driver must preserve any information which + * is usually stored here. + * + * @migrate_page: + * After isolation, the VM calls this function with the isolated + * @src page. The driver should copy the contents of the + * @src page to the @dst page and set up the fields of @dst page. + * Both pages are locked. + * If page migration is successful, the driver should call + * __ClearPageMovable(@src) and return MIGRATEPAGE_SUCCESS. + * If the driver cannot migrate the page at the moment, it can return + * -EAGAIN. The VM interprets this as a temporary migration failure and + * will retry it later. Any other error value is a permanent migration + * failure and migration will not be retried. + * The driver shouldn't touch the @src->lru field while in the + * migrate_page() function. It may write to @dst->lru. + * + * @putback_page: + * If migration fails on the isolated page, the VM informs the driver + * that the page is no longer a candidate for migration by calling + * this function. The driver should put the isolated page back into + * its own data structure. + */ +struct movable_operations { + bool (*isolate_page)(struct page *, isolate_mode_t); + int (*migrate_page)(struct page *dst, struct page *src, + enum migrate_mode); + void (*putback_page)(struct page *); +}; + /* Defined in mm/debug.c: */ extern const char *migrate_reason_names[MR_TYPES]; @@ -91,13 +128,13 @@ static inline int next_demotion_node(int node) #endif #ifdef CONFIG_COMPACTION -extern int PageMovable(struct page *page); -extern void __SetPageMovable(struct page *page, struct address_space *mapping); -extern void __ClearPageMovable(struct page *page); +bool PageMovable(struct page *page); +void __SetPageMovable(struct page *page, const struct movable_operations *ops); +void __ClearPageMovable(struct page *page); #else -static inline int PageMovable(struct page *page) { return 0; } +static inline bool PageMovable(struct page *page) { return false; } static inline void __SetPageMovable(struct page *page, - struct address_space *mapping) + const struct movable_operations *ops) { } static inline void __ClearPageMovable(struct page *page) @@ -110,6 +147,15 @@ static inline bool folio_test_movable(struct folio *folio) return PageMovable(&folio->page); } +static inline +const struct movable_operations *page_movable_ops(struct page *page) +{ + VM_BUG_ON(!__PageMovable(page)); + + return (const struct movable_operations *) + ((unsigned long)page->mapping - PAGE_MAPPING_MOVABLE); +} + #ifdef CONFIG_NUMA_BALANCING extern int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e66f7aa3191df..3f5490f6f0385 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -639,7 +639,7 @@ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND) * structure which KSM associates with that merged page. See ksm.h. * * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable - * page and then page->mapping points a struct address_space. + * page and then page->mapping points to a struct movable_operations. * * Please note that, confusingly, "page_mapping" refers to the inode * address_space which maps the page from disk; whereas "page_mapped" diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f724129c04255..6325d1d0e90f5 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -98,12 +98,8 @@ /* Since UDF 2.01 is ISO 13346 based... */ #define UDF_SUPER_MAGIC 0x15013346 -#define BALLOON_KVM_MAGIC 0x13661366 -#define ZSMALLOC_MAGIC 0x58295829 #define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ -#define Z3FOLD_MAGIC 0x33 -#define PPC_CMM_MAGIC 0xc7571590 #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #endif /* __LINUX_MAGIC_H__ */ diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 4b8eab4b3f456..22c96fed70b59 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -228,10 +228,8 @@ static void balloon_page_putback(struct page *page) spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); } - /* move_to_new_page() counterpart for a ballooned page */ -static int balloon_page_migrate(struct address_space *mapping, - struct page *newpage, struct page *page, +static int balloon_page_migrate(struct page *newpage, struct page *page, enum migrate_mode mode) { struct balloon_dev_info *balloon = balloon_page_device(page); @@ -250,11 +248,11 @@ static int balloon_page_migrate(struct address_space *mapping, return balloon->migratepage(balloon, newpage, page, mode); } -const struct address_space_operations balloon_aops = { - .migratepage = balloon_page_migrate, +const struct movable_operations balloon_mops = { + .migrate_page = balloon_page_migrate, .isolate_page = balloon_page_isolate, .putback_page = balloon_page_putback, }; -EXPORT_SYMBOL_GPL(balloon_aops); +EXPORT_SYMBOL_GPL(balloon_mops); #endif /* CONFIG_BALLOON_COMPACTION */ diff --git a/mm/compaction.c b/mm/compaction.c index 1f89b969c12bf..f23efba1d118c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -110,28 +110,27 @@ static void split_map_pages(struct list_head *list) } #ifdef CONFIG_COMPACTION - -int PageMovable(struct page *page) +bool PageMovable(struct page *page) { - struct address_space *mapping; + const struct movable_operations *mops; VM_BUG_ON_PAGE(!PageLocked(page), page); if (!__PageMovable(page)) - return 0; + return false; - mapping = page_mapping(page); - if (mapping && mapping->a_ops && mapping->a_ops->isolate_page) - return 1; + mops = page_movable_ops(page); + if (mops) + return true; - return 0; + return false; } EXPORT_SYMBOL(PageMovable); -void __SetPageMovable(struct page *page, struct address_space *mapping) +void __SetPageMovable(struct page *page, const struct movable_operations *mops) { VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page); - page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE); + VM_BUG_ON_PAGE((unsigned long)mops & PAGE_MAPPING_MOVABLE, page); + page->mapping = (void *)((unsigned long)mops | PAGE_MAPPING_MOVABLE); } EXPORT_SYMBOL(__SetPageMovable); @@ -139,12 +138,10 @@ void __ClearPageMovable(struct page *page) { VM_BUG_ON_PAGE(!PageMovable(page), page); /* - * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE - * flag so that VM can catch up released page by driver after isolation. - * With it, VM migration doesn't try to put it back. + * This page still has the type of a movable page, but it's + * actually not movable any more. */ - page->mapping = (void *)((unsigned long)page->mapping & - PAGE_MAPPING_MOVABLE); + page->mapping = (void *)PAGE_MAPPING_MOVABLE; } EXPORT_SYMBOL(__ClearPageMovable); diff --git a/mm/migrate.c b/mm/migrate.c index 6c1ea61f39d80..491f037478324 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -59,7 +59,7 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) { - struct address_space *mapping; + const struct movable_operations *mops; /* * Avoid burning cycles with pages that are yet under __free_pages(), @@ -97,10 +97,10 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) if (!PageMovable(page) || PageIsolated(page)) goto out_no_isolated; - mapping = page_mapping(page); - VM_BUG_ON_PAGE(!mapping, page); + mops = page_movable_ops(page); + VM_BUG_ON_PAGE(!mops, page); - if (!mapping->a_ops->isolate_page(page, mode)) + if (!mops->isolate_page(page, mode)) goto out_no_isolated; /* Driver shouldn't use PG_isolated bit of page->flags */ @@ -120,10 +120,9 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) static void putback_movable_page(struct page *page) { - struct address_space *mapping; + const struct movable_operations *mops = page_movable_ops(page); - mapping = page_mapping(page); - mapping->a_ops->putback_page(page); + mops->putback_page(page); ClearPageIsolated(page); } @@ -846,16 +845,15 @@ static int fallback_migrate_page(struct address_space *mapping, static int move_to_new_folio(struct folio *dst, struct folio *src, enum migrate_mode mode) { - struct address_space *mapping; int rc = -EAGAIN; bool is_lru = !__PageMovable(&src->page); VM_BUG_ON_FOLIO(!folio_test_locked(src), src); VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst); - mapping = folio_mapping(src); - if (likely(is_lru)) { + struct address_space *mapping = folio_mapping(src); + if (!mapping) rc = migrate_page(mapping, &dst->page, &src->page, mode); else if (mapping->a_ops->migratepage) @@ -872,6 +870,8 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, rc = fallback_migrate_page(mapping, &dst->page, &src->page, mode); } else { + const struct movable_operations *mops; + /* * In case of non-lru page, it could be released after * isolation step. In that case, we shouldn't try migration. @@ -883,8 +883,8 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, goto out; } - rc = mapping->a_ops->migratepage(mapping, &dst->page, - &src->page, mode); + mops = page_movable_ops(&src->page); + rc = mops->migrate_page(&dst->page, &src->page, mode); WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS && !folio_test_isolated(src)); } diff --git a/mm/util.c b/mm/util.c index 0837570c92251..53af0e79d3e47 100644 --- a/mm/util.c +++ b/mm/util.c @@ -804,10 +804,10 @@ struct address_space *folio_mapping(struct folio *folio) return swap_address_space(folio_swap_entry(folio)); mapping = folio->mapping; - if ((unsigned long)mapping & PAGE_MAPPING_ANON) + if ((unsigned long)mapping & PAGE_MAPPING_FLAGS) return NULL; - return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS); + return mapping; } EXPORT_SYMBOL(folio_mapping); diff --git a/mm/z3fold.c b/mm/z3fold.c index f41f8b0d9e9a0..cf71da10d04e7 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -34,15 +34,11 @@ #include #include #include -#include -#include -#include #include #include #include #include #include -#include #include /* @@ -149,7 +145,6 @@ struct z3fold_header { * @compact_wq: workqueue for page layout background optimization * @release_wq: workqueue for safe page release * @work: work_struct for safe page release - * @inode: inode for z3fold pseudo filesystem * * This structure is allocated at pool creation time and maintains metadata * pertaining to a particular z3fold pool. @@ -169,7 +164,6 @@ struct z3fold_pool { struct workqueue_struct *compact_wq; struct workqueue_struct *release_wq; struct work_struct work; - struct inode *inode; }; /* @@ -334,54 +328,6 @@ static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) } } -static int z3fold_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type z3fold_fs = { - .name = "z3fold", - .init_fs_context = z3fold_init_fs_context, - .kill_sb = kill_anon_super, -}; - -static struct vfsmount *z3fold_mnt; -static int __init z3fold_mount(void) -{ - int ret = 0; - - z3fold_mnt = kern_mount(&z3fold_fs); - if (IS_ERR(z3fold_mnt)) - ret = PTR_ERR(z3fold_mnt); - - return ret; -} - -static void z3fold_unmount(void) -{ - kern_unmount(z3fold_mnt); -} - -static const struct address_space_operations z3fold_aops; -static int z3fold_register_migration(struct z3fold_pool *pool) -{ - pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb); - if (IS_ERR(pool->inode)) { - pool->inode = NULL; - return 1; - } - - pool->inode->i_mapping->private_data = pool; - pool->inode->i_mapping->a_ops = &z3fold_aops; - return 0; -} - -static void z3fold_unregister_migration(struct z3fold_pool *pool) -{ - if (pool->inode) - iput(pool->inode); -} - /* Initializes the z3fold header of a newly allocated z3fold page */ static struct z3fold_header *init_z3fold_page(struct page *page, bool headless, struct z3fold_pool *pool, gfp_t gfp) @@ -1002,14 +948,10 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, pool->release_wq = create_singlethread_workqueue(pool->name); if (!pool->release_wq) goto out_wq; - if (z3fold_register_migration(pool)) - goto out_rwq; INIT_WORK(&pool->work, free_pages_work); pool->ops = ops; return pool; -out_rwq: - destroy_workqueue(pool->release_wq); out_wq: destroy_workqueue(pool->compact_wq); out_unbuddied: @@ -1043,11 +985,12 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool) destroy_workqueue(pool->compact_wq); destroy_workqueue(pool->release_wq); - z3fold_unregister_migration(pool); free_percpu(pool->unbuddied); kfree(pool); } +static const struct movable_operations z3fold_mops; + /** * z3fold_alloc() - allocates a region of a given size * @pool: z3fold pool from which to allocate @@ -1117,11 +1060,11 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, } if (can_sleep) { lock_page(page); - __SetPageMovable(page, pool->inode->i_mapping); + __SetPageMovable(page, &z3fold_mops); unlock_page(page); } else { WARN_ON(!trylock_page(page)); - __SetPageMovable(page, pool->inode->i_mapping); + __SetPageMovable(page, &z3fold_mops); unlock_page(page); } z3fold_page_lock(zhdr); @@ -1554,12 +1497,11 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) return false; } -static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode) +static int z3fold_page_migrate(struct page *newpage, struct page *page, + enum migrate_mode mode) { struct z3fold_header *zhdr, *new_zhdr; struct z3fold_pool *pool; - struct address_space *new_mapping; VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); @@ -1592,7 +1534,6 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa * so we only have to reinitialize it. */ INIT_LIST_HEAD(&new_zhdr->buddy); - new_mapping = page_mapping(page); __ClearPageMovable(page); get_page(newpage); @@ -1608,7 +1549,7 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa spin_lock(&pool->lock); list_add(&newpage->lru, &pool->lru); spin_unlock(&pool->lock); - __SetPageMovable(newpage, new_mapping); + __SetPageMovable(newpage, &z3fold_mops); z3fold_page_unlock(new_zhdr); queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); @@ -1642,9 +1583,9 @@ static void z3fold_page_putback(struct page *page) z3fold_page_unlock(zhdr); } -static const struct address_space_operations z3fold_aops = { +static const struct movable_operations z3fold_mops = { .isolate_page = z3fold_page_isolate, - .migratepage = z3fold_page_migrate, + .migrate_page = z3fold_page_migrate, .putback_page = z3fold_page_putback, }; @@ -1746,17 +1687,11 @@ MODULE_ALIAS("zpool-z3fold"); static int __init init_z3fold(void) { - int ret; - /* * Make sure the z3fold header is not larger than the page size and * there has remaining spaces for its buddy. */ BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE); - ret = z3fold_mount(); - if (ret) - return ret; - zpool_register_driver(&z3fold_zpool_driver); return 0; @@ -1764,7 +1699,6 @@ static int __init init_z3fold(void) static void __exit exit_z3fold(void) { - z3fold_unmount(); zpool_unregister_driver(&z3fold_zpool_driver); } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5d5fc04385b8d..71d6edcbea488 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -59,8 +58,6 @@ #include #include #include -#include -#include #include #include #include @@ -177,10 +174,6 @@ struct zs_size_stat { static struct dentry *zs_stat_root; #endif -#ifdef CONFIG_COMPACTION -static struct vfsmount *zsmalloc_mnt; -#endif - /* * We assign a page to ZS_ALMOST_EMPTY fullness group when: * n <= N / f, where @@ -252,7 +245,6 @@ struct zs_pool { struct dentry *stat_dentry; #endif #ifdef CONFIG_COMPACTION - struct inode *inode; struct work_struct free_work; #endif /* protect page/zspage migration */ @@ -271,6 +263,7 @@ struct zspage { unsigned int freeobj; struct page *first_page; struct list_head list; /* fullness list */ + struct zs_pool *pool; #ifdef CONFIG_COMPACTION rwlock_t lock; #endif @@ -295,8 +288,6 @@ static bool ZsHugePage(struct zspage *zspage) } #ifdef CONFIG_COMPACTION -static int zs_register_migration(struct zs_pool *pool); -static void zs_unregister_migration(struct zs_pool *pool); static void migrate_lock_init(struct zspage *zspage); static void migrate_read_lock(struct zspage *zspage); static void migrate_read_unlock(struct zspage *zspage); @@ -307,10 +298,6 @@ static void kick_deferred_free(struct zs_pool *pool); static void init_deferred_free(struct zs_pool *pool); static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage); #else -static int zsmalloc_mount(void) { return 0; } -static void zsmalloc_unmount(void) {} -static int zs_register_migration(struct zs_pool *pool) { return 0; } -static void zs_unregister_migration(struct zs_pool *pool) {} static void migrate_lock_init(struct zspage *zspage) {} static void migrate_read_lock(struct zspage *zspage) {} static void migrate_read_unlock(struct zspage *zspage) {} @@ -1083,6 +1070,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, create_page_chain(class, zspage, pages); init_zspage(class, zspage); + zspage->pool = pool; return zspage; } @@ -1754,33 +1742,6 @@ static void lock_zspage(struct zspage *zspage) migrate_read_unlock(zspage); } -static int zs_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type zsmalloc_fs = { - .name = "zsmalloc", - .init_fs_context = zs_init_fs_context, - .kill_sb = kill_anon_super, -}; - -static int zsmalloc_mount(void) -{ - int ret = 0; - - zsmalloc_mnt = kern_mount(&zsmalloc_fs); - if (IS_ERR(zsmalloc_mnt)) - ret = PTR_ERR(zsmalloc_mnt); - - return ret; -} - -static void zsmalloc_unmount(void) -{ - kern_unmount(zsmalloc_mnt); -} - static void migrate_lock_init(struct zspage *zspage) { rwlock_init(&zspage->lock); @@ -1823,6 +1784,8 @@ static void dec_zspage_isolation(struct zspage *zspage) zspage->isolated--; } +static const struct movable_operations zsmalloc_mops; + static void replace_sub_page(struct size_class *class, struct zspage *zspage, struct page *newpage, struct page *oldpage) { @@ -1843,7 +1806,7 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage, set_first_obj_offset(newpage, get_first_obj_offset(oldpage)); if (unlikely(ZsHugePage(zspage))) newpage->index = oldpage->index; - __SetPageMovable(newpage, page_mapping(oldpage)); + __SetPageMovable(newpage, &zsmalloc_mops); } static bool zs_page_isolate(struct page *page, isolate_mode_t mode) @@ -1865,8 +1828,8 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode) return true; } -static int zs_page_migrate(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode) +static int zs_page_migrate(struct page *newpage, struct page *page, + enum migrate_mode mode) { struct zs_pool *pool; struct size_class *class; @@ -1889,14 +1852,15 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); - pool = mapping->private_data; + /* The page is locked, so this pointer must remain valid */ + zspage = get_zspage(page); + pool = zspage->pool; /* * The pool migrate_lock protects the race between zpage migration * and zs_free. */ write_lock(&pool->migrate_lock); - zspage = get_zspage(page); class = zspage_class(pool, zspage); /* @@ -1964,31 +1928,12 @@ static void zs_page_putback(struct page *page) migrate_write_unlock(zspage); } -static const struct address_space_operations zsmalloc_aops = { +static const struct movable_operations zsmalloc_mops = { .isolate_page = zs_page_isolate, - .migratepage = zs_page_migrate, + .migrate_page = zs_page_migrate, .putback_page = zs_page_putback, }; -static int zs_register_migration(struct zs_pool *pool) -{ - pool->inode = alloc_anon_inode(zsmalloc_mnt->mnt_sb); - if (IS_ERR(pool->inode)) { - pool->inode = NULL; - return 1; - } - - pool->inode->i_mapping->private_data = pool; - pool->inode->i_mapping->a_ops = &zsmalloc_aops; - return 0; -} - -static void zs_unregister_migration(struct zs_pool *pool) -{ - flush_work(&pool->free_work); - iput(pool->inode); -} - /* * Caller should hold page_lock of all pages in the zspage * In here, we cannot use zspage meta data. @@ -2032,6 +1977,11 @@ static void kick_deferred_free(struct zs_pool *pool) schedule_work(&pool->free_work); } +static void zs_flush_migration(struct zs_pool *pool) +{ + flush_work(&pool->free_work); +} + static void init_deferred_free(struct zs_pool *pool) { INIT_WORK(&pool->free_work, async_free_zspage); @@ -2043,10 +1993,12 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) do { WARN_ON(!trylock_page(page)); - __SetPageMovable(page, pool->inode->i_mapping); + __SetPageMovable(page, &zsmalloc_mops); unlock_page(page); } while ((page = get_next_page(page)) != NULL); } +#else +static inline void zs_flush_migration(struct zs_pool *pool) { } #endif /* @@ -2324,9 +2276,6 @@ struct zs_pool *zs_create_pool(const char *name) /* debug only, don't abort if it fails */ zs_pool_stat_create(pool, name); - if (zs_register_migration(pool)) - goto err; - /* * Not critical since shrinker is only used to trigger internal * defragmentation of the pool which is pretty optional thing. If @@ -2348,7 +2297,7 @@ void zs_destroy_pool(struct zs_pool *pool) int i; zs_unregister_shrinker(pool); - zs_unregister_migration(pool); + zs_flush_migration(pool); zs_pool_stat_destroy(pool); for (i = 0; i < ZS_SIZE_CLASSES; i++) { @@ -2380,14 +2329,10 @@ static int __init zs_init(void) { int ret; - ret = zsmalloc_mount(); - if (ret) - goto out; - ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare", zs_cpu_prepare, zs_cpu_dead); if (ret) - goto hp_setup_fail; + goto out; #ifdef CONFIG_ZPOOL zpool_register_driver(&zs_zpool_driver); @@ -2397,8 +2342,6 @@ static int __init zs_init(void) return 0; -hp_setup_fail: - zsmalloc_unmount(); out: return ret; } @@ -2408,7 +2351,6 @@ static void __exit zs_exit(void) #ifdef CONFIG_ZPOOL zpool_unregister_driver(&zs_zpool_driver); #endif - zsmalloc_unmount(); cpuhp_remove_state(CPUHP_MM_ZS_PREPARE); zs_stat_exit(); From 662389777689c17a77849af822bac1677be56e37 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 09:00:16 -0400 Subject: [PATCH 19/41] fs: Add aops->migrate_folio Provide a folio-based replacement for aops->migratepage. Update the documentation to document migrate_folio instead of migratepage. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- Documentation/filesystems/locking.rst | 5 +++-- Documentation/filesystems/vfs.rst | 14 +++++++------- include/linux/fs.h | 4 +++- mm/compaction.c | 4 +++- mm/migrate.c | 11 +++++++---- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 9963d9600b717..4bb2627026ec8 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -252,7 +252,8 @@ prototypes:: bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *); int (*direct_IO)(struct kiocb *, struct iov_iter *iter); - int (*migratepage)(struct address_space *, struct page *, struct page *); + int (*migrate_folio)(struct address_space *, struct folio *dst, + struct folio *src, enum migrate_mode); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count); int (*error_remove_page)(struct address_space *, struct page *); @@ -278,7 +279,7 @@ invalidate_folio: yes exclusive release_folio: yes free_folio: yes direct_IO: -migratepage: yes (both) +migrate_folio: yes (both) launder_folio: yes is_partially_uptodate: yes error_remove_page: yes diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index b51665cdabc46..6cd6953e175b3 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -737,8 +737,8 @@ cache in your filesystem. The following members are defined: bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); - /* migrate the contents of a page to the specified target */ - int (*migratepage) (struct page *, struct page *); + int (*migrate_folio)(struct mapping *, struct folio *dst, + struct folio *src, enum migrate_mode); int (*launder_folio) (struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, @@ -926,12 +926,12 @@ cache in your filesystem. The following members are defined: data directly between the storage and the application's address space. -``migrate_page`` +``migrate_folio`` This is used to compact the physical memory usage. If the VM - wants to relocate a page (maybe off a memory card that is - signalling imminent failure) it will pass a new page and an old - page to this function. migrate_page should transfer any private - data across and update any references that it has to the page. + wants to relocate a folio (maybe from a memory device that is + signalling imminent failure) it will pass a new folio and an old + folio to this function. migrate_folio should transfer any private + data across and update any references that it has to the folio. ``launder_folio`` Called before freeing a folio - it writes back the dirty folio. diff --git a/include/linux/fs.h b/include/linux/fs.h index 5d8ee3155ca2e..47431cf8fbb3d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -362,9 +362,11 @@ struct address_space_operations { void (*free_folio)(struct folio *folio); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* - * migrate the contents of a page to the specified target. If + * migrate the contents of a folio to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. */ + int (*migrate_folio)(struct address_space *, struct folio *dst, + struct folio *src, enum migrate_mode); int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); int (*launder_folio)(struct folio *); diff --git a/mm/compaction.c b/mm/compaction.c index f23efba1d118c..458f49f9ab09b 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1042,7 +1042,9 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, goto isolate_fail_put; mapping = page_mapping(page); - migrate_dirty = !mapping || mapping->a_ops->migratepage; + migrate_dirty = !mapping || + mapping->a_ops->migrate_folio || + mapping->a_ops->migratepage; unlock_page(page); if (!migrate_dirty) goto isolate_fail_put; diff --git a/mm/migrate.c b/mm/migrate.c index 491f037478324..3c3c168097dd5 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -856,14 +856,17 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, if (!mapping) rc = migrate_page(mapping, &dst->page, &src->page, mode); - else if (mapping->a_ops->migratepage) + else if (mapping->a_ops->migrate_folio) /* - * Most pages have a mapping and most filesystems - * provide a migratepage callback. Anonymous pages + * Most folios have a mapping and most filesystems + * provide a migrate_folio callback. Anonymous folios * are part of swap space which also has its own - * migratepage callback. This is the most common path + * migrate_folio callback. This is the most common path * for page migration. */ + rc = mapping->a_ops->migrate_folio(mapping, dst, src, + mode); + else if (mapping->a_ops->migratepage) rc = mapping->a_ops->migratepage(mapping, &dst->page, &src->page, mode); else From 765acf9085a1188244ec6294e3039637685c209e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 09:34:36 -0400 Subject: [PATCH 20/41] mm/migrate: Convert fallback_migrate_page() to fallback_migrate_folio() Use a folio throughout. migrate_page() will be converted to migrate_folio() later. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- mm/migrate.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 3c3c168097dd5..c5278440f74dc 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -805,11 +805,11 @@ static int writeout(struct address_space *mapping, struct page *page) /* * Default handling if a filesystem does not provide a migration function. */ -static int fallback_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) +static int fallback_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { - if (PageDirty(page)) { - /* Only writeback pages in full synchronous migration */ + if (folio_test_dirty(src)) { + /* Only writeback folios in full synchronous migration */ switch (mode) { case MIGRATE_SYNC: case MIGRATE_SYNC_NO_COPY: @@ -817,18 +817,18 @@ static int fallback_migrate_page(struct address_space *mapping, default: return -EBUSY; } - return writeout(mapping, page); + return writeout(mapping, &src->page); } /* * Buffers may be managed in a filesystem specific way. * We must have no buffers or drop them. */ - if (page_has_private(page) && - !try_to_release_page(page, GFP_KERNEL)) + if (folio_test_private(src) && + !filemap_release_folio(src, GFP_KERNEL)) return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY; - return migrate_page(mapping, newpage, page, mode); + return migrate_page(mapping, &dst->page, &src->page, mode); } /* @@ -870,8 +870,7 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, rc = mapping->a_ops->migratepage(mapping, &dst->page, &src->page, mode); else - rc = fallback_migrate_page(mapping, &dst->page, - &src->page, mode); + rc = fallback_migrate_folio(mapping, dst, src, mode); } else { const struct movable_operations *mops; From 7d474706ff4fb035f896710fa1274e3050afb461 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 09:41:03 -0400 Subject: [PATCH 21/41] mm/migrate: Convert writeout() to take a folio Use a folio throughout this function. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- mm/migrate.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index c5278440f74dc..75b171425c458 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -761,11 +761,10 @@ int buffer_migrate_page_norefs(struct address_space *mapping, #endif /* - * Writeback a page to clean the dirty state + * Writeback a folio to clean the dirty state */ -static int writeout(struct address_space *mapping, struct page *page) +static int writeout(struct address_space *mapping, struct folio *folio) { - struct folio *folio = page_folio(page); struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .nr_to_write = 1, @@ -779,25 +778,25 @@ static int writeout(struct address_space *mapping, struct page *page) /* No write method for the address space */ return -EINVAL; - if (!clear_page_dirty_for_io(page)) + if (!folio_clear_dirty_for_io(folio)) /* Someone else already triggered a write */ return -EAGAIN; /* - * A dirty page may imply that the underlying filesystem has - * the page on some queue. So the page must be clean for - * migration. Writeout may mean we loose the lock and the - * page state is no longer what we checked for earlier. + * A dirty folio may imply that the underlying filesystem has + * the folio on some queue. So the folio must be clean for + * migration. Writeout may mean we lose the lock and the + * folio state is no longer what we checked for earlier. * At this point we know that the migration attempt cannot * be successful. */ remove_migration_ptes(folio, folio, false); - rc = mapping->a_ops->writepage(page, &wbc); + rc = mapping->a_ops->writepage(&folio->page, &wbc); if (rc != AOP_WRITEPAGE_ACTIVATE) /* unlocked. Relock */ - lock_page(page); + folio_lock(folio); return (rc < 0) ? -EIO : -EAGAIN; } @@ -817,7 +816,7 @@ static int fallback_migrate_folio(struct address_space *mapping, default: return -EBUSY; } - return writeout(mapping, &src->page); + return writeout(mapping, src); } /* From e267a3198014ec93389b1930b776047082bb271f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:20:31 -0400 Subject: [PATCH 22/41] mm/migrate: Convert buffer_migrate_page() to buffer_migrate_folio() Use a folio throughout __buffer_migrate_folio(), add kernel-doc for buffer_migrate_folio() and buffer_migrate_folio_norefs(), move their declarations to buffer.h and switch all filesystems that have wired them up. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- block/fops.c | 2 +- fs/ext2/inode.c | 4 +- fs/ext4/inode.c | 4 +- fs/ntfs/aops.c | 6 +-- fs/ocfs2/aops.c | 2 +- include/linux/buffer_head.h | 10 +++++ include/linux/fs.h | 12 ------ mm/migrate.c | 76 ++++++++++++++++++++++--------------- 8 files changed, 65 insertions(+), 51 deletions(-) diff --git a/block/fops.c b/block/fops.c index d6b3276a6c680..743fc46d0aad7 100644 --- a/block/fops.c +++ b/block/fops.c @@ -417,7 +417,7 @@ const struct address_space_operations def_blk_aops = { .write_end = blkdev_write_end, .writepages = blkdev_writepages, .direct_IO = blkdev_direct_IO, - .migratepage = buffer_migrate_page_norefs, + .migrate_folio = buffer_migrate_folio_norefs, .is_dirty_writeback = buffer_check_dirty_writeback, }; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e6b932219803e..58a9d061f17d1 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -973,7 +973,7 @@ const struct address_space_operations ext2_aops = { .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -989,7 +989,7 @@ const struct address_space_operations ext2_nobh_aops = { .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 06cc688781766..87a8b4382bce0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3633,7 +3633,7 @@ static const struct address_space_operations ext4_aops = { .invalidate_folio = ext4_invalidate_folio, .release_folio = ext4_release_folio, .direct_IO = noop_direct_IO, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .swap_activate = ext4_iomap_swap_activate, @@ -3668,7 +3668,7 @@ static const struct address_space_operations ext4_da_aops = { .invalidate_folio = ext4_invalidate_folio, .release_folio = ext4_release_folio, .direct_IO = noop_direct_IO, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .swap_activate = ext4_iomap_swap_activate, diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 9e3964ea2ea03..5f4fb6ca6f2e9 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1659,7 +1659,7 @@ const struct address_space_operations ntfs_normal_aops = { .dirty_folio = block_dirty_folio, #endif /* NTFS_RW */ .bmap = ntfs_bmap, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -1673,7 +1673,7 @@ const struct address_space_operations ntfs_compressed_aops = { .writepage = ntfs_writepage, .dirty_folio = block_dirty_folio, #endif /* NTFS_RW */ - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -1688,7 +1688,7 @@ const struct address_space_operations ntfs_mst_aops = { .writepage = ntfs_writepage, /* Write dirty page to disk. */ .dirty_folio = filemap_dirty_folio, #endif /* NTFS_RW */ - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 767df51f8657a..1d489003f99dc 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2462,7 +2462,7 @@ const struct address_space_operations ocfs2_aops = { .direct_IO = ocfs2_direct_IO, .invalidate_folio = block_invalidate_folio, .release_folio = ocfs2_release_folio, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c9d1463bb20f3..b0366c89d6a4d 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -267,6 +267,16 @@ int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); int nobh_writepage(struct page *page, get_block_t *get_block, struct writeback_control *wbc); +#ifdef CONFIG_MIGRATION +extern int buffer_migrate_folio(struct address_space *, + struct folio *dst, struct folio *src, enum migrate_mode); +extern int buffer_migrate_folio_norefs(struct address_space *, + struct folio *dst, struct folio *src, enum migrate_mode); +#else +#define buffer_migrate_folio NULL +#define buffer_migrate_folio_norefs NULL +#endif + void buffer_init(void); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 47431cf8fbb3d..9e6b17da4e11b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3215,18 +3215,6 @@ extern int generic_check_addressable(unsigned, u64); extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); -#ifdef CONFIG_MIGRATION -extern int buffer_migrate_page(struct address_space *, - struct page *, struct page *, - enum migrate_mode); -extern int buffer_migrate_page_norefs(struct address_space *, - struct page *, struct page *, - enum migrate_mode); -#else -#define buffer_migrate_page NULL -#define buffer_migrate_page_norefs NULL -#endif - int may_setattr(struct user_namespace *mnt_userns, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *); diff --git a/mm/migrate.c b/mm/migrate.c index 75b171425c458..ea5398d0f7f1f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -656,23 +656,23 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, return true; } -static int __buffer_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode, +static int __buffer_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode, bool check_refs) { struct buffer_head *bh, *head; int rc; int expected_count; - if (!page_has_buffers(page)) - return migrate_page(mapping, newpage, page, mode); + head = folio_buffers(src); + if (!head) + return migrate_page(mapping, &dst->page, &src->page, mode); /* Check whether page does not have extra refs before we do more work */ - expected_count = expected_page_refs(mapping, page); - if (page_count(page) != expected_count) + expected_count = expected_page_refs(mapping, &src->page); + if (folio_ref_count(src) != expected_count) return -EAGAIN; - head = page_buffers(page); if (!buffer_migrate_lock_buffers(head, mode)) return -EAGAIN; @@ -703,23 +703,22 @@ static int __buffer_migrate_page(struct address_space *mapping, } } - rc = migrate_page_move_mapping(mapping, newpage, page, 0); + rc = folio_migrate_mapping(mapping, dst, src, 0); if (rc != MIGRATEPAGE_SUCCESS) goto unlock_buffers; - attach_page_private(newpage, detach_page_private(page)); + folio_attach_private(dst, folio_detach_private(src)); bh = head; do { - set_bh_page(bh, newpage, bh_offset(bh)); + set_bh_page(bh, &dst->page, bh_offset(bh)); bh = bh->b_this_page; - } while (bh != head); if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(dst, src); else - migrate_page_states(newpage, page); + folio_migrate_flags(dst, src); rc = MIGRATEPAGE_SUCCESS; unlock_buffers: @@ -729,34 +728,51 @@ static int __buffer_migrate_page(struct address_space *mapping, do { unlock_buffer(bh); bh = bh->b_this_page; - } while (bh != head); return rc; } -/* - * Migration function for pages with buffers. This function can only be used - * if the underlying filesystem guarantees that no other references to "page" - * exist. For example attached buffer heads are accessed only under page lock. +/** + * buffer_migrate_folio() - Migration function for folios with buffers. + * @mapping: The address space containing @src. + * @dst: The folio to migrate to. + * @src: The folio to migrate from. + * @mode: How to migrate the folio. + * + * This function can only be used if the underlying filesystem guarantees + * that no other references to @src exist. For example attached buffer + * heads are accessed only under the folio lock. If your filesystem cannot + * provide this guarantee, buffer_migrate_folio_norefs() may be more + * appropriate. + * + * Return: 0 on success or a negative errno on failure. */ -int buffer_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) +int buffer_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { - return __buffer_migrate_page(mapping, newpage, page, mode, false); + return __buffer_migrate_folio(mapping, dst, src, mode, false); } -EXPORT_SYMBOL(buffer_migrate_page); +EXPORT_SYMBOL(buffer_migrate_folio); -/* - * Same as above except that this variant is more careful and checks that there - * are also no buffer head references. This function is the right one for - * mappings where buffer heads are directly looked up and referenced (such as - * block device mappings). +/** + * buffer_migrate_folio_norefs() - Migration function for folios with buffers. + * @mapping: The address space containing @src. + * @dst: The folio to migrate to. + * @src: The folio to migrate from. + * @mode: How to migrate the folio. + * + * Like buffer_migrate_folio() except that this variant is more careful + * and checks that there are also no buffer head references. This function + * is the right one for mappings where buffer heads are directly looked + * up and referenced (such as block device mappings). + * + * Return: 0 on success or a negative errno on failure. */ -int buffer_migrate_page_norefs(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) +int buffer_migrate_folio_norefs(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { - return __buffer_migrate_page(mapping, newpage, page, mode, true); + return __buffer_migrate_folio(mapping, dst, src, mode, true); } #endif From 55553f3437c8b185b71fe4bd8106141a4e4192de Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 16:25:10 -0400 Subject: [PATCH 23/41] mm/migrate: Convert expected_page_refs() to folio_expected_refs() Now that both callers have a folio, convert this function to take a folio & rename it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- mm/migrate.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index ea5398d0f7f1f..61cd8d270b030 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -336,13 +336,18 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) } #endif -static int expected_page_refs(struct address_space *mapping, struct page *page) +static int folio_expected_refs(struct address_space *mapping, + struct folio *folio) { - int expected_count = 1; + int refs = 1; + if (!mapping) + return refs; - if (mapping) - expected_count += compound_nr(page) + page_has_private(page); - return expected_count; + refs += folio_nr_pages(folio); + if (folio_test_private(folio)) + refs++; + + return refs; } /* @@ -359,7 +364,7 @@ int folio_migrate_mapping(struct address_space *mapping, XA_STATE(xas, &mapping->i_pages, folio_index(folio)); struct zone *oldzone, *newzone; int dirty; - int expected_count = expected_page_refs(mapping, &folio->page) + extra_count; + int expected_count = folio_expected_refs(mapping, folio) + extra_count; long nr = folio_nr_pages(folio); if (!mapping) { @@ -669,7 +674,7 @@ static int __buffer_migrate_folio(struct address_space *mapping, return migrate_page(mapping, &dst->page, &src->page, mode); /* Check whether page does not have extra refs before we do more work */ - expected_count = expected_page_refs(mapping, &src->page); + expected_count = folio_expected_refs(mapping, src); if (folio_ref_count(src) != expected_count) return -EAGAIN; From e8172b8e1728b41160dc8fef7fdd9ffbcaa152c6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 09:22:19 -0400 Subject: [PATCH 24/41] btrfs: Convert btree_migratepage to migrate_folio Use a folio throughout this function. migrate_page() will be converted later. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: David Sterba --- fs/btrfs/disk-io.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dbac856565718..e3ecc38a9ebcc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -952,28 +952,28 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_ } #ifdef CONFIG_MIGRATION -static int btree_migratepage(struct address_space *mapping, - struct page *newpage, struct page *page, - enum migrate_mode mode) +static int btree_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { /* * we can't safely write a btree page from here, * we haven't done the locking hook */ - if (PageDirty(page)) + if (folio_test_dirty(src)) return -EAGAIN; /* * Buffers may be managed in a filesystem specific way. * We must have no buffers or drop them. */ - if (page_has_private(page) && - !try_to_release_page(page, GFP_KERNEL)) + if (folio_get_private(src) && + !filemap_release_folio(src, GFP_KERNEL)) return -EAGAIN; - return migrate_page(mapping, newpage, page, mode); + return migrate_page(mapping, &dst->page, &src->page, mode); } +#else +#define btree_migrate_folio NULL #endif - static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -1073,10 +1073,8 @@ static const struct address_space_operations btree_aops = { .writepages = btree_writepages, .release_folio = btree_release_folio, .invalidate_folio = btree_invalidate_folio, -#ifdef CONFIG_MIGRATION - .migratepage = btree_migratepage, -#endif - .dirty_folio = btree_dirty_folio, + .migrate_folio = btree_migrate_folio, + .dirty_folio = btree_dirty_folio, }; struct extent_buffer *btrfs_find_create_tree_block( From 27826326e888a185d7d191670cf445dec88e9218 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 09:22:19 -0400 Subject: [PATCH 25/41] nfs: Convert to migrate_folio Use a folio throughout this function. migrate_page() will be converted later. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Anna Schumaker Reviewed-by: Christoph Hellwig --- fs/nfs/file.c | 4 +--- fs/nfs/internal.h | 6 ++++-- fs/nfs/write.c | 16 ++++++++-------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2d72b1b7ed74c..549baed763513 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -533,9 +533,7 @@ const struct address_space_operations nfs_file_aops = { .write_end = nfs_write_end, .invalidate_folio = nfs_invalidate_folio, .release_folio = nfs_release_folio, -#ifdef CONFIG_MIGRATION - .migratepage = nfs_migrate_page, -#endif + .migrate_folio = nfs_migrate_folio, .launder_folio = nfs_launder_folio, .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8f8cd6e2d4dbc..437ebe544aafb 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -578,8 +578,10 @@ void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) #endif #ifdef CONFIG_MIGRATION -extern int nfs_migrate_page(struct address_space *, - struct page *, struct page *, enum migrate_mode); +int nfs_migrate_folio(struct address_space *, struct folio *dst, + struct folio *src, enum migrate_mode); +#else +#define nfs_migrate_folio NULL #endif static inline int diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1c706465d090b..649b9e6334597 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -2119,27 +2119,27 @@ int nfs_wb_page(struct inode *inode, struct page *page) } #ifdef CONFIG_MIGRATION -int nfs_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode) +int nfs_migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode) { /* - * If PagePrivate is set, then the page is currently associated with + * If the private flag is set, the folio is currently associated with * an in-progress read or write request. Don't try to migrate it. * * FIXME: we could do this in principle, but we'll need a way to ensure * that we can safely release the inode reference while holding - * the page lock. + * the folio lock. */ - if (PagePrivate(page)) + if (folio_test_private(src)) return -EBUSY; - if (PageFsCache(page)) { + if (folio_test_fscache(src)) { if (mode == MIGRATE_ASYNC) return -EBUSY; - wait_on_page_fscache(page); + folio_wait_fscache(src); } - return migrate_page(mapping, newpage, page, mode); + return migrate_page(mapping, &dst->page, &src->page, mode); } #endif From af9c33968b722c5871974541067dc180377501df Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:27:41 -0400 Subject: [PATCH 26/41] mm/migrate: Convert migrate_page() to migrate_folio() Convert all callers to pass a folio. Most have the folio already available. Switch all users from aops->migratepage to aops->migrate_folio. Also turn the documentation into kerneldoc. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: David Sterba --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 4 +-- fs/btrfs/disk-io.c | 2 +- fs/nfs/write.c | 2 +- include/linux/migrate.h | 5 ++- mm/migrate.c | 37 +++++++++++---------- mm/migrate_device.c | 3 +- mm/shmem.c | 2 +- mm/swap_state.c | 2 +- 8 files changed, 30 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 094f06b4ce335..8423df021b713 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -216,8 +216,8 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, * However...! * * The mmu-notifier can be invalidated for a - * migrate_page, that is alreadying holding the lock - * on the page. Such a try_to_unmap() will result + * migrate_folio, that is alreadying holding the lock + * on the folio. Such a try_to_unmap() will result * in us calling put_pages() and so recursively try * to lock the page. We avoid that deadlock with * a trylock_page() and in exchange we risk missing diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e3ecc38a9ebcc..03bbb7e96bf34 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -968,7 +968,7 @@ static int btree_migrate_folio(struct address_space *mapping, if (folio_get_private(src) && !filemap_release_folio(src, GFP_KERNEL)) return -EAGAIN; - return migrate_page(mapping, &dst->page, &src->page, mode); + return migrate_folio(mapping, dst, src, mode); } #else #define btree_migrate_folio NULL diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 649b9e6334597..69569696dde0b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -2139,7 +2139,7 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst, folio_wait_fscache(src); } - return migrate_page(mapping, &dst->page, &src->page, mode); + return migrate_folio(mapping, dst, src, mode); } #endif diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 82c735ba6109f..c9986d5da335f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -62,9 +62,8 @@ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); -extern int migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, - enum migrate_mode mode); +int migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded); diff --git a/mm/migrate.c b/mm/migrate.c index 61cd8d270b030..77aeb7e12f629 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -593,34 +593,37 @@ EXPORT_SYMBOL(folio_migrate_copy); * Migration functions ***********************************************************/ -/* - * Common logic to directly migrate a single LRU page suitable for - * pages that do not use PagePrivate/PagePrivate2. +/** + * migrate_folio() - Simple folio migration. + * @mapping: The address_space containing the folio. + * @dst: The folio to migrate the data to. + * @src: The folio containing the current data. + * @mode: How to migrate the page. * - * Pages are locked upon entry and exit. + * Common logic to directly migrate a single LRU folio suitable for + * folios that do not use PagePrivate/PagePrivate2. + * + * Folios are locked upon entry and exit. */ -int migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, - enum migrate_mode mode) +int migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode) { - struct folio *newfolio = page_folio(newpage); - struct folio *folio = page_folio(page); int rc; - BUG_ON(folio_test_writeback(folio)); /* Writeback must be complete */ + BUG_ON(folio_test_writeback(src)); /* Writeback must be complete */ - rc = folio_migrate_mapping(mapping, newfolio, folio, 0); + rc = folio_migrate_mapping(mapping, dst, src, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; if (mode != MIGRATE_SYNC_NO_COPY) - folio_migrate_copy(newfolio, folio); + folio_migrate_copy(dst, src); else - folio_migrate_flags(newfolio, folio); + folio_migrate_flags(dst, src); return MIGRATEPAGE_SUCCESS; } -EXPORT_SYMBOL(migrate_page); +EXPORT_SYMBOL(migrate_folio); #ifdef CONFIG_BLOCK /* Returns true if all buffers are successfully locked */ @@ -671,7 +674,7 @@ static int __buffer_migrate_folio(struct address_space *mapping, head = folio_buffers(src); if (!head) - return migrate_page(mapping, &dst->page, &src->page, mode); + return migrate_folio(mapping, dst, src, mode); /* Check whether page does not have extra refs before we do more work */ expected_count = folio_expected_refs(mapping, src); @@ -848,7 +851,7 @@ static int fallback_migrate_folio(struct address_space *mapping, !filemap_release_folio(src, GFP_KERNEL)) return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY; - return migrate_page(mapping, &dst->page, &src->page, mode); + return migrate_folio(mapping, dst, src, mode); } /* @@ -875,7 +878,7 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, struct address_space *mapping = folio_mapping(src); if (!mapping) - rc = migrate_page(mapping, &dst->page, &src->page, mode); + rc = migrate_folio(mapping, dst, src, mode); else if (mapping->a_ops->migrate_folio) /* * Most folios have a mapping and most filesystems diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 5052093d0262d..5dd97c39ca6ae 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -718,7 +718,8 @@ void migrate_vma_pages(struct migrate_vma *migrate) continue; } - r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY); + r = migrate_folio(mapping, page_folio(newpage), + page_folio(page), MIGRATE_SYNC_NO_COPY); if (r != MIGRATEPAGE_SUCCESS) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; } diff --git a/mm/shmem.c b/mm/shmem.c index 28a62be1d41e5..15c61456e0875 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3801,7 +3801,7 @@ const struct address_space_operations shmem_aops = { .write_end = shmem_write_end, #endif #ifdef CONFIG_MIGRATION - .migratepage = migrate_page, + .migrate_folio = migrate_folio, #endif .error_remove_page = shmem_error_remove_page, }; diff --git a/mm/swap_state.c b/mm/swap_state.c index f5b6f56389080..0a2021fc55ade 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -33,7 +33,7 @@ static const struct address_space_operations swap_aops = { .writepage = swap_writepage, .dirty_folio = noop_dirty_folio, #ifdef CONFIG_MIGRATION - .migratepage = migrate_page, + .migrate_folio = migrate_folio, #endif }; From 441b3afcb2e31aca89f6e2cd6642f141c7bbe142 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 12:55:08 -0400 Subject: [PATCH 27/41] mm/migrate: Add filemap_migrate_folio() There is nothing iomap-specific about iomap_migratepage(), and it fits a pattern used by several other filesystems, so move it to mm/migrate.c, convert it to be filemap_migrate_folio() and convert the iomap filesystems to use it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/gfs2/aops.c | 2 +- fs/iomap/buffered-io.c | 25 ------------------------- fs/xfs/xfs_aops.c | 2 +- fs/zonefs/super.c | 2 +- include/linux/iomap.h | 6 ------ include/linux/pagemap.h | 6 ++++++ mm/migrate.c | 20 ++++++++++++++++++++ 7 files changed, 29 insertions(+), 34 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 106e90a365838..57ff883d432c7 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -774,7 +774,7 @@ static const struct address_space_operations gfs2_aops = { .invalidate_folio = iomap_invalidate_folio, .bmap = gfs2_bmap, .direct_IO = noop_direct_IO, - .migratepage = iomap_migrate_page, + .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 66278a14bfa7c..5a91aa1db945b 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -489,31 +489,6 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) } EXPORT_SYMBOL_GPL(iomap_invalidate_folio); -#ifdef CONFIG_MIGRATION -int -iomap_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode) -{ - struct folio *folio = page_folio(page); - struct folio *newfolio = page_folio(newpage); - int ret; - - ret = folio_migrate_mapping(mapping, newfolio, folio, 0); - if (ret != MIGRATEPAGE_SUCCESS) - return ret; - - if (folio_test_private(folio)) - folio_attach_private(newfolio, folio_detach_private(folio)); - - if (mode != MIGRATE_SYNC_NO_COPY) - folio_migrate_copy(newfolio, folio); - else - folio_migrate_flags(newfolio, folio); - return MIGRATEPAGE_SUCCESS; -} -EXPORT_SYMBOL_GPL(iomap_migrate_page); -#endif /* CONFIG_MIGRATION */ - static void iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) { diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 8ec38b25187bd..5d1a995b15f83 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -570,7 +570,7 @@ const struct address_space_operations xfs_address_space_operations = { .invalidate_folio = iomap_invalidate_folio, .bmap = xfs_vm_bmap, .direct_IO = noop_direct_IO, - .migratepage = iomap_migrate_page, + .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .swap_activate = xfs_iomap_swapfile_activate, diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 053299758deb9..cc6d4cf580ac6 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -271,7 +271,7 @@ static const struct address_space_operations zonefs_file_aops = { .dirty_folio = filemap_dirty_folio, .release_folio = iomap_release_folio, .invalidate_folio = iomap_invalidate_folio, - .migratepage = iomap_migrate_page, + .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .direct_IO = noop_direct_IO, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e552097c67e0b..758a1125e72fb 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -231,12 +231,6 @@ void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); -#ifdef CONFIG_MIGRATION -int iomap_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode); -#else -#define iomap_migrate_page NULL -#endif int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 87d4ea571240e..cc9adbaddb591 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1078,6 +1078,12 @@ static inline int __must_check write_one_page(struct page *page) int __set_page_dirty_nobuffers(struct page *page); bool noop_dirty_folio(struct address_space *mapping, struct folio *folio); +#ifdef CONFIG_MIGRATION +int filemap_migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode); +#else +#define filemap_migrate_folio NULL +#endif void page_endio(struct page *page, bool is_write, int err); void folio_end_private_2(struct folio *folio); diff --git a/mm/migrate.c b/mm/migrate.c index 77aeb7e12f629..4ed8f0d53c77c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -784,6 +784,26 @@ int buffer_migrate_folio_norefs(struct address_space *mapping, } #endif +int filemap_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) +{ + int ret; + + ret = folio_migrate_mapping(mapping, dst, src, 0); + if (ret != MIGRATEPAGE_SUCCESS) + return ret; + + if (folio_get_private(src)) + folio_attach_private(dst, folio_detach_private(src)); + + if (mode != MIGRATE_SYNC_NO_COPY) + folio_migrate_copy(dst, src); + else + folio_migrate_flags(dst, src); + return MIGRATEPAGE_SUCCESS; +} +EXPORT_SYMBOL_GPL(filemap_migrate_folio); + /* * Writeback a folio to clean the dirty state */ From 9c5161d1eefb24389a077b43e8e11322f2e4cd42 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:47:21 -0400 Subject: [PATCH 28/41] btrfs: Convert btrfs_migratepage to migrate_folio Use filemap_migrate_folio() to do the bulk of the work, and then copy the ordered flag across if needed. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: David Sterba --- fs/btrfs/inode.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 81737eff92f3d..5f41d869c6484 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8255,30 +8255,24 @@ static bool btrfs_release_folio(struct folio *folio, gfp_t gfp_flags) } #ifdef CONFIG_MIGRATION -static int btrfs_migratepage(struct address_space *mapping, - struct page *newpage, struct page *page, +static int btrfs_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { - int ret; + int ret = filemap_migrate_folio(mapping, dst, src, mode); - ret = migrate_page_move_mapping(mapping, newpage, page, 0); if (ret != MIGRATEPAGE_SUCCESS) return ret; - if (page_has_private(page)) - attach_page_private(newpage, detach_page_private(page)); - - if (PageOrdered(page)) { - ClearPageOrdered(page); - SetPageOrdered(newpage); + if (folio_test_ordered(src)) { + folio_clear_ordered(src); + folio_set_ordered(dst); } - if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); - else - migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } +#else +#define btrfs_migrate_folio NULL #endif static void btrfs_invalidate_folio(struct folio *folio, size_t offset, @@ -11422,9 +11416,7 @@ static const struct address_space_operations btrfs_aops = { .direct_IO = noop_direct_IO, .invalidate_folio = btrfs_invalidate_folio, .release_folio = btrfs_release_folio, -#ifdef CONFIG_MIGRATION - .migratepage = btrfs_migratepage, -#endif + .migrate_folio = btrfs_migrate_folio, .dirty_folio = filemap_dirty_folio, .error_remove_page = generic_error_remove_page, .swap_activate = btrfs_swap_activate, From 9f11d68b27211e6a73b0540dc757c91c01ae74bf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:47:21 -0400 Subject: [PATCH 29/41] ubifs: Convert to filemap_migrate_folio() filemap_migrate_folio() is a little more general than ubifs really needs, but it's better to share the code. Signed-off-by: Matthew Wilcox (Oracle) --- fs/ubifs/file.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 04ced154960fa..f2353dd676ef0 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1461,29 +1461,6 @@ static bool ubifs_dirty_folio(struct address_space *mapping, return ret; } -#ifdef CONFIG_MIGRATION -static int ubifs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) -{ - int rc; - - rc = migrate_page_move_mapping(mapping, newpage, page, 0); - if (rc != MIGRATEPAGE_SUCCESS) - return rc; - - if (PagePrivate(page)) { - detach_page_private(page); - attach_page_private(newpage, (void *)1); - } - - if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); - else - migrate_page_states(newpage, page); - return MIGRATEPAGE_SUCCESS; -} -#endif - static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags) { struct inode *inode = folio->mapping->host; @@ -1649,10 +1626,8 @@ const struct address_space_operations ubifs_file_address_operations = { .write_end = ubifs_write_end, .invalidate_folio = ubifs_invalidate_folio, .dirty_folio = ubifs_dirty_folio, -#ifdef CONFIG_MIGRATION - .migratepage = ubifs_migrate_page, -#endif - .release_folio = ubifs_release_folio, + .migrate_folio = filemap_migrate_folio, + .release_folio = ubifs_release_folio, }; const struct inode_operations ubifs_file_inode_operations = { From 9c16c4c68213afc512aadb854c9603f4eff6b977 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:47:21 -0400 Subject: [PATCH 30/41] f2fs: Convert to filemap_migrate_folio() filemap_migrate_folio() fits f2fs's needs perfectly. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Chao Yu --- fs/f2fs/checkpoint.c | 4 +--- fs/f2fs/data.c | 40 +--------------------------------------- fs/f2fs/f2fs.h | 4 ---- fs/f2fs/node.c | 4 +--- 4 files changed, 3 insertions(+), 49 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6d8b2bf14de0f..8259e0fa97e1f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -463,9 +463,7 @@ const struct address_space_operations f2fs_meta_aops = { .dirty_folio = f2fs_dirty_meta_folio, .invalidate_folio = f2fs_invalidate_folio, .release_folio = f2fs_release_folio, -#ifdef CONFIG_MIGRATION - .migratepage = f2fs_migrate_page, -#endif + .migrate_folio = filemap_migrate_folio, }; static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7fcbcf9797372..318a3f91ad74b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3751,42 +3751,6 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) return blknr; } -#ifdef CONFIG_MIGRATION -#include - -int f2fs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) -{ - int rc, extra_count = 0; - - BUG_ON(PageWriteback(page)); - - rc = migrate_page_move_mapping(mapping, newpage, - page, extra_count); - if (rc != MIGRATEPAGE_SUCCESS) - return rc; - - /* guarantee to start from no stale private field */ - set_page_private(newpage, 0); - if (PagePrivate(page)) { - set_page_private(newpage, page_private(page)); - SetPagePrivate(newpage); - get_page(newpage); - - set_page_private(page, 0); - ClearPagePrivate(page); - put_page(page); - } - - if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); - else - migrate_page_states(newpage, page); - - return MIGRATEPAGE_SUCCESS; -} -#endif - #ifdef CONFIG_SWAP static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, unsigned int blkcnt) @@ -4018,15 +3982,13 @@ const struct address_space_operations f2fs_dblock_aops = { .write_begin = f2fs_write_begin, .write_end = f2fs_write_end, .dirty_folio = f2fs_dirty_data_folio, + .migrate_folio = filemap_migrate_folio, .invalidate_folio = f2fs_invalidate_folio, .release_folio = f2fs_release_folio, .direct_IO = noop_direct_IO, .bmap = f2fs_bmap, .swap_activate = f2fs_swap_activate, .swap_deactivate = f2fs_swap_deactivate, -#ifdef CONFIG_MIGRATION - .migratepage = f2fs_migrate_page, -#endif }; void f2fs_clear_page_cache_dirty_tag(struct page *page) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d9bbecd008d22..f258a1b6faed3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3764,10 +3764,6 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, void f2fs_write_failed(struct inode *inode, loff_t to); void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length); bool f2fs_release_folio(struct folio *folio, gfp_t wait); -#ifdef CONFIG_MIGRATION -int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode); -#endif bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); void f2fs_clear_page_cache_dirty_tag(struct page *page); int f2fs_init_post_read_processing(void); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index cf6f7fc83c082..12bba66a8a300 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2165,9 +2165,7 @@ const struct address_space_operations f2fs_node_aops = { .dirty_folio = f2fs_dirty_node_folio, .invalidate_folio = f2fs_invalidate_folio, .release_folio = f2fs_release_folio, -#ifdef CONFIG_MIGRATION - .migratepage = f2fs_migrate_page, -#endif + .migrate_folio = filemap_migrate_folio, }; static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, From 9b553d25025600ecaeb903b9250279b50c1c6054 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:47:21 -0400 Subject: [PATCH 31/41] aio: Convert to migrate_folio Use a folio throughout this function. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- fs/aio.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 3c249b9386327..a1911e86859c7 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -400,8 +400,8 @@ static const struct file_operations aio_ring_fops = { }; #if IS_ENABLED(CONFIG_MIGRATION) -static int aio_migratepage(struct address_space *mapping, struct page *new, - struct page *old, enum migrate_mode mode) +static int aio_migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode) { struct kioctx *ctx; unsigned long flags; @@ -435,10 +435,10 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, goto out; } - idx = old->index; + idx = src->index; if (idx < (pgoff_t)ctx->nr_pages) { - /* Make sure the old page hasn't already been changed */ - if (ctx->ring_pages[idx] != old) + /* Make sure the old folio hasn't already been changed */ + if (ctx->ring_pages[idx] != &src->page) rc = -EAGAIN; } else rc = -EINVAL; @@ -447,27 +447,27 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, goto out_unlock; /* Writeback must be complete */ - BUG_ON(PageWriteback(old)); - get_page(new); + BUG_ON(folio_test_writeback(src)); + folio_get(dst); - rc = migrate_page_move_mapping(mapping, new, old, 1); + rc = folio_migrate_mapping(mapping, dst, src, 1); if (rc != MIGRATEPAGE_SUCCESS) { - put_page(new); + folio_put(dst); goto out_unlock; } /* Take completion_lock to prevent other writes to the ring buffer - * while the old page is copied to the new. This prevents new + * while the old folio is copied to the new. This prevents new * events from being lost. */ spin_lock_irqsave(&ctx->completion_lock, flags); - migrate_page_copy(new, old); - BUG_ON(ctx->ring_pages[idx] != old); - ctx->ring_pages[idx] = new; + folio_migrate_copy(dst, src); + BUG_ON(ctx->ring_pages[idx] != &src->page); + ctx->ring_pages[idx] = &dst->page; spin_unlock_irqrestore(&ctx->completion_lock, flags); - /* The old page is no longer accessible. */ - put_page(old); + /* The old folio is no longer accessible. */ + folio_put(src); out_unlock: mutex_unlock(&ctx->ring_lock); @@ -475,13 +475,13 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, spin_unlock(&mapping->private_lock); return rc; } +#else +#define aio_migrate_folio NULL #endif static const struct address_space_operations aio_ctx_aops = { .dirty_folio = noop_dirty_folio, -#if IS_ENABLED(CONFIG_MIGRATION) - .migratepage = aio_migratepage, -#endif + .migrate_folio = aio_migrate_folio, }; static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) From 5567427fd70edcd4809a5b1df03f363e72f997bf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:47:21 -0400 Subject: [PATCH 32/41] hugetlb: Convert to migrate_folio This involves converting migrate_huge_page_move_mapping(). We also need a folio variant of hugetlb_set_page_subpool(), but that's for a later patch. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Muchun Song Reviewed-by: Mike Kravetz --- fs/hugetlbfs/inode.c | 23 ++++++++++++++--------- include/linux/migrate.h | 6 +++--- mm/migrate.c | 18 +++++++++--------- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 14d33f725e059..eca1d0fabd7ea 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -954,28 +954,33 @@ static int hugetlbfs_symlink(struct user_namespace *mnt_userns, return error; } -static int hugetlbfs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, +#ifdef CONFIG_MIGRATION +static int hugetlbfs_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { int rc; - rc = migrate_huge_page_move_mapping(mapping, newpage, page); + rc = migrate_huge_page_move_mapping(mapping, dst, src); if (rc != MIGRATEPAGE_SUCCESS) return rc; - if (hugetlb_page_subpool(page)) { - hugetlb_set_page_subpool(newpage, hugetlb_page_subpool(page)); - hugetlb_set_page_subpool(page, NULL); + if (hugetlb_page_subpool(&src->page)) { + hugetlb_set_page_subpool(&dst->page, + hugetlb_page_subpool(&src->page)); + hugetlb_set_page_subpool(&src->page, NULL); } if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(dst, src); else - migrate_page_states(newpage, page); + folio_migrate_flags(dst, src); return MIGRATEPAGE_SUCCESS; } +#else +#define hugetlbfs_migrate_folio NULL +#endif static int hugetlbfs_error_remove_page(struct address_space *mapping, struct page *page) @@ -1142,7 +1147,7 @@ static const struct address_space_operations hugetlbfs_aops = { .write_begin = hugetlbfs_write_begin, .write_end = hugetlbfs_write_end, .dirty_folio = noop_dirty_folio, - .migratepage = hugetlbfs_migrate_page, + .migrate_folio = hugetlbfs_migrate_folio, .error_remove_page = hugetlbfs_error_remove_page, }; diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c9986d5da335f..13f793309b753 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -72,8 +72,8 @@ extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); -extern int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page); +int migrate_huge_page_move_mapping(struct address_space *mapping, + struct folio *dst, struct folio *src); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, @@ -104,7 +104,7 @@ static inline void migrate_page_copy(struct page *newpage, struct page *page) {} static inline int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page) + struct folio *dst, struct folio *src) { return -ENOSYS; } diff --git a/mm/migrate.c b/mm/migrate.c index 4ed8f0d53c77c..0dd3ec9525b35 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -474,26 +474,26 @@ EXPORT_SYMBOL(folio_migrate_mapping); * of folio_migrate_mapping(). */ int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page) + struct folio *dst, struct folio *src) { - XA_STATE(xas, &mapping->i_pages, page_index(page)); + XA_STATE(xas, &mapping->i_pages, folio_index(src)); int expected_count; xas_lock_irq(&xas); - expected_count = 2 + page_has_private(page); - if (!page_ref_freeze(page, expected_count)) { + expected_count = 2 + folio_has_private(src); + if (!folio_ref_freeze(src, expected_count)) { xas_unlock_irq(&xas); return -EAGAIN; } - newpage->index = page->index; - newpage->mapping = page->mapping; + dst->index = src->index; + dst->mapping = src->mapping; - get_page(newpage); + folio_get(dst); - xas_store(&xas, newpage); + xas_store(&xas, dst); - page_ref_unfreeze(page, expected_count - 1); + folio_ref_unfreeze(src, expected_count - 1); xas_unlock_irq(&xas); From 48e7ede1b860e44edfd0231f888a7f16af86d6a0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 11:30:43 -0400 Subject: [PATCH 33/41] secretmem: Convert to migrate_folio This is little more than changing the types over; there's no real work being done in this function. Signed-off-by: Matthew Wilcox (Oracle) --- mm/secretmem.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/secretmem.c b/mm/secretmem.c index 1c7f1775b56e7..658a7486efa97 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -133,9 +133,8 @@ static const struct file_operations secretmem_fops = { .mmap = secretmem_mmap, }; -static int secretmem_migratepage(struct address_space *mapping, - struct page *newpage, struct page *page, - enum migrate_mode mode) +static int secretmem_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { return -EBUSY; } @@ -149,7 +148,7 @@ static void secretmem_free_folio(struct folio *folio) const struct address_space_operations secretmem_aops = { .dirty_folio = noop_dirty_folio, .free_folio = secretmem_free_folio, - .migratepage = secretmem_migratepage, + .migrate_folio = secretmem_migrate_folio, }; static int secretmem_setattr(struct user_namespace *mnt_userns, From c33b866a97842ac96f4373f737ba608dd157f08a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 11:53:31 -0400 Subject: [PATCH 34/41] fs: Remove aops->migratepage() With all users converted to migrate_folio(), remove this operation. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/fs.h | 2 -- mm/compaction.c | 5 ++--- mm/migrate.c | 3 --- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 9e6b17da4e11b..7e06919b8f603 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -367,8 +367,6 @@ struct address_space_operations { */ int (*migrate_folio)(struct address_space *, struct folio *dst, struct folio *src, enum migrate_mode); - int (*migratepage) (struct address_space *, - struct page *, struct page *, enum migrate_mode); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); diff --git a/mm/compaction.c b/mm/compaction.c index 458f49f9ab09b..a2c53fcf933e6 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1031,7 +1031,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* * Only pages without mappings or that have a - * ->migratepage callback are possible to migrate + * ->migrate_folio callback are possible to migrate * without blocking. However, we can be racing with * truncation so it's necessary to lock the page * to stabilise the mapping as truncation holds @@ -1043,8 +1043,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, mapping = page_mapping(page); migrate_dirty = !mapping || - mapping->a_ops->migrate_folio || - mapping->a_ops->migratepage; + mapping->a_ops->migrate_folio; unlock_page(page); if (!migrate_dirty) goto isolate_fail_put; diff --git a/mm/migrate.c b/mm/migrate.c index 0dd3ec9525b35..1b4b977809a1c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -909,9 +909,6 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, */ rc = mapping->a_ops->migrate_folio(mapping, dst, src, mode); - else if (mapping->a_ops->migratepage) - rc = mapping->a_ops->migratepage(mapping, &dst->page, - &src->page, mode); else rc = fallback_migrate_folio(mapping, dst, src, mode); } else { From 84578adbb0e0657003e646e0af699ef74b99386e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 13:29:10 -0400 Subject: [PATCH 35/41] mm/folio-compat: Remove migration compatibility functions migrate_page_move_mapping(), migrate_page_copy() and migrate_page_states() are all now unused after converting all the filesystems from aops->migratepage() to aops->migrate_folio(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/migrate.h | 11 ----------- mm/folio-compat.c | 22 ---------------------- mm/ksm.c | 2 +- 3 files changed, 1 insertion(+), 34 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 13f793309b753..ae5bb67a9ba1f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -70,12 +70,8 @@ extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); -extern void migrate_page_states(struct page *newpage, struct page *page); -extern void migrate_page_copy(struct page *newpage, struct page *page); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -extern int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count); void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, spinlock_t *ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); @@ -96,13 +92,6 @@ static inline struct page *alloc_migration_target(struct page *page, static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } -static inline void migrate_page_states(struct page *newpage, struct page *page) -{ -} - -static inline void migrate_page_copy(struct page *newpage, - struct page *page) {} - static inline int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src) { diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 20bc15b57d93e..458618c7302c3 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -51,28 +51,6 @@ void mark_page_accessed(struct page *page) } EXPORT_SYMBOL(mark_page_accessed); -#ifdef CONFIG_MIGRATION -int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count) -{ - return folio_migrate_mapping(mapping, page_folio(newpage), - page_folio(page), extra_count); -} -EXPORT_SYMBOL(migrate_page_move_mapping); - -void migrate_page_states(struct page *newpage, struct page *page) -{ - folio_migrate_flags(page_folio(newpage), page_folio(page)); -} -EXPORT_SYMBOL(migrate_page_states); - -void migrate_page_copy(struct page *newpage, struct page *page) -{ - folio_migrate_copy(page_folio(newpage), page_folio(page)); -} -EXPORT_SYMBOL(migrate_page_copy); -#endif - bool set_page_writeback(struct page *page) { return folio_start_writeback(page_folio(page)); diff --git a/mm/ksm.c b/mm/ksm.c index 54f78c9eecaee..e8f8c1a2bb396 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -712,7 +712,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node, * however, it might mean that the page is under page_ref_freeze(). * The __remove_mapping() case is easy, again the node is now stale; * the same is in reuse_ksm_page() case; but if page is swapcache - * in migrate_page_move_mapping(), it might still be our page, + * in folio_migrate_mapping(), it might still be our page, * in which case it's essential to keep the node. */ while (!get_page_unless_zero(page)) { From 9594da4cec1db0491d35b38d5988eb989720d6f4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Jun 2022 07:37:10 +0200 Subject: [PATCH 36/41] ntfs3: refactor ntfs_writepages Handle the resident case with an explicit generic_writepages call instead of using the obscure overload that makes mpage_writepages with a NULL get_block do the same thing. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox (Oracle) --- fs/ntfs3/inode.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index be4ebdd8048b0..28c09c25b823d 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -851,12 +851,10 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc) static int ntfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct inode *inode = mapping->host; - struct ntfs_inode *ni = ntfs_i(inode); /* Redirect call to 'ntfs_writepage' for resident files. */ - get_block_t *get_block = is_resident(ni) ? NULL : &ntfs_get_block; - - return mpage_writepages(mapping, wbc, get_block); + if (is_resident(ntfs_i(mapping->host))) + return generic_writepages(mapping, wbc); + return mpage_writepages(mapping, wbc, ntfs_get_block); } static int ntfs_get_block_write_begin(struct inode *inode, sector_t vbn, From 8862fa5da9f144d0554c2177aea7ce0b6f97d8c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Jun 2022 07:37:11 +0200 Subject: [PATCH 37/41] ext2: remove nobh support The nobh mode is an obscure feature to save lowlevel for large memory 32-bit configurations while trading for much slower performance and has been long obsolete. Remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox (Oracle) --- Documentation/filesystems/ext2.rst | 2 -- fs/ext2/ext2.h | 1 - fs/ext2/inode.c | 51 ++---------------------------- fs/ext2/namei.c | 10 ++---- fs/ext2/super.c | 6 ++-- 5 files changed, 7 insertions(+), 63 deletions(-) diff --git a/Documentation/filesystems/ext2.rst b/Documentation/filesystems/ext2.rst index 154101cf0e4f5..92aae683e16a7 100644 --- a/Documentation/filesystems/ext2.rst +++ b/Documentation/filesystems/ext2.rst @@ -59,8 +59,6 @@ acl Enable POSIX Access Control Lists support (requires CONFIG_EXT2_FS_POSIX_ACL). noacl Don't support POSIX ACLs. -nobh Do not attach buffer_heads to file pagecache. - quota, usrquota Enable user disk quota support (requires CONFIG_QUOTA). diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index d4f306aa5aceb..28de11a22e5f6 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -795,7 +795,6 @@ extern const struct file_operations ext2_file_operations; /* inode.c */ extern void ext2_set_file_ops(struct inode *inode); extern const struct address_space_operations ext2_aops; -extern const struct address_space_operations ext2_nobh_aops; extern const struct iomap_ops ext2_iomap_ops; /* namei.c */ diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 58a9d061f17d1..c5229033baf05 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -908,25 +908,6 @@ static int ext2_write_end(struct file *file, struct address_space *mapping, return ret; } -static int -ext2_nobh_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, struct page **pagep, void **fsdata) -{ - int ret; - - ret = nobh_write_begin(mapping, pos, len, pagep, fsdata, - ext2_get_block); - if (ret < 0) - ext2_write_failed(mapping, pos + len); - return ret; -} - -static int ext2_nobh_writepage(struct page *page, - struct writeback_control *wbc) -{ - return nobh_writepage(page, ext2_get_block, wbc); -} - static sector_t ext2_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,ext2_get_block); @@ -978,21 +959,6 @@ const struct address_space_operations ext2_aops = { .error_remove_page = generic_error_remove_page, }; -const struct address_space_operations ext2_nobh_aops = { - .dirty_folio = block_dirty_folio, - .invalidate_folio = block_invalidate_folio, - .read_folio = ext2_read_folio, - .readahead = ext2_readahead, - .writepage = ext2_nobh_writepage, - .write_begin = ext2_nobh_write_begin, - .write_end = nobh_write_end, - .bmap = ext2_bmap, - .direct_IO = ext2_direct_IO, - .writepages = ext2_writepages, - .migrate_folio = buffer_migrate_folio, - .error_remove_page = generic_error_remove_page, -}; - static const struct address_space_operations ext2_dax_aops = { .writepages = ext2_dax_writepages, .direct_IO = noop_direct_IO, @@ -1298,13 +1264,10 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) inode_dio_wait(inode); - if (IS_DAX(inode)) { + if (IS_DAX(inode)) error = dax_zero_range(inode, newsize, PAGE_ALIGN(newsize) - newsize, NULL, &ext2_iomap_ops); - } else if (test_opt(inode->i_sb, NOBH)) - error = nobh_truncate_page(inode->i_mapping, - newsize, ext2_get_block); else error = block_truncate_page(inode->i_mapping, newsize, ext2_get_block); @@ -1396,8 +1359,6 @@ void ext2_set_file_ops(struct inode *inode) inode->i_fop = &ext2_file_operations; if (IS_DAX(inode)) inode->i_mapping->a_ops = &ext2_dax_aops; - else if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; else inode->i_mapping->a_ops = &ext2_aops; } @@ -1497,10 +1458,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode->i_mapping->a_ops = &ext2_aops; } else if (S_ISLNK(inode->i_mode)) { if (ext2_inode_is_fast_symlink(inode)) { inode->i_link = (char *)ei->i_data; @@ -1510,10 +1468,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) } else { inode->i_op = &ext2_symlink_inode_operations; inode_nohighmem(inode); - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode->i_mapping->a_ops = &ext2_aops; } } else { inode->i_op = &ext2_special_inode_operations; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 5f6b7560eb3f3..5fd9a22d2b70c 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -178,10 +178,7 @@ static int ext2_symlink (struct user_namespace * mnt_userns, struct inode * dir, /* slow symlink */ inode->i_op = &ext2_symlink_inode_operations; inode_nohighmem(inode); - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode->i_mapping->a_ops = &ext2_aops; err = page_symlink(inode, symname, l); if (err) goto out_fail; @@ -247,10 +244,7 @@ static int ext2_mkdir(struct user_namespace * mnt_userns, inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; - if (test_opt(inode->i_sb, NOBH)) - inode->i_mapping->a_ops = &ext2_nobh_aops; - else - inode->i_mapping->a_ops = &ext2_aops; + inode->i_mapping->a_ops = &ext2_aops; inode_inc_link_count(inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f6a19f6d9f6d5..a1c1263c07ab3 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -296,9 +296,6 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",noacl"); #endif - if (test_opt(sb, NOBH)) - seq_puts(seq, ",nobh"); - if (test_opt(sb, USRQUOTA)) seq_puts(seq, ",usrquota"); @@ -551,7 +548,8 @@ static int parse_options(char *options, struct super_block *sb, clear_opt (opts->s_mount_opt, OLDALLOC); break; case Opt_nobh: - set_opt (opts->s_mount_opt, NOBH); + ext2_msg(sb, KERN_INFO, + "nobh option not supported"); break; #ifdef CONFIG_EXT2_FS_XATTR case Opt_user_xattr: From 3f05372ce9ddbd643a763f60c0d2a115cb2de008 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Jun 2022 07:37:12 +0200 Subject: [PATCH 38/41] jfs: stop using the nobh helper The nobh mode is an obscure feature to save lowlevel for large memory 32-bit configurations while trading for much slower performance and has been long obsolete. Switch to the regular buffer head based helpers instead. Signed-off-by: Christoph Hellwig Signed-off-by: Matthew Wilcox (Oracle) --- fs/jfs/inode.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 259326556ada6..d1ec920aa030a 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -301,13 +301,25 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping, { int ret; - ret = nobh_write_begin(mapping, pos, len, pagep, fsdata, jfs_get_block); + ret = block_write_begin(mapping, pos, len, pagep, jfs_get_block); if (unlikely(ret)) jfs_write_failed(mapping, pos + len); return ret; } +static int jfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, struct page *page, + void *fsdata) +{ + int ret; + + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (ret < len) + jfs_write_failed(mapping, pos + len); + return ret; +} + static sector_t jfs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping, block, jfs_get_block); @@ -346,7 +358,7 @@ const struct address_space_operations jfs_aops = { .writepage = jfs_writepage, .writepages = jfs_writepages, .write_begin = jfs_write_begin, - .write_end = nobh_write_end, + .write_end = jfs_write_end, .bmap = jfs_bmap, .direct_IO = jfs_direct_IO, }; @@ -399,7 +411,7 @@ void jfs_truncate(struct inode *ip) { jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); - nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); + block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); IWRITE_LOCK(ip, RDWRLOCK_NORMAL); jfs_truncate_nolock(ip, ip->i_size); From 215e71b6ee7ad3363c6e6bd979adbb56e070f6de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Jun 2022 07:37:13 +0200 Subject: [PATCH 39/41] fs: remove the nobh helpers All callers are gone, so remove the now dead code. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox (Oracle) --- fs/buffer.c | 324 ------------------------------------ fs/mpage.c | 25 +-- include/linux/buffer_head.h | 8 - include/linux/mpage.h | 2 - 4 files changed, 1 insertion(+), 358 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index ce9844d7c10fa..5717d1881d2fa 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2537,330 +2537,6 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, } EXPORT_SYMBOL(block_page_mkwrite); -/* - * nobh_write_begin()'s prereads are special: the buffer_heads are freed - * immediately, while under the page lock. So it needs a special end_io - * handler which does not touch the bh after unlocking it. - */ -static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate) -{ - __end_buffer_read_notouch(bh, uptodate); -} - -/* - * Attach the singly-linked list of buffers created by nobh_write_begin, to - * the page (converting it to circular linked list and taking care of page - * dirty races). - */ -static void attach_nobh_buffers(struct page *page, struct buffer_head *head) -{ - struct buffer_head *bh; - - BUG_ON(!PageLocked(page)); - - spin_lock(&page->mapping->private_lock); - bh = head; - do { - if (PageDirty(page)) - set_buffer_dirty(bh); - if (!bh->b_this_page) - bh->b_this_page = head; - bh = bh->b_this_page; - } while (bh != head); - attach_page_private(page, head); - spin_unlock(&page->mapping->private_lock); -} - -/* - * On entry, the page is fully not uptodate. - * On exit the page is fully uptodate in the areas outside (from,to) - * The filesystem needs to handle block truncation upon failure. - */ -int nobh_write_begin(struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata, - get_block_t *get_block) -{ - struct inode *inode = mapping->host; - const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize = 1 << blkbits; - struct buffer_head *head, *bh; - struct page *page; - pgoff_t index; - unsigned from, to; - unsigned block_in_page; - unsigned block_start, block_end; - sector_t block_in_file; - int nr_reads = 0; - int ret = 0; - int is_mapped_to_disk = 1; - - index = pos >> PAGE_SHIFT; - from = pos & (PAGE_SIZE - 1); - to = from + len; - - page = grab_cache_page_write_begin(mapping, index); - if (!page) - return -ENOMEM; - *pagep = page; - *fsdata = NULL; - - if (page_has_buffers(page)) { - ret = __block_write_begin(page, pos, len, get_block); - if (unlikely(ret)) - goto out_release; - return ret; - } - - if (PageMappedToDisk(page)) - return 0; - - /* - * Allocate buffers so that we can keep track of state, and potentially - * attach them to the page if an error occurs. In the common case of - * no error, they will just be freed again without ever being attached - * to the page (which is all OK, because we're under the page lock). - * - * Be careful: the buffer linked list is a NULL terminated one, rather - * than the circular one we're used to. - */ - head = alloc_page_buffers(page, blocksize, false); - if (!head) { - ret = -ENOMEM; - goto out_release; - } - - block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); - - /* - * We loop across all blocks in the page, whether or not they are - * part of the affected region. This is so we can discover if the - * page is fully mapped-to-disk. - */ - for (block_start = 0, block_in_page = 0, bh = head; - block_start < PAGE_SIZE; - block_in_page++, block_start += blocksize, bh = bh->b_this_page) { - int create; - - block_end = block_start + blocksize; - bh->b_state = 0; - create = 1; - if (block_start >= to) - create = 0; - ret = get_block(inode, block_in_file + block_in_page, - bh, create); - if (ret) - goto failed; - if (!buffer_mapped(bh)) - is_mapped_to_disk = 0; - if (buffer_new(bh)) - clean_bdev_bh_alias(bh); - if (PageUptodate(page)) { - set_buffer_uptodate(bh); - continue; - } - if (buffer_new(bh) || !buffer_mapped(bh)) { - zero_user_segments(page, block_start, from, - to, block_end); - continue; - } - if (buffer_uptodate(bh)) - continue; /* reiserfs does this */ - if (block_start < from || block_end > to) { - lock_buffer(bh); - bh->b_end_io = end_buffer_read_nobh; - submit_bh(REQ_OP_READ, 0, bh); - nr_reads++; - } - } - - if (nr_reads) { - /* - * The page is locked, so these buffers are protected from - * any VM or truncate activity. Hence we don't need to care - * for the buffer_head refcounts. - */ - for (bh = head; bh; bh = bh->b_this_page) { - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - ret = -EIO; - } - if (ret) - goto failed; - } - - if (is_mapped_to_disk) - SetPageMappedToDisk(page); - - *fsdata = head; /* to be released by nobh_write_end */ - - return 0; - -failed: - BUG_ON(!ret); - /* - * Error recovery is a bit difficult. We need to zero out blocks that - * were newly allocated, and dirty them to ensure they get written out. - * Buffers need to be attached to the page at this point, otherwise - * the handling of potential IO errors during writeout would be hard - * (could try doing synchronous writeout, but what if that fails too?) - */ - attach_nobh_buffers(page, head); - page_zero_new_buffers(page, from, to); - -out_release: - unlock_page(page); - put_page(page); - *pagep = NULL; - - return ret; -} -EXPORT_SYMBOL(nobh_write_begin); - -int nobh_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct inode *inode = page->mapping->host; - struct buffer_head *head = fsdata; - struct buffer_head *bh; - BUG_ON(fsdata != NULL && page_has_buffers(page)); - - if (unlikely(copied < len) && head) - attach_nobh_buffers(page, head); - if (page_has_buffers(page)) - return generic_write_end(file, mapping, pos, len, - copied, page, fsdata); - - SetPageUptodate(page); - set_page_dirty(page); - if (pos+copied > inode->i_size) { - i_size_write(inode, pos+copied); - mark_inode_dirty(inode); - } - - unlock_page(page); - put_page(page); - - while (head) { - bh = head; - head = head->b_this_page; - free_buffer_head(bh); - } - - return copied; -} -EXPORT_SYMBOL(nobh_write_end); - -/* - * nobh_writepage() - based on block_full_write_page() except - * that it tries to operate without attaching bufferheads to - * the page. - */ -int nobh_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) -{ - struct inode * const inode = page->mapping->host; - loff_t i_size = i_size_read(inode); - const pgoff_t end_index = i_size >> PAGE_SHIFT; - unsigned offset; - int ret; - - /* Is the page fully inside i_size? */ - if (page->index < end_index) - goto out; - - /* Is the page fully outside i_size? (truncate in progress) */ - offset = i_size & (PAGE_SIZE-1); - if (page->index >= end_index+1 || !offset) { - unlock_page(page); - return 0; /* don't care */ - } - - /* - * The page straddles i_size. It must be zeroed out on each and every - * writepage invocation because it may be mmapped. "A file is mapped - * in multiples of the page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when mapped, and - * writes to that region are not written out to the file." - */ - zero_user_segment(page, offset, PAGE_SIZE); -out: - ret = mpage_writepage(page, get_block, wbc); - if (ret == -EAGAIN) - ret = __block_write_full_page(inode, page, get_block, wbc, - end_buffer_async_write); - return ret; -} -EXPORT_SYMBOL(nobh_writepage); - -int nobh_truncate_page(struct address_space *mapping, - loff_t from, get_block_t *get_block) -{ - pgoff_t index = from >> PAGE_SHIFT; - struct inode *inode = mapping->host; - unsigned blocksize = i_blocksize(inode); - struct folio *folio; - struct buffer_head map_bh; - size_t offset; - sector_t iblock; - int err; - - /* Block boundary? Nothing to do */ - if (!(from & (blocksize - 1))) - return 0; - - folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_CREAT, - mapping_gfp_mask(mapping)); - err = -ENOMEM; - if (!folio) - goto out; - - if (folio_buffers(folio)) - goto has_buffers; - - iblock = from >> inode->i_blkbits; - map_bh.b_size = blocksize; - map_bh.b_state = 0; - err = get_block(inode, iblock, &map_bh, 0); - if (err) - goto unlock; - /* unmapped? It's a hole - nothing to do */ - if (!buffer_mapped(&map_bh)) - goto unlock; - - /* Ok, it's mapped. Make sure it's up-to-date */ - if (!folio_test_uptodate(folio)) { - err = mapping->a_ops->read_folio(NULL, folio); - if (err) { - folio_put(folio); - goto out; - } - folio_lock(folio); - if (!folio_test_uptodate(folio)) { - err = -EIO; - goto unlock; - } - if (folio_buffers(folio)) - goto has_buffers; - } - offset = offset_in_folio(folio, from); - folio_zero_segment(folio, offset, round_up(offset, blocksize)); - folio_mark_dirty(folio); - err = 0; - -unlock: - folio_unlock(folio); - folio_put(folio); -out: - return err; - -has_buffers: - folio_unlock(folio); - folio_put(folio); - return block_truncate_page(mapping, from, get_block); -} -EXPORT_SYMBOL(nobh_truncate_page); - int block_truncate_page(struct address_space *mapping, loff_t from, get_block_t *get_block) { diff --git a/fs/mpage.c b/fs/mpage.c index 681a4b9a36e3c..b7e0b7fbb41f2 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -404,7 +404,6 @@ struct mpage_data { struct bio *bio; sector_t last_block_in_bio; get_block_t *get_block; - unsigned use_writepage; }; /* @@ -624,15 +623,10 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, if (bio) bio = mpage_bio_submit(bio); - if (mpd->use_writepage) { - ret = mapping->a_ops->writepage(page, wbc); - } else { - ret = -EAGAIN; - goto out; - } /* * The caller has a ref on the inode, so *mapping is stable */ + ret = mapping->a_ops->writepage(page, wbc); mapping_set_error(mapping, ret); out: mpd->bio = bio; @@ -674,7 +668,6 @@ mpage_writepages(struct address_space *mapping, .bio = NULL, .last_block_in_bio = 0, .get_block = get_block, - .use_writepage = 1, }; ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); @@ -685,19 +678,3 @@ mpage_writepages(struct address_space *mapping, return ret; } EXPORT_SYMBOL(mpage_writepages); - -int mpage_writepage(struct page *page, get_block_t get_block, - struct writeback_control *wbc) -{ - struct mpage_data mpd = { - .bio = NULL, - .last_block_in_bio = 0, - .get_block = get_block, - .use_writepage = 0, - }; - int ret = __mpage_writepage(page, wbc, &mpd); - if (mpd.bio) - mpage_bio_submit(mpd.bio); - return ret; -} -EXPORT_SYMBOL(mpage_writepage); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index b0366c89d6a4d..61afb81cfdaea 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -258,14 +258,6 @@ static inline vm_fault_t block_page_mkwrite_return(int err) } sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); -int nobh_write_begin(struct address_space *, loff_t, unsigned len, - struct page **, void **, get_block_t*); -int nobh_write_end(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page *, void *); -int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); -int nobh_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); #ifdef CONFIG_MIGRATION extern int buffer_migrate_folio(struct address_space *, diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 43986f7ec4dd3..1bdc39daac0a3 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -19,7 +19,5 @@ void mpage_readahead(struct readahead_control *, get_block_t get_block); int mpage_read_folio(struct folio *folio, get_block_t get_block); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); -int mpage_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); #endif From cf95d50205f62c4f5f538676def847292cf39fa9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Jun 2022 07:37:14 +0200 Subject: [PATCH 40/41] fs: don't call ->writepage from __mpage_writepage All callers of mpage_writepage use block_write_full_page as their ->writepage implementation when called from mpage_writepages (although for ntfs3 this is obsfucated a bit). Just call block_write_full_page directly instead of going through the ->writepage indirection. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox (Oracle) --- fs/mpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/mpage.c b/fs/mpage.c index b7e0b7fbb41f2..bf7d1cf621e2f 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -626,7 +626,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, /* * The caller has a ref on the inode, so *mapping is stable */ - ret = mapping->a_ops->writepage(page, wbc); + ret = block_write_full_page(page, mpd->get_block, wbc); mapping_set_error(mapping, ret); out: mpd->bio = bio; From 03b33c09ea22fa89dd204ad0a2058e512c691b9f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Jun 2022 07:37:15 +0200 Subject: [PATCH 41/41] fs: remove the NULL get_block case in mpage_writepages No one calls mpage_writepages with a NULL get_block paramter, so remove support for that case. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox (Oracle) --- fs/mpage.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/fs/mpage.c b/fs/mpage.c index bf7d1cf621e2f..8326ff8a7a96e 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -638,8 +638,6 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @get_block: the filesystem's block mapper function. - * If this is NULL then use a_ops->writepage. Otherwise, go - * direct-to-BIO. * * This is a library function, which implements the writepages() * address_space_operation. @@ -656,24 +654,16 @@ int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block) { + struct mpage_data mpd = { + .get_block = get_block, + }; struct blk_plug plug; int ret; blk_start_plug(&plug); - - if (!get_block) - ret = generic_writepages(mapping, wbc); - else { - struct mpage_data mpd = { - .bio = NULL, - .last_block_in_bio = 0, - .get_block = get_block, - }; - - ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); - if (mpd.bio) - mpage_bio_submit(mpd.bio); - } + ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); + if (mpd.bio) + mpage_bio_submit(mpd.bio); blk_finish_plug(&plug); return ret; }