From a426ce9d6751cc8e709f031fa546900e4239f125 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vladimir@tuxera.com>
Date: Fri, 30 Oct 2020 14:28:39 +0200
Subject: [PATCH 1/6] erofs: remove a void EROFS_VERSION macro set in Makefile
Since commit 4f761fa253b4 ("erofs: rename errln/infoln/debugln to
erofs_{err, info, dbg}") the defined macro EROFS_VERSION has no affect,
therefore removing it from the Makefile is a non-functional change.
Link: https://lore.kernel.org/r/20201030122839.25431-1-vladimir@tuxera.com
Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Vladimir Zapolskiy <vladimir@tuxera.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
fs/erofs/Makefile | 5 -----
1 file changed, 5 deletions(-)
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index 46f2aa4ba46c2..af159539fc1b2 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -1,11 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-EROFS_VERSION = "1.0"
-
-ccflags-y += -DEROFS_VERSION=\"$(EROFS_VERSION)\"
-
obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
-
From 6aaa7b0664e6886f6154070edbc24435d6e1f86b Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@redhat.com>
Date: Tue, 8 Dec 2020 17:58:32 +0800
Subject: [PATCH 2/6] erofs: get rid of magical Z_EROFS_MAPPING_STAGING
Previously, we played around with magical page->mapping for short-lived
temporary pages since we need to identify different types of pages in
the same pcluster but both invalidated and short-lived temporary pages
can have page->mapping == NULL. It was considered as safe because that
temporary pages are all non-LRU / non-movable pages.
This patch tends to use specific page->private to identify short-lived
pages instead so it won't rely on page->mapping anymore. Details are
described in "compress.h" as well.
Link: https://lore.kernel.org/r/20201208095834.3133565-1-hsiangkao@redhat.com
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
fs/erofs/compress.h | 51 ++++++++++++++++++++++++++----------
fs/erofs/decompressor.c | 2 +-
fs/erofs/zdata.c | 57 ++++++++++++++++++++++-------------------
fs/erofs/zdata.h | 1 +
4 files changed, 71 insertions(+), 40 deletions(-)
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 3d452443c545b..4dadde18cdf19 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -26,30 +26,55 @@ struct z_erofs_decompress_req {
bool inplace_io, partial_decoding;
};
+/* some special page->private (unsigned long, see below) */
+#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
+
/*
- * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
- * used to mark temporary allocated pages from other
- * file/cached pages and NULL mapping pages.
+ * For all pages in a pcluster, page->private should be one of
+ * Type Last 2bits page->private
+ * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
+ * cached/managed page 00 pointer to z_erofs_pcluster
+ * online page (file-backed, 01/10/11 sub-index << 2 | count
+ * some pages can be used for inplace I/O)
+ *
+ * page->mapping should be one of
+ * Type page->mapping
+ * short-lived page NULL
+ * cached/managed page non-NULL or NULL (invalidated/truncated page)
+ * online page non-NULL
+ *
+ * For all managed pages, PG_private should be set with 1 extra refcount,
+ * which is used for page reclaim / migration.
*/
-#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
-/* check if a page is marked as staging */
-static inline bool z_erofs_page_is_staging(struct page *page)
+/*
+ * short-lived pages are pages directly from buddy system with specific
+ * page->private (no need to set PagePrivate since these are non-LRU /
+ * non-movable pages and bypass reclaim / migration code).
+ */
+static inline bool z_erofs_is_shortlived_page(struct page *page)
{
- return page->mapping == Z_EROFS_MAPPING_STAGING;
+ if (page->private != Z_EROFS_SHORTLIVED_PAGE)
+ return false;
+
+ DBG_BUGON(page->mapping);
+ return true;
}
-static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
- struct page *page)
+static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
+ struct page *page)
{
- if (!z_erofs_page_is_staging(page))
+ if (!z_erofs_is_shortlived_page(page))
return false;
- /* staging pages should not be used by others at the same time */
- if (page_ref_count(page) > 1)
+ /* short-lived pages should not be used by others at the same time */
+ if (page_ref_count(page) > 1) {
put_page(page);
- else
+ } else {
+ /* follow the pcluster rule above. */
+ set_page_private(page, 0);
list_add(&page->lru, pagepool);
+ }
return true;
}
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index cbadbf55c6c20..1cb1ffd105698 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -76,7 +76,7 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
victim = erofs_allocpage(pagepool, GFP_KERNEL);
if (!victim)
return -ENOMEM;
- victim->mapping = Z_EROFS_MAPPING_STAGING;
+ set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
}
rq->out[i] = victim;
}
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 86fd3bf62af61..cfb0d11f893b7 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -226,11 +226,8 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
/* barrier is implied in the following 'unlock_page' */
WRITE_ONCE(pcl->compressed_pages[i], NULL);
- set_page_private(page, 0);
- ClearPagePrivate(page);
-
+ detach_page_private(page);
unlock_page(page);
- put_page(page);
}
return 0;
}
@@ -254,10 +251,8 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
}
erofs_workgroup_unfreeze(&pcl->obj, 1);
- if (ret) {
- ClearPagePrivate(page);
- put_page(page);
- }
+ if (ret)
+ detach_page_private(page);
}
return ret;
}
@@ -648,12 +643,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
retry:
err = z_erofs_attach_page(clt, page, page_type);
- /* should allocate an additional staging page for pagevec */
+ /* should allocate an additional short-lived page for pagevec */
if (err == -EAGAIN) {
struct page *const newpage =
alloc_page(GFP_NOFS | __GFP_NOFAIL);
- newpage->mapping = Z_EROFS_MAPPING_STAGING;
+ set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
err = z_erofs_attach_page(clt, newpage,
Z_EROFS_PAGE_TYPE_EXCLUSIVE);
if (!err)
@@ -710,6 +705,11 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
queue_work(z_erofs_workqueue, &io->u.work);
}
+static bool z_erofs_page_is_invalidated(struct page *page)
+{
+ return !page->mapping && !z_erofs_is_shortlived_page(page);
+}
+
static void z_erofs_decompressqueue_endio(struct bio *bio)
{
tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
@@ -722,7 +722,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
struct page *page = bvec->bv_page;
DBG_BUGON(PageUptodate(page));
- DBG_BUGON(!page->mapping);
+ DBG_BUGON(z_erofs_page_is_invalidated(page));
if (err)
SetPageError(page);
@@ -795,9 +795,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
/* all pages in pagevec ought to be valid */
DBG_BUGON(!page);
- DBG_BUGON(!page->mapping);
+ DBG_BUGON(z_erofs_page_is_invalidated(page));
- if (z_erofs_put_stagingpage(pagepool, page))
+ if (z_erofs_put_shortlivedpage(pagepool, page))
continue;
if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
@@ -831,9 +831,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
/* all compressed pages ought to be valid */
DBG_BUGON(!page);
- DBG_BUGON(!page->mapping);
+ DBG_BUGON(z_erofs_page_is_invalidated(page));
- if (!z_erofs_page_is_staging(page)) {
+ if (!z_erofs_is_shortlived_page(page)) {
if (erofs_page_is_managed(sbi, page)) {
if (!PageUptodate(page))
err = -EIO;
@@ -858,7 +858,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
overlapped = true;
}
- /* PG_error needs checking for inplaced and staging pages */
+ /* PG_error needs checking for all non-managed pages */
if (PageError(page)) {
DBG_BUGON(PageUptodate(page));
err = -EIO;
@@ -897,8 +897,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
if (erofs_page_is_managed(sbi, page))
continue;
- /* recycle all individual staging pages */
- (void)z_erofs_put_stagingpage(pagepool, page);
+ /* recycle all individual short-lived pages */
+ (void)z_erofs_put_shortlivedpage(pagepool, page);
WRITE_ONCE(compressed_pages[i], NULL);
}
@@ -908,10 +908,10 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
if (!page)
continue;
- DBG_BUGON(!page->mapping);
+ DBG_BUGON(z_erofs_page_is_invalidated(page));
- /* recycle all individual staging pages */
- if (z_erofs_put_stagingpage(pagepool, page))
+ /* recycle all individual short-lived pages */
+ if (z_erofs_put_shortlivedpage(pagepool, page))
continue;
if (err < 0)
@@ -1011,13 +1011,17 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
mapping = READ_ONCE(page->mapping);
/*
- * unmanaged (file) pages are all locked solidly,
+ * file-backed online pages in plcuster are all locked steady,
* therefore it is impossible for `mapping' to be NULL.
*/
if (mapping && mapping != mc)
/* ought to be unmanaged pages */
goto out;
+ /* directly return for shortlived page as well */
+ if (z_erofs_is_shortlived_page(page))
+ goto out;
+
lock_page(page);
/* only true if page reclaim goes wrong, should never happen */
@@ -1062,8 +1066,8 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
out_allocpage:
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
- /* non-LRU / non-movable temporary page is needed */
- page->mapping = Z_EROFS_MAPPING_STAGING;
+ /* turn into temporary page if fails */
+ set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
tocache = false;
}
@@ -1080,8 +1084,9 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
}
if (tocache) {
- set_page_private(page, (unsigned long)pcl);
- SetPagePrivate(page);
+ attach_page_private(page, pcl);
+ /* drop a ref added by allocpage (then we have 2 refs here) */
+ put_page(page);
}
out: /* the only exit (for tracing and debugging) */
return page;
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index 68c9b29fc0ca5..b503b353d4abc 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -173,6 +173,7 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
v = atomic_dec_return(u.o);
if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
+ set_page_private(page, 0);
ClearPagePrivate(page);
if (!PageError(page))
SetPageUptodate(page);
From bf225074ff211f219cff2166cea17b158a0d06a9 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@redhat.com>
Date: Tue, 8 Dec 2020 17:58:33 +0800
Subject: [PATCH 3/6] erofs: insert to managed cache after adding to pcl
Previously, it could be some concern to call add_to_page_cache_lru()
with page->mapping == Z_EROFS_MAPPING_STAGING (!= NULL).
In contrast, page->private is used instead now, so partially revert
commit 5ddcee1f3a1c ("erofs: get rid of __stagingpage_alloc helper")
with some adaption for simplicity.
Link: https://lore.kernel.org/r/20201208095834.3133565-2-hsiangkao@redhat.com
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
fs/erofs/zdata.c | 26 +++++++++-----------------
1 file changed, 9 insertions(+), 17 deletions(-)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index cfb0d11f893b7..37fee144f0e7d 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1065,29 +1065,21 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
put_page(page);
out_allocpage:
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
- if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
- /* turn into temporary page if fails */
- set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
- tocache = false;
- }
-
if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
- if (tocache) {
- /* since it added to managed cache successfully */
- unlock_page(page);
- put_page(page);
- } else {
- list_add(&page->lru, pagepool);
- }
+ list_add(&page->lru, pagepool);
cond_resched();
goto repeat;
}
- if (tocache) {
- attach_page_private(page, pcl);
- /* drop a ref added by allocpage (then we have 2 refs here) */
- put_page(page);
+ if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
+ /* turn into temporary page if fails (1 ref) */
+ set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
+ goto out;
}
+ attach_page_private(page, pcl);
+ /* drop a refcount added by allocpage (then we have 2 refs here) */
+ put_page(page);
+
out: /* the only exit (for tracing and debugging) */
return page;
}
From 473e15b0c0f7cf63a48f776937a02cb9dfcab252 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@redhat.com>
Date: Tue, 8 Dec 2020 17:58:34 +0800
Subject: [PATCH 4/6] erofs: simplify try_to_claim_pcluster()
simplify try_to_claim_pcluster() by directly using cmpxchg() here
(the retry loop caused more overhead.) Also, move the chain loop
detection in and rename it to z_erofs_try_to_claim_pcluster().
Link: https://lore.kernel.org/r/20201208095834.3133565-3-hsiangkao@redhat.com
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
fs/erofs/zdata.c | 51 +++++++++++++++++++++++-------------------------
1 file changed, 24 insertions(+), 27 deletions(-)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 37fee144f0e7d..777790038bc9d 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -292,34 +292,33 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt,
return ret ? 0 : -EAGAIN;
}
-static enum z_erofs_collectmode
-try_to_claim_pcluster(struct z_erofs_pcluster *pcl,
- z_erofs_next_pcluster_t *owned_head)
+static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
{
- /* let's claim these following types of pclusters */
-retry:
- if (pcl->next == Z_EROFS_PCLUSTER_NIL) {
- /* type 1, nil pcluster */
- if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
- *owned_head) != Z_EROFS_PCLUSTER_NIL)
- goto retry;
+ struct z_erofs_pcluster *pcl = clt->pcl;
+ z_erofs_next_pcluster_t *owned_head = &clt->owned_head;
+ /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */
+ if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
+ *owned_head) == Z_EROFS_PCLUSTER_NIL) {
*owned_head = &pcl->next;
- /* lucky, I am the followee :) */
- return COLLECT_PRIMARY_FOLLOWED;
- } else if (pcl->next == Z_EROFS_PCLUSTER_TAIL) {
- /*
- * type 2, link to the end of a existing open chain,
- * be careful that its submission itself is governed
- * by the original owned chain.
- */
- if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
- *owned_head) != Z_EROFS_PCLUSTER_TAIL)
- goto retry;
+ /* so we can attach this pcluster to our submission chain. */
+ clt->mode = COLLECT_PRIMARY_FOLLOWED;
+ return;
+ }
+
+ /*
+ * type 2, link to the end of an existing open chain, be careful
+ * that its submission is controlled by the original attached chain.
+ */
+ if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
+ *owned_head) == Z_EROFS_PCLUSTER_TAIL) {
*owned_head = Z_EROFS_PCLUSTER_TAIL;
- return COLLECT_PRIMARY_HOOKED;
+ clt->mode = COLLECT_PRIMARY_HOOKED;
+ clt->tailpcl = NULL;
+ return;
}
- return COLLECT_PRIMARY; /* :( better luck next time */
+ /* type 3, it belongs to a chain, but it isn't the end of the chain */
+ clt->mode = COLLECT_PRIMARY;
}
static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
@@ -364,10 +363,8 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
/* used to check tail merging loop due to corrupted images */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
clt->tailpcl = pcl;
- clt->mode = try_to_claim_pcluster(pcl, &clt->owned_head);
- /* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
- if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
- clt->tailpcl = NULL;
+
+ z_erofs_try_to_claim_pcluster(clt);
clt->cl = cl;
return 0;
}
From 1825c8d7ce93c4725cb04ad09627fc2829de32ca Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@redhat.com>
Date: Wed, 9 Dec 2020 20:37:17 +0800
Subject: [PATCH 5/6] erofs: force inplace I/O under low memory scenario
Try to forcely switch to inplace I/O under low memory scenario in
order to avoid direct memory reclaim due to cached page allocation.
Link: https://lore.kernel.org/r/20201209123717.12430-1-hsiangkao@aol.com
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
fs/erofs/compress.h | 3 +++
fs/erofs/zdata.c | 48 +++++++++++++++++++++++++++++++++++++--------
2 files changed, 43 insertions(+), 8 deletions(-)
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 4dadde18cdf19..aea129ddda74b 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -28,11 +28,13 @@ struct z_erofs_decompress_req {
/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
+#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2)
/*
* For all pages in a pcluster, page->private should be one of
* Type Last 2bits page->private
* short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
+ * preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE
* cached/managed page 00 pointer to z_erofs_pcluster
* online page (file-backed, 01/10/11 sub-index << 2 | count
* some pages can be used for inplace I/O)
@@ -40,6 +42,7 @@ struct z_erofs_decompress_req {
* page->mapping should be one of
* Type page->mapping
* short-lived page NULL
+ * preallocated page NULL
* cached/managed page non-NULL or NULL (invalidated/truncated page)
* online page non-NULL
*
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 777790038bc9d..6cb356c4217b2 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -20,6 +20,11 @@
enum z_erofs_cache_alloctype {
DONTALLOC, /* don't allocate any cached pages */
DELAYEDALLOC, /* delayed allocation (at the time of submitting io) */
+ /*
+ * try to use cached I/O if page allocation succeeds or fallback
+ * to in-place I/O instead to avoid any direct reclaim.
+ */
+ TRYALLOC,
};
/*
@@ -154,13 +159,16 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
static void preload_compressed_pages(struct z_erofs_collector *clt,
struct address_space *mc,
- enum z_erofs_cache_alloctype type)
+ enum z_erofs_cache_alloctype type,
+ struct list_head *pagepool)
{
const struct z_erofs_pcluster *pcl = clt->pcl;
const unsigned int clusterpages = BIT(pcl->clusterbits);
struct page **pages = clt->compressedpages;
pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
bool standalone = true;
+ gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
+ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
return;
@@ -168,6 +176,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
struct page *page;
compressed_page_t t;
+ struct page *newpage = NULL;
/* the compressed page was loaded before */
if (READ_ONCE(*pages))
@@ -179,7 +188,15 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
t = tag_compressed_page_justfound(page);
} else if (type == DELAYEDALLOC) {
t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
+ } else if (type == TRYALLOC) {
+ newpage = erofs_allocpage(pagepool, gfp);
+ if (!newpage)
+ goto dontalloc;
+
+ set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
+ t = tag_compressed_page_justfound(newpage);
} else { /* DONTALLOC */
+dontalloc:
if (standalone)
clt->compressedpages = pages;
standalone = false;
@@ -189,8 +206,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
continue;
- if (page)
+ if (page) {
put_page(page);
+ } else if (newpage) {
+ set_page_private(newpage, 0);
+ list_add(&newpage->lru, pagepool);
+ }
}
if (standalone) /* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
@@ -554,7 +575,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
}
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
- struct page *page)
+ struct page *page, struct list_head *pagepool)
{
struct inode *const inode = fe->inode;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@@ -607,11 +628,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
/* preload all compressed pages (maybe downgrade role if necessary) */
if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
- cache_strategy = DELAYEDALLOC;
+ cache_strategy = TRYALLOC;
else
cache_strategy = DONTALLOC;
- preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
+ preload_compressed_pages(clt, MNGD_MAPPING(sbi),
+ cache_strategy, pagepool);
hitted:
/*
@@ -1005,6 +1027,16 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
justfound = tagptr_unfold_tags(t);
page = tagptr_unfold_ptr(t);
+ /*
+ * preallocated cached pages, which is used to avoid direct reclaim
+ * otherwise, it will go inplace I/O path instead.
+ */
+ if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
+ WRITE_ONCE(pcl->compressed_pages[nr], page);
+ set_page_private(page, 0);
+ tocache = true;
+ goto out_tocache;
+ }
mapping = READ_ONCE(page->mapping);
/*
@@ -1067,7 +1099,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
cond_resched();
goto repeat;
}
-
+out_tocache:
if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
/* turn into temporary page if fails (1 ref) */
set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
@@ -1278,7 +1310,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
- err = z_erofs_do_read_page(&f, page);
+ err = z_erofs_do_read_page(&f, page, &pagepool);
(void)z_erofs_collector_end(&f.clt);
/* if some compressed cluster ready, need submit them anyway */
@@ -1332,7 +1364,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
/* traversal in reverse order */
head = (void *)page_private(page);
- err = z_erofs_do_read_page(&f, page);
+ err = z_erofs_do_read_page(&f, page, &pagepool);
if (err)
erofs_err(inode->i_sb,
"readahead error at page %lu @ nid %llu",
From d8b3df8b1048405e73558b88cba2adf29490d468 Mon Sep 17 00:00:00 2001
From: Huang Jianan <huangjianan@oppo.com>
Date: Wed, 9 Dec 2020 19:57:40 +0800
Subject: [PATCH 6/6] erofs: avoid using generic_block_bmap
Surprisingly, `block' in sector_t indicates the number of
i_blkbits-sized blocks rather than sectors for bmap.
In addition, considering buffer_head limits mapped size to 32-bits,
should avoid using generic_block_bmap.
Link: https://lore.kernel.org/r/20201209115740.18802-1-huangjianan@oppo.com
Fixes: 9da681e017a3 ("staging: erofs: support bmap")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
Signed-off-by: Huang Jianan <huangjianan@oppo.com>
Signed-off-by: Guo Weichao <guoweichao@oppo.com>
[ Gao Xiang: slightly update the commit message description. ]
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
fs/erofs/data.c | 26 +++++++-------------------
1 file changed, 7 insertions(+), 19 deletions(-)
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 347be146884c3..ea4f693bee224 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -312,27 +312,12 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
submit_bio(bio);
}
-static int erofs_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create)
-{
- struct erofs_map_blocks map = {
- .m_la = iblock << 9,
- };
- int err;
-
- err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
- if (err)
- return err;
-
- if (map.m_flags & EROFS_MAP_MAPPED)
- bh->b_blocknr = erofs_blknr(map.m_pa);
-
- return err;
-}
-
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
+ struct erofs_map_blocks map = {
+ .m_la = blknr_to_addr(block),
+ };
if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
@@ -341,7 +326,10 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
return 0;
}
- return generic_block_bmap(mapping, block, erofs_get_block);
+ if (!erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW))
+ return erofs_blknr(map.m_pa);
+
+ return 0;
}
/* for uncompressed (aligned) files and raw access for other files */