From 43a511c44e58e357a687d61a20cf5ef1dc9e5a7c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 10 May 2021 12:25:59 +0200 Subject: [PATCH 1/7] gfs2: Prevent direct-I/O write fallback errors from getting lost When a direct I/O write falls entirely and falls back to buffered I/O and the buffered I/O fails, the write failed with return value 0 instead of the error number reported by the buffered I/O. Fix that. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index a0b542d84cd9e..493a83e3f5906 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -911,8 +911,11 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) current->backing_dev_info = inode_to_bdi(inode); buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); current->backing_dev_info = NULL; - if (unlikely(buffered <= 0)) + if (unlikely(buffered <= 0)) { + if (!ret) + ret = buffered; goto out_unlock; + } /* * We need to ensure that the page cache pages are written to From 4194dec4b4169e5a9a5171db60c2ec00c4d8cf16 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 19 May 2021 14:45:56 -0400 Subject: [PATCH 2/7] gfs2: Fix I_NEW check in gfs2_dinode_in Patch 4a378d8a0d96 added a new check for I_NEW inodes, but unfortunately it used the wrong variable, i_flags. This caused GFS2 to withdraw when gfs2_lookup_by_inum needed to refresh an I_NEW inode. This patch switches to use the correct variable, i_state. Fixes: 4a378d8a0d96 ("gfs2: be careful with inode refresh") Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 454095e9fedfd..54d3fbeb3002f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -396,7 +396,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) struct timespec64 atime; u16 height, depth; umode_t mode = be32_to_cpu(str->di_mode); - bool is_new = ip->i_inode.i_flags & I_NEW; + bool is_new = ip->i_inode.i_state & I_NEW; if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) goto corrupt; From 20265d9a67e40eafd39a8884658ca2e36f05985d Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 18 May 2021 09:12:10 -0400 Subject: [PATCH 3/7] gfs2: fix scheduling while atomic bug in glocks Before this patch, in the unlikely event that gfs2_glock_dq encountered a withdraw, it would do a wait_on_bit to wait for its journal to be recovered, but it never released the glock's spin_lock, which caused a scheduling-while-atomic error. This patch unlocks the lockref spin_lock before waiting for recovery. Fixes: 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish") Cc: stable@vger.kernel.org # v5.7+ Reported-by: Alexander Aring Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ea7fc5c641c7e..8c547db210fbc 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1466,9 +1466,11 @@ void gfs2_glock_dq(struct gfs2_holder *gh) glock_blocked_by_withdraw(gl) && gh->gh_gl != sdp->sd_jinode_gl) { sdp->sd_glock_dqs_held++; + spin_unlock(&gl->gl_lockref.lock); might_sleep(); wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, TASK_UNINTERRUPTIBLE); + spin_lock(&gl->gl_lockref.lock); } if (gh->gh_flags & GL_NOCACHE) handle_callback(gl, LM_ST_UNLOCKED, 0, false); From 865cc3e9cc0b1d4b81c10d53174bced76decf888 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 18 May 2021 09:14:31 -0400 Subject: [PATCH 4/7] gfs2: fix a deadlock on withdraw-during-mount Before this patch, gfs2 would deadlock because of the following sequence during mount: mount gfs2_fill_super gfs2_make_fs_rw <--- Detects IO error with glock kthread_stop(sdp->sd_quotad_process); <--- Blocked waiting for quotad to finish logd Detects IO error and the need to withdraw calls gfs2_withdraw gfs2_make_fs_ro kthread_stop(sdp->sd_quotad_process); <--- Blocked waiting for quotad to finish gfs2_quotad gfs2_statfs_sync gfs2_glock_wait <---- Blocked waiting for statfs glock to be granted glock_work_func do_xmote <---Detects IO error, can't release glock: blocked on withdraw glops->go_inval glock_blocked_by_withdraw requeue glock work & exit <--- work requeued, blocked by withdraw This patch makes a special exception for the statfs system inode glock, which allows the statfs glock UNLOCK to proceed normally. That allows the quotad daemon to exit during the withdraw, which allows the logd daemon to exit during the withdraw, which allows the mount to exit. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8c547db210fbc..797949e784ccd 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -582,6 +582,16 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) spin_unlock(&gl->gl_lockref.lock); } +static bool is_system_glock(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + + if (gl == m_ip->i_gl) + return true; + return false; +} + /** * do_xmote - Calls the DLM to change the state of a lock * @gl: The lock state @@ -671,17 +681,25 @@ __acquires(&gl->gl_lockref.lock) * to see sd_log_error and withdraw, and in the meantime, requeue the * work for later. * + * We make a special exception for some system glocks, such as the + * system statfs inode glock, which needs to be granted before the + * gfs2_quotad daemon can exit, and that exit needs to finish before + * we can unmount the withdrawn file system. + * * However, if we're just unlocking the lock (say, for unmount, when * gfs2_gl_hash_clear calls clear_glock) and recovery is complete * then it's okay to tell dlm to unlock it. */ if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp))) gfs2_withdraw_delayed(sdp); - if (glock_blocked_by_withdraw(gl)) { - if (target != LM_ST_UNLOCKED || - test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags)) { + if (glock_blocked_by_withdraw(gl) && + (target != LM_ST_UNLOCKED || + test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { + if (!is_system_glock(gl)) { gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); goto out; + } else { + clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); } } From f5456b5d67cf812fd31fe3e130ca216b2e0908e5 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 19 May 2021 14:54:02 -0400 Subject: [PATCH 5/7] gfs2: Clean up revokes on normal withdraws Before this patch, the system ail lists were cleaned up if the logd process withdrew, but on other withdraws, they were not cleaned up. This included the cleaning up of the revokes as well. This patch reorganizes things a bit so that all withdraws (not just logd) clean up the ail lists, including any pending revokes. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 6 +++--- fs/gfs2/log.h | 1 + fs/gfs2/lops.c | 7 ++++++- fs/gfs2/lops.h | 1 + fs/gfs2/util.c | 1 + 5 files changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 97d54e581a7bd..42c15cfc08219 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -926,10 +926,10 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) } /** - * ail_drain - drain the ail lists after a withdraw + * gfs2_ail_drain - drain the ail lists after a withdraw * @sdp: Pointer to GFS2 superblock */ -static void ail_drain(struct gfs2_sbd *sdp) +void gfs2_ail_drain(struct gfs2_sbd *sdp) { struct gfs2_trans *tr; @@ -956,6 +956,7 @@ static void ail_drain(struct gfs2_sbd *sdp) list_del(&tr->tr_list); gfs2_trans_free(sdp, tr); } + gfs2_drain_revokes(sdp); spin_unlock(&sdp->sd_ail_lock); } @@ -1162,7 +1163,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) if (tr && list_empty(&tr->tr_list)) list_add(&tr->tr_list, &sdp->sd_ail1_list); spin_unlock(&sdp->sd_ail_lock); - ail_drain(sdp); /* frees all transactions */ tr = NULL; goto out_end; } diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index eea58015710e7..fc905c2af53ce 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -93,5 +93,6 @@ extern int gfs2_logd(void *data); extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl); extern void gfs2_flush_revokes(struct gfs2_sbd *sdp); +extern void gfs2_ail_drain(struct gfs2_sbd *sdp); #endif /* __LOG_DOT_H__ */ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 221e7118cc3b9..8ee05d25dfa6c 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -885,7 +885,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_log_write_page(sdp, page); } -static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) +void gfs2_drain_revokes(struct gfs2_sbd *sdp) { struct list_head *head = &sdp->sd_log_revokes; struct gfs2_bufdata *bd; @@ -900,6 +900,11 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) } } +static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) +{ + gfs2_drain_revokes(sdp); +} + static void revoke_lo_before_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, int pass) { diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 31b6dd0d2e5d8..f707601597dcc 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -20,6 +20,7 @@ extern void gfs2_log_submit_bio(struct bio **biop, int opf); extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, bool keep_cache); +extern void gfs2_drain_revokes(struct gfs2_sbd *sdp); static inline unsigned int buf_limit(struct gfs2_sbd *sdp) { return sdp->sd_ldptrs; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 3e08027a6c81b..f4325b44956dc 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -131,6 +131,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc) return; + gfs2_ail_drain(sdp); /* frees all transactions */ inode = sdp->sd_jdesc->jd_inode; ip = GFS2_I(inode); i_gl = ip->i_gl; From b7f55d928e75557295c1ac280c291b738905b6fb Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 15 May 2021 17:27:14 +0200 Subject: [PATCH 6/7] gfs2: Fix mmap locking for write faults When a write fault occurs, we need to take the inode glock of the underlying inode in exclusive mode. Otherwise, there's no guarantee that the dirty page will be written back to disk. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 493a83e3f5906..8a35a0196b6da 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -540,9 +540,11 @@ static vm_fault_t gfs2_fault(struct vm_fault *vmf) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; vm_fault_t ret; + u16 state; int err; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + state = (vmf->flags & FAULT_FLAG_WRITE) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; + gfs2_holder_init(ip->i_gl, state, 0, &gh); err = gfs2_glock_nq(&gh); if (err) { ret = block_page_mkwrite_return(err); From 1ab19c5de4c537ec0d9b21020395a5b5a6c059b2 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Tue, 18 May 2021 16:46:25 +0800 Subject: [PATCH 7/7] gfs2: Fix use-after-free in gfs2_glock_shrink_scan The GLF_LRU flag is checked under lru_lock in gfs2_glock_remove_from_lru() to remove the glock from the lru list in __gfs2_glock_put(). On the shrink scan path, the same flag is cleared under lru_lock but because of cond_resched_lock(&lru_lock) in gfs2_dispose_glock_lru(), progress on the put side can be made without deleting the glock from the lru list. Keep GLF_LRU across the race window opened by cond_resched_lock(&lru_lock) to ensure correct behavior on both sides - clear GLF_LRU after list_del under lru_lock. Reported-by: syzbot Signed-off-by: Hillf Danton Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 797949e784ccd..d9cb261f55b06 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1795,6 +1795,7 @@ __acquires(&lru_lock) while(!list_empty(list)) { gl = list_first_entry(list, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); + clear_bit(GLF_LRU, &gl->gl_flags); if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: list_add(&gl->gl_lru, &lru_list); @@ -1840,7 +1841,6 @@ static long gfs2_scan_glock_lru(int nr) if (!test_bit(GLF_LOCK, &gl->gl_flags)) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); - clear_bit(GLF_LRU, &gl->gl_flags); freed++; continue; }