Skip to content

Commit

Permalink
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix kconfig typo and extra whitespace
  ext4: fix build failure without procfs
  ext4: add an option to control error handling on file data
  jbd2: don't dirty original metadata buffer on abort
  ext4: add checks for errors from jbd2
  jbd2: fix error handling for checkpoint io
  jbd2: abort when failed to log metadata buffers
  • Loading branch information
Linus Torvalds committed Oct 12, 2008
2 parents f1b2a5a + f319fb8 commit 3280fb3
Show file tree
Hide file tree
Showing 10 changed files with 131 additions and 32 deletions.
5 changes: 5 additions & 0 deletions Documentation/filesystems/ext4.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error.
errors=continue Keep going on a filesystem error.
errors=panic Panic and halt the machine if an error occurs.

data_err=ignore(*) Just print an error message if an error occurs
in a file data buffer in ordered mode.
data_err=abort Abort the journal if an error occurs in a file
data buffer in ordered mode.

grpid Give objects the same group ID as their creator.
bsdgroups

Expand Down
4 changes: 2 additions & 2 deletions fs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ config EXT4DEV_COMPAT
help
Starting with 2.6.28, the name of the ext4 filesystem was
renamed from ext4dev to ext4. Unfortunately there are some
lagecy userspace programs (such as klibc's fstype) have
"ext4dev" hardcoded.
legacy userspace programs (such as klibc's fstype) have
"ext4dev" hardcoded.

To enable backwards compatibility so that systems that are
still expecting to mount ext4 filesystems using ext4dev,
Expand Down
2 changes: 2 additions & 0 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,8 @@ do { \
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */

/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
Expand Down
12 changes: 8 additions & 4 deletions fs/ext4/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT4_IOC_GROUP_EXTEND: {
ext4_fsblk_t n_blocks_count;
struct super_block *sb = inode->i_sb;
int err;
int err, err2;

if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
Expand All @@ -206,16 +206,18 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)

err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
jbd2_journal_flush(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
if (err == 0)
err = err2;
mnt_drop_write(filp->f_path.mnt);

return err;
}
case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input;
struct super_block *sb = inode->i_sb;
int err;
int err, err2;

if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
Expand All @@ -230,8 +232,10 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)

err = ext4_group_add(sb, &input);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
jbd2_journal_flush(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
if (err == 0)
err = err2;
mnt_drop_write(filp->f_path.mnt);

return err;
Expand Down
41 changes: 37 additions & 4 deletions fs/ext4/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,8 @@ static void ext4_put_super(struct super_block *sb)
ext4_mb_release(sb);
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
jbd2_journal_destroy(sbi->s_journal);
if (jbd2_journal_destroy(sbi->s_journal) < 0)
ext4_abort(sb, __func__, "Couldn't clean up the journal");
sbi->s_journal = NULL;
if (!(sb->s_flags & MS_RDONLY)) {
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
Expand Down Expand Up @@ -777,6 +778,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_printf(seq, ",inode_readahead_blks=%u",
sbi->s_inode_readahead_blks);

if (test_opt(sb, DATA_ERR_ABORT))
seq_puts(seq, ",data_err=abort");

ext4_show_quota_options(seq, sb);
return 0;
}
Expand Down Expand Up @@ -906,6 +910,7 @@ enum {
Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
Expand Down Expand Up @@ -952,6 +957,8 @@ static match_table_t tokens = {
{Opt_data_journal, "data=journal"},
{Opt_data_ordered, "data=ordered"},
{Opt_data_writeback, "data=writeback"},
{Opt_data_err_abort, "data_err=abort"},
{Opt_data_err_ignore, "data_err=ignore"},
{Opt_offusrjquota, "usrjquota="},
{Opt_usrjquota, "usrjquota=%s"},
{Opt_offgrpjquota, "grpjquota="},
Expand Down Expand Up @@ -1186,6 +1193,12 @@ static int parse_options(char *options, struct super_block *sb,
sbi->s_mount_opt |= data_opt;
}
break;
case Opt_data_err_abort:
set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
break;
case Opt_data_err_ignore:
clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
break;
#ifdef CONFIG_QUOTA
case Opt_usrjquota:
qtype = USRQUOTA;
Expand Down Expand Up @@ -2218,13 +2231,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}

#ifdef CONFIG_PROC_FS
if (ext4_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);

if (sbi->s_proc)
proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
&ext4_ui_proc_fops,
&sbi->s_inode_readahead_blks);
#endif

bgl_lock_init(&sbi->s_blockgroup_lock);

Expand Down Expand Up @@ -2534,6 +2549,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_flags |= JBD2_BARRIER;
else
journal->j_flags &= ~JBD2_BARRIER;
if (test_opt(sb, DATA_ERR_ABORT))
journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
else
journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
spin_unlock(&journal->j_state_lock);
}

Expand Down Expand Up @@ -2853,7 +2872,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
journal_t *journal = EXT4_SB(sb)->s_journal;

jbd2_journal_lock_updates(journal);
jbd2_journal_flush(journal);
if (jbd2_journal_flush(journal) < 0)
goto out;

lock_super(sb);
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
sb->s_flags & MS_RDONLY) {
Expand All @@ -2862,6 +2883,8 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
ext4_commit_super(sb, es, 1);
}
unlock_super(sb);

out:
jbd2_journal_unlock_updates(journal);
}

Expand Down Expand Up @@ -2962,7 +2985,13 @@ static void ext4_write_super_lockfs(struct super_block *sb)

/* Now we set up the journal barrier. */
jbd2_journal_lock_updates(journal);
jbd2_journal_flush(journal);

/*
* We don't want to clear needs_recovery flag when we failed
* to flush the journal.
*/
if (jbd2_journal_flush(journal) < 0)
return;

/* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
Expand Down Expand Up @@ -3402,8 +3431,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
* otherwise be livelocked...
*/
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
jbd2_journal_flush(EXT4_SB(sb)->s_journal);
err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
if (err) {
path_put(&nd.path);
return err;
}
}

err = vfs_quota_on_path(sb, type, format_id, &nd.path);
Expand Down
49 changes: 37 additions & 12 deletions fs/jbd2/checkpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
int ret = 0;
struct buffer_head *bh = jh2bh(jh);

if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh);
Expand Down Expand Up @@ -176,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
* buffers. Note that we take the buffers in the opposite ordering
* from the one in which they were submitted for IO.
*
* Return 0 on success, and return <0 if some buffers have failed
* to be written out.
*
* Called with j_list_lock held.
*/
static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
{
struct journal_head *jh;
struct buffer_head *bh;
tid_t this_tid;
int released = 0;
int ret = 0;

this_tid = transaction->t_tid;
restart:
/* Did somebody clean up the transaction in the meanwhile? */
if (journal->j_checkpoint_transactions != transaction ||
transaction->t_tid != this_tid)
return;
return ret;
while (!released && transaction->t_checkpoint_io_list) {
jh = transaction->t_checkpoint_io_list;
bh = jh2bh(jh);
Expand All @@ -210,6 +215,9 @@ static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
spin_lock(&journal->j_list_lock);
goto restart;
}
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;

/*
* Now in whatever state the buffer currently is, we know that
* it has been written out and so we can drop it from the list
Expand All @@ -219,6 +227,8 @@ static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
jbd2_journal_remove_journal_head(bh);
__brelse(bh);
}

return ret;
}

#define NR_BATCH 64
Expand All @@ -242,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
* Try to flush one buffer from the checkpoint list to disk.
*
* Return 1 if something happened which requires us to abort the current
* scan of the checkpoint list.
* scan of the checkpoint list. Return <0 if the buffer has failed to
* be written out.
*
* Called with j_list_lock held and drops it if 1 is returned
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
Expand Down Expand Up @@ -274,14 +285,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
jbd2_log_wait_commit(journal, tid);
ret = 1;
} else if (!buffer_dirty(bh)) {
ret = 1;
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
jbd2_journal_remove_journal_head(bh);
__brelse(bh);
ret = 1;
} else {
/*
* Important: we are about to write the buffer, and
Expand Down Expand Up @@ -314,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
* to disk. We submit larger chunks of data at once.
*
* The journal should be locked before calling this function.
* Called with j_checkpoint_mutex held.
*/
int jbd2_log_do_checkpoint(journal_t *journal)
{
Expand All @@ -339,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
* OK, we need to start writing disk blocks. Take one transaction
* and write it.
*/
result = 0;
spin_lock(&journal->j_list_lock);
if (!journal->j_checkpoint_transactions)
goto out;
Expand All @@ -357,7 +372,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
int batch_count = 0;
struct buffer_head *bhs[NR_BATCH];
struct journal_head *jh;
int retry = 0;
int retry = 0, err;

while (!retry && transaction->t_checkpoint_list) {
struct buffer_head *bh;
Expand All @@ -371,6 +386,8 @@ int jbd2_log_do_checkpoint(journal_t *journal)
}
retry = __process_buffer(journal, jh, bhs, &batch_count,
transaction);
if (retry < 0 && !result)
result = retry;
if (!retry && (need_resched() ||
spin_needbreak(&journal->j_list_lock))) {
spin_unlock(&journal->j_list_lock);
Expand All @@ -395,14 +412,18 @@ int jbd2_log_do_checkpoint(journal_t *journal)
* Now we have cleaned up the first transaction's checkpoint
* list. Let's clean up the second one
*/
__wait_cp_io(journal, transaction);
err = __wait_cp_io(journal, transaction);
if (!result)
result = err;
}
out:
spin_unlock(&journal->j_list_lock);
result = jbd2_cleanup_journal_tail(journal);
if (result < 0)
return result;
return 0;
jbd2_journal_abort(journal, result);
else
result = jbd2_cleanup_journal_tail(journal);

return (result < 0) ? result : 0;
}

/*
Expand All @@ -418,8 +439,9 @@ int jbd2_log_do_checkpoint(journal_t *journal)
* This is the only part of the journaling code which really needs to be
* aware of transaction aborts. Checkpointing involves writing to the
* main filesystem area rather than to the journal, so it can proceed
* even in abort state, but we must not update the journal superblock if
* we have an abort error outstanding.
* even in abort state, but we must not update the super block if
* checkpointing may have failed. Otherwise, we would lose some metadata
* buffers which should be written-back to the filesystem.
*/

int jbd2_cleanup_journal_tail(journal_t *journal)
Expand All @@ -428,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
tid_t first_tid;
unsigned long blocknr, freed;

if (is_journal_aborted(journal))
return 1;

/* OK, work out the oldest transaction remaining in the log, and
* the log block it starts at.
*
Expand Down
Loading

0 comments on commit 3280fb3

Please sign in to comment.