Skip to content

Commit

Permalink
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kern…
Browse files Browse the repository at this point in the history
…el/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Scalability improvements when allocating inodes, and some
  miscellaneous bug fixes and cleanups"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: avoid Y2038 overflow in recently_deleted()
  ext4: fix fault handling when mounted with -o dax,ro
  ext4: fix quota inconsistency during orphan cleanup for read-only mounts
  ext4: fix incorrect quotaoff if the quota feature is enabled
  ext4: remove useless test and assignment in strtohash functions
  ext4: backward compatibility support for Lustre ea_inode implementation
  ext4: remove timebomb in ext4_decode_extra_time()
  ext4: use sizeof(*ptr)
  ext4: in ext4_seek_{hole,data}, return -ENXIO for negative offsets
  ext4: reduce lock contention in __ext4_new_inode
  ext4: cleanup goto next group
  ext4: do not unnecessarily allocate buffer in recently_deleted()
  • Loading branch information
Linus Torvalds committed Sep 6, 2017
2 parents 5791577 + b5f5157 commit be6297e
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 111 deletions.
2 changes: 1 addition & 1 deletion fs/ext4/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
{
struct dir_private_info *p;

p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return NULL;
p->curr_hash = pos2maj_hash(filp, pos);
Expand Down
11 changes: 5 additions & 6 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -838,13 +838,11 @@ static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
{
if (unlikely(sizeof(time->tv_sec) > 4 &&
(extra & cpu_to_le32(EXT4_EPOCH_MASK)))) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0)

#if 1
/* Handle legacy encoding of pre-1970 dates with epoch
* bits 1,1. We assume that by kernel version 4.20,
* everyone will have run fsck over the affected
* filesystems to correct the problem. (This
* backwards compatibility may be removed before this
* time, at the discretion of the ext4 developers.)
* bits 1,1. (This backwards compatibility may be removed
* at the discretion of the ext4 developers.)
*/
u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK;
if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0)
Expand Down Expand Up @@ -1567,6 +1565,7 @@ enum {
nolocking */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
};

#define EXT4_INODE_BIT_FNS(name, field, offset) \
Expand Down
19 changes: 16 additions & 3 deletions fs/ext4/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,20 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
handle_t *handle = NULL;
struct inode *inode = file_inode(vmf->vma->vm_file);
struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE;

/*
* We have to distinguish real writes from writes which will result in a
* COW page; COW writes should *not* poke the journal (the file will not
* be changed). Doing so would cause unintended failures when mounted
* read-only.
*
* We check for VM_SHARED rather than vmf->cow_page since the latter is
* unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
* other sizes, dax_iomap_fault will handle splitting / fallback so that
* we eventually come back with a COW page.
*/
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
(vmf->vma->vm_flags & VM_SHARED);

if (write) {
sb_start_pagefault(sb);
Expand Down Expand Up @@ -595,7 +608,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
inode_lock(inode);

isize = i_size_read(inode);
if (offset >= isize) {
if (offset < 0 || offset >= isize) {
inode_unlock(inode);
return -ENXIO;
}
Expand Down Expand Up @@ -658,7 +671,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
inode_lock(inode);

isize = i_size_read(inode);
if (offset >= isize) {
if (offset < 0 || offset >= isize) {
inode_unlock(inode);
return -ENXIO;
}
Expand Down
4 changes: 0 additions & 4 deletions fs/ext4/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,6 @@ static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
if (len > num*4)
len = num * 4;
for (i = 0; i < len; i++) {
if ((i % 4) == 0)
val = pad;
val = ((int) scp[i]) + (val << 8);
if ((i % 4) == 3) {
*buf++ = val;
Expand All @@ -176,8 +174,6 @@ static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
if (len > num*4)
len = num * 4;
for (i = 0; i < len; i++) {
if ((i % 4) == 0)
val = pad;
val = ((int) ucp[i]) + (val << 8);
if ((i % 4) == 3) {
*buf++ = val;
Expand Down
93 changes: 60 additions & 33 deletions fs/ext4/ialloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -692,24 +692,25 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
* somewhat arbitrary...)
*/
#define RECENTCY_MIN 5
#define RECENTCY_DIRTY 30
#define RECENTCY_DIRTY 300

static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
{
struct ext4_group_desc *gdp;
struct ext4_inode *raw_inode;
struct buffer_head *bh;
unsigned long dtime, now;
int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
int offset, ret = 0, recentcy = RECENTCY_MIN;
int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
int offset, ret = 0;
int recentcy = RECENTCY_MIN;
u32 dtime, now;

gdp = ext4_get_group_desc(sb, group, NULL);
if (unlikely(!gdp))
return 0;

bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) +
(ino / inodes_per_block));
if (unlikely(!bh) || !buffer_uptodate(bh))
if (!bh || !buffer_uptodate(bh))
/*
* If the block is not in the buffer cache, then it
* must have been written out.
Expand All @@ -718,18 +719,45 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)

offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
raw_inode = (struct ext4_inode *) (bh->b_data + offset);

/* i_dtime is only 32 bits on disk, but we only care about relative
* times in the range of a few minutes (i.e. long enough to sync a
* recently-deleted inode to disk), so using the low 32 bits of the
* clock (a 68 year range) is enough, see time_before32() */
dtime = le32_to_cpu(raw_inode->i_dtime);
now = get_seconds();
now = ktime_get_real_seconds();
if (buffer_dirty(bh))
recentcy += RECENTCY_DIRTY;

if (dtime && (dtime < now) && (now < dtime + recentcy))
if (dtime && time_before32(dtime, now) &&
time_before32(now, dtime + recentcy))
ret = 1;
out:
brelse(bh);
return ret;
}

static int find_inode_bit(struct super_block *sb, ext4_group_t group,
struct buffer_head *bitmap, unsigned long *ino)
{
next:
*ino = ext4_find_next_zero_bit((unsigned long *)
bitmap->b_data,
EXT4_INODES_PER_GROUP(sb), *ino);
if (*ino >= EXT4_INODES_PER_GROUP(sb))
return 0;

if ((EXT4_SB(sb)->s_journal == NULL) &&
recently_deleted(sb, group, *ino)) {
*ino = *ino + 1;
if (*ino < EXT4_INODES_PER_GROUP(sb))
goto next;
return 0;
}

return 1;
}

/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
Expand Down Expand Up @@ -892,47 +920,34 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
/*
* Check free inodes count before loading bitmap.
*/
if (ext4_free_inodes_count(sb, gdp) == 0) {
if (++group == ngroups)
group = 0;
continue;
}
if (ext4_free_inodes_count(sb, gdp) == 0)
goto next_group;

grp = ext4_get_group_info(sb, group);
/* Skip groups with already-known suspicious inode tables */
if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
if (++group == ngroups)
group = 0;
continue;
}
if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
goto next_group;

brelse(inode_bitmap_bh);
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
/* Skip groups with suspicious inode tables */
if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) ||
IS_ERR(inode_bitmap_bh)) {
inode_bitmap_bh = NULL;
if (++group == ngroups)
group = 0;
continue;
goto next_group;
}

repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
inode_bitmap_bh->b_data,
EXT4_INODES_PER_GROUP(sb), ino);
if (ino >= EXT4_INODES_PER_GROUP(sb))
ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
if (!ret2)
goto next_group;
if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {

if (group == 0 && (ino + 1) < EXT4_FIRST_INO(sb)) {
ext4_error(sb, "reserved inode found cleared - "
"inode=%lu", ino + 1);
continue;
}
if ((EXT4_SB(sb)->s_journal == NULL) &&
recently_deleted(sb, group, ino)) {
ino++;
goto next_inode;
goto next_group;
}

if (!handle) {
BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
Expand All @@ -952,11 +967,23 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
}
ext4_lock_group(sb, group);
ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
if (ret2) {
/* Someone already took the bit. Repeat the search
* with lock held.
*/
ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
if (ret2) {
ext4_set_bit(ino, inode_bitmap_bh->b_data);
ret2 = 0;
} else {
ret2 = 1; /* we didn't grab the inode */
}
}
ext4_unlock_group(sb, group);
ino++; /* the inode bitmap is zero-based */
if (!ret2)
goto got; /* we grabbed the inode! */
next_inode:

if (ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
next_group:
Expand Down
8 changes: 0 additions & 8 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -4897,14 +4897,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
brelse(iloc.bh);
ext4_set_inode_flags(inode);

if (ei->i_flags & EXT4_EA_INODE_FL) {
ext4_xattr_inode_set_class(inode);

inode_lock(inode);
inode->i_flags |= S_NOQUOTA;
inode_unlock(inode);
}

unlock_new_inode(inode);
return inode;

Expand Down
2 changes: 1 addition & 1 deletion fs/ext4/mmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ int ext4_multi_mount_protect(struct super_block *sb,
goto failed;
}

mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
mmpd_data = kmalloc(sizeof(*mmpd_data), GFP_KERNEL);
if (!mmpd_data) {
ext4_warning(sb, "not enough memory for mmpd_data");
goto failed;
Expand Down
38 changes: 31 additions & 7 deletions fs/ext4/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -2404,6 +2404,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
unsigned int s_flags = sb->s_flags;
int ret, nr_orphans = 0, nr_truncates = 0;
#ifdef CONFIG_QUOTA
int quota_update = 0;
int i;
#endif
if (!es->s_last_orphan) {
Expand Down Expand Up @@ -2442,14 +2443,32 @@ static void ext4_orphan_cleanup(struct super_block *sb,
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sb->s_flags |= MS_ACTIVE;
/* Turn on quotas so that they are updated correctly */

/*
* Turn on quotas which were not enabled for read-only mounts if
* filesystem has quota feature, so that they are updated correctly.
*/
if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) {
int ret = ext4_enable_quotas(sb);

if (!ret)
quota_update = 1;
else
ext4_msg(sb, KERN_ERR,
"Cannot turn on quotas: error %d", ret);
}

/* Turn on journaled quotas used for old sytle */
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
if (EXT4_SB(sb)->s_qf_names[i]) {
int ret = ext4_quota_on_mount(sb, i);
if (ret < 0)

if (!ret)
quota_update = 1;
else
ext4_msg(sb, KERN_ERR,
"Cannot turn on journaled "
"quota: error %d", ret);
"quota: type %d: error %d", i, ret);
}
}
#endif
Expand Down Expand Up @@ -2510,10 +2529,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
PLURAL(nr_truncates));
#ifdef CONFIG_QUOTA
/* Turn quotas off */
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
if (sb_dqopt(sb)->files[i])
dquot_quota_off(sb, i);
/* Turn off quotas if they were enabled for orphan cleanup */
if (quota_update) {
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
if (sb_dqopt(sb)->files[i])
dquot_quota_off(sb, i);
}
}
#endif
sb->s_flags = s_flags; /* Restore MS_RDONLY status */
Expand Down Expand Up @@ -5512,6 +5533,9 @@ static int ext4_enable_quotas(struct super_block *sb)
DQUOT_USAGE_ENABLED |
(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
for (type--; type >= 0; type--)
dquot_quota_off(sb, type);

ext4_warning(sb,
"Failed to enable quota tracking "
"(type=%d, err=%d). Please run "
Expand Down
Loading

0 comments on commit be6297e

Please sign in to comment.