Skip to content

Commit

Permalink
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  jbd2: call __jbd2_log_start_commit with j_state_lock write locked
  ext4: serialize unaligned asynchronous DIO
  ext4: make grpinfo slab cache names static
  ext4: Fix data corruption with multi-block writepages support
  ext4: fix up ext4 error handling
  ext4: unregister features interface on module unload
  ext4: fix panic on module unload when stopping lazyinit thread
  • Loading branch information
Linus Torvalds committed Feb 12, 2011
2 parents 3c6c0d6 + e447183 commit c8e0b00
Show file tree
Hide file tree
Showing 8 changed files with 221 additions and 91 deletions.
10 changes: 10 additions & 0 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,7 @@ struct ext4_inode_info {
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
/* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio;
atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */

spinlock_t i_block_reservation_lock;

Expand Down Expand Up @@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)

#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)

/* For ioend & aio unwritten conversion wait queues */
#define EXT4_WQ_HASH_SZ 37
#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
EXT4_WQ_HASH_SZ])
#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
EXT4_WQ_HASH_SZ])
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];

#endif /* __KERNEL__ */

#endif /* _EXT4_H */
10 changes: 6 additions & 4 deletions fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* that this IO needs to convertion to written when IO is
* completed
*/
if (io)
if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
io->flag = EXT4_IO_END_UNWRITTEN;
else
atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
} else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
if (ext4_should_dioread_nolock(inode))
map->m_flags |= EXT4_MAP_UNINIT;
Expand Down Expand Up @@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* that we need to perform convertion when IO is done.
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
if (io)
if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
io->flag = EXT4_IO_END_UNWRITTEN;
else
atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
} else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
}
Expand Down
60 changes: 59 additions & 1 deletion fs/ext4/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
return 0;
}

static void ext4_aiodio_wait(struct inode *inode)
{
wait_queue_head_t *wq = ext4_ioend_wq(inode);

wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
}

/*
* This tests whether the IO in question is block-aligned or not.
* Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
* are converted to written only after the IO is complete. Until they are
* mapped, these blocks appear as holes, so dio_zero_block() will assume that
* it needs to zero out portions of the start and/or end block. If 2 AIO
* threads are at work on the same unwritten block, they must be synchronized
* or one thread will zero the other's data, causing corruption.
*/
static int
ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct super_block *sb = inode->i_sb;
int blockmask = sb->s_blocksize - 1;
size_t count = iov_length(iov, nr_segs);
loff_t final_size = pos + count;

if (pos >= inode->i_size)
return 0;

if ((pos & blockmask) || (final_size & blockmask))
return 1;

return 0;
}

static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
int unaligned_aio = 0;
int ret;

/*
* If we have encountered a bitmap-format file, the size limit
Expand All @@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
sbi->s_bitmap_maxbytes - pos);
}
} else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
!is_sync_kiocb(iocb))) {
unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
}

return generic_file_aio_write(iocb, iov, nr_segs, pos);
/* Unaligned direct AIO must be serialized; see comment above */
if (unaligned_aio) {
static unsigned long unaligned_warn_time;

/* Warn about this once per day */
if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
ext4_msg(inode->i_sb, KERN_WARNING,
"Unaligned AIO/DIO on inode %ld by %s; "
"performance will be poor.",
inode->i_ino, current->comm);
mutex_lock(ext4_aio_mutex(inode));
ext4_aiodio_wait(inode);
}

ret = generic_file_aio_write(iocb, iov, nr_segs, pos);

if (unaligned_aio)
mutex_unlock(ext4_aio_mutex(inode));

return ret;
}

static const struct vm_operations_struct ext4_file_vm_ops = {
Expand Down
100 changes: 60 additions & 40 deletions fs/ext4/mballoc.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
/* We create slab caches for groupinfo data structures based on the
* superblock block size. There will be one per mounted filesystem for
* each unique s_blocksize_bits */
#define NR_GRPINFO_CACHES \
(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
#define NR_GRPINFO_CACHES 8
static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];

static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
"ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
"ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
"ext4_groupinfo_64k", "ext4_groupinfo_128k"
};

static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
Expand Down Expand Up @@ -2414,16 +2419,62 @@ static int ext4_mb_init_backend(struct super_block *sb)
return -ENOMEM;
}

static void ext4_groupinfo_destroy_slabs(void)
{
int i;

for (i = 0; i < NR_GRPINFO_CACHES; i++) {
if (ext4_groupinfo_caches[i])
kmem_cache_destroy(ext4_groupinfo_caches[i]);
ext4_groupinfo_caches[i] = NULL;
}
}

static int ext4_groupinfo_create_slab(size_t size)
{
static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
int slab_size;
int blocksize_bits = order_base_2(size);
int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
struct kmem_cache *cachep;

if (cache_index >= NR_GRPINFO_CACHES)
return -EINVAL;

if (unlikely(cache_index < 0))
cache_index = 0;

mutex_lock(&ext4_grpinfo_slab_create_mutex);
if (ext4_groupinfo_caches[cache_index]) {
mutex_unlock(&ext4_grpinfo_slab_create_mutex);
return 0; /* Already created */
}

slab_size = offsetof(struct ext4_group_info,
bb_counters[blocksize_bits + 2]);

cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
slab_size, 0, SLAB_RECLAIM_ACCOUNT,
NULL);

mutex_unlock(&ext4_grpinfo_slab_create_mutex);
if (!cachep) {
printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
return -ENOMEM;
}

ext4_groupinfo_caches[cache_index] = cachep;

return 0;
}

int ext4_mb_init(struct super_block *sb, int needs_recovery)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned i, j;
unsigned offset;
unsigned max;
int ret;
int cache_index;
struct kmem_cache *cachep;
char *namep = NULL;

i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);

Expand All @@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
goto out;
}

cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
cachep = ext4_groupinfo_caches[cache_index];
if (!cachep) {
char name[32];
int len = offsetof(struct ext4_group_info,
bb_counters[sb->s_blocksize_bits + 2]);

sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
namep = kstrdup(name, GFP_KERNEL);
if (!namep) {
ret = -ENOMEM;
goto out;
}

/* Need to free the kmem_cache_name() when we
* destroy the slab */
cachep = kmem_cache_create(namep, len, 0,
SLAB_RECLAIM_ACCOUNT, NULL);
if (!cachep) {
ret = -ENOMEM;
goto out;
}
ext4_groupinfo_caches[cache_index] = cachep;
}
ret = ext4_groupinfo_create_slab(sb->s_blocksize);
if (ret < 0)
goto out;

/* order 0 is regular bitmap */
sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
Expand Down Expand Up @@ -2520,7 +2550,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
if (ret) {
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
kfree(namep);
}
return ret;
}
Expand Down Expand Up @@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)

void ext4_exit_mballoc(void)
{
int i;
/*
* Wait for completion of call_rcu()'s on ext4_pspace_cachep
* before destroying the slab cache.
Expand All @@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep);

for (i = 0; i < NR_GRPINFO_CACHES; i++) {
struct kmem_cache *cachep = ext4_groupinfo_caches[i];
if (cachep) {
char *name = (char *)kmem_cache_name(cachep);
kmem_cache_destroy(cachep);
kfree(name);
}
}
ext4_groupinfo_destroy_slabs();
ext4_remove_debugfs_entry();
}

Expand Down
Loading

0 comments on commit c8e0b00

Please sign in to comment.