Skip to content

Commit

Permalink
Fix a crash when block device is read and block size is changed at th…
Browse files Browse the repository at this point in the history
…e same time

The kernel may crash when block size is changed and I/O is issued
simultaneously.

Because some subsystems (udev or lvm) may read any block device anytime,
the bug actually puts any code that changes a block device size in
jeopardy.

The crash can be reproduced if you place "msleep(1000)" to
blkdev_get_blocks just before "bh->b_size = max_blocks <<
inode->i_blkbits;".
Then, run "dd if=/dev/ram0 of=/dev/null bs=4k count=1 iflag=direct"
While it is waiting in msleep, run "blockdev --setbsz 2048 /dev/ram0"
You get a BUG.

The direct and non-direct I/O is written with the assumption that block
size does not change. It doesn't seem practical to fix these crashes
one-by-one there may be many crash possibilities when block size changes
at a certain place and it is impossible to find them all and verify the
code.

This patch introduces a new rw-lock bd_block_size_semaphore. The lock is
taken for read during I/O. It is taken for write when changing block
size. Consequently, block size can't be changed while I/O is being
submitted.

For asynchronous I/O, the patch only prevents block size change while
the I/O is being submitted. The block size can change when the I/O is in
progress or when the I/O is being finished. This is acceptable because
there are no accesses to block size when asynchronous I/O is being
finished.

The patch prevents block size changing while the device is mapped with
mmap.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Mikulas Patocka authored and Jens Axboe committed Sep 26, 2012
1 parent 60ea822 commit b87570f
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 3 deletions.
2 changes: 1 addition & 1 deletion drivers/char/raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,

static const struct file_operations raw_fops = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.aio_read = blkdev_aio_read,
.write = do_sync_write,
.aio_write = blkdev_aio_write,
.fsync = blkdev_fsync,
Expand Down
62 changes: 60 additions & 2 deletions fs/block_dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ EXPORT_SYMBOL(invalidate_bdev);

int set_blocksize(struct block_device *bdev, int size)
{
struct address_space *mapping;

/* Size must be a power of two, and between 512 and PAGE_SIZE */
if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
return -EINVAL;
Expand All @@ -124,13 +126,30 @@ int set_blocksize(struct block_device *bdev, int size)
if (size < bdev_logical_block_size(bdev))
return -EINVAL;

/* Prevent starting I/O or mapping the device */
down_write(&bdev->bd_block_size_semaphore);

/* Check that the block device is not memory mapped */
mapping = bdev->bd_inode->i_mapping;
mutex_lock(&mapping->i_mmap_mutex);
if (!prio_tree_empty(&mapping->i_mmap) ||
!list_empty(&mapping->i_mmap_nonlinear)) {
mutex_unlock(&mapping->i_mmap_mutex);
up_write(&bdev->bd_block_size_semaphore);
return -EBUSY;
}
mutex_unlock(&mapping->i_mmap_mutex);

/* Don't change the size if it is same as current */
if (bdev->bd_block_size != size) {
sync_blockdev(bdev);
bdev->bd_block_size = size;
bdev->bd_inode->i_blkbits = blksize_bits(size);
kill_bdev(bdev);
}

up_write(&bdev->bd_block_size_semaphore);

return 0;
}

Expand Down Expand Up @@ -472,6 +491,7 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
/* Initialize mutex for freeze. */
mutex_init(&bdev->bd_fsfreeze_mutex);
init_rwsem(&bdev->bd_block_size_semaphore);
}

static inline void __bd_forget(struct inode *inode)
Expand Down Expand Up @@ -1567,6 +1587,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return blkdev_ioctl(bdev, mode, cmd, arg);
}

ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
ssize_t ret;
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);

down_read(&bdev->bd_block_size_semaphore);

ret = generic_file_aio_read(iocb, iov, nr_segs, pos);

up_read(&bdev->bd_block_size_semaphore);

return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_read);

/*
* Write data to the block device. Only intended for the block device itself
* and the raw driver which basically is a fake block device.
Expand All @@ -1578,12 +1614,16 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct block_device *bdev = I_BDEV(file->f_mapping->host);
struct blk_plug plug;
ssize_t ret;

BUG_ON(iocb->ki_pos != pos);

blk_start_plug(&plug);

down_read(&bdev->bd_block_size_semaphore);

ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
Expand All @@ -1592,11 +1632,29 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}

up_read(&bdev->bd_block_size_semaphore);

blk_finish_plug(&plug);

return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_write);

int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
{
int ret;
struct block_device *bdev = I_BDEV(file->f_mapping->host);

down_read(&bdev->bd_block_size_semaphore);

ret = generic_file_mmap(file, vma);

up_read(&bdev->bd_block_size_semaphore);

return ret;
}

/*
* Try to release a page associated with block device when the system
* is under memory pressure.
Expand Down Expand Up @@ -1627,9 +1685,9 @@ const struct file_operations def_blk_fops = {
.llseek = block_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_read = blkdev_aio_read,
.aio_write = blkdev_aio_write,
.mmap = generic_file_mmap,
.mmap = blkdev_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT
Expand Down
4 changes: 4 additions & 0 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,8 @@ struct block_device {
int bd_fsfreeze_count;
/* Mutex for freeze */
struct mutex bd_fsfreeze_mutex;
/* A semaphore that prevents I/O while block size is being changed */
struct rw_semaphore bd_block_size_semaphore;
};

/*
Expand Down Expand Up @@ -2565,6 +2567,8 @@ extern int generic_segment_checks(const struct iovec *iov,
unsigned long *nr_segs, size_t *count, int access_flags);

/* fs/block_dev.c */
extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
Expand Down

0 comments on commit b87570f

Please sign in to comment.