Skip to content

Commit

Permalink
Merge branch 'block-dev'
Browse files Browse the repository at this point in the history
Merge 'block-dev' branch.

I was going to just mark everything here for stable and leave it to the
3.8 merge window, but having decided on doing another -rc, I migth as
well merge it now.

This removes the bd_block_size_semaphore semaphore that was added in
this release to fix a race condition between block size changes and
block IO, and replaces it with atomicity guaratees in fs/buffer.c
instead, along with simplifying fs/block-dev.c.

This removes more lines than it adds, makes the code generally simpler,
and avoids the latency/rt issues that the block size semaphore
introduced for mount.

I'm not happy with the timing, but it wouldn't be much better doing this
during the merge window and then having some delayed back-port of it
into stable.

* block-dev:
  blkdev_max_block: make private to fs/buffer.c
  direct-io: don't read inode->i_blkbits multiple times
  blockdev: remove bd_block_size_semaphore again
  fs/buffer.c: make block-size be per-page and protected by the page lock
  • Loading branch information
Linus Torvalds committed Dec 3, 2012
2 parents 7e5530a + bbec027 commit d3594ea
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 196 deletions.
2 changes: 1 addition & 1 deletion drivers/char/raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,

static const struct file_operations raw_fops = {
.read = do_sync_read,
.aio_read = blkdev_aio_read,
.aio_read = generic_file_aio_read,
.write = do_sync_write,
.aio_write = blkdev_aio_write,
.fsync = blkdev_fsync,
Expand Down
160 changes: 5 additions & 155 deletions fs/block_dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,6 @@ static void bdev_inode_switch_bdi(struct inode *inode,
spin_unlock(&dst->wb.list_lock);
}

sector_t blkdev_max_block(struct block_device *bdev)
{
sector_t retval = ~((sector_t)0);
loff_t sz = i_size_read(bdev->bd_inode);

if (sz) {
unsigned int size = block_size(bdev);
unsigned int sizebits = blksize_bits(size);
retval = (sz >> sizebits);
}
return retval;
}

/* Kill _all_ buffers and pagecache , dirty or not.. */
void kill_bdev(struct block_device *bdev)
{
Expand Down Expand Up @@ -116,8 +103,6 @@ EXPORT_SYMBOL(invalidate_bdev);

int set_blocksize(struct block_device *bdev, int size)
{
struct address_space *mapping;

/* Size must be a power of two, and between 512 and PAGE_SIZE */
if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
return -EINVAL;
Expand All @@ -126,29 +111,13 @@ int set_blocksize(struct block_device *bdev, int size)
if (size < bdev_logical_block_size(bdev))
return -EINVAL;

/* Prevent starting I/O or mapping the device */
percpu_down_write(&bdev->bd_block_size_semaphore);

/* Check that the block device is not memory mapped */
mapping = bdev->bd_inode->i_mapping;
mutex_lock(&mapping->i_mmap_mutex);
if (mapping_mapped(mapping)) {
mutex_unlock(&mapping->i_mmap_mutex);
percpu_up_write(&bdev->bd_block_size_semaphore);
return -EBUSY;
}
mutex_unlock(&mapping->i_mmap_mutex);

/* Don't change the size if it is same as current */
if (bdev->bd_block_size != size) {
sync_blockdev(bdev);
bdev->bd_block_size = size;
bdev->bd_inode->i_blkbits = blksize_bits(size);
kill_bdev(bdev);
}

percpu_up_write(&bdev->bd_block_size_semaphore);

return 0;
}

Expand Down Expand Up @@ -181,52 +150,12 @@ static int
blkdev_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
if (iblock >= blkdev_max_block(I_BDEV(inode))) {
if (create)
return -EIO;

/*
* for reads, we're just trying to fill a partial page.
* return a hole, they will have to call get_block again
* before they can fill it, and they will get -EIO at that
* time
*/
return 0;
}
bh->b_bdev = I_BDEV(inode);
bh->b_blocknr = iblock;
set_buffer_mapped(bh);
return 0;
}

static int
blkdev_get_blocks(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
sector_t end_block = blkdev_max_block(I_BDEV(inode));
unsigned long max_blocks = bh->b_size >> inode->i_blkbits;

if ((iblock + max_blocks) > end_block) {
max_blocks = end_block - iblock;
if ((long)max_blocks <= 0) {
if (create)
return -EIO; /* write fully beyond EOF */
/*
* It is a read which is fully beyond EOF. We return
* a !buffer_mapped buffer
*/
max_blocks = 0;
}
}

bh->b_bdev = I_BDEV(inode);
bh->b_blocknr = iblock;
bh->b_size = max_blocks << inode->i_blkbits;
if (max_blocks)
set_buffer_mapped(bh);
return 0;
}

static ssize_t
blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
Expand All @@ -235,7 +164,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct inode *inode = file->f_mapping->host;

return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
nr_segs, blkdev_get_blocks, NULL, NULL, 0);
nr_segs, blkdev_get_block, NULL, NULL, 0);
}

int __sync_blockdev(struct block_device *bdev, int wait)
Expand Down Expand Up @@ -459,12 +388,6 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
if (!ei)
return NULL;

if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) {
kmem_cache_free(bdev_cachep, ei);
return NULL;
}

return &ei->vfs_inode;
}

Expand All @@ -473,8 +396,6 @@ static void bdev_i_callback(struct rcu_head *head)
struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);

percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore);

kmem_cache_free(bdev_cachep, bdi);
}

Expand Down Expand Up @@ -1593,22 +1514,6 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return blkdev_ioctl(bdev, mode, cmd, arg);
}

ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
ssize_t ret;
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);

percpu_down_read(&bdev->bd_block_size_semaphore);

ret = generic_file_aio_read(iocb, iov, nr_segs, pos);

percpu_up_read(&bdev->bd_block_size_semaphore);

return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_read);

/*
* Write data to the block device. Only intended for the block device itself
* and the raw driver which basically is a fake block device.
Expand All @@ -1620,16 +1525,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct block_device *bdev = I_BDEV(file->f_mapping->host);
struct blk_plug plug;
ssize_t ret;

BUG_ON(iocb->ki_pos != pos);

blk_start_plug(&plug);

percpu_down_read(&bdev->bd_block_size_semaphore);

ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
Expand All @@ -1638,62 +1539,11 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}

percpu_up_read(&bdev->bd_block_size_semaphore);

blk_finish_plug(&plug);

return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_write);

static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
{
int ret;
struct block_device *bdev = I_BDEV(file->f_mapping->host);

percpu_down_read(&bdev->bd_block_size_semaphore);

ret = generic_file_mmap(file, vma);

percpu_up_read(&bdev->bd_block_size_semaphore);

return ret;
}

static ssize_t blkdev_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
ssize_t ret;
struct block_device *bdev = I_BDEV(file->f_mapping->host);

percpu_down_read(&bdev->bd_block_size_semaphore);

ret = generic_file_splice_read(file, ppos, pipe, len, flags);

percpu_up_read(&bdev->bd_block_size_semaphore);

return ret;
}

static ssize_t blkdev_splice_write(struct pipe_inode_info *pipe,
struct file *file, loff_t *ppos, size_t len,
unsigned int flags)
{
ssize_t ret;
struct block_device *bdev = I_BDEV(file->f_mapping->host);

percpu_down_read(&bdev->bd_block_size_semaphore);

ret = generic_file_splice_write(pipe, file, ppos, len, flags);

percpu_up_read(&bdev->bd_block_size_semaphore);

return ret;
}


/*
* Try to release a page associated with block device when the system
* is under memory pressure.
Expand Down Expand Up @@ -1724,16 +1574,16 @@ const struct file_operations def_blk_fops = {
.llseek = block_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = blkdev_aio_read,
.aio_read = generic_file_aio_read,
.aio_write = blkdev_aio_write,
.mmap = blkdev_mmap,
.mmap = generic_file_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
.splice_read = blkdev_splice_read,
.splice_write = blkdev_splice_write,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};

int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
Expand Down
Loading

0 comments on commit d3594ea

Please sign in to comment.