Skip to content

Commit

Permalink
Merge branch 'raid56-experimental' into for-linus-3.9
Browse files Browse the repository at this point in the history
Signed-off-by: Chris Mason <chris.mason@fusionio.com>

Conflicts:
	fs/btrfs/ctree.h
	fs/btrfs/extent-tree.c
	fs/btrfs/inode.c
	fs/btrfs/volumes.c
  • Loading branch information
Chris Mason committed Feb 20, 2013
2 parents b2c6b3e + 0e4e026 commit e942f88
Show file tree
Hide file tree
Showing 18 changed files with 2,814 additions and 120 deletions.
3 changes: 3 additions & 0 deletions fs/btrfs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ config BTRFS_FS
select ZLIB_DEFLATE
select LZO_COMPRESS
select LZO_DECOMPRESS
select RAID6_PQ
select XOR_BLOCKS

help
Btrfs is a new filesystem with extents, writable snapshotting,
support for multiple devices and many more features.
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o

btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
4 changes: 2 additions & 2 deletions fs/btrfs/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
if (bio->bi_size)
ret = io_tree->ops->merge_bio_hook(page, 0,
ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
PAGE_CACHE_SIZE,
bio, 0);
else
Expand Down Expand Up @@ -655,7 +655,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->index = em_start >> PAGE_CACHE_SHIFT;

if (comp_bio->bi_size)
ret = tree->ops->merge_bio_hook(page, 0,
ret = tree->ops->merge_bio_hook(READ, page, 0,
PAGE_CACHE_SIZE,
comp_bio, 0);
else
Expand Down
44 changes: 41 additions & 3 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)

#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)

#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
Expand All @@ -515,6 +516,7 @@ struct btrfs_super_block {
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
BTRFS_FEATURE_INCOMPAT_RAID56 | \
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)

/*
Expand Down Expand Up @@ -956,6 +958,8 @@ struct btrfs_dev_replace_item {
#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
#define BTRFS_BLOCK_GROUP_RAID5 (1 << 7)
#define BTRFS_BLOCK_GROUP_RAID6 (1 << 8)
#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE

enum btrfs_raid_types {
Expand All @@ -964,6 +968,8 @@ enum btrfs_raid_types {
BTRFS_RAID_DUP,
BTRFS_RAID_RAID0,
BTRFS_RAID_SINGLE,
BTRFS_RAID_RAID5,
BTRFS_RAID_RAID6,
BTRFS_NR_RAID_TYPES
};

Expand All @@ -973,6 +979,8 @@ enum btrfs_raid_types {

#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID1 | \
BTRFS_BLOCK_GROUP_RAID5 | \
BTRFS_BLOCK_GROUP_RAID6 | \
BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID10)
/*
Expand Down Expand Up @@ -1197,6 +1205,10 @@ struct btrfs_block_group_cache {
u64 flags;
u64 sectorsize;
u64 cache_generation;

/* for raid56, this is a full stripe, without parity */
unsigned long full_stripe_len;

unsigned int ro:1;
unsigned int dirty:1;
unsigned int iref:1;
Expand Down Expand Up @@ -1242,6 +1254,23 @@ enum btrfs_orphan_cleanup_state {
ORPHAN_CLEANUP_DONE = 2,
};

/* used by the raid56 code to lock stripes for read/modify/write */
struct btrfs_stripe_hash {
struct list_head hash_list;
wait_queue_head_t wait;
spinlock_t lock;
};

/* used by the raid56 code to lock stripes for read/modify/write */
struct btrfs_stripe_hash_table {
struct list_head stripe_cache;
spinlock_t cache_lock;
int cache_size;
struct btrfs_stripe_hash table[];
};

#define BTRFS_STRIPE_HASH_TABLE_BITS 11

/* fs_info */
struct reloc_control;
struct btrfs_device;
Expand Down Expand Up @@ -1341,6 +1370,13 @@ struct btrfs_fs_info {
struct mutex cleaner_mutex;
struct mutex chunk_mutex;
struct mutex volume_mutex;

/* this is used during read/modify/write to make sure
* no two ios are trying to mod the same stripe at the same
* time
*/
struct btrfs_stripe_hash_table *stripe_hash_table;

/*
* this protects the ordered operations list only while we are
* processing all of the entries on it. This way we make
Expand Down Expand Up @@ -1423,6 +1459,8 @@ struct btrfs_fs_info {
struct btrfs_workers flush_workers;
struct btrfs_workers endio_workers;
struct btrfs_workers endio_meta_workers;
struct btrfs_workers endio_raid56_workers;
struct btrfs_workers rmw_workers;
struct btrfs_workers endio_meta_write_workers;
struct btrfs_workers endio_write_workers;
struct btrfs_workers endio_freespace_worker;
Expand Down Expand Up @@ -3490,9 +3528,9 @@ int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root, u64 new_dirid);
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio, unsigned long bio_flags);

int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_evict_inode(struct inode *inode);
Expand Down
9 changes: 9 additions & 0 deletions fs/btrfs/delayed-ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,15 @@ struct btrfs_delayed_ref_root {
/* total number of head nodes ready for processing */
unsigned long num_heads_ready;

/*
* bumped when someone is making progress on the delayed
* refs, so that other procs know they are just adding to
* contention intead of helping
*/
atomic_t procs_running_refs;
atomic_t ref_seq;
wait_queue_head_t wait;

/*
* set when the tree is flushing before a transaction commit,
* used by the throttling code to decide if new updates need
Expand Down
62 changes: 53 additions & 9 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "check-integrity.h"
#include "rcu-string.h"
#include "dev-replace.h"
#include "raid56.h"

#ifdef CONFIG_X86
#include <asm/cpufeature.h>
Expand Down Expand Up @@ -640,8 +641,15 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
btree_readahead_hook(root, eb, eb->start, ret);
}

if (ret)
if (ret) {
/*
* our io error hook is going to dec the io pages
* again, we have to make sure it has something
* to decrement
*/
atomic_inc(&eb->io_pages);
clear_extent_buffer_uptodate(eb);
}
free_extent_buffer(eb);
out:
return ret;
Expand All @@ -655,6 +663,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
eb = (struct extent_buffer *)page->private;
set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
eb->read_mirror = failed_mirror;
atomic_dec(&eb->io_pages);
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, -EIO);
return -EIO; /* we fixed nothing */
Expand All @@ -671,17 +680,23 @@ static void end_workqueue_bio(struct bio *bio, int err)
end_io_wq->work.flags = 0;

if (bio->bi_rw & REQ_WRITE) {
if (end_io_wq->metadata == 1)
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
btrfs_queue_worker(&fs_info->endio_meta_write_workers,
&end_io_wq->work);
else if (end_io_wq->metadata == 2)
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
btrfs_queue_worker(&fs_info->endio_freespace_worker,
&end_io_wq->work);
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
btrfs_queue_worker(&fs_info->endio_raid56_workers,
&end_io_wq->work);
else
btrfs_queue_worker(&fs_info->endio_write_workers,
&end_io_wq->work);
} else {
if (end_io_wq->metadata)
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
btrfs_queue_worker(&fs_info->endio_raid56_workers,
&end_io_wq->work);
else if (end_io_wq->metadata)
btrfs_queue_worker(&fs_info->endio_meta_workers,
&end_io_wq->work);
else
Expand All @@ -696,6 +711,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
* 0 - if data
* 1 - if normal metadta
* 2 - if writing to the free space cache area
* 3 - raid parity work
*/
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata)
Expand Down Expand Up @@ -2179,6 +2195,12 @@ int open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);

ret = btrfs_alloc_stripe_hash_table(fs_info);
if (ret) {
err = -ENOMEM;
goto fail_alloc;
}

__setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);

Expand Down Expand Up @@ -2349,6 +2371,12 @@ int open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->endio_meta_write_workers,
"endio-meta-write", fs_info->thread_pool_size,
&fs_info->generic_worker);
btrfs_init_workers(&fs_info->endio_raid56_workers,
"endio-raid56", fs_info->thread_pool_size,
&fs_info->generic_worker);
btrfs_init_workers(&fs_info->rmw_workers,
"rmw", fs_info->thread_pool_size,
&fs_info->generic_worker);
btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
fs_info->thread_pool_size,
&fs_info->generic_worker);
Expand All @@ -2367,6 +2395,8 @@ int open_ctree(struct super_block *sb,
*/
fs_info->endio_workers.idle_thresh = 4;
fs_info->endio_meta_workers.idle_thresh = 4;
fs_info->endio_raid56_workers.idle_thresh = 4;
fs_info->rmw_workers.idle_thresh = 2;

fs_info->endio_write_workers.idle_thresh = 2;
fs_info->endio_meta_write_workers.idle_thresh = 2;
Expand All @@ -2383,6 +2413,8 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->fixup_workers);
ret |= btrfs_start_workers(&fs_info->endio_workers);
ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
ret |= btrfs_start_workers(&fs_info->rmw_workers);
ret |= btrfs_start_workers(&fs_info->endio_raid56_workers);
ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
ret |= btrfs_start_workers(&fs_info->endio_write_workers);
ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
Expand Down Expand Up @@ -2726,6 +2758,8 @@ int open_ctree(struct super_block *sb,
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_raid56_workers);
btrfs_stop_workers(&fs_info->rmw_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->endio_freespace_worker);
Expand All @@ -2747,6 +2781,7 @@ int open_ctree(struct super_block *sb,
fail_srcu:
cleanup_srcu_struct(&fs_info->subvol_srcu);
fail:
btrfs_free_stripe_hash_table(fs_info);
btrfs_close_devices(fs_info->fs_devices);
return err;

Expand Down Expand Up @@ -3094,11 +3129,16 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
== 0)))
num_tolerated_disk_barrier_failures = 0;
else if (num_tolerated_disk_barrier_failures > 1
&&
(flags & (BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10)))
num_tolerated_disk_barrier_failures = 1;
else if (num_tolerated_disk_barrier_failures > 1) {
if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID10)) {
num_tolerated_disk_barrier_failures = 1;
} else if (flags &
BTRFS_BLOCK_GROUP_RAID5) {
num_tolerated_disk_barrier_failures = 2;
}
}
}
}
up_read(&sinfo->groups_sem);
Expand Down Expand Up @@ -3402,6 +3442,8 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_raid56_workers);
btrfs_stop_workers(&fs_info->rmw_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->endio_freespace_worker);
Expand All @@ -3424,6 +3466,8 @@ int close_ctree(struct btrfs_root *root)
bdi_destroy(&fs_info->bdi);
cleanup_srcu_struct(&fs_info->subvol_srcu);

btrfs_free_stripe_hash_table(fs_info);

return 0;
}

Expand Down
7 changes: 7 additions & 0 deletions fs/btrfs/disk-io.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
#define BTRFS_SUPER_MIRROR_MAX 3
#define BTRFS_SUPER_MIRROR_SHIFT 12

enum {
BTRFS_WQ_ENDIO_DATA = 0,
BTRFS_WQ_ENDIO_METADATA = 1,
BTRFS_WQ_ENDIO_FREE_SPACE = 2,
BTRFS_WQ_ENDIO_RAID56 = 3,
};

static inline u64 btrfs_sb_offset(int mirror)
{
u64 start = 16 * 1024;
Expand Down
Loading

0 comments on commit e942f88

Please sign in to comment.