diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 0c3323e0adb2a..63682d27fc8dd 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3949,7 +3949,9 @@ static int __load_dirty_region_bitmap(struct raid_set *rs) /* Try loading the bitmap unless "raid0", which does not have one */ if (!rs_is_raid0(rs) && !test_and_set_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) { - r = md_bitmap_load(&rs->md); + struct mddev *mddev = &rs->md; + + r = mddev->bitmap_ops->load(mddev); if (r) DMERR("Failed to load bitmap"); } @@ -4066,7 +4068,8 @@ static int raid_preresume(struct dm_target *ti) mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) { int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize; - r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0); + r = mddev->bitmap_ops->resize(mddev, mddev->dev_sectors, + chunksize, false); if (r) DMERR("Failed to resize bitmap"); } diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 08232d8dc815e..864418c8c0288 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -32,11 +32,210 @@ #include "md.h" #include "md-bitmap.h" +#define BITMAP_MAJOR_LO 3 +/* version 4 insists the bitmap is in little-endian order + * with version 3, it is host-endian which is non-portable + * Version 5 is currently set only for clustered devices + */ +#define BITMAP_MAJOR_HI 4 +#define BITMAP_MAJOR_CLUSTERED 5 +#define BITMAP_MAJOR_HOSTENDIAN 3 + +/* + * in-memory bitmap: + * + * Use 16 bit block counters to track pending writes to each "chunk". + * The 2 high order bits are special-purpose, the first is a flag indicating + * whether a resync is needed. The second is a flag indicating whether a + * resync is active. + * This means that the counter is actually 14 bits: + * + * +--------+--------+------------------------------------------------+ + * | resync | resync | counter | + * | needed | active | | + * | (0-1) | (0-1) | (0-16383) | + * +--------+--------+------------------------------------------------+ + * + * The "resync needed" bit is set when: + * a '1' bit is read from storage at startup. + * a write request fails on some drives + * a resync is aborted on a chunk with 'resync active' set + * It is cleared (and resync-active set) when a resync starts across all drives + * of the chunk. + * + * + * The "resync active" bit is set when: + * a resync is started on all drives, and resync_needed is set. + * resync_needed will be cleared (as long as resync_active wasn't already set). + * It is cleared when a resync completes. + * + * The counter counts pending write requests, plus the on-disk bit. + * When the counter is '1' and the resync bits are clear, the on-disk + * bit can be cleared as well, thus setting the counter to 0. + * When we set a bit, or in the counter (to start a write), if the fields is + * 0, we first set the disk bit and set the counter to 1. + * + * If the counter is 0, the on-disk bit is clear and the stripe is clean + * Anything that dirties the stripe pushes the counter to 2 (at least) + * and sets the on-disk bit (lazily). + * If a periodic sweep find the counter at 2, it is decremented to 1. + * If the sweep find the counter at 1, the on-disk bit is cleared and the + * counter goes to zero. + * + * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block + * counters as a fallback when "page" memory cannot be allocated: + * + * Normal case (page memory allocated): + * + * page pointer (32-bit) + * + * [ ] ------+ + * | + * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) + * c1 c2 c2048 + * + * Hijacked case (page memory allocation failed): + * + * hijacked page pointer (32-bit) + * + * [ ][ ] (no page memory allocated) + * counter #1 (16-bit) counter #2 (16-bit) + * + */ + +#define PAGE_BITS (PAGE_SIZE << 3) +#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) + +#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) +#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) +#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) + +/* how many counters per page? */ +#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) +/* same, except a shift value for more efficient bitops */ +#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) +/* same, except a mask value for more efficient bitops */ +#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) + +#define BITMAP_BLOCK_SHIFT 9 + +/* + * bitmap structures: + */ + +/* the in-memory bitmap is represented by bitmap_pages */ +struct bitmap_page { + /* + * map points to the actual memory page + */ + char *map; + /* + * in emergencies (when map cannot be alloced), hijack the map + * pointer and use it as two counters itself + */ + unsigned int hijacked:1; + /* + * If any counter in this page is '1' or '2' - and so could be + * cleared then that page is marked as 'pending' + */ + unsigned int pending:1; + /* + * count of dirty bits on the page + */ + unsigned int count:30; +}; + +/* the main bitmap structure - one per mddev */ +struct bitmap { + + struct bitmap_counts { + spinlock_t lock; + struct bitmap_page *bp; + /* total number of pages in the bitmap */ + unsigned long pages; + /* number of pages not yet allocated */ + unsigned long missing_pages; + /* chunksize = 2^chunkshift (for bitops) */ + unsigned long chunkshift; + /* total number of data chunks for the array */ + unsigned long chunks; + } counts; + + struct mddev *mddev; /* the md device that the bitmap is for */ + + __u64 events_cleared; + int need_sync; + + struct bitmap_storage { + /* backing disk file */ + struct file *file; + /* cached copy of the bitmap file superblock */ + struct page *sb_page; + unsigned long sb_index; + /* list of cache pages for the file */ + struct page **filemap; + /* attributes associated filemap pages */ + unsigned long *filemap_attr; + /* number of pages in the file */ + unsigned long file_pages; + /* total bytes in the bitmap */ + unsigned long bytes; + } storage; + + unsigned long flags; + + int allclean; + + atomic_t behind_writes; + /* highest actual value at runtime */ + unsigned long behind_writes_used; + + /* + * the bitmap daemon - periodically wakes up and sweeps the bitmap + * file, cleaning up bits and flushing out pages to disk as necessary + */ + unsigned long daemon_lastrun; /* jiffies of last run */ + /* + * when we lasted called end_sync to update bitmap with resync + * progress. + */ + unsigned long last_end_sync; + + /* pending writes to the bitmap file */ + atomic_t pending_writes; + wait_queue_head_t write_wait; + wait_queue_head_t overflow_wait; + wait_queue_head_t behind_wait; + + struct kernfs_node *sysfs_can_clear; + /* slot offset for clustered env */ + int cluster_slot; +}; + +static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, + int chunksize, bool init); + static inline char *bmname(struct bitmap *bitmap) { return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; } +static bool __bitmap_enabled(struct bitmap *bitmap) +{ + return bitmap->storage.filemap && + !test_bit(BITMAP_STALE, &bitmap->flags); +} + +static bool bitmap_enabled(struct mddev *mddev) +{ + struct bitmap *bitmap = mddev->bitmap; + + if (!bitmap) + return false; + + return __bitmap_enabled(bitmap); +} + /* * check a page and, if necessary, allocate it (or hijack it if the alloc fails) * @@ -472,9 +671,10 @@ static void md_bitmap_wait_writes(struct bitmap *bitmap) /* update the event counter and sync the superblock to disk */ -void md_bitmap_update_sb(struct bitmap *bitmap) +static void bitmap_update_sb(void *data) { bitmap_super_t *sb; + struct bitmap *bitmap = data; if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ return; @@ -510,10 +710,8 @@ void md_bitmap_update_sb(struct bitmap *bitmap) write_sb_page(bitmap, bitmap->storage.sb_index, bitmap->storage.sb_page, 1); } -EXPORT_SYMBOL(md_bitmap_update_sb); -/* print out the bitmap file superblock */ -void md_bitmap_print_sb(struct bitmap *bitmap) +static void bitmap_print_sb(struct bitmap *bitmap) { bitmap_super_t *sb; @@ -760,7 +958,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) bitmap->mddev->bitmap_info.space > sectors_reserved) bitmap->mddev->bitmap_info.space = sectors_reserved; } else { - md_bitmap_print_sb(bitmap); + bitmap_print_sb(bitmap); if (bitmap->cluster_slot < 0) md_cluster_stop(bitmap->mddev); } @@ -893,7 +1091,7 @@ static void md_bitmap_file_unmap(struct bitmap_storage *store) static void md_bitmap_file_kick(struct bitmap *bitmap) { if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { - md_bitmap_update_sb(bitmap); + bitmap_update_sb(bitmap); if (bitmap->storage.file) { pr_warn("%s: kicking failed bitmap file %pD4 from array!\n", @@ -1028,13 +1226,13 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) /* this gets called when the md device is ready to unplug its underlying * (slave) device queues -- before we let any writes go down, we need to * sync the dirty pages of the bitmap file to disk */ -void md_bitmap_unplug(struct bitmap *bitmap) +static void __bitmap_unplug(struct bitmap *bitmap) { unsigned long i; int dirty, need_write; int writing = 0; - if (!md_bitmap_enabled(bitmap)) + if (!__bitmap_enabled(bitmap)) return; /* look at each page to see if there are any set bits that need to be @@ -1060,7 +1258,6 @@ void md_bitmap_unplug(struct bitmap *bitmap) if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) md_bitmap_file_kick(bitmap); } -EXPORT_SYMBOL(md_bitmap_unplug); struct bitmap_unplug_work { struct work_struct work; @@ -1073,11 +1270,11 @@ static void md_bitmap_unplug_fn(struct work_struct *work) struct bitmap_unplug_work *unplug_work = container_of(work, struct bitmap_unplug_work, work); - md_bitmap_unplug(unplug_work->bitmap); + __bitmap_unplug(unplug_work->bitmap); complete(unplug_work->done); } -void md_bitmap_unplug_async(struct bitmap *bitmap) +static void bitmap_unplug_async(struct bitmap *bitmap) { DECLARE_COMPLETION_ONSTACK(done); struct bitmap_unplug_work unplug_work; @@ -1089,7 +1286,19 @@ void md_bitmap_unplug_async(struct bitmap *bitmap) queue_work(md_bitmap_wq, &unplug_work.work); wait_for_completion(&done); } -EXPORT_SYMBOL(md_bitmap_unplug_async); + +static void bitmap_unplug(struct mddev *mddev, bool sync) +{ + struct bitmap *bitmap = mddev->bitmap; + + if (!bitmap) + return; + + if (sync) + __bitmap_unplug(bitmap); + else + bitmap_unplug_async(bitmap); +} static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); @@ -1226,22 +1435,21 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) return ret; } -void md_bitmap_write_all(struct bitmap *bitmap) +/* just flag bitmap pages as needing to be written. */ +static void bitmap_write_all(struct mddev *mddev) { - /* We don't actually write all bitmap blocks here, - * just flag them as needing to be written - */ int i; + struct bitmap *bitmap = mddev->bitmap; if (!bitmap || !bitmap->storage.filemap) return; + + /* Only one copy, so nothing needed */ if (bitmap->storage.file) - /* Only one copy, so nothing needed */ return; for (i = 0; i < bitmap->storage.file_pages; i++) - set_page_attr(bitmap, i, - BITMAP_PAGE_NEEDWRITE); + set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); bitmap->allclean = 0; } @@ -1290,7 +1498,7 @@ static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout, * bitmap daemon -- periodically wakes up to clean bits and flush pages * out to disk */ -void md_bitmap_daemon_work(struct mddev *mddev) +static void bitmap_daemon_work(struct mddev *mddev) { struct bitmap *bitmap; unsigned long j; @@ -1461,8 +1669,11 @@ __acquires(bitmap->lock) &(bitmap->bp[page].map[pageoff]); } -int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) +static int bitmap_startwrite(struct mddev *mddev, sector_t offset, + unsigned long sectors, bool behind) { + struct bitmap *bitmap = mddev->bitmap; + if (!bitmap) return 0; @@ -1523,13 +1734,15 @@ int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long s } return 0; } -EXPORT_SYMBOL(md_bitmap_startwrite); -void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int success, int behind) +static void bitmap_endwrite(struct mddev *mddev, sector_t offset, + unsigned long sectors, bool success, bool behind) { + struct bitmap *bitmap = mddev->bitmap; + if (!bitmap) return; + if (behind) { if (atomic_dec_and_test(&bitmap->behind_writes)) wake_up(&bitmap->behind_wait); @@ -1576,26 +1789,27 @@ void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset, sectors = 0; } } -EXPORT_SYMBOL(md_bitmap_endwrite); -static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, - int degraded) +static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, + sector_t *blocks, bool degraded) { bitmap_counter_t *bmc; - int rv; + bool rv; + if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ *blocks = 1024; - return 1; /* always resync if no bitmap */ + return true; /* always resync if no bitmap */ } spin_lock_irq(&bitmap->counts.lock); + + rv = false; bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); - rv = 0; if (bmc) { /* locked */ - if (RESYNC(*bmc)) - rv = 1; - else if (NEEDED(*bmc)) { - rv = 1; + if (RESYNC(*bmc)) { + rv = true; + } else if (NEEDED(*bmc)) { + rv = true; if (!degraded) { /* don't set/clear bits if degraded */ *bmc |= RESYNC_MASK; *bmc &= ~NEEDED_MASK; @@ -1603,11 +1817,12 @@ static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t } } spin_unlock_irq(&bitmap->counts.lock); + return rv; } -int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, - int degraded) +static bool bitmap_start_sync(struct mddev *mddev, sector_t offset, + sector_t *blocks, bool degraded) { /* bitmap_start_sync must always report on multiples of whole * pages, otherwise resync (which is very PAGE_SIZE based) will @@ -1616,21 +1831,22 @@ int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *block * At least PAGE_SIZE>>9 blocks are covered. * Return the 'or' of the result. */ - int rv = 0; + bool rv = false; sector_t blocks1; *blocks = 0; while (*blocks < (PAGE_SIZE>>9)) { - rv |= __bitmap_start_sync(bitmap, offset, + rv |= __bitmap_start_sync(mddev->bitmap, offset, &blocks1, degraded); offset += blocks1; *blocks += blocks1; } + return rv; } -EXPORT_SYMBOL(md_bitmap_start_sync); -void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted) +static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset, + sector_t *blocks, bool aborted) { bitmap_counter_t *bmc; unsigned long flags; @@ -1659,9 +1875,14 @@ void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks unlock: spin_unlock_irqrestore(&bitmap->counts.lock, flags); } -EXPORT_SYMBOL(md_bitmap_end_sync); -void md_bitmap_close_sync(struct bitmap *bitmap) +static void bitmap_end_sync(struct mddev *mddev, sector_t offset, + sector_t *blocks) +{ + __bitmap_end_sync(mddev->bitmap, offset, blocks, true); +} + +static void bitmap_close_sync(struct mddev *mddev) { /* Sync has finished, and any bitmap chunks that weren't synced * properly have been aborted. It remains to us to clear the @@ -1669,19 +1890,23 @@ void md_bitmap_close_sync(struct bitmap *bitmap) */ sector_t sector = 0; sector_t blocks; + struct bitmap *bitmap = mddev->bitmap; + if (!bitmap) return; + while (sector < bitmap->mddev->resync_max_sectors) { - md_bitmap_end_sync(bitmap, sector, &blocks, 0); + __bitmap_end_sync(bitmap, sector, &blocks, false); sector += blocks; } } -EXPORT_SYMBOL(md_bitmap_close_sync); -void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) +static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector, + bool force) { sector_t s = 0; sector_t blocks; + struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return; @@ -1700,34 +1925,32 @@ void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); s = 0; while (s < sector && s < bitmap->mddev->resync_max_sectors) { - md_bitmap_end_sync(bitmap, s, &blocks, 0); + __bitmap_end_sync(bitmap, s, &blocks, false); s += blocks; } bitmap->last_end_sync = jiffies; sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed); } -EXPORT_SYMBOL(md_bitmap_cond_end_sync); -void md_bitmap_sync_with_cluster(struct mddev *mddev, - sector_t old_lo, sector_t old_hi, - sector_t new_lo, sector_t new_hi) +static void bitmap_sync_with_cluster(struct mddev *mddev, + sector_t old_lo, sector_t old_hi, + sector_t new_lo, sector_t new_hi) { struct bitmap *bitmap = mddev->bitmap; sector_t sector, blocks = 0; for (sector = old_lo; sector < new_lo; ) { - md_bitmap_end_sync(bitmap, sector, &blocks, 0); + __bitmap_end_sync(bitmap, sector, &blocks, false); sector += blocks; } WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n"); for (sector = old_hi; sector < new_hi; ) { - md_bitmap_start_sync(bitmap, sector, &blocks, 0); + bitmap_start_sync(mddev, sector, &blocks, false); sector += blocks; } WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n"); } -EXPORT_SYMBOL(md_bitmap_sync_with_cluster); static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) { @@ -1756,12 +1979,18 @@ static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, in } /* dirty the memory and file bits for bitmap chunks "s" to "e" */ -void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) +static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s, + unsigned long e) { unsigned long chunk; + struct bitmap *bitmap = mddev->bitmap; + + if (!bitmap) + return; for (chunk = s; chunk <= e; chunk++) { sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; + md_bitmap_set_memory_bits(bitmap, sec, 1); md_bitmap_file_set_bit(bitmap, sec); if (sec < bitmap->mddev->recovery_cp) @@ -1773,10 +2002,7 @@ void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long } } -/* - * flush out any pending updates - */ -void md_bitmap_flush(struct mddev *mddev) +static void bitmap_flush(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; long sleep; @@ -1789,23 +2015,21 @@ void md_bitmap_flush(struct mddev *mddev) */ sleep = mddev->bitmap_info.daemon_sleep * 2; bitmap->daemon_lastrun -= sleep; - md_bitmap_daemon_work(mddev); + bitmap_daemon_work(mddev); bitmap->daemon_lastrun -= sleep; - md_bitmap_daemon_work(mddev); + bitmap_daemon_work(mddev); bitmap->daemon_lastrun -= sleep; - md_bitmap_daemon_work(mddev); + bitmap_daemon_work(mddev); if (mddev->bitmap_info.external) md_super_wait(mddev); - md_bitmap_update_sb(bitmap); + bitmap_update_sb(bitmap); } -/* - * free memory that was allocated - */ -void md_bitmap_free(struct bitmap *bitmap) +static void md_bitmap_free(void *data) { unsigned long k, pages; struct bitmap_page *bp; + struct bitmap *bitmap = data; if (!bitmap) /* there was no bitmap */ return; @@ -1836,9 +2060,8 @@ void md_bitmap_free(struct bitmap *bitmap) kfree(bp); kfree(bitmap); } -EXPORT_SYMBOL(md_bitmap_free); -void md_bitmap_wait_behind_writes(struct mddev *mddev) +static void bitmap_wait_behind_writes(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; @@ -1852,14 +2075,14 @@ void md_bitmap_wait_behind_writes(struct mddev *mddev) } } -void md_bitmap_destroy(struct mddev *mddev) +static void bitmap_destroy(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; if (!bitmap) /* there was no bitmap */ return; - md_bitmap_wait_behind_writes(mddev); + bitmap_wait_behind_writes(mddev); if (!mddev->serialize_policy) mddev_destroy_serial_pool(mddev, NULL); @@ -1878,7 +2101,7 @@ void md_bitmap_destroy(struct mddev *mddev) * if this returns an error, bitmap_destroy must be called to do clean up * once mddev->bitmap is set */ -struct bitmap *md_bitmap_create(struct mddev *mddev, int slot) +static struct bitmap *__bitmap_create(struct mddev *mddev, int slot) { struct bitmap *bitmap; sector_t blocks = mddev->resync_max_sectors; @@ -1948,7 +2171,8 @@ struct bitmap *md_bitmap_create(struct mddev *mddev, int slot) goto error; bitmap->daemon_lastrun = jiffies; - err = md_bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1); + err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, + true); if (err) goto error; @@ -1965,7 +2189,18 @@ struct bitmap *md_bitmap_create(struct mddev *mddev, int slot) return ERR_PTR(err); } -int md_bitmap_load(struct mddev *mddev) +static int bitmap_create(struct mddev *mddev, int slot) +{ + struct bitmap *bitmap = __bitmap_create(mddev, slot); + + if (IS_ERR(bitmap)) + return PTR_ERR(bitmap); + + mddev->bitmap = bitmap; + return 0; +} + +static int bitmap_load(struct mddev *mddev) { int err = 0; sector_t start = 0; @@ -1989,10 +2224,10 @@ int md_bitmap_load(struct mddev *mddev) */ while (sector < mddev->resync_max_sectors) { sector_t blocks; - md_bitmap_start_sync(bitmap, sector, &blocks, 0); + bitmap_start_sync(mddev, sector, &blocks, false); sector += blocks; } - md_bitmap_close_sync(bitmap); + bitmap_close_sync(mddev); if (mddev->degraded == 0 || bitmap->events_cleared == mddev->events) @@ -2014,22 +2249,21 @@ int md_bitmap_load(struct mddev *mddev) mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); md_wakeup_thread(mddev->thread); - md_bitmap_update_sb(bitmap); + bitmap_update_sb(bitmap); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) err = -EIO; out: return err; } -EXPORT_SYMBOL_GPL(md_bitmap_load); /* caller need to free returned bitmap with md_bitmap_free() */ -struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot) +static void *bitmap_get_from_slot(struct mddev *mddev, int slot) { int rv = 0; struct bitmap *bitmap; - bitmap = md_bitmap_create(mddev, slot); + bitmap = __bitmap_create(mddev, slot); if (IS_ERR(bitmap)) { rv = PTR_ERR(bitmap); return ERR_PTR(rv); @@ -2043,20 +2277,19 @@ struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot) return bitmap; } -EXPORT_SYMBOL(get_bitmap_from_slot); /* Loads the bitmap associated with slot and copies the resync information * to our bitmap */ -int md_bitmap_copy_from_slot(struct mddev *mddev, int slot, - sector_t *low, sector_t *high, bool clear_bits) +static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low, + sector_t *high, bool clear_bits) { int rv = 0, i, j; sector_t block, lo = 0, hi = 0; struct bitmap_counts *counts; struct bitmap *bitmap; - bitmap = get_bitmap_from_slot(mddev, slot); + bitmap = bitmap_get_from_slot(mddev, slot); if (IS_ERR(bitmap)) { pr_err("%s can't get bitmap from slot %d\n", __func__, slot); return -1; @@ -2076,53 +2309,59 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot, } if (clear_bits) { - md_bitmap_update_sb(bitmap); + bitmap_update_sb(bitmap); /* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */ for (i = 0; i < bitmap->storage.file_pages; i++) if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING)) set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); - md_bitmap_unplug(bitmap); + __bitmap_unplug(bitmap); } - md_bitmap_unplug(mddev->bitmap); + __bitmap_unplug(mddev->bitmap); *low = lo; *high = hi; md_bitmap_free(bitmap); return rv; } -EXPORT_SYMBOL_GPL(md_bitmap_copy_from_slot); +static void bitmap_set_pages(void *data, unsigned long pages) +{ + struct bitmap *bitmap = data; + + bitmap->counts.pages = pages; +} -void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap) +static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) { - unsigned long chunk_kb; + struct bitmap_storage *storage; struct bitmap_counts *counts; + struct bitmap *bitmap = data; + bitmap_super_t *sb; if (!bitmap) - return; + return -ENOENT; + + sb = kmap_local_page(bitmap->storage.sb_page); + stats->sync_size = le64_to_cpu(sb->sync_size); + kunmap_local(sb); counts = &bitmap->counts; + stats->missing_pages = counts->missing_pages; + stats->pages = counts->pages; - chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10; - seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " - "%lu%s chunk", - counts->pages - counts->missing_pages, - counts->pages, - (counts->pages - counts->missing_pages) - << (PAGE_SHIFT - 10), - chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize, - chunk_kb ? "KB" : "B"); - if (bitmap->storage.file) { - seq_printf(seq, ", file: "); - seq_file_path(seq, bitmap->storage.file, " \t\n"); - } + storage = &bitmap->storage; + stats->file_pages = storage->file_pages; + stats->file = storage->file; - seq_printf(seq, "\n"); + stats->behind_writes = atomic_read(&bitmap->behind_writes); + stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait); + stats->events_cleared = bitmap->events_cleared; + return 0; } -int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, - int chunksize, int init) +static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, + int chunksize, bool init) { /* If chunk_size is 0, choose an appropriate chunk size. * Then possibly allocate new storage space. @@ -2320,14 +2559,24 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, spin_unlock_irq(&bitmap->counts.lock); if (!init) { - md_bitmap_unplug(bitmap); + __bitmap_unplug(bitmap); bitmap->mddev->pers->quiesce(bitmap->mddev, 0); } ret = 0; err: return ret; } -EXPORT_SYMBOL_GPL(md_bitmap_resize); + +static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize, + bool init) +{ + struct bitmap *bitmap = mddev->bitmap; + + if (!bitmap) + return 0; + + return __bitmap_resize(bitmap, blocks, chunksize, init); +} static ssize_t location_show(struct mddev *mddev, char *page) @@ -2367,7 +2616,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len) goto out; } - md_bitmap_destroy(mddev); + bitmap_destroy(mddev); mddev->bitmap_info.offset = 0; if (mddev->bitmap_info.file) { struct file *f = mddev->bitmap_info.file; @@ -2377,7 +2626,6 @@ location_store(struct mddev *mddev, const char *buf, size_t len) } else { /* No bitmap, OK to set a location */ long long offset; - struct bitmap *bitmap; if (strncmp(buf, "none", 4) == 0) /* nothing to be done */; @@ -2404,17 +2652,14 @@ location_store(struct mddev *mddev, const char *buf, size_t len) } mddev->bitmap_info.offset = offset; - bitmap = md_bitmap_create(mddev, -1); - if (IS_ERR(bitmap)) { - rv = PTR_ERR(bitmap); + rv = bitmap_create(mddev, -1); + if (rv) goto out; - } - mddev->bitmap = bitmap; - rv = md_bitmap_load(mddev); + rv = bitmap_load(mddev); if (rv) { mddev->bitmap_info.offset = 0; - md_bitmap_destroy(mddev); + bitmap_destroy(mddev); goto out; } } @@ -2450,6 +2695,7 @@ space_show(struct mddev *mddev, char *page) static ssize_t space_store(struct mddev *mddev, const char *buf, size_t len) { + struct bitmap *bitmap; unsigned long sectors; int rv; @@ -2460,8 +2706,8 @@ space_store(struct mddev *mddev, const char *buf, size_t len) if (sectors == 0) return -EINVAL; - if (mddev->bitmap && - sectors < (mddev->bitmap->storage.bytes + 511) >> 9) + bitmap = mddev->bitmap; + if (bitmap && sectors < (bitmap->storage.bytes + 511) >> 9) return -EFBIG; /* Bitmap is too big for this small space */ /* could make sure it isn't too big, but that isn't really @@ -2569,7 +2815,7 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len) mddev_create_serial_pool(mddev, rdev); } if (old_mwb != backlog) - md_bitmap_update_sb(mddev->bitmap); + bitmap_update_sb(mddev->bitmap); mddev_unlock_and_resume(mddev); return len; @@ -2638,10 +2884,13 @@ __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); static ssize_t can_clear_show(struct mddev *mddev, char *page) { int len; + struct bitmap *bitmap; + spin_lock(&mddev->lock); - if (mddev->bitmap) - len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ? - "false" : "true")); + bitmap = mddev->bitmap; + if (bitmap) + len = sprintf(page, "%s\n", (bitmap->need_sync ? "false" : + "true")); else len = sprintf(page, "\n"); spin_unlock(&mddev->lock); @@ -2650,17 +2899,24 @@ static ssize_t can_clear_show(struct mddev *mddev, char *page) static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len) { - if (mddev->bitmap == NULL) + struct bitmap *bitmap = mddev->bitmap; + + if (!bitmap) return -ENOENT; - if (strncmp(buf, "false", 5) == 0) - mddev->bitmap->need_sync = 1; - else if (strncmp(buf, "true", 4) == 0) { + + if (strncmp(buf, "false", 5) == 0) { + bitmap->need_sync = 1; + return len; + } + + if (strncmp(buf, "true", 4) == 0) { if (mddev->degraded) return -EBUSY; - mddev->bitmap->need_sync = 0; - } else - return -EINVAL; - return len; + bitmap->need_sync = 0; + return len; + } + + return -EINVAL; } static struct md_sysfs_entry bitmap_can_clear = @@ -2670,21 +2926,26 @@ static ssize_t behind_writes_used_show(struct mddev *mddev, char *page) { ssize_t ret; + struct bitmap *bitmap; + spin_lock(&mddev->lock); - if (mddev->bitmap == NULL) + bitmap = mddev->bitmap; + if (!bitmap) ret = sprintf(page, "0\n"); else - ret = sprintf(page, "%lu\n", - mddev->bitmap->behind_writes_used); + ret = sprintf(page, "%lu\n", bitmap->behind_writes_used); spin_unlock(&mddev->lock); + return ret; } static ssize_t behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len) { - if (mddev->bitmap) - mddev->bitmap->behind_writes_used = 0; + struct bitmap *bitmap = mddev->bitmap; + + if (bitmap) + bitmap->behind_writes_used = 0; return len; } @@ -2707,3 +2968,38 @@ const struct attribute_group md_bitmap_group = { .name = "bitmap", .attrs = md_bitmap_attrs, }; + +static struct bitmap_operations bitmap_ops = { + .enabled = bitmap_enabled, + .create = bitmap_create, + .resize = bitmap_resize, + .load = bitmap_load, + .destroy = bitmap_destroy, + .flush = bitmap_flush, + .write_all = bitmap_write_all, + .dirty_bits = bitmap_dirty_bits, + .unplug = bitmap_unplug, + .daemon_work = bitmap_daemon_work, + .wait_behind_writes = bitmap_wait_behind_writes, + + .startwrite = bitmap_startwrite, + .endwrite = bitmap_endwrite, + .start_sync = bitmap_start_sync, + .end_sync = bitmap_end_sync, + .cond_end_sync = bitmap_cond_end_sync, + .close_sync = bitmap_close_sync, + + .update_sb = bitmap_update_sb, + .get_stats = bitmap_get_stats, + + .sync_with_cluster = bitmap_sync_with_cluster, + .get_from_slot = bitmap_get_from_slot, + .copy_from_slot = bitmap_copy_from_slot, + .set_pages = bitmap_set_pages, + .free = md_bitmap_free, +}; + +void mddev_set_bitmap_ops(struct mddev *mddev) +{ + mddev->bitmap_ops = &bitmap_ops; +} diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h index bb9eb418780a6..662e6fc141a77 100644 --- a/drivers/md/md-bitmap.h +++ b/drivers/md/md-bitmap.h @@ -7,81 +7,7 @@ #ifndef BITMAP_H #define BITMAP_H 1 -#define BITMAP_MAJOR_LO 3 -/* version 4 insists the bitmap is in little-endian order - * with version 3, it is host-endian which is non-portable - * Version 5 is currently set only for clustered devices - */ -#define BITMAP_MAJOR_HI 4 -#define BITMAP_MAJOR_CLUSTERED 5 -#define BITMAP_MAJOR_HOSTENDIAN 3 - -/* - * in-memory bitmap: - * - * Use 16 bit block counters to track pending writes to each "chunk". - * The 2 high order bits are special-purpose, the first is a flag indicating - * whether a resync is needed. The second is a flag indicating whether a - * resync is active. - * This means that the counter is actually 14 bits: - * - * +--------+--------+------------------------------------------------+ - * | resync | resync | counter | - * | needed | active | | - * | (0-1) | (0-1) | (0-16383) | - * +--------+--------+------------------------------------------------+ - * - * The "resync needed" bit is set when: - * a '1' bit is read from storage at startup. - * a write request fails on some drives - * a resync is aborted on a chunk with 'resync active' set - * It is cleared (and resync-active set) when a resync starts across all drives - * of the chunk. - * - * - * The "resync active" bit is set when: - * a resync is started on all drives, and resync_needed is set. - * resync_needed will be cleared (as long as resync_active wasn't already set). - * It is cleared when a resync completes. - * - * The counter counts pending write requests, plus the on-disk bit. - * When the counter is '1' and the resync bits are clear, the on-disk - * bit can be cleared as well, thus setting the counter to 0. - * When we set a bit, or in the counter (to start a write), if the fields is - * 0, we first set the disk bit and set the counter to 1. - * - * If the counter is 0, the on-disk bit is clear and the stripe is clean - * Anything that dirties the stripe pushes the counter to 2 (at least) - * and sets the on-disk bit (lazily). - * If a periodic sweep find the counter at 2, it is decremented to 1. - * If the sweep find the counter at 1, the on-disk bit is cleared and the - * counter goes to zero. - * - * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block - * counters as a fallback when "page" memory cannot be allocated: - * - * Normal case (page memory allocated): - * - * page pointer (32-bit) - * - * [ ] ------+ - * | - * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) - * c1 c2 c2048 - * - * Hijacked case (page memory allocation failed): - * - * hijacked page pointer (32-bit) - * - * [ ][ ] (no page memory allocated) - * counter #1 (16-bit) counter #2 (16-bit) - * - */ - -#ifdef __KERNEL__ - -#define PAGE_BITS (PAGE_SIZE << 3) -#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) +#define BITMAP_MAGIC 0x6d746962 typedef __u16 bitmap_counter_t; #define COUNTER_BITS 16 @@ -91,26 +17,6 @@ typedef __u16 bitmap_counter_t; #define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1))) #define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2))) #define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1) -#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) -#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) -#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) - -/* how many counters per page? */ -#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) -/* same, except a shift value for more efficient bitops */ -#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) -/* same, except a mask value for more efficient bitops */ -#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) - -#define BITMAP_BLOCK_SHIFT 9 - -#endif - -/* - * bitmap structures: - */ - -#define BITMAP_MAGIC 0x6d746962 /* use these for bitmap->flags and bitmap->sb->state bit-fields */ enum bitmap_state { @@ -152,136 +58,58 @@ typedef struct bitmap_super_s { * devices. For raid10 it is the size of the array. */ -#ifdef __KERNEL__ +struct md_bitmap_stats { + u64 events_cleared; + int behind_writes; + bool behind_wait; -/* the in-memory bitmap is represented by bitmap_pages */ -struct bitmap_page { - /* - * map points to the actual memory page - */ - char *map; - /* - * in emergencies (when map cannot be alloced), hijack the map - * pointer and use it as two counters itself - */ - unsigned int hijacked:1; - /* - * If any counter in this page is '1' or '2' - and so could be - * cleared then that page is marked as 'pending' - */ - unsigned int pending:1; - /* - * count of dirty bits on the page - */ - unsigned int count:30; + unsigned long missing_pages; + unsigned long file_pages; + unsigned long sync_size; + unsigned long pages; + struct file *file; }; -/* the main bitmap structure - one per mddev */ -struct bitmap { - - struct bitmap_counts { - spinlock_t lock; - struct bitmap_page *bp; - unsigned long pages; /* total number of pages - * in the bitmap */ - unsigned long missing_pages; /* number of pages - * not yet allocated */ - unsigned long chunkshift; /* chunksize = 2^chunkshift - * (for bitops) */ - unsigned long chunks; /* Total number of data - * chunks for the array */ - } counts; - - struct mddev *mddev; /* the md device that the bitmap is for */ - - __u64 events_cleared; - int need_sync; - - struct bitmap_storage { - struct file *file; /* backing disk file */ - struct page *sb_page; /* cached copy of the bitmap - * file superblock */ - unsigned long sb_index; - struct page **filemap; /* list of cache pages for - * the file */ - unsigned long *filemap_attr; /* attributes associated - * w/ filemap pages */ - unsigned long file_pages; /* number of pages in the file*/ - unsigned long bytes; /* total bytes in the bitmap */ - } storage; - - unsigned long flags; - - int allclean; - - atomic_t behind_writes; - unsigned long behind_writes_used; /* highest actual value at runtime */ - - /* - * the bitmap daemon - periodically wakes up and sweeps the bitmap - * file, cleaning up bits and flushing out pages to disk as necessary - */ - unsigned long daemon_lastrun; /* jiffies of last run */ - unsigned long last_end_sync; /* when we lasted called end_sync to - * update bitmap with resync progress */ - - atomic_t pending_writes; /* pending writes to the bitmap file */ - wait_queue_head_t write_wait; - wait_queue_head_t overflow_wait; - wait_queue_head_t behind_wait; - - struct kernfs_node *sysfs_can_clear; - int cluster_slot; /* Slot offset for clustered env */ +struct bitmap_operations { + bool (*enabled)(struct mddev *mddev); + int (*create)(struct mddev *mddev, int slot); + int (*resize)(struct mddev *mddev, sector_t blocks, int chunksize, + bool init); + + int (*load)(struct mddev *mddev); + void (*destroy)(struct mddev *mddev); + void (*flush)(struct mddev *mddev); + void (*write_all)(struct mddev *mddev); + void (*dirty_bits)(struct mddev *mddev, unsigned long s, + unsigned long e); + void (*unplug)(struct mddev *mddev, bool sync); + void (*daemon_work)(struct mddev *mddev); + void (*wait_behind_writes)(struct mddev *mddev); + + int (*startwrite)(struct mddev *mddev, sector_t offset, + unsigned long sectors, bool behind); + void (*endwrite)(struct mddev *mddev, sector_t offset, + unsigned long sectors, bool success, bool behind); + bool (*start_sync)(struct mddev *mddev, sector_t offset, + sector_t *blocks, bool degraded); + void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks); + void (*cond_end_sync)(struct mddev *mddev, sector_t sector, bool force); + void (*close_sync)(struct mddev *mddev); + + void (*update_sb)(void *data); + int (*get_stats)(void *data, struct md_bitmap_stats *stats); + + void (*sync_with_cluster)(struct mddev *mddev, + sector_t old_lo, sector_t old_hi, + sector_t new_lo, sector_t new_hi); + void *(*get_from_slot)(struct mddev *mddev, int slot); + int (*copy_from_slot)(struct mddev *mddev, int slot, sector_t *lo, + sector_t *hi, bool clear_bits); + void (*set_pages)(void *data, unsigned long pages); + void (*free)(void *data); }; /* the bitmap API */ - -/* these are used only by md/bitmap */ -struct bitmap *md_bitmap_create(struct mddev *mddev, int slot); -int md_bitmap_load(struct mddev *mddev); -void md_bitmap_flush(struct mddev *mddev); -void md_bitmap_destroy(struct mddev *mddev); - -void md_bitmap_print_sb(struct bitmap *bitmap); -void md_bitmap_update_sb(struct bitmap *bitmap); -void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap); - -int md_bitmap_setallbits(struct bitmap *bitmap); -void md_bitmap_write_all(struct bitmap *bitmap); - -void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e); - -/* these are exported */ -int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int behind); -void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int success, int behind); -int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded); -void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted); -void md_bitmap_close_sync(struct bitmap *bitmap); -void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force); -void md_bitmap_sync_with_cluster(struct mddev *mddev, - sector_t old_lo, sector_t old_hi, - sector_t new_lo, sector_t new_hi); - -void md_bitmap_unplug(struct bitmap *bitmap); -void md_bitmap_unplug_async(struct bitmap *bitmap); -void md_bitmap_daemon_work(struct mddev *mddev); - -int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, - int chunksize, int init); -struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot); -int md_bitmap_copy_from_slot(struct mddev *mddev, int slot, - sector_t *lo, sector_t *hi, bool clear_bits); -void md_bitmap_free(struct bitmap *bitmap); -void md_bitmap_wait_behind_writes(struct mddev *mddev); - -static inline bool md_bitmap_enabled(struct bitmap *bitmap) -{ - return bitmap && bitmap->storage.filemap && - !test_bit(BITMAP_STALE, &bitmap->flags); -} - -#endif +void mddev_set_bitmap_ops(struct mddev *mddev); #endif diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 1d0db62f03514..6595f89becdb7 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -317,7 +317,7 @@ static void recover_bitmaps(struct md_thread *thread) str, ret); goto clear_bit; } - ret = md_bitmap_copy_from_slot(mddev, slot, &lo, &hi, true); + ret = mddev->bitmap_ops->copy_from_slot(mddev, slot, &lo, &hi, true); if (ret) { pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); goto clear_bit; @@ -497,8 +497,8 @@ static void process_suspend_info(struct mddev *mddev, * we don't want to trigger lots of WARN. */ if (sb && !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) - md_bitmap_sync_with_cluster(mddev, cinfo->sync_low, - cinfo->sync_hi, lo, hi); + mddev->bitmap_ops->sync_with_cluster(mddev, cinfo->sync_low, + cinfo->sync_hi, lo, hi); cinfo->sync_low = lo; cinfo->sync_hi = hi; @@ -628,8 +628,9 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) break; case BITMAP_RESIZE: if (le64_to_cpu(msg->high) != mddev->pers->size(mddev, 0, 0)) - ret = md_bitmap_resize(mddev->bitmap, - le64_to_cpu(msg->high), 0, 0); + ret = mddev->bitmap_ops->resize(mddev, + le64_to_cpu(msg->high), + 0, false); break; default: ret = -1; @@ -856,7 +857,7 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) } /* Read the disk bitmap sb and check if it needs recovery */ - ret = md_bitmap_copy_from_slot(mddev, i, &lo, &hi, false); + ret = mddev->bitmap_ops->copy_from_slot(mddev, i, &lo, &hi, false); if (ret) { pr_warn("md-cluster: Could not gather bitmaps from slot %d", i); lockres_free(bm_lockres); @@ -1143,13 +1144,16 @@ static int update_bitmap_size(struct mddev *mddev, sector_t size) static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsize) { - struct bitmap_counts *counts; - char str[64]; - struct dlm_lock_resource *bm_lockres; - struct bitmap *bitmap = mddev->bitmap; - unsigned long my_pages = bitmap->counts.pages; + void *bitmap = mddev->bitmap; + struct md_bitmap_stats stats; + unsigned long my_pages; int i, rv; + rv = mddev->bitmap_ops->get_stats(bitmap, &stats); + if (rv) + return rv; + + my_pages = stats.pages; /* * We need to ensure all the nodes can grow to a larger * bitmap size before make the reshaping. @@ -1159,17 +1163,22 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz return rv; for (i = 0; i < mddev->bitmap_info.nodes; i++) { + struct dlm_lock_resource *bm_lockres; + char str[64]; + if (i == md_cluster_ops->slot_number(mddev)) continue; - bitmap = get_bitmap_from_slot(mddev, i); + bitmap = mddev->bitmap_ops->get_from_slot(mddev, i); if (IS_ERR(bitmap)) { pr_err("can't get bitmap from slot %d\n", i); bitmap = NULL; goto out; } - counts = &bitmap->counts; + rv = mddev->bitmap_ops->get_stats(bitmap, &stats); + if (rv) + goto out; /* * If we can hold the bitmap lock of one node then * the slot is not occupied, update the pages. @@ -1183,21 +1192,21 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz bm_lockres->flags |= DLM_LKF_NOQUEUE; rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); if (!rv) - counts->pages = my_pages; + mddev->bitmap_ops->set_pages(bitmap, my_pages); lockres_free(bm_lockres); - if (my_pages != counts->pages) + if (my_pages != stats.pages) /* * Let's revert the bitmap size if one node * can't resize bitmap */ goto out; - md_bitmap_free(bitmap); + mddev->bitmap_ops->free(bitmap); } return 0; out: - md_bitmap_free(bitmap); + mddev->bitmap_ops->free(bitmap); update_bitmap_size(mddev, oldsize); return -1; } @@ -1207,24 +1216,27 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz */ static int cluster_check_sync_size(struct mddev *mddev) { - int i, rv; - bitmap_super_t *sb; - unsigned long my_sync_size, sync_size = 0; - int node_num = mddev->bitmap_info.nodes; int current_slot = md_cluster_ops->slot_number(mddev); - struct bitmap *bitmap = mddev->bitmap; - char str[64]; + int node_num = mddev->bitmap_info.nodes; struct dlm_lock_resource *bm_lockres; + struct md_bitmap_stats stats; + void *bitmap = mddev->bitmap; + unsigned long sync_size = 0; + unsigned long my_sync_size; + char str[64]; + int i, rv; - sb = kmap_atomic(bitmap->storage.sb_page); - my_sync_size = sb->sync_size; - kunmap_atomic(sb); + rv = mddev->bitmap_ops->get_stats(bitmap, &stats); + if (rv) + return rv; + + my_sync_size = stats.sync_size; for (i = 0; i < node_num; i++) { if (i == current_slot) continue; - bitmap = get_bitmap_from_slot(mddev, i); + bitmap = mddev->bitmap_ops->get_from_slot(mddev, i); if (IS_ERR(bitmap)) { pr_err("can't get bitmap from slot %d\n", i); return -1; @@ -1238,25 +1250,28 @@ static int cluster_check_sync_size(struct mddev *mddev) bm_lockres = lockres_init(mddev, str, NULL, 1); if (!bm_lockres) { pr_err("md-cluster: Cannot initialize %s\n", str); - md_bitmap_free(bitmap); + mddev->bitmap_ops->free(bitmap); return -1; } bm_lockres->flags |= DLM_LKF_NOQUEUE; rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); if (!rv) - md_bitmap_update_sb(bitmap); + mddev->bitmap_ops->update_sb(bitmap); lockres_free(bm_lockres); - sb = kmap_atomic(bitmap->storage.sb_page); - if (sync_size == 0) - sync_size = sb->sync_size; - else if (sync_size != sb->sync_size) { - kunmap_atomic(sb); - md_bitmap_free(bitmap); + rv = mddev->bitmap_ops->get_stats(bitmap, &stats); + if (rv) { + mddev->bitmap_ops->free(bitmap); + return rv; + } + + if (sync_size == 0) { + sync_size = stats.sync_size; + } else if (sync_size != stats.sync_size) { + mddev->bitmap_ops->free(bitmap); return -1; } - kunmap_atomic(sb); - md_bitmap_free(bitmap); + mddev->bitmap_ops->free(bitmap); } return (my_sync_size == sync_size) ? 0 : -1; @@ -1585,7 +1600,7 @@ static int gather_bitmaps(struct md_rdev *rdev) for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) { if (sn == (cinfo->slot_number - 1)) continue; - err = md_bitmap_copy_from_slot(mddev, sn, &lo, &hi, false); + err = mddev->bitmap_ops->copy_from_slot(mddev, sn, &lo, &hi, false); if (err) { pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn); goto out; diff --git a/drivers/md/md.c b/drivers/md/md.c index ce5d8be138219..4141461114253 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -664,6 +664,7 @@ int mddev_init(struct mddev *mddev) mddev->resync_min = 0; mddev->resync_max = MaxSector; mddev->level = LEVEL_NONE; + mddev_set_bitmap_ops(mddev); INIT_WORK(&mddev->sync_work, md_start_sync); INIT_WORK(&mddev->del_work, mddev_delayed_delete); @@ -1264,6 +1265,18 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor return ret; } +static u64 md_bitmap_events_cleared(struct mddev *mddev) +{ + struct md_bitmap_stats stats; + int err; + + err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (err) + return 0; + + return stats.events_cleared; +} + /* * validate_super for 0.90.0 * note: we are not using "freshest" for 0.9 superblock @@ -1356,7 +1369,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru /* if adding to array with a bitmap, then we can accept an * older device ... but not too old. */ - if (ev1 < mddev->bitmap->events_cleared) + if (ev1 < md_bitmap_events_cleared(mddev)) return 0; if (ev1 < mddev->events) set_bit(Bitmap_sync, &rdev->flags); @@ -1883,7 +1896,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc /* If adding to array with a bitmap, then we can accept an * older device, but not too old. */ - if (ev1 < mddev->bitmap->events_cleared) + if (ev1 < md_bitmap_events_cleared(mddev)) return 0; if (ev1 < mddev->events) set_bit(Bitmap_sync, &rdev->flags); @@ -2215,7 +2228,6 @@ super_1_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset) { /* All necessary checks on new >= old have been done */ - struct bitmap *bitmap; if (new_offset >= rdev->data_offset) return 1; @@ -2232,11 +2244,18 @@ super_1_allow_new_offset(struct md_rdev *rdev, */ if (rdev->sb_start + (32+4)*2 > new_offset) return 0; - bitmap = rdev->mddev->bitmap; - if (bitmap && !rdev->mddev->bitmap_info.file && - rdev->sb_start + rdev->mddev->bitmap_info.offset + - bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset) - return 0; + + if (!rdev->mddev->bitmap_info.file) { + struct mddev *mddev = rdev->mddev; + struct md_bitmap_stats stats; + int err; + + err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (!err && rdev->sb_start + mddev->bitmap_info.offset + + stats.file_pages * (PAGE_SIZE >> 9) > new_offset) + return 0; + } + if (rdev->badblocks.sector + rdev->badblocks.size > new_offset) return 0; @@ -2712,7 +2731,7 @@ void md_update_sb(struct mddev *mddev, int force_change) mddev_add_trace_msg(mddev, "md md_update_sb"); rewrite: - md_bitmap_update_sb(mddev->bitmap); + mddev->bitmap_ops->update_sb(mddev->bitmap); rdev_for_each(rdev, mddev) { if (rdev->sb_loaded != 1) continue; /* no noise on spare devices */ @@ -4572,17 +4591,23 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len) /* buf should be ... or - ... (range) */ while (*buf) { chunk = end_chunk = simple_strtoul(buf, &end, 0); - if (buf == end) break; + if (buf == end) + break; + if (*end == '-') { /* range */ buf = end + 1; end_chunk = simple_strtoul(buf, &end, 0); - if (buf == end) break; + if (buf == end) + break; } - if (*end && !isspace(*end)) break; - md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk); + + if (*end && !isspace(*end)) + break; + + mddev->bitmap_ops->dirty_bits(mddev, chunk, end_chunk); buf = skip_spaces(end); } - md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ + mddev->bitmap_ops->unplug(mddev, true); /* flush the bits to disk */ out: mddev_unlock(mddev); return len; @@ -6098,16 +6123,10 @@ int md_run(struct mddev *mddev) } if (err == 0 && pers->sync_request && (mddev->bitmap_info.file || mddev->bitmap_info.offset)) { - struct bitmap *bitmap; - - bitmap = md_bitmap_create(mddev, -1); - if (IS_ERR(bitmap)) { - err = PTR_ERR(bitmap); + err = mddev->bitmap_ops->create(mddev, -1); + if (err) pr_warn("%s: failed to create bitmap (%d)\n", mdname(mddev), err); - } else - mddev->bitmap = bitmap; - } if (err) goto bitmap_abort; @@ -6177,7 +6196,7 @@ int md_run(struct mddev *mddev) pers->free(mddev, mddev->private); mddev->private = NULL; module_put(pers->owner); - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); abort: bioset_exit(&mddev->io_clone_set); exit_sync_set: @@ -6196,9 +6215,10 @@ int do_md_run(struct mddev *mddev) err = md_run(mddev); if (err) goto out; - err = md_bitmap_load(mddev); + + err = mddev->bitmap_ops->load(mddev); if (err) { - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); goto out; } @@ -6342,7 +6362,8 @@ static void __md_stop_writes(struct mddev *mddev) mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); } - md_bitmap_flush(mddev); + + mddev->bitmap_ops->flush(mddev); if (md_is_rdwr(mddev) && ((!mddev->in_sync && !mddev_is_clustered(mddev)) || @@ -6369,7 +6390,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes); static void mddev_detach(struct mddev *mddev) { - md_bitmap_wait_behind_writes(mddev); + mddev->bitmap_ops->wait_behind_writes(mddev); if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) { mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); @@ -6384,7 +6405,8 @@ static void mddev_detach(struct mddev *mddev) static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; - md_bitmap_destroy(mddev); + + mddev->bitmap_ops->destroy(mddev); mddev_detach(mddev); spin_lock(&mddev->lock); mddev->pers = NULL; @@ -7162,22 +7184,19 @@ static int set_bitmap_file(struct mddev *mddev, int fd) err = 0; if (mddev->pers) { if (fd >= 0) { - struct bitmap *bitmap; + err = mddev->bitmap_ops->create(mddev, -1); + if (!err) + err = mddev->bitmap_ops->load(mddev); - bitmap = md_bitmap_create(mddev, -1); - if (!IS_ERR(bitmap)) { - mddev->bitmap = bitmap; - err = md_bitmap_load(mddev); - } else - err = PTR_ERR(bitmap); if (err) { - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); fd = -1; } } else if (fd < 0) { - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); } } + if (fd < 0) { struct file *f = mddev->bitmap_info.file; if (f) { @@ -7446,7 +7465,6 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) goto err; } if (info->state & (1<bitmap) { rv = -EEXIST; @@ -7460,24 +7478,24 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->bitmap_info.default_offset; mddev->bitmap_info.space = mddev->bitmap_info.default_space; - bitmap = md_bitmap_create(mddev, -1); - if (!IS_ERR(bitmap)) { - mddev->bitmap = bitmap; - rv = md_bitmap_load(mddev); - } else - rv = PTR_ERR(bitmap); + rv = mddev->bitmap_ops->create(mddev, -1); + if (!rv) + rv = mddev->bitmap_ops->load(mddev); + if (rv) - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); } else { - /* remove the bitmap */ - if (!mddev->bitmap) { - rv = -ENOENT; + struct md_bitmap_stats stats; + + rv = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (rv) goto err; - } - if (mddev->bitmap->storage.file) { + + if (stats.file) { rv = -EINVAL; goto err; } + if (mddev->bitmap_info.nodes) { /* hold PW on all the bitmap lock */ if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) { @@ -7492,7 +7510,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) module_put(md_cluster_mod); mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY; } - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); mddev->bitmap_info.offset = 0; } } @@ -8262,6 +8280,33 @@ static void md_seq_stop(struct seq_file *seq, void *v) spin_unlock(&all_mddevs_lock); } +static void md_bitmap_status(struct seq_file *seq, struct mddev *mddev) +{ + struct md_bitmap_stats stats; + unsigned long used_pages; + unsigned long chunk_kb; + int err; + + err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (err) + return; + + chunk_kb = mddev->bitmap_info.chunksize >> 10; + used_pages = stats.pages - stats.missing_pages; + + seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], %lu%s chunk", + used_pages, stats.pages, used_pages << (PAGE_SHIFT - 10), + chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize, + chunk_kb ? "KB" : "B"); + + if (stats.file) { + seq_puts(seq, ", file: "); + seq_file_path(seq, stats.file, " \t\n"); + } + + seq_putc(seq, '\n'); +} + static int md_seq_show(struct seq_file *seq, void *v) { struct mddev *mddev; @@ -8345,7 +8390,7 @@ static int md_seq_show(struct seq_file *seq, void *v) } else seq_printf(seq, "\n "); - md_bitmap_status(seq, mddev->bitmap); + md_bitmap_status(seq, mddev); seq_printf(seq, "\n"); } @@ -9397,7 +9442,7 @@ static void md_start_sync(struct work_struct *ws) * stored on all devices. So make sure all bitmap pages get written. */ if (spares) - md_bitmap_write_all(mddev->bitmap); + mddev->bitmap_ops->write_all(mddev); name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ? "reshape" : "resync"; @@ -9485,7 +9530,7 @@ static void unregister_sync_thread(struct mddev *mddev) void md_check_recovery(struct mddev *mddev) { if (mddev->bitmap) - md_bitmap_daemon_work(mddev); + mddev->bitmap_ops->daemon_work(mddev); if (signal_pending(current)) { if (mddev->pers->sync_request && !mddev->external) { @@ -9856,7 +9901,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) if (ret) pr_info("md-cluster: resize failed\n"); else - md_bitmap_update_sb(mddev->bitmap); + mddev->bitmap_ops->update_sb(mddev->bitmap); } /* Check for change of roles in the active devices */ diff --git a/drivers/md/md.h b/drivers/md/md.h index bc684d503ab66..5d2e6bd58e4da 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -535,7 +535,8 @@ struct mddev { struct percpu_ref writes_pending; int sync_checkers; /* # of threads checking writes_pending */ - struct bitmap *bitmap; /* the bitmap for the device */ + void *bitmap; /* the bitmap for the device */ + struct bitmap_operations *bitmap_ops; struct { struct file *file; /* the bitmap file */ loff_t offset; /* offset from superblock of diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 2ea1710a3b705..4378d3250bd75 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -140,7 +140,7 @@ static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio, * If bitmap is not enabled, it's safe to submit the io directly, and * this can get optimal performance. */ - if (!md_bitmap_enabled(mddev->bitmap)) { + if (!mddev->bitmap_ops->enabled(mddev)) { raid1_submit_write(bio); return true; } @@ -166,12 +166,9 @@ static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio, * while current io submission must wait for bitmap io to be done. In order to * avoid such deadlock, submit bitmap io asynchronously. */ -static inline void raid1_prepare_flush_writes(struct bitmap *bitmap) +static inline void raid1_prepare_flush_writes(struct mddev *mddev) { - if (current->bio_list) - md_bitmap_unplug_async(bitmap); - else - md_bitmap_unplug(bitmap); + mddev->bitmap_ops->unplug(mddev, current->bio_list == NULL); } /* diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 531ddfc6efbdd..f55c8e67d0593 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -411,18 +411,20 @@ static void raid1_end_read_request(struct bio *bio) static void close_write(struct r1bio *r1_bio) { + struct mddev *mddev = r1_bio->mddev; + /* it really is the end of this request */ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { bio_free_pages(r1_bio->behind_master_bio); bio_put(r1_bio->behind_master_bio); r1_bio->behind_master_bio = NULL; } + /* clear the bitmap if all writes complete successfully */ - md_bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, - r1_bio->sectors, - !test_bit(R1BIO_Degraded, &r1_bio->state), - test_bit(R1BIO_BehindIO, &r1_bio->state)); - md_write_end(r1_bio->mddev); + mddev->bitmap_ops->endwrite(mddev, r1_bio->sector, r1_bio->sectors, + !test_bit(R1BIO_Degraded, &r1_bio->state), + test_bit(R1BIO_BehindIO, &r1_bio->state)); + md_write_end(mddev); } static void r1_bio_write_done(struct r1bio *r1_bio) @@ -894,7 +896,7 @@ static void wake_up_barrier(struct r1conf *conf) static void flush_bio_list(struct r1conf *conf, struct bio *bio) { /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - raid1_prepare_flush_writes(conf->mddev->bitmap); + raid1_prepare_flush_writes(conf->mddev); wake_up_barrier(conf); while (bio) { /* submit pending writes */ @@ -1311,7 +1313,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, struct r1conf *conf = mddev->private; struct raid1_info *mirror; struct bio *read_bio; - struct bitmap *bitmap = mddev->bitmap; const enum req_op op = bio_op(bio); const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; int max_sectors; @@ -1364,15 +1365,13 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, (unsigned long long)r1_bio->sector, mirror->rdev->bdev); - if (test_bit(WriteMostly, &mirror->rdev->flags) && - bitmap) { + if (test_bit(WriteMostly, &mirror->rdev->flags)) { /* * Reading from a write-mostly device must take care not to * over-take any writes that are 'behind' */ mddev_add_trace_msg(mddev, "raid1 wait behind writes"); - wait_event(bitmap->behind_wait, - atomic_read(&bitmap->behind_writes) == 0); + mddev->bitmap_ops->wait_behind_writes(mddev); } if (max_sectors < bio_sectors(bio)) { @@ -1413,7 +1412,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, struct r1conf *conf = mddev->private; struct r1bio *r1_bio; int i, disks; - struct bitmap *bitmap = mddev->bitmap; unsigned long flags; struct md_rdev *blocked_rdev; int first_clone; @@ -1566,7 +1564,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, * at a time and thus needs a new bio that can fit the whole payload * this bio in page sized chunks. */ - if (write_behind && bitmap) + if (write_behind && mddev->bitmap) max_sectors = min_t(int, max_sectors, BIO_MAX_VECS * (PAGE_SIZE >> 9)); if (max_sectors < bio_sectors(bio)) { @@ -1593,19 +1591,23 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, continue; if (first_clone) { + unsigned long max_write_behind = + mddev->bitmap_info.max_write_behind; + struct md_bitmap_stats stats; + int err; + /* do behind I/O ? * Not if there are too many, or cannot * allocate memory, or a reader on WriteMostly * is waiting for behind writes to flush */ - if (bitmap && write_behind && - (atomic_read(&bitmap->behind_writes) - < mddev->bitmap_info.max_write_behind) && - !waitqueue_active(&bitmap->behind_wait)) { + err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (!err && write_behind && !stats.behind_wait && + stats.behind_writes < max_write_behind) alloc_behind_master_bio(r1_bio, bio); - } - md_bitmap_startwrite(bitmap, r1_bio->sector, r1_bio->sectors, - test_bit(R1BIO_BehindIO, &r1_bio->state)); + mddev->bitmap_ops->startwrite( + mddev, r1_bio->sector, r1_bio->sectors, + test_bit(R1BIO_BehindIO, &r1_bio->state)); first_clone = 0; } @@ -2023,7 +2025,7 @@ static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio) /* make sure these bits don't get cleared. */ do { - md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); + mddev->bitmap_ops->end_sync(mddev, s, &sync_blocks); s += sync_blocks; sectors_to_go -= sync_blocks; } while (sectors_to_go > 0); @@ -2752,7 +2754,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, int wonly = -1; int write_targets = 0, read_targets = 0; sector_t sync_blocks; - int still_degraded = 0; + bool still_degraded = false; int good_sectors = RESYNC_SECTORS; int min_bad = 0; /* number of sectors that are bad in all devices */ int idx = sector_to_idx(sector_nr); @@ -2769,12 +2771,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, * We can find the current addess in mddev->curr_resync */ if (mddev->curr_resync < max_sector) /* aborted */ - md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, - &sync_blocks, 1); + mddev->bitmap_ops->end_sync(mddev, mddev->curr_resync, + &sync_blocks); else /* completed sync */ conf->fullsync = 0; - md_bitmap_close_sync(mddev->bitmap); + mddev->bitmap_ops->close_sync(mddev); close_sync(conf); if (mddev_is_clustered(mddev)) { @@ -2794,7 +2796,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, /* before building a request, check if we can skip these blocks.. * This call the bitmap_start_sync doesn't actually record anything */ - if (!md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && + if (!mddev->bitmap_ops->start_sync(mddev, sector_nr, &sync_blocks, true) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* We can skip this block, and probably several more */ *skipped = 1; @@ -2812,9 +2814,9 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, * sector_nr + two times RESYNC_SECTORS */ - md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, - mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); - + mddev->bitmap_ops->cond_end_sync(mddev, sector_nr, + mddev_is_clustered(mddev) && + (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); if (raise_barrier(conf, sector_nr)) return 0; @@ -2845,7 +2847,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { if (i < conf->raid_disks) - still_degraded = 1; + still_degraded = true; } else if (!test_bit(In_sync, &rdev->flags)) { bio->bi_opf = REQ_OP_WRITE; bio->bi_end_io = end_sync_write; @@ -2969,8 +2971,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (len == 0) break; if (sync_blocks == 0) { - if (!md_bitmap_start_sync(mddev->bitmap, sector_nr, - &sync_blocks, still_degraded) && + if (!mddev->bitmap_ops->start_sync(mddev, sector_nr, + &sync_blocks, still_degraded) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) break; @@ -3294,14 +3296,16 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) * worth it. */ sector_t newsize = raid1_size(mddev, sectors, 0); + int ret; + if (mddev->external_size && mddev->array_sectors > newsize) return -EINVAL; - if (mddev->bitmap) { - int ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); - if (ret) - return ret; - } + + ret = mddev->bitmap_ops->resize(mddev, newsize, 0, false); + if (ret) + return ret; + md_set_array_sectors(mddev, newsize); if (sectors > mddev->dev_sectors && mddev->recovery_cp > mddev->dev_sectors) { diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e55e020b55711..f3bf1116794a4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -426,12 +426,13 @@ static void raid10_end_read_request(struct bio *bio) static void close_write(struct r10bio *r10_bio) { + struct mddev *mddev = r10_bio->mddev; + /* clear the bitmap if all writes complete successfully */ - md_bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, - r10_bio->sectors, - !test_bit(R10BIO_Degraded, &r10_bio->state), - 0); - md_write_end(r10_bio->mddev); + mddev->bitmap_ops->endwrite(mddev, r10_bio->sector, r10_bio->sectors, + !test_bit(R10BIO_Degraded, &r10_bio->state), + false); + md_write_end(mddev); } static void one_write_done(struct r10bio *r10_bio) @@ -884,7 +885,7 @@ static void flush_pending_writes(struct r10conf *conf) __set_current_state(TASK_RUNNING); blk_start_plug(&plug); - raid1_prepare_flush_writes(conf->mddev->bitmap); + raid1_prepare_flush_writes(conf->mddev); wake_up(&conf->wait_barrier); while (bio) { /* submit pending writes */ @@ -1100,7 +1101,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) /* we aren't scheduling, so we can do the write-out directly. */ bio = bio_list_get(&plug->pending); - raid1_prepare_flush_writes(mddev->bitmap); + raid1_prepare_flush_writes(mddev); wake_up_barrier(conf); while (bio) { /* submit pending writes */ @@ -1492,7 +1493,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, md_account_bio(mddev, &bio); r10_bio->master_bio = bio; atomic_set(&r10_bio->remaining, 1); - md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); + mddev->bitmap_ops->startwrite(mddev, r10_bio->sector, r10_bio->sectors, + false); for (i = 0; i < conf->copies; i++) { if (r10_bio->devs[i].bio) @@ -3192,13 +3194,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (mddev->curr_resync < max_sector) { /* aborted */ if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) - md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, - &sync_blocks, 1); + mddev->bitmap_ops->end_sync(mddev, + mddev->curr_resync, + &sync_blocks); else for (i = 0; i < conf->geo.raid_disks; i++) { sector_t sect = raid10_find_virt(conf, mddev->curr_resync, i); - md_bitmap_end_sync(mddev->bitmap, sect, - &sync_blocks, 1); + + mddev->bitmap_ops->end_sync(mddev, sect, + &sync_blocks); } } else { /* completed sync */ @@ -3218,7 +3222,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, } conf->fullsync = 0; } - md_bitmap_close_sync(mddev->bitmap); + mddev->bitmap_ops->close_sync(mddev); close_sync(conf); *skipped = 1; return sectors_skipped; @@ -3287,10 +3291,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, r10_bio = NULL; for (i = 0 ; i < conf->geo.raid_disks; i++) { - int still_degraded; + bool still_degraded; struct r10bio *rb2; sector_t sect; - int must_sync; + bool must_sync; int any_working; struct raid10_info *mirror = &conf->mirrors[i]; struct md_rdev *mrdev, *mreplace; @@ -3307,7 +3311,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (!mrdev && !mreplace) continue; - still_degraded = 0; + still_degraded = false; /* want to reconstruct this device */ rb2 = r10_bio; sect = raid10_find_virt(conf, sector_nr, i); @@ -3320,8 +3324,9 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, * we only need to recover the block if it is set in * the bitmap */ - must_sync = md_bitmap_start_sync(mddev->bitmap, sect, - &sync_blocks, 1); + must_sync = mddev->bitmap_ops->start_sync(mddev, sect, + &sync_blocks, + true); if (sync_blocks < max_sync) max_sync = sync_blocks; if (!must_sync && @@ -3359,13 +3364,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, struct md_rdev *rdev = conf->mirrors[j].rdev; if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { - still_degraded = 1; + still_degraded = false; break; } } - must_sync = md_bitmap_start_sync(mddev->bitmap, sect, - &sync_blocks, still_degraded); + must_sync = mddev->bitmap_ops->start_sync(mddev, sect, + &sync_blocks, still_degraded); any_working = 0; for (j=0; jcopies;j++) { @@ -3538,12 +3543,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, * safety reason, which ensures curr_resync_completed is * updated in bitmap_cond_end_sync. */ - md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, + mddev->bitmap_ops->cond_end_sync(mddev, sector_nr, mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); - if (!md_bitmap_start_sync(mddev->bitmap, sector_nr, - &sync_blocks, mddev->degraded) && + if (!mddev->bitmap_ops->start_sync(mddev, sector_nr, + &sync_blocks, + mddev->degraded) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* We can skip this block */ @@ -4190,6 +4196,7 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) */ struct r10conf *conf = mddev->private; sector_t oldsize, size; + int ret; if (mddev->reshape_position != MaxSector) return -EBUSY; @@ -4202,11 +4209,11 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) if (mddev->external_size && mddev->array_sectors > size) return -EINVAL; - if (mddev->bitmap) { - int ret = md_bitmap_resize(mddev->bitmap, size, 0, 0); - if (ret) - return ret; - } + + ret = mddev->bitmap_ops->resize(mddev, size, 0, false); + if (ret) + return ret; + md_set_array_sectors(mddev, size); if (sectors > mddev->dev_sectors && mddev->recovery_cp > oldsize) { @@ -4472,7 +4479,7 @@ static int raid10_start_reshape(struct mddev *mddev) newsize = raid10_size(mddev, 0, conf->geo.raid_disks); if (!mddev_is_clustered(mddev)) { - ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); + ret = mddev->bitmap_ops->resize(mddev, newsize, 0, false); if (ret) goto abort; else @@ -4487,20 +4494,20 @@ static int raid10_start_reshape(struct mddev *mddev) /* * some node is already performing reshape, and no need to - * call md_bitmap_resize again since it should be called when + * call bitmap_ops->resize again since it should be called when * receiving BITMAP_RESIZE msg */ if ((sb && (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize)) goto out; - ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); + ret = mddev->bitmap_ops->resize(mddev, newsize, 0, false); if (ret) goto abort; ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize); if (ret) { - md_bitmap_resize(mddev->bitmap, oldsize, 0, 0); + mddev->bitmap_ops->resize(mddev, oldsize, 0, false); goto abort; } } diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 874874fe4fa12..23f2cbcf1a6c4 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -313,10 +313,10 @@ void r5c_handle_cached_data_endio(struct r5conf *conf, if (sh->dev[i].written) { set_bit(R5_UPTODATE, &sh->dev[i].flags); r5c_return_dev_pending_writes(conf, &sh->dev[i]); - md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, - RAID5_STRIPE_SECTORS(conf), - !test_bit(STRIPE_DEGRADED, &sh->state), - 0); + conf->mddev->bitmap_ops->endwrite(conf->mddev, + sh->sector, RAID5_STRIPE_SECTORS(conf), + !test_bit(STRIPE_DEGRADED, &sh->state), + false); } } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c14cf2410365d..c84a7e0263cde 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3563,8 +3563,8 @@ static void __add_stripe_bio(struct stripe_head *sh, struct bio *bi, */ set_bit(STRIPE_BITMAP_PENDING, &sh->state); spin_unlock_irq(&sh->stripe_lock); - md_bitmap_startwrite(conf->mddev->bitmap, sh->sector, - RAID5_STRIPE_SECTORS(conf), 0); + conf->mddev->bitmap_ops->startwrite(conf->mddev, sh->sector, + RAID5_STRIPE_SECTORS(conf), false); spin_lock_irq(&sh->stripe_lock); clear_bit(STRIPE_BITMAP_PENDING, &sh->state); if (!sh->batch_head) { @@ -3663,8 +3663,9 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, bi = nextbi; } if (bitmap_end) - md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, - RAID5_STRIPE_SECTORS(conf), 0, 0); + conf->mddev->bitmap_ops->endwrite(conf->mddev, + sh->sector, RAID5_STRIPE_SECTORS(conf), + false, false); bitmap_end = 0; /* and fail all 'written' */ bi = sh->dev[i].written; @@ -3709,8 +3710,9 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, } } if (bitmap_end) - md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, - RAID5_STRIPE_SECTORS(conf), 0, 0); + conf->mddev->bitmap_ops->endwrite(conf->mddev, + sh->sector, RAID5_STRIPE_SECTORS(conf), + false, false); /* If we were in the middle of a write the parity block might * still be locked - so just clear all R5_LOCKED flags */ @@ -4059,10 +4061,10 @@ static void handle_stripe_clean_event(struct r5conf *conf, bio_endio(wbi); wbi = wbi2; } - md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, - RAID5_STRIPE_SECTORS(conf), - !test_bit(STRIPE_DEGRADED, &sh->state), - 0); + conf->mddev->bitmap_ops->endwrite(conf->mddev, + sh->sector, RAID5_STRIPE_SECTORS(conf), + !test_bit(STRIPE_DEGRADED, &sh->state), + false); if (head_sh->batch_head) { sh = list_first_entry(&sh->batch_list, struct stripe_head, @@ -5788,13 +5790,10 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) } spin_unlock_irq(&sh->stripe_lock); if (conf->mddev->bitmap) { - for (d = 0; - d < conf->raid_disks - conf->max_degraded; + for (d = 0; d < conf->raid_disks - conf->max_degraded; d++) - md_bitmap_startwrite(mddev->bitmap, - sh->sector, - RAID5_STRIPE_SECTORS(conf), - 0); + mddev->bitmap_ops->startwrite(mddev, sh->sector, + RAID5_STRIPE_SECTORS(conf), false); sh->bm_seq = conf->seq_flush + 1; set_bit(STRIPE_BIT_DELAY, &sh->state); } @@ -6486,7 +6485,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n struct r5conf *conf = mddev->private; struct stripe_head *sh; sector_t sync_blocks; - int still_degraded = 0; + bool still_degraded = false; int i; if (sector_nr >= max_sector) { @@ -6498,11 +6497,11 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n } if (mddev->curr_resync < max_sector) /* aborted */ - md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, - &sync_blocks, 1); + mddev->bitmap_ops->end_sync(mddev, mddev->curr_resync, + &sync_blocks); else /* completed sync */ conf->fullsync = 0; - md_bitmap_close_sync(mddev->bitmap); + mddev->bitmap_ops->close_sync(mddev); return 0; } @@ -6531,7 +6530,8 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n } if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && !conf->fullsync && - !md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && + !mddev->bitmap_ops->start_sync(mddev, sector_nr, &sync_blocks, + true) && sync_blocks >= RAID5_STRIPE_SECTORS(conf)) { /* we can skip this block, and probably more */ do_div(sync_blocks, RAID5_STRIPE_SECTORS(conf)); @@ -6540,7 +6540,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n return sync_blocks * RAID5_STRIPE_SECTORS(conf); } - md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); + mddev->bitmap_ops->cond_end_sync(mddev, sector_nr, false); sh = raid5_get_active_stripe(conf, NULL, sector_nr, R5_GAS_NOBLOCK); @@ -6559,10 +6559,11 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n struct md_rdev *rdev = conf->disks[i].rdev; if (rdev == NULL || test_bit(Faulty, &rdev->flags)) - still_degraded = 1; + still_degraded = true; } - md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); + mddev->bitmap_ops->start_sync(mddev, sector_nr, &sync_blocks, + still_degraded); set_bit(STRIPE_SYNC_REQUESTED, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); @@ -6767,7 +6768,7 @@ static void raid5d(struct md_thread *thread) /* Now is a good time to flush some bitmap updates */ conf->seq_flush++; spin_unlock_irq(&conf->device_lock); - md_bitmap_unplug(mddev->bitmap); + mddev->bitmap_ops->unplug(mddev, true); spin_lock_irq(&conf->device_lock); conf->seq_write = conf->seq_flush; activate_bit_delay(conf, conf->temp_inactive_list); @@ -8312,6 +8313,7 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) */ sector_t newsize; struct r5conf *conf = mddev->private; + int ret; if (raid5_has_log(conf) || raid5_has_ppl(conf)) return -EINVAL; @@ -8320,11 +8322,11 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) if (mddev->external_size && mddev->array_sectors > newsize) return -EINVAL; - if (mddev->bitmap) { - int ret = md_bitmap_resize(mddev->bitmap, sectors, 0, 0); - if (ret) - return ret; - } + + ret = mddev->bitmap_ops->resize(mddev, sectors, 0, false); + if (ret) + return ret; + md_set_array_sectors(mddev, newsize); if (sectors > mddev->dev_sectors && mddev->recovery_cp > mddev->dev_sectors) {