Skip to content

Commit

Permalink
md: Support write-intent bitmaps with externally managed metadata.
Browse files Browse the repository at this point in the history
In this case, the metadata needs to not be in the same
sector as the bitmap.
md will not read/write any bitmap metadata.  Config must be
done via sysfs and when a recovery makes the array non-degraded
again, writing 'true' to 'bitmap/can_clear' will allow bits in
the bitmap to be cleared again.

Signed-off-by: NeilBrown <neilb@suse.de>
  • Loading branch information
NeilBrown committed Dec 14, 2009
1 parent 624ce4f commit ece5cff
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 33 deletions.
16 changes: 16 additions & 0 deletions Documentation/md.txt
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,22 @@ All md devices contain:
'backlog' sets a limit on the number of concurrent background
writes. If there are more than this, new writes will by
synchronous.
bitmap/metadata
This can be either 'internal' or 'external'.
'internal' is the default and means the metadata for the bitmap
is stored in the first 256 bytes of the allocated space and is
managed by the md module.
'external' means that bitmap metadata is managed externally to
the kernel (i.e. by some userspace program)
bitmap/can_clear
This is either 'true' or 'false'. If 'true', then bits in the
bitmap will be cleared when the corresponding blocks are thought
to be in-sync. If 'false', bits will never be cleared.
This is automatically set to 'false' if a write happens on a
degraded array, or if the array becomes degraded during a write.
When metadata is managed externally, it should be set to true
once the array becomes non-degraded, and this fact has been
recorded in the metadata.



Expand Down
142 changes: 119 additions & 23 deletions drivers/md/bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap)

if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
return;
if (bitmap->mddev->bitmap_info.external)
return;
spin_lock_irqsave(&bitmap->lock, flags);
if (!bitmap->sb_page) { /* no superblock */
spin_unlock_irqrestore(&bitmap->lock, flags);
Expand Down Expand Up @@ -676,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
* general bitmap file operations
*/

/*
* on-disk bitmap:
*
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
* file a page at a time. There's a superblock at the start of the file.
*/
/* calculate the index of the page that contains this bit */
static inline unsigned long file_page_index(unsigned long chunk)
static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
{
return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
if (!bitmap->mddev->bitmap_info.external)
chunk += sizeof(bitmap_super_t) << 3;
return chunk >> PAGE_BIT_SHIFT;
}

/* calculate the (bit) offset of this bit within a page */
static inline unsigned long file_page_offset(unsigned long chunk)
static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
{
return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
if (!bitmap->mddev->bitmap_info.external)
chunk += sizeof(bitmap_super_t) << 3;
return chunk & (PAGE_BITS - 1);
}

/*
Expand All @@ -698,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk)
static inline struct page *filemap_get_page(struct bitmap *bitmap,
unsigned long chunk)
{
if (file_page_index(chunk) >= bitmap->file_pages) return NULL;
return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL;
return bitmap->filemap[file_page_index(bitmap, chunk)
- file_page_index(bitmap, 0)];
}


Expand All @@ -722,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
spin_unlock_irqrestore(&bitmap->lock, flags);

while (pages--)
if (map[pages]->index != 0) /* 0 is sb_page, release it below */
if (map[pages] != sb_page) /* 0 is sb_page, release it below */
free_buffers(map[pages]);
kfree(map);
kfree(attr);
Expand Down Expand Up @@ -833,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)

page = filemap_get_page(bitmap, chunk);
if (!page) return;
bit = file_page_offset(chunk);
bit = file_page_offset(bitmap, chunk);

/* set the bit */
kaddr = kmap_atomic(page, KM_USER0);
Expand Down Expand Up @@ -931,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
"recovery\n", bmname(bitmap));

bytes = (chunks + 7) / 8;
if (!bitmap->mddev->bitmap_info.external)
bytes += sizeof(bitmap_super_t);

num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;

num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;

if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
if (file && i_size_read(file->f_mapping->host) < bytes) {
printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
bmname(bitmap),
(unsigned long) i_size_read(file->f_mapping->host),
bytes + sizeof(bitmap_super_t));
bytes);
goto err;
}

Expand All @@ -959,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)

for (i = 0; i < chunks; i++) {
int b;
index = file_page_index(i);
bit = file_page_offset(i);
index = file_page_index(bitmap, i);
bit = file_page_offset(bitmap, i);
if (index != oldindex) { /* this is a new page, read it in */
int count;
/* unmap the old page, we're done with it */
if (index == num_pages-1)
count = bytes + sizeof(bitmap_super_t)
- index * PAGE_SIZE;
count = bytes - index * PAGE_SIZE;
else
count = PAGE_SIZE;
if (index == 0) {
if (index == 0 && bitmap->sb_page) {
/*
* if we're here then the superblock page
* contains some bits (PAGE_SIZE != sizeof sb)
Expand Down Expand Up @@ -1164,7 +1179,8 @@ void bitmap_daemon_work(mddev_t *mddev)
/* We are possibly going to clear some bits, so make
* sure that events_cleared is up-to-date.
*/
if (bitmap->need_sync) {
if (bitmap->need_sync &&
bitmap->mddev->bitmap_info.external == 0) {
bitmap_super_t *sb;
bitmap->need_sync = 0;
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
Expand All @@ -1174,7 +1190,8 @@ void bitmap_daemon_work(mddev_t *mddev)
write_page(bitmap, bitmap->sb_page, 1);
}
spin_lock_irqsave(&bitmap->lock, flags);
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
if (!bitmap->need_sync)
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
}
bmc = bitmap_get_counter(bitmap,
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
Expand All @@ -1189,7 +1206,7 @@ void bitmap_daemon_work(mddev_t *mddev)
if (*bmc == 2) {
*bmc=1; /* maybe clear the bit next time */
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
} else if (*bmc == 1) {
} else if (*bmc == 1 && !bitmap->need_sync) {
/* we can clear the bit */
*bmc = 0;
bitmap_count_page(bitmap,
Expand All @@ -1199,9 +1216,11 @@ void bitmap_daemon_work(mddev_t *mddev)
/* clear the bit */
paddr = kmap_atomic(page, KM_USER0);
if (bitmap->flags & BITMAP_HOSTENDIAN)
clear_bit(file_page_offset(j), paddr);
clear_bit(file_page_offset(bitmap, j),
paddr);
else
ext2_clear_bit(file_page_offset(j), paddr);
ext2_clear_bit(file_page_offset(bitmap, j),
paddr);
kunmap_atomic(paddr, KM_USER0);
}
} else
Expand Down Expand Up @@ -1356,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
bitmap->events_cleared < bitmap->mddev->events) {
bitmap->events_cleared = bitmap->mddev->events;
bitmap->need_sync = 1;
sysfs_notify_dirent(bitmap->sysfs_can_clear);
}

if (!success && ! (*bmc & NEEDED_MASK))
Expand Down Expand Up @@ -1613,6 +1633,9 @@ void bitmap_destroy(mddev_t *mddev)
if (mddev->thread)
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;

if (bitmap->sysfs_can_clear)
sysfs_put(bitmap->sysfs_can_clear);

bitmap_free(bitmap);
}

Expand All @@ -1629,6 +1652,7 @@ int bitmap_create(mddev_t *mddev)
struct file *file = mddev->bitmap_info.file;
int err;
sector_t start;
struct sysfs_dirent *bm;

BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);

Expand All @@ -1648,6 +1672,13 @@ int bitmap_create(mddev_t *mddev)

bitmap->mddev = mddev;

bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
if (bm) {
bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
sysfs_put(bm);
} else
bitmap->sysfs_can_clear = NULL;

bitmap->file = file;
if (file) {
get_file(file);
Expand All @@ -1658,7 +1689,16 @@ int bitmap_create(mddev_t *mddev)
vfs_fsync(file, file->f_dentry, 1);
}
/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
err = bitmap_read_sb(bitmap);
if (!mddev->bitmap_info.external)
err = bitmap_read_sb(bitmap);
else {
err = 0;
if (mddev->bitmap_info.chunksize == 0 ||
mddev->bitmap_info.daemon_sleep == 0)
/* chunksize and time_base need to be
* set first. */
err = -EINVAL;
}
if (err)
goto error;

Expand Down Expand Up @@ -1777,7 +1817,8 @@ location_store(mddev_t *mddev, const char *buf, size_t len)
return rv;
if (offset == 0)
return -EINVAL;
if (mddev->major_version == 0 &&
if (mddev->bitmap_info.external == 0 &&
mddev->major_version == 0 &&
offset != mddev->bitmap_info.default_offset)
return -EINVAL;
mddev->bitmap_info.offset = offset;
Expand Down Expand Up @@ -1906,11 +1947,66 @@ chunksize_store(mddev_t *mddev, const char *buf, size_t len)
static struct md_sysfs_entry bitmap_chunksize =
__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);

static ssize_t metadata_show(mddev_t *mddev, char *page)
{
return sprintf(page, "%s\n", (mddev->bitmap_info.external
? "external" : "internal"));
}

static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len)
{
if (mddev->bitmap ||
mddev->bitmap_info.file ||
mddev->bitmap_info.offset)
return -EBUSY;
if (strncmp(buf, "external", 8) == 0)
mddev->bitmap_info.external = 1;
else if (strncmp(buf, "internal", 8) == 0)
mddev->bitmap_info.external = 0;
else
return -EINVAL;
return len;
}

static struct md_sysfs_entry bitmap_metadata =
__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);

static ssize_t can_clear_show(mddev_t *mddev, char *page)
{
int len;
if (mddev->bitmap)
len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
"false" : "true"));
else
len = sprintf(page, "\n");
return len;
}

static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
{
if (mddev->bitmap == NULL)
return -ENOENT;
if (strncmp(buf, "false", 5) == 0)
mddev->bitmap->need_sync = 1;
else if (strncmp(buf, "true", 4) == 0) {
if (mddev->degraded)
return -EBUSY;
mddev->bitmap->need_sync = 0;
} else
return -EINVAL;
return len;
}

static struct md_sysfs_entry bitmap_can_clear =
__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);

static struct attribute *md_bitmap_attrs[] = {
&bitmap_location.attr,
&bitmap_timeout.attr,
&bitmap_backlog.attr,
&bitmap_chunksize.attr,
&bitmap_metadata.attr,
&bitmap_can_clear.attr,
NULL
};
struct attribute_group md_bitmap_group = {
Expand Down
11 changes: 1 addition & 10 deletions drivers/md/bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,6 @@ typedef __u16 bitmap_counter_t;
(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)

/*
* on-disk bitmap:
*
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
* file a page at a time. There's a superblock at the start of the file.
*/

/* map chunks (bits) to file pages - offset by the size of the superblock */
#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))

#endif

/*
Expand Down Expand Up @@ -250,6 +240,7 @@ struct bitmap {
wait_queue_head_t write_wait;
wait_queue_head_t overflow_wait;

struct sysfs_dirent *sysfs_can_clear;
};

/* the bitmap API */
Expand Down
1 change: 1 addition & 0 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ struct mddev_s
unsigned long chunksize;
unsigned long daemon_sleep; /* how many seconds between updates? */
unsigned long max_write_behind; /* write-behind mode */
int external;
} bitmap_info;

struct list_head all_mddevs;
Expand Down

0 comments on commit ece5cff

Please sign in to comment.