Skip to content

Commit

Permalink
drbd: al_write_transaction: skip re-scanning of bitmap page pointer a…
Browse files Browse the repository at this point in the history
…rray

For larger devices, the array of bitmap page pointers can grow very
large (8000 pointers per TB of storage).

For each activity log transaction, we need to flush the associated
bitmap pages to stable storage. Currently, we just "mark" the respective
pages while setting up the transaction, then tell the bitmap code to
write out all marked pages, but skip unchanged pages.

But one such transaction can affect only a small number of bitmap pages,
there is no need to scan the full array of several (ten-)thousand
page pointers to find the few marked ones.

Instead, remember the index numbers of the few affected pages,
and later only re-check those to skip duplicates and unchanged ones.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
  • Loading branch information
Lars Ellenberg authored and Jens Axboe committed Jun 14, 2016
1 parent 13c2088 commit 27ea1d8
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 15 deletions.
2 changes: 2 additions & 0 deletions drivers/block/drbd/drbd_actlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ static int __al_write_transaction(struct drbd_device *device, struct al_transact

i = 0;

drbd_bm_reset_al_hints(device);

/* Even though no one can start to change this list
* once we set the LC_LOCKED -- from drbd_al_begin_io(),
* lc_try_lock_for_transaction() --, someone may still
Expand Down
66 changes: 51 additions & 15 deletions drivers/block/drbd/drbd_bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@ struct drbd_bitmap {
struct page **bm_pages;
spinlock_t bm_lock;

/* exclusively to be used by __al_write_transaction(),
* drbd_bm_mark_for_writeout() and
* and drbd_bm_write_hinted() -> bm_rw() called from there.
*/
unsigned int n_bitmap_hints;
unsigned int al_bitmap_hints[AL_UPDATES_PER_TRANSACTION];

/* see LIMITATIONS: above */

unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */
Expand Down Expand Up @@ -242,6 +249,11 @@ static void bm_set_page_need_writeout(struct page *page)
set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
}

void drbd_bm_reset_al_hints(struct drbd_device *device)
{
device->bitmap->n_bitmap_hints = 0;
}

/**
* drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
* @device: DRBD device.
Expand All @@ -253,14 +265,17 @@ static void bm_set_page_need_writeout(struct page *page)
*/
void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr)
{
struct drbd_bitmap *b = device->bitmap;
struct page *page;
if (page_nr >= device->bitmap->bm_number_of_pages) {
drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n",
page_nr, (int)device->bitmap->bm_number_of_pages);
return;
}
page = device->bitmap->bm_pages[page_nr];
set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
BUG_ON(b->n_bitmap_hints >= ARRAY_SIZE(b->al_bitmap_hints));
if (!test_and_set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)))
b->al_bitmap_hints[b->n_bitmap_hints++] = page_nr;
}

static int bm_test_page_unchanged(struct page *page)
Expand Down Expand Up @@ -1030,7 +1045,7 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
{
struct drbd_bm_aio_ctx *ctx;
struct drbd_bitmap *b = device->bitmap;
int num_pages, i, count = 0;
unsigned int num_pages, i, count = 0;
unsigned long now;
char ppb[10];
int err = 0;
Expand Down Expand Up @@ -1078,16 +1093,37 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
now = jiffies;

/* let the layers below us try to merge these bios... */
for (i = 0; i < num_pages; i++) {
/* ignore completely unchanged pages */
if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
break;
if (!(flags & BM_AIO_READ)) {
if ((flags & BM_AIO_WRITE_HINTED) &&
!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
&page_private(b->bm_pages[i])))
continue;

if (flags & BM_AIO_READ) {
for (i = 0; i < num_pages; i++) {
atomic_inc(&ctx->in_flight);
bm_page_io_async(ctx, i);
++count;
cond_resched();
}
} else if (flags & BM_AIO_WRITE_HINTED) {
/* ASSERT: BM_AIO_WRITE_ALL_PAGES is not set. */
unsigned int hint;
for (hint = 0; hint < b->n_bitmap_hints; hint++) {
i = b->al_bitmap_hints[hint];
if (i >= num_pages) /* == -1U: no hint here. */
continue;
/* Several AL-extents may point to the same page. */
if (!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
&page_private(b->bm_pages[i])))
continue;
/* Has it even changed? */
if (bm_test_page_unchanged(b->bm_pages[i]))
continue;
atomic_inc(&ctx->in_flight);
bm_page_io_async(ctx, i);
++count;
}
} else {
for (i = 0; i < num_pages; i++) {
/* ignore completely unchanged pages */
if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
break;
if (!(flags & BM_AIO_WRITE_ALL_PAGES) &&
bm_test_page_unchanged(b->bm_pages[i])) {
dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i);
Expand All @@ -1100,11 +1136,11 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i);
continue;
}
atomic_inc(&ctx->in_flight);
bm_page_io_async(ctx, i);
++count;
cond_resched();
}
atomic_inc(&ctx->in_flight);
bm_page_io_async(ctx, i);
++count;
cond_resched();
}

/*
Expand Down
1 change: 1 addition & 0 deletions drivers/block/drbd/drbd_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,7 @@ extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr);
extern int drbd_bm_read(struct drbd_device *device) __must_hold(local);
extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr);
extern int drbd_bm_write(struct drbd_device *device) __must_hold(local);
extern void drbd_bm_reset_al_hints(struct drbd_device *device) __must_hold(local);
extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local);
extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local);
extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local);
Expand Down

0 comments on commit 27ea1d8

Please sign in to comment.