Skip to content

Commit

Permalink
dm log userspace: allow mark requests to piggyback on flush requests
Browse files Browse the repository at this point in the history
In the cluster evironment, cluster write has poor performance because
userspace_flush() has to contact a userspace program (cmirrord) for
clear/mark/flush requests.  But both mark and flush requests require
cmirrord to communicate the message to all the cluster nodes for each
flush call.  This behaviour is really slow.

To address this we now merge mark and flush requests together to reduce
the kernel-userspace-kernel time.  We allow a new directive,
"integrated_flush" that can be used to instruct the kernel log code to
combine flush and mark requests when directed by userspace.  If not
directed by userspace (due to an older version of the userspace code
perhaps), the kernel will function as it did previously - preserving
backwards compatibility.  Additionally, flush requests are performed
lazily when only clear requests exist.

Signed-off-by: Dongmao Zhang <dmzhang@suse.com>
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
  • Loading branch information
Dongmao Zhang authored and Mike Snitzer committed Jan 22, 2014
1 parent fca0284 commit 5066a4d
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 50 deletions.
206 changes: 159 additions & 47 deletions drivers/md/dm-log-userspace-base.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
#include <linux/device-mapper.h>
#include <linux/dm-log-userspace.h>
#include <linux/module.h>
#include <linux/workqueue.h>

#include "dm-log-userspace-transfer.h"

#define DM_LOG_USERSPACE_VSN "1.1.0"
#define DM_LOG_USERSPACE_VSN "1.3.0"

struct flush_entry {
int type;
Expand Down Expand Up @@ -58,6 +59,18 @@ struct log_c {
spinlock_t flush_lock;
struct list_head mark_list;
struct list_head clear_list;

/*
* Workqueue for flush of clear region requests.
*/
struct workqueue_struct *dmlog_wq;
struct delayed_work flush_log_work;
atomic_t sched_flush;

/*
* Combine userspace flush and mark requests for efficiency.
*/
uint32_t integrated_flush;
};

static mempool_t *flush_entry_pool;
Expand Down Expand Up @@ -122,6 +135,9 @@ static int build_constructor_string(struct dm_target *ti,

*ctr_str = NULL;

/*
* Determine overall size of the string.
*/
for (i = 0, str_size = 0; i < argc; i++)
str_size += strlen(argv[i]) + 1; /* +1 for space between args */

Expand All @@ -141,18 +157,39 @@ static int build_constructor_string(struct dm_target *ti,
return str_size;
}

static void do_flush(struct work_struct *work)
{
int r;
struct log_c *lc = container_of(work, struct log_c, flush_log_work.work);

atomic_set(&lc->sched_flush, 0);

r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, NULL, 0, NULL, NULL);

if (r)
dm_table_event(lc->ti->table);
}

/*
* userspace_ctr
*
* argv contains:
* <UUID> <other args>
* Where 'other args' is the userspace implementation specific log
* arguments. An example might be:
* <UUID> clustered-disk <arg count> <log dev> <region_size> [[no]sync]
* <UUID> [integrated_flush] <other args>
* Where 'other args' are the userspace implementation-specific log
* arguments.
*
* Example:
* <UUID> [integrated_flush] clustered-disk <arg count> <log dev>
* <region_size> [[no]sync]
*
* This module strips off the <UUID> and uses it for identification
* purposes when communicating with userspace about a log.
*
* So, this module will strip off the <UUID> for identification purposes
* when communicating with userspace about a log; but will pass on everything
* else.
* If integrated_flush is defined, the kernel combines flush
* and mark requests.
*
* The rest of the line, beginning with 'clustered-disk', is passed
* to the userspace ctr function.
*/
static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
unsigned argc, char **argv)
Expand Down Expand Up @@ -188,12 +225,22 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
return -EINVAL;
}

lc->usr_argc = argc;

strncpy(lc->uuid, argv[0], DM_UUID_LEN);
argc--;
argv++;
spin_lock_init(&lc->flush_lock);
INIT_LIST_HEAD(&lc->mark_list);
INIT_LIST_HEAD(&lc->clear_list);

str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
if (!strcasecmp(argv[0], "integrated_flush")) {
lc->integrated_flush = 1;
argc--;
argv++;
}

str_size = build_constructor_string(ti, argc, argv, &ctr_str);
if (str_size < 0) {
kfree(lc);
return str_size;
Expand Down Expand Up @@ -246,14 +293,26 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
DMERR("Failed to register %s with device-mapper",
devices_rdata);
}

if (lc->integrated_flush) {
lc->dmlog_wq = alloc_workqueue("dmlogd", WQ_MEM_RECLAIM, 0);
if (!lc->dmlog_wq) {
DMERR("couldn't start dmlogd");
r = -ENOMEM;
goto out;
}

INIT_DELAYED_WORK(&lc->flush_log_work, do_flush);
atomic_set(&lc->sched_flush, 0);
}

out:
kfree(devices_rdata);
if (r) {
kfree(lc);
kfree(ctr_str);
} else {
lc->usr_argv_str = ctr_str;
lc->usr_argc = argc;
log->context = lc;
}

Expand All @@ -264,9 +323,16 @@ static void userspace_dtr(struct dm_dirty_log *log)
{
struct log_c *lc = log->context;

if (lc->integrated_flush) {
/* flush workqueue */
if (atomic_read(&lc->sched_flush))
flush_delayed_work(&lc->flush_log_work);

destroy_workqueue(lc->dmlog_wq);
}

(void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
NULL, 0,
NULL, NULL);
NULL, 0, NULL, NULL);

if (lc->log_dev)
dm_put_device(lc->ti, lc->log_dev);
Expand All @@ -283,8 +349,7 @@ static int userspace_presuspend(struct dm_dirty_log *log)
struct log_c *lc = log->context;

r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
NULL, 0,
NULL, NULL);
NULL, 0, NULL, NULL);

return r;
}
Expand All @@ -294,9 +359,14 @@ static int userspace_postsuspend(struct dm_dirty_log *log)
int r;
struct log_c *lc = log->context;

/*
* Run planned flush earlier.
*/
if (lc->integrated_flush && atomic_read(&lc->sched_flush))
flush_delayed_work(&lc->flush_log_work);

r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
NULL, 0,
NULL, NULL);
NULL, 0, NULL, NULL);

return r;
}
Expand All @@ -308,8 +378,7 @@ static int userspace_resume(struct dm_dirty_log *log)

lc->in_sync_hint = 0;
r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
NULL, 0,
NULL, NULL);
NULL, 0, NULL, NULL);

return r;
}
Expand Down Expand Up @@ -405,7 +474,8 @@ static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list)
return r;
}

static int flush_by_group(struct log_c *lc, struct list_head *flush_list)
static int flush_by_group(struct log_c *lc, struct list_head *flush_list,
int flush_with_payload)
{
int r = 0;
int count;
Expand All @@ -431,15 +501,29 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list)
break;
}

r = userspace_do_request(lc, lc->uuid, type,
(char *)(group),
count * sizeof(uint64_t),
NULL, NULL);
if (r) {
/* Group send failed. Attempt one-by-one. */
list_splice_init(&tmp_list, flush_list);
r = flush_one_by_one(lc, flush_list);
break;
if (flush_with_payload) {
r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
(char *)(group),
count * sizeof(uint64_t),
NULL, NULL);
/*
* Integrated flush failed.
*/
if (r)
break;
} else {
r = userspace_do_request(lc, lc->uuid, type,
(char *)(group),
count * sizeof(uint64_t),
NULL, NULL);
if (r) {
/*
* Group send failed. Attempt one-by-one.
*/
list_splice_init(&tmp_list, flush_list);
r = flush_one_by_one(lc, flush_list);
break;
}
}
}

Expand Down Expand Up @@ -476,30 +560,60 @@ static int userspace_flush(struct dm_dirty_log *log)
struct log_c *lc = log->context;
LIST_HEAD(mark_list);
LIST_HEAD(clear_list);
int mark_list_is_empty;
int clear_list_is_empty;
struct flush_entry *fe, *tmp_fe;

spin_lock_irqsave(&lc->flush_lock, flags);
list_splice_init(&lc->mark_list, &mark_list);
list_splice_init(&lc->clear_list, &clear_list);
spin_unlock_irqrestore(&lc->flush_lock, flags);

if (list_empty(&mark_list) && list_empty(&clear_list))
mark_list_is_empty = list_empty(&mark_list);
clear_list_is_empty = list_empty(&clear_list);

if (mark_list_is_empty && clear_list_is_empty)
return 0;

r = flush_by_group(lc, &mark_list);
r = flush_by_group(lc, &clear_list, 0);
if (r)
goto fail;
goto out;

r = flush_by_group(lc, &clear_list);
if (!lc->integrated_flush) {
r = flush_by_group(lc, &mark_list, 0);
if (r)
goto out;
r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
NULL, 0, NULL, NULL);
goto out;
}

/*
* Send integrated flush request with mark_list as payload.
*/
r = flush_by_group(lc, &mark_list, 1);
if (r)
goto fail;
goto out;

r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
NULL, 0, NULL, NULL);
if (mark_list_is_empty && !atomic_read(&lc->sched_flush)) {
/*
* When there are only clear region requests,
* we schedule a flush in the future.
*/
queue_delayed_work(lc->dmlog_wq, &lc->flush_log_work, 3 * HZ);
atomic_set(&lc->sched_flush, 1);
} else {
/*
* Cancel pending flush because we
* have already flushed in mark_region.
*/
cancel_delayed_work(&lc->flush_log_work);
atomic_set(&lc->sched_flush, 0);
}

fail:
out:
/*
* We can safely remove these entries, even if failure.
* We can safely remove these entries, even after failure.
* Calling code will receive an error and will know that
* the log facility has failed.
*/
Expand Down Expand Up @@ -603,8 +717,7 @@ static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)

rdata_size = sizeof(pkg);
r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
NULL, 0,
(char *)&pkg, &rdata_size);
NULL, 0, (char *)&pkg, &rdata_size);

*region = pkg.r;
return (r) ? r : (int)pkg.i;
Expand All @@ -630,8 +743,7 @@ static void userspace_set_region_sync(struct dm_dirty_log *log,
pkg.i = (int64_t)in_sync;

r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
(char *)&pkg, sizeof(pkg),
NULL, NULL);
(char *)&pkg, sizeof(pkg), NULL, NULL);

/*
* It would be nice to be able to report failures.
Expand All @@ -657,8 +769,7 @@ static region_t userspace_get_sync_count(struct dm_dirty_log *log)

rdata_size = sizeof(sync_count);
r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
NULL, 0,
(char *)&sync_count, &rdata_size);
NULL, 0, (char *)&sync_count, &rdata_size);

if (r)
return 0;
Expand All @@ -685,8 +796,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
switch (status_type) {
case STATUSTYPE_INFO:
r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
NULL, 0,
result, &sz);
NULL, 0, result, &sz);

if (r) {
sz = 0;
Expand All @@ -699,8 +809,10 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
BUG_ON(!table_args); /* There will always be a ' ' */
table_args++;

DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc,
lc->uuid, table_args);
DMEMIT("%s %u %s ", log->type->name, lc->usr_argc, lc->uuid);
if (lc->integrated_flush)
DMEMIT("integrated_flush ");
DMEMIT("%s ", table_args);
break;
}
return (r) ? 0 : (int)sz;
Expand Down
Loading

0 comments on commit 5066a4d

Please sign in to comment.