Skip to content

Commit

Permalink
Merge tag 'for-6.10/dm-changes' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Add a dm-crypt optional "high_priority" flag that enables the crypt
   workqueues to use WQ_HIGHPRI.

 - Export dm-crypt workqueues via sysfs (by enabling WQ_SYSFS) to allow
   for improved visibility and controls over IO and crypt workqueues.

 - Fix dm-crypt to no longer constrain max_segment_size to PAGE_SIZE.
   This limit isn't needed given that the block core provides late bio
   splitting if bio exceeds underlying limits (e.g. max_segment_size).

 - Fix dm-crypt crypt_queue's use of WQ_UNBOUND to not use
   WQ_CPU_INTENSIVE because it is meaningless with WQ_UNBOUND.

 - Fix various issues with dm-delay target (ranging from a resource
   teardown fix, a fix for hung task when using kthread mode, and other
   improvements that followed from code inspection).

* tag 'for-6.10/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm-delay: remove timer_lock
  dm-delay: change locking to avoid contention
  dm-delay: fix max_delay calculations
  dm-delay: fix hung task introduced by kthread mode
  dm-delay: fix workqueue delay_timer race
  dm-crypt: don't set WQ_CPU_INTENSIVE for WQ_UNBOUND crypt_queue
  dm: use queue_limits_set
  dm-crypt: stop constraining max_segment_size to PAGE_SIZE
  dm-crypt: export sysfs of all workqueues
  dm-crypt: add the optional "high_priority" flag
  • Loading branch information
Linus Torvalds committed May 15, 2024
2 parents 113d1dd + 8b21ac8 commit 4f8b6f2
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 68 deletions.
5 changes: 5 additions & 0 deletions Documentation/admin-guide/device-mapper/dm-crypt.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ same_cpu_crypt
The default is to use an unbound workqueue so that encryption work
is automatically balanced between available CPUs.

high_priority
Set dm-crypt workqueues and the writer thread to high priority. This
improves throughput and latency of dm-crypt while degrading general
responsiveness of the system.

submit_from_crypt_cpus
Disable offloading writes to a separate thread after encryption.
There are some situations where offloading write bios from the
Expand Down
73 changes: 49 additions & 24 deletions drivers/md/dm-crypt.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@

#define DM_MSG_PREFIX "crypt"

static DEFINE_IDA(workqueue_ida);

/*
* context holding the current state of a multi-part conversion
*/
Expand Down Expand Up @@ -137,9 +139,9 @@ struct iv_elephant_private {
* and encrypts / decrypts at the same time.
*/
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE,
DM_CRYPT_WRITE_INLINE };
DM_CRYPT_SAME_CPU, DM_CRYPT_HIGH_PRIORITY,
DM_CRYPT_NO_OFFLOAD, DM_CRYPT_NO_READ_WORKQUEUE,
DM_CRYPT_NO_WRITE_WORKQUEUE, DM_CRYPT_WRITE_INLINE };

enum cipher_flags {
CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cipher */
Expand Down Expand Up @@ -184,6 +186,7 @@ struct crypt_config {
struct crypto_aead **tfms_aead;
} cipher_tfm;
unsigned int tfms_count;
int workqueue_id;
unsigned long cipher_flags;

/*
Expand Down Expand Up @@ -1653,8 +1656,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);

/*
* Generate a new unfragmented bio with the given size
* This should never violate the device limitations (but only because
* max_segment_size is being constrained to PAGE_SIZE).
* This should never violate the device limitations (but if it did then block
* core should split the bio as needed).
*
* This function may be called concurrently. If we allocate from the mempool
* concurrently, there is a possibility of deadlock. For example, if we have
Expand Down Expand Up @@ -2771,6 +2774,9 @@ static void crypt_dtr(struct dm_target *ti)
if (cc->crypt_queue)
destroy_workqueue(cc->crypt_queue);

if (cc->workqueue_id)
ida_free(&workqueue_ida, cc->workqueue_id);

crypt_free_tfms(cc);

bioset_exit(&cc->bs);
Expand Down Expand Up @@ -3134,7 +3140,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
struct crypt_config *cc = ti->private;
struct dm_arg_set as;
static const struct dm_arg _args[] = {
{0, 8, "Invalid number of feature args"},
{0, 9, "Invalid number of feature args"},
};
unsigned int opt_params, val;
const char *opt_string, *sval;
Expand All @@ -3161,6 +3167,8 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar

else if (!strcasecmp(opt_string, "same_cpu_crypt"))
set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
else if (!strcasecmp(opt_string, "high_priority"))
set_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);

else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
Expand Down Expand Up @@ -3230,8 +3238,9 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct crypt_config *cc;
const char *devname = dm_table_device_name(ti->table);
int key_size;
int key_size, wq_id;
unsigned int align_mask;
unsigned int common_wq_flags;
unsigned long long tmpll;
int ret;
size_t iv_size_padding, additional_req_size;
Expand Down Expand Up @@ -3398,20 +3407,38 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->tag_pool_max_sectors <<= cc->sector_shift;
}

wq_id = ida_alloc_min(&workqueue_ida, 1, GFP_KERNEL);
if (wq_id < 0) {
ti->error = "Couldn't get workqueue id";
ret = wq_id;
goto bad;
}
cc->workqueue_id = wq_id;

ret = -ENOMEM;
cc->io_queue = alloc_workqueue("kcryptd_io/%s", WQ_MEM_RECLAIM, 1, devname);
common_wq_flags = WQ_MEM_RECLAIM | WQ_SYSFS;
if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
common_wq_flags |= WQ_HIGHPRI;

cc->io_queue = alloc_workqueue("kcryptd_io-%s-%d", common_wq_flags, 1, devname, wq_id);
if (!cc->io_queue) {
ti->error = "Couldn't create kcryptd io queue";
goto bad;
}

if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
1, devname);
else
cc->crypt_queue = alloc_workqueue("kcryptd/%s",
WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
num_online_cpus(), devname);
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) {
cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
common_wq_flags | WQ_CPU_INTENSIVE,
1, devname, wq_id);
} else {
/*
* While crypt_queue is certainly CPU intensive, the use of
* WQ_CPU_INTENSIVE is meaningless with WQ_UNBOUND.
*/
cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
common_wq_flags | WQ_UNBOUND,
num_online_cpus(), devname, wq_id);
}
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
goto bad;
Expand All @@ -3427,6 +3454,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->error = "Couldn't spawn write thread";
goto bad;
}
if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
set_user_nice(cc->write_thread, MIN_NICE);

ti->num_flush_bios = 1;
ti->limit_swap_bios = true;
Expand Down Expand Up @@ -3547,6 +3576,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,

num_feature_args += !!ti->num_discard_bios;
num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
Expand All @@ -3560,6 +3590,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT(" allow_discards");
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
DMEMIT(" same_cpu_crypt");
if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
DMEMIT(" high_priority");
if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
DMEMIT(" submit_from_crypt_cpus");
if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags))
Expand All @@ -3579,6 +3611,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT_TARGET_NAME_VERSION(ti->type);
DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n');
DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n');
DMEMIT(",high_priority=%c", test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags) ? 'y' : 'n');
DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ?
'y' : 'n');
DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ?
Expand Down Expand Up @@ -3688,14 +3721,6 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct crypt_config *cc = ti->private;

/*
* Unfortunate constraint that is required to avoid the potential
* for exceeding underlying device's max_segments limits -- due to
* crypt_alloc_buffer() possibly allocating pages for the encryption
* bio that are not as physically contiguous as the original bio.
*/
limits->max_segment_size = PAGE_SIZE;

limits->logical_block_size =
max_t(unsigned int, limits->logical_block_size, cc->sector_size);
limits->physical_block_size =
Expand All @@ -3706,7 +3731,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)

static struct target_type crypt_target = {
.name = "crypt",
.version = {1, 25, 0},
.version = {1, 26, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
Expand Down
60 changes: 31 additions & 29 deletions drivers/md/dm-delay.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ struct delay_class {

struct delay_c {
struct timer_list delay_timer;
struct mutex timer_lock;
struct mutex process_bios_lock; /* hold while removing bios to be processed from list */
spinlock_t delayed_bios_lock; /* hold on all accesses to delayed_bios list */
struct workqueue_struct *kdelayd_wq;
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
Expand All @@ -49,8 +50,6 @@ struct dm_delay_info {
unsigned long expires;
};

static DEFINE_MUTEX(delayed_bios_lock);

static void handle_delayed_timer(struct timer_list *t)
{
struct delay_c *dc = from_timer(dc, t, delay_timer);
Expand All @@ -60,12 +59,7 @@ static void handle_delayed_timer(struct timer_list *t)

static void queue_timeout(struct delay_c *dc, unsigned long expires)
{
mutex_lock(&dc->timer_lock);

if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
mod_timer(&dc->delay_timer, expires);

mutex_unlock(&dc->timer_lock);
timer_reduce(&dc->delay_timer, expires);
}

static inline bool delay_is_fast(struct delay_c *dc)
Expand All @@ -89,12 +83,16 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
struct bio_list flush_bio_list;
LIST_HEAD(local_list);
unsigned long next_expires = 0;
bool start_timer = false;
bio_list_init(&flush_bio_list);

mutex_lock(&delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
mutex_lock(&dc->process_bios_lock);
spin_lock(&dc->delayed_bios_lock);
list_replace_init(&dc->delayed_bios, &local_list);
spin_unlock(&dc->delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &local_list, list) {
cond_resched();
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
Expand All @@ -114,7 +112,10 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
}
}
}
mutex_unlock(&delayed_bios_lock);
spin_lock(&dc->delayed_bios_lock);
list_splice(&local_list, &dc->delayed_bios);
spin_unlock(&dc->delayed_bios_lock);
mutex_unlock(&dc->process_bios_lock);

if (start_timer)
queue_timeout(dc, next_expires);
Expand All @@ -128,13 +129,13 @@ static int flush_worker_fn(void *data)

while (!kthread_should_stop()) {
flush_delayed_bios(dc, false);
mutex_lock(&delayed_bios_lock);
spin_lock(&dc->delayed_bios_lock);
if (unlikely(list_empty(&dc->delayed_bios))) {
set_current_state(TASK_INTERRUPTIBLE);
mutex_unlock(&delayed_bios_lock);
spin_unlock(&dc->delayed_bios_lock);
schedule();
} else {
mutex_unlock(&delayed_bios_lock);
spin_unlock(&dc->delayed_bios_lock);
cond_resched();
}
}
Expand All @@ -154,8 +155,10 @@ static void delay_dtr(struct dm_target *ti)
{
struct delay_c *dc = ti->private;

if (dc->kdelayd_wq)
if (dc->kdelayd_wq) {
timer_shutdown_sync(&dc->delay_timer);
destroy_workqueue(dc->kdelayd_wq);
}

if (dc->read.dev)
dm_put_device(ti, dc->read.dev);
Expand All @@ -166,7 +169,7 @@ static void delay_dtr(struct dm_target *ti)
if (dc->worker)
kthread_stop(dc->worker);

mutex_destroy(&dc->timer_lock);
mutex_destroy(&dc->process_bios_lock);

kfree(dc);
}
Expand Down Expand Up @@ -224,7 +227,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)

ti->private = dc;
INIT_LIST_HEAD(&dc->delayed_bios);
mutex_init(&dc->timer_lock);
mutex_init(&dc->process_bios_lock);
spin_lock_init(&dc->delayed_bios_lock);
dc->may_delay = true;
dc->argc = argc;

Expand All @@ -240,19 +244,18 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->flush, argv);
if (ret)
goto bad;
max_delay = max(max_delay, dc->write.delay);
max_delay = max(max_delay, dc->flush.delay);
goto out;
}

ret = delay_class_ctr(ti, &dc->write, argv + 3);
if (ret)
goto bad;
max_delay = max(max_delay, dc->write.delay);

if (argc == 6) {
ret = delay_class_ctr(ti, &dc->flush, argv + 3);
if (ret)
goto bad;
max_delay = max(max_delay, dc->flush.delay);
goto out;
}

Expand All @@ -267,8 +270,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
* In case of small requested delays, use kthread instead of
* timers and workqueue to achieve better latency.
*/
dc->worker = kthread_create(&flush_worker_fn, dc,
"dm-delay-flush-worker");
dc->worker = kthread_run(&flush_worker_fn, dc, "dm-delay-flush-worker");
if (IS_ERR(dc->worker)) {
ret = PTR_ERR(dc->worker);
dc->worker = NULL;
Expand Down Expand Up @@ -309,14 +311,14 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
delayed->context = dc;
delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);

mutex_lock(&delayed_bios_lock);
spin_lock(&dc->delayed_bios_lock);
if (unlikely(!dc->may_delay)) {
mutex_unlock(&delayed_bios_lock);
spin_unlock(&dc->delayed_bios_lock);
return DM_MAPIO_REMAPPED;
}
c->ops++;
list_add_tail(&delayed->list, &dc->delayed_bios);
mutex_unlock(&delayed_bios_lock);
spin_unlock(&dc->delayed_bios_lock);

if (delay_is_fast(dc))
wake_up_process(dc->worker);
Expand All @@ -330,12 +332,12 @@ static void delay_presuspend(struct dm_target *ti)
{
struct delay_c *dc = ti->private;

mutex_lock(&delayed_bios_lock);
spin_lock(&dc->delayed_bios_lock);
dc->may_delay = false;
mutex_unlock(&delayed_bios_lock);
spin_unlock(&dc->delayed_bios_lock);

if (!delay_is_fast(dc))
del_timer_sync(&dc->delay_timer);
timer_delete(&dc->delay_timer);
flush_delayed_bios(dc, true);
}

Expand Down
Loading

0 comments on commit 4f8b6f2

Please sign in to comment.