Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block fixes from Jens Axboe:
 - fix for a memory leak on certain unplug events
 - a collection of bcache fixes from Kent and Nicolas
 - a few null_blk fixes and updates form Matias
 - a marking of static of functions in the stec pci-e driver

* 'for-linus' of git://git.kernel.dk/linux-block:
  null_blk: support submit_queues on use_per_node_hctx
  null_blk: set use_per_node_hctx param to false
  null_blk: corrections to documentation
  null_blk: warning on ignored submit_queues param
  null_blk: refactor init and init errors code paths
  null_blk: documentation
  null_blk: mem garbage on NUMA systems during init
  drivers: block: Mark the functions as static in skd_main.c
  bcache: New writeback PD controller
  bcache: bugfix for race between moving_gc and bucket_invalidate
  bcache: fix for gc and writeback race
  bcache: bugfix - moving_gc now moves only correct buckets
  bcache: fix for gc crashing when no sectors are used
  bcache: Fix heap_peek() macro
  bcache: Fix for can_attach_cache()
  bcache: Fix dirty_data accounting
  bcache: Use uninterruptible sleep in writeback
  bcache: kthread don't set writeback task to INTERUPTIBLE
  block: fix memory leaks on unplugging block device
  bcache: fix sparse non static symbol warning
  • Loading branch information
Linus Torvalds committed Dec 24, 2013
2 parents 70e672f + fc1bc35 commit c5fdd53
Show file tree
Hide file tree
Showing 13 changed files with 274 additions and 94 deletions.
72 changes: 72 additions & 0 deletions Documentation/block/null_blk.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
Null block device driver
================================================================================

I. Overview

The null block device (/dev/nullb*) is used for benchmarking the various
block-layer implementations. It emulates a block device of X gigabytes in size.
The following instances are possible:

Single-queue block-layer
- Request-based.
- Single submission queue per device.
- Implements IO scheduling algorithms (CFQ, Deadline, noop).
Multi-queue block-layer
- Request-based.
- Configurable submission queues per device.
No block-layer (Known as bio-based)
- Bio-based. IO requests are submitted directly to the device driver.
- Directly accepts bio data structure and returns them.

All of them have a completion queue for each core in the system.

II. Module parameters applicable for all instances:

queue_mode=[0-2]: Default: 2-Multi-queue
Selects which block-layer the module should instantiate with.

0: Bio-based.
1: Single-queue.
2: Multi-queue.

home_node=[0--nr_nodes]: Default: NUMA_NO_NODE
Selects what CPU node the data structures are allocated from.

gb=[Size in GB]: Default: 250GB
The size of the device reported to the system.

bs=[Block size (in bytes)]: Default: 512 bytes
The block size reported to the system.

nr_devices=[Number of devices]: Default: 2
Number of block devices instantiated. They are instantiated as /dev/nullb0,
etc.

irq_mode=[0-2]: Default: 1-Soft-irq
The completion mode used for completing IOs to the block-layer.

0: None.
1: Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead
when IOs are issued from another CPU node than the home the device is
connected to.
2: Timer: Waits a specific period (completion_nsec) for each IO before
completion.

completion_nsec=[ns]: Default: 10.000ns
Combined with irq_mode=2 (timer). The time each completion event must wait.

submit_queues=[0..nr_cpus]:
The number of submission queues attached to the device driver. If unset, it
defaults to 1 on single-queue and bio-based instances. For multi-queue,
it is ignored when use_per_node_hctx module parameter is 1.

hw_queue_depth=[0..qdepth]: Default: 64
The hardware queue depth of the device.

III: Multi-queue specific parameters

use_per_node_hctx=[0/1]: Default: 0
0: The number of submit queues are set to the value of the submit_queues
parameter.
1: The multi-queue block layer is instantiated with a hardware dispatch
queue for each CPU node in the system.
13 changes: 13 additions & 0 deletions block/blk-mq-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,9 +335,22 @@ static struct kobj_type blk_mq_hw_ktype = {
void blk_mq_unregister_disk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
int i, j;

queue_for_each_hw_ctx(q, hctx, i) {
hctx_for_each_ctx(hctx, ctx, j) {
kobject_del(&ctx->kobj);
kobject_put(&ctx->kobj);
}
kobject_del(&hctx->kobj);
kobject_put(&hctx->kobj);
}

kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
kobject_del(&q->mq_kobj);
kobject_put(&q->mq_kobj);

kobject_put(&disk_to_dev(disk)->kobj);
}
Expand Down
102 changes: 76 additions & 26 deletions drivers/block/null_blk.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <linux/module.h>

#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/fs.h>
Expand Down Expand Up @@ -65,7 +66,7 @@ enum {
NULL_Q_MQ = 2,
};

static int submit_queues = 1;
static int submit_queues;
module_param(submit_queues, int, S_IRUGO);
MODULE_PARM_DESC(submit_queues, "Number of submission queues");

Expand Down Expand Up @@ -101,9 +102,9 @@ static int hw_queue_depth = 64;
module_param(hw_queue_depth, int, S_IRUGO);
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");

static bool use_per_node_hctx = true;
static bool use_per_node_hctx = false;
module_param(use_per_node_hctx, bool, S_IRUGO);
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true");
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");

static void put_tag(struct nullb_queue *nq, unsigned int tag)
{
Expand Down Expand Up @@ -346,25 +347,62 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)

static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index)
{
return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL,
hctx_index);
int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes);
int tip = (reg->nr_hw_queues % nr_online_nodes);
int node = 0, i, n;

/*
* Split submit queues evenly wrt to the number of nodes. If uneven,
* fill the first buckets with one extra, until the rest is filled with
* no extra.
*/
for (i = 0, n = 1; i < hctx_index; i++, n++) {
if (n % b_size == 0) {
n = 0;
node++;

tip--;
if (!tip)
b_size = reg->nr_hw_queues / nr_online_nodes;
}
}

/*
* A node might not be online, therefore map the relative node id to the
* real node id.
*/
for_each_online_node(n) {
if (!node)
break;
node--;
}

return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n);
}

static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
{
kfree(hctx);
}

static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
{
BUG_ON(!nullb);
BUG_ON(!nq);

init_waitqueue_head(&nq->wait);
nq->queue_depth = nullb->queue_depth;
}

static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int index)
{
struct nullb *nullb = data;
struct nullb_queue *nq = &nullb->queues[index];

init_waitqueue_head(&nq->wait);
nq->queue_depth = nullb->queue_depth;
nullb->nr_queues++;
hctx->driver_data = nq;
null_init_queue(nullb, nq);
nullb->nr_queues++;

return 0;
}
Expand Down Expand Up @@ -417,13 +455,13 @@ static int setup_commands(struct nullb_queue *nq)

nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
if (!nq->cmds)
return 1;
return -ENOMEM;

tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
if (!nq->tag_map) {
kfree(nq->cmds);
return 1;
return -ENOMEM;
}

for (i = 0; i < nq->queue_depth; i++) {
Expand Down Expand Up @@ -454,33 +492,37 @@ static void cleanup_queues(struct nullb *nullb)

static int setup_queues(struct nullb *nullb)
{
struct nullb_queue *nq;
int i;

nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL);
nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue),
GFP_KERNEL);
if (!nullb->queues)
return 1;
return -ENOMEM;

nullb->nr_queues = 0;
nullb->queue_depth = hw_queue_depth;

if (queue_mode == NULL_Q_MQ)
return 0;
return 0;
}

static int init_driver_queues(struct nullb *nullb)
{
struct nullb_queue *nq;
int i, ret = 0;

for (i = 0; i < submit_queues; i++) {
nq = &nullb->queues[i];
init_waitqueue_head(&nq->wait);
nq->queue_depth = hw_queue_depth;
if (setup_commands(nq))
break;

null_init_queue(nullb, nq);

ret = setup_commands(nq);
if (ret)
goto err_queue;
nullb->nr_queues++;
}

if (i == submit_queues)
return 0;

return 0;
err_queue:
cleanup_queues(nullb);
return 1;
return ret;
}

static int null_add_dev(void)
Expand Down Expand Up @@ -518,11 +560,13 @@ static int null_add_dev(void)
} else if (queue_mode == NULL_Q_BIO) {
nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
blk_queue_make_request(nullb->q, null_queue_bio);
init_driver_queues(nullb);
} else {
nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
if (nullb->q)
blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
init_driver_queues(nullb);
}

if (!nullb->q)
Expand Down Expand Up @@ -579,7 +623,13 @@ static int __init null_init(void)
}
#endif

if (submit_queues > nr_cpu_ids)
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) {
pr_warn("null_blk: submit_queues param is set to %u.",
nr_online_nodes);
submit_queues = nr_online_nodes;
}
} else if (submit_queues > nr_cpu_ids)
submit_queues = nr_cpu_ids;
else if (!submit_queues)
submit_queues = 1;
Expand Down
4 changes: 2 additions & 2 deletions drivers/block/skd_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -5269,7 +5269,7 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
}
}

const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
static const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
{
switch (state) {
case SKD_MSG_STATE_IDLE:
Expand All @@ -5281,7 +5281,7 @@ const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
}
}

const char *skd_skreq_state_to_str(enum skd_req_state state)
static const char *skd_skreq_state_to_str(enum skd_req_state state)
{
switch (state) {
case SKD_REQ_STATE_IDLE:
Expand Down
2 changes: 2 additions & 0 deletions drivers/md/bcache/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -421,9 +421,11 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)

if (watermark <= WATERMARK_METADATA) {
SET_GC_MARK(b, GC_MARK_METADATA);
SET_GC_MOVE(b, 0);
b->prio = BTREE_PRIO;
} else {
SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
SET_GC_MOVE(b, 0);
b->prio = INITIAL_PRIO;
}

Expand Down
12 changes: 6 additions & 6 deletions drivers/md/bcache/bcache.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ struct bucket {
uint8_t disk_gen;
uint8_t last_gc; /* Most out of date gen in the btree */
uint8_t gc_gen;
uint16_t gc_mark;
uint16_t gc_mark; /* Bitfield used by GC. See below for field */
};

/*
Expand All @@ -209,7 +209,8 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
#define GC_MARK_RECLAIMABLE 0
#define GC_MARK_DIRTY 1
#define GC_MARK_METADATA 2
BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14);
BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13);
BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);

#include "journal.h"
#include "stats.h"
Expand Down Expand Up @@ -372,14 +373,14 @@ struct cached_dev {
unsigned char writeback_percent;
unsigned writeback_delay;

int writeback_rate_change;
int64_t writeback_rate_derivative;
uint64_t writeback_rate_target;
int64_t writeback_rate_proportional;
int64_t writeback_rate_derivative;
int64_t writeback_rate_change;

unsigned writeback_rate_update_seconds;
unsigned writeback_rate_d_term;
unsigned writeback_rate_p_term_inverse;
unsigned writeback_rate_d_smooth;
};

enum alloc_watermarks {
Expand Down Expand Up @@ -445,7 +446,6 @@ struct cache {
* call prio_write() to keep gens from wrapping.
*/
uint8_t need_save_prio;
unsigned gc_move_threshold;

/*
* If nonzero, we know we aren't going to find any buckets to invalidate
Expand Down
Loading

0 comments on commit c5fdd53

Please sign in to comment.