Skip to content

Commit

Permalink
Merge branch 'block'
Browse files Browse the repository at this point in the history
  • Loading branch information
James Bottomley authored and James Bottomley committed Dec 23, 2010
2 parents 5f7bb3a + 047b7bd commit ce82ba8
Show file tree
Hide file tree
Showing 50 changed files with 1,206 additions and 1,038 deletions.
27 changes: 27 additions & 0 deletions Documentation/cgroups/blkio-controller.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,33 @@ Throttling/Upper Limit policy

Limits for writes can be put using blkio.write_bps_device file.

Hierarchical Cgroups
====================
- Currently none of the IO control policy supports hierarhical groups. But
cgroup interface does allow creation of hierarhical cgroups and internally
IO policies treat them as flat hierarchy.

So this patch will allow creation of cgroup hierarhcy but at the backend
everything will be treated as flat. So if somebody created a hierarchy like
as follows.

root
/ \
test1 test2
|
test3

CFQ and throttling will practically treat all groups at same level.

pivot
/ | \ \
root test1 test2 test3

Down the line we can implement hierarchical accounting/control support
and also introduce a new cgroup file "use_hierarchy" which will control
whether cgroup hierarchy is viewed as flat or hierarchical by the policy..
This is how memory controller also has implemented the things.

Various user visible config options
===================================
CONFIG_BLK_CGROUP
Expand Down
4 changes: 0 additions & 4 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
goto done;
}

/* Currently we do not support hierarchy deeper than two level (0,1) */
if (parent != cgroup->top_cgroup)
return ERR_PTR(-EPERM);

blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
if (!blkcg)
return ERR_PTR(-ENOMEM);
Expand Down
10 changes: 5 additions & 5 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

#include "blk.h"

EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

Expand Down Expand Up @@ -1329,9 +1329,9 @@ static inline void blk_partition_remap(struct bio *bio)
bio->bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;

trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
bdev->bd_dev,
bio->bi_sector - p->start_sect);
trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
bdev->bd_dev,
bio->bi_sector - p->start_sect);
}
}

Expand Down Expand Up @@ -1500,7 +1500,7 @@ static inline void __generic_make_request(struct bio *bio)
goto end_io;

if (old_sector != -1)
trace_block_remap(q, bio, old_dev, old_sector);
trace_block_bio_remap(q, bio, old_dev, old_sector);

old_sector = bio->bi_sector;
old_dev = bio->bi_bdev->bd_dev;
Expand Down
62 changes: 30 additions & 32 deletions block/cfq-iosched.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ struct cfq_rb_root {
unsigned count;
unsigned total_weight;
u64 min_vdisktime;
struct rb_node *active;
};
#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
.count = 0, .min_vdisktime = 0, }
Expand Down Expand Up @@ -180,7 +179,6 @@ struct cfq_group {
/* group service_tree key */
u64 vdisktime;
unsigned int weight;
bool on_st;

/* number of cfqq currently on this group */
int nr_cfqq;
Expand Down Expand Up @@ -563,11 +561,6 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
u64 vdisktime = st->min_vdisktime;
struct cfq_group *cfqg;

if (st->active) {
cfqg = rb_entry_cfqg(st->active);
vdisktime = cfqg->vdisktime;
}

if (st->left) {
cfqg = rb_entry_cfqg(st->left);
vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
Expand Down Expand Up @@ -646,11 +639,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
static inline bool cfq_slice_used(struct cfq_queue *cfqq)
{
if (cfq_cfqq_slice_new(cfqq))
return 0;
return false;
if (time_before(jiffies, cfqq->slice_end))
return 0;
return false;

return 1;
return true;
}

/*
Expand Down Expand Up @@ -869,7 +862,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
struct rb_node *n;

cfqg->nr_cfqq++;
if (cfqg->on_st)
if (!RB_EMPTY_NODE(&cfqg->rb_node))
return;

/*
Expand All @@ -885,7 +878,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfqg->vdisktime = st->min_vdisktime;

__cfq_group_service_tree_add(st, cfqg);
cfqg->on_st = true;
st->total_weight += cfqg->weight;
}

Expand All @@ -894,9 +886,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
struct cfq_rb_root *st = &cfqd->grp_service_tree;

if (st->active == &cfqg->rb_node)
st->active = NULL;

BUG_ON(cfqg->nr_cfqq < 1);
cfqg->nr_cfqq--;

Expand All @@ -905,7 +894,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
return;

cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
cfqg->on_st = false;
st->total_weight -= cfqg->weight;
if (!RB_EMPTY_NODE(&cfqg->rb_node))
cfq_rb_erase(&cfqg->rb_node, st);
Expand Down Expand Up @@ -1095,7 +1083,7 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
if (!atomic_dec_and_test(&cfqg->ref))
return;
for_each_cfqg_st(cfqg, i, j, st)
BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
BUG_ON(!RB_EMPTY_ROOT(&st->rb));
kfree(cfqg);
}

Expand Down Expand Up @@ -1687,9 +1675,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (cfqq == cfqd->active_queue)
cfqd->active_queue = NULL;

if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
cfqd->grp_service_tree.active = NULL;

if (cfqd->active_cic) {
put_io_context(cfqd->active_cic->ioc);
cfqd->active_cic = NULL;
Expand Down Expand Up @@ -1901,10 +1886,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* in their service tree.
*/
if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
return 1;
return true;
cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
service_tree->count);
return 0;
return false;
}

static void cfq_arm_slice_timer(struct cfq_data *cfqd)
Expand Down Expand Up @@ -2116,12 +2101,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
unsigned count;
struct cfq_rb_root *st;
unsigned group_slice;

if (!cfqg) {
cfqd->serving_prio = IDLE_WORKLOAD;
cfqd->workload_expires = jiffies + 1;
return;
}
enum wl_prio_t original_prio = cfqd->serving_prio;

/* Choose next priority. RT > BE > IDLE */
if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
Expand All @@ -2134,6 +2114,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
return;
}

if (original_prio != cfqd->serving_prio)
goto new_workload;

/*
* For RT and BE, we have to choose also the type
* (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
Expand All @@ -2148,6 +2131,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
if (count && !time_after(jiffies, cfqd->workload_expires))
return;

new_workload:
/* otherwise select new workload type */
cfqd->serving_type =
cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
Expand Down Expand Up @@ -2199,7 +2183,6 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
if (RB_EMPTY_ROOT(&st->rb))
return NULL;
cfqg = cfq_rb_first_group(st);
st->active = &cfqg->rb_node;
update_min_vdisktime(st);
return cfqg;
}
Expand Down Expand Up @@ -2293,6 +2276,17 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
goto keep_queue;
}

/*
* This is a deep seek queue, but the device is much faster than
* the queue can deliver, don't idle
**/
if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
(cfq_cfqq_slice_new(cfqq) ||
(cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
cfq_clear_cfqq_deep(cfqq);
cfq_clear_cfqq_idle_window(cfqq);
}

if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
cfqq = NULL;
goto keep_queue;
Expand Down Expand Up @@ -2367,12 +2361,12 @@ static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
{
/* the queue hasn't finished any request, can't estimate */
if (cfq_cfqq_slice_new(cfqq))
return 1;
return true;
if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
cfqq->slice_end))
return 1;
return true;

return 0;
return false;
}

static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Expand Down Expand Up @@ -3265,6 +3259,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
return true;

/* An idle queue should not be idle now for some reason */
if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
return true;

if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
return false;

Expand Down
Loading

0 comments on commit ce82ba8

Please sign in to comment.