Skip to content

Commit

Permalink
Merge branch 'for-2.6.40/core' of git://git.kernel.dk/linux-2.6-block
Browse files Browse the repository at this point in the history
* 'for-2.6.40/core' of git://git.kernel.dk/linux-2.6-block: (40 commits)
  cfq-iosched: free cic_index if cfqd allocation fails
  cfq-iosched: remove unused 'group_changed' in cfq_service_tree_add()
  cfq-iosched: reduce bit operations in cfq_choose_req()
  cfq-iosched: algebraic simplification in cfq_prio_to_maxrq()
  blk-cgroup: Initialize ioc->cgroup_changed at ioc creation time
  block: move bd_set_size() above rescan_partitions() in __blkdev_get()
  block: call elv_bio_merged() when merged
  cfq-iosched: Make IO merge related stats per cpu
  cfq-iosched: Fix a memory leak of per cpu stats for root group
  backing-dev: Kill set but not used var in  bdi_debug_stats_show()
  block: get rid of on-stack plugging debug checks
  blk-throttle: Make no throttling rule group processing lockless
  blk-cgroup: Make cgroup stat reset path blkg->lock free for dispatch stats
  blk-cgroup: Make 64bit per cpu stats safe on 32bit arch
  blk-throttle: Make dispatch stats per cpu
  blk-throttle: Free up a group only after one rcu grace period
  blk-throttle: Use helper function to add root throtl group to lists
  blk-throttle: Introduce a helper function to fill in device details
  blk-throttle: Dynamically allocate root group
  blk-cgroup: Allow sleeping while dynamically allocating a group
  ...
  • Loading branch information
Linus Torvalds committed May 25, 2011
2 parents 22e12bb + 1547010 commit 798ce8f
Show file tree
Hide file tree
Showing 25 changed files with 785 additions and 317 deletions.
64 changes: 64 additions & 0 deletions Documentation/ABI/testing/sysfs-block
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,67 @@ Description:
with the previous I/O request are enabled. When set to 2,
all merge tries are disabled. The default value is 0 -
which enables all types of merge tries.

What: /sys/block/<disk>/discard_alignment
Date: May 2011
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Devices that support discard functionality may
internally allocate space in units that are bigger than
the exported logical block size. The discard_alignment
parameter indicates how many bytes the beginning of the
device is offset from the internal allocation unit's
natural alignment.

What: /sys/block/<disk>/<partition>/discard_alignment
Date: May 2011
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Devices that support discard functionality may
internally allocate space in units that are bigger than
the exported logical block size. The discard_alignment
parameter indicates how many bytes the beginning of the
partition is offset from the internal allocation unit's
natural alignment.

What: /sys/block/<disk>/queue/discard_granularity
Date: May 2011
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Devices that support discard functionality may
internally allocate space using units that are bigger
than the logical block size. The discard_granularity
parameter indicates the size of the internal allocation
unit in bytes if reported by the device. Otherwise the
discard_granularity will be set to match the device's
physical block size. A discard_granularity of 0 means
that the device does not support discard functionality.

What: /sys/block/<disk>/queue/discard_max_bytes
Date: May 2011
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Devices that support discard functionality may have
internal limits on the number of bytes that can be
trimmed or unmapped in a single operation. Some storage
protocols also have inherent limits on the number of
blocks that can be described in a single command. The
discard_max_bytes parameter is set by the device driver
to the maximum number of bytes that can be discarded in
a single operation. Discard requests issued to the
device must not exceed this limit. A discard_max_bytes
value of 0 means that the device does not support
discard functionality.

What: /sys/block/<disk>/queue/discard_zeroes_data
Date: May 2011
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Devices that support discard functionality may return
stale or random data when a previously discarded block
is read back. This can cause problems if the filesystem
expects discarded blocks to be explicitly cleared. If a
device reports that it deterministically returns zeroes
when a discarded area is read the discard_zeroes_data
parameter will be set to one. Otherwise it will be 0 and
the result of reading a discarded area is undefined.
200 changes: 162 additions & 38 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -385,25 +385,40 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,

spin_lock_irqsave(&blkg->stats_lock, flags);
blkg->stats.time += time;
#ifdef CONFIG_DEBUG_BLK_CGROUP
blkg->stats.unaccounted_time += unaccounted_time;
#endif
spin_unlock_irqrestore(&blkg->stats_lock, flags);
}
EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);

/*
* should be called under rcu read lock or queue lock to make sure blkg pointer
* is valid.
*/
void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
uint64_t bytes, bool direction, bool sync)
{
struct blkio_group_stats *stats;
struct blkio_group_stats_cpu *stats_cpu;
unsigned long flags;

spin_lock_irqsave(&blkg->stats_lock, flags);
stats = &blkg->stats;
stats->sectors += bytes >> 9;
blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
sync);
blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
direction, sync);
spin_unlock_irqrestore(&blkg->stats_lock, flags);
/*
* Disabling interrupts to provide mutual exclusion between two
* writes on same cpu. It probably is not needed for 64bit. Not
* optimizing that case yet.
*/
local_irq_save(flags);

stats_cpu = this_cpu_ptr(blkg->stats_cpu);

u64_stats_update_begin(&stats_cpu->syncp);
stats_cpu->sectors += bytes >> 9;
blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED],
1, direction, sync);
blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
bytes, direction, sync);
u64_stats_update_end(&stats_cpu->syncp);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);

Expand All @@ -426,18 +441,44 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg,
}
EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);

/* Merged stats are per cpu. */
void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
bool sync)
{
struct blkio_group_stats_cpu *stats_cpu;
unsigned long flags;

spin_lock_irqsave(&blkg->stats_lock, flags);
blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction,
sync);
spin_unlock_irqrestore(&blkg->stats_lock, flags);
/*
* Disabling interrupts to provide mutual exclusion between two
* writes on same cpu. It probably is not needed for 64bit. Not
* optimizing that case yet.
*/
local_irq_save(flags);

stats_cpu = this_cpu_ptr(blkg->stats_cpu);

u64_stats_update_begin(&stats_cpu->syncp);
blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_MERGED], 1,
direction, sync);
u64_stats_update_end(&stats_cpu->syncp);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);

/*
* This function allocates the per cpu stats for blkio_group. Should be called
* from sleepable context as alloc_per_cpu() requires that.
*/
int blkio_alloc_blkg_stats(struct blkio_group *blkg)
{
/* Allocate memory for per cpu stats */
blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
if (!blkg->stats_cpu)
return -ENOMEM;
return 0;
}
EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats);

void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, void *key, dev_t dev,
enum blkio_policy_id plid)
Expand Down Expand Up @@ -508,6 +549,30 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
}
EXPORT_SYMBOL_GPL(blkiocg_lookup_group);

static void blkio_reset_stats_cpu(struct blkio_group *blkg)
{
struct blkio_group_stats_cpu *stats_cpu;
int i, j, k;
/*
* Note: On 64 bit arch this should not be an issue. This has the
* possibility of returning some inconsistent value on 32bit arch
* as 64bit update on 32bit is non atomic. Taking care of this
* corner case makes code very complicated, like sending IPIs to
* cpus, taking care of stats of offline cpus etc.
*
* reset stats is anyway more of a debug feature and this sounds a
* corner case. So I am not complicating the code yet until and
* unless this becomes a real issue.
*/
for_each_possible_cpu(i) {
stats_cpu = per_cpu_ptr(blkg->stats_cpu, i);
stats_cpu->sectors = 0;
for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
for (k = 0; k < BLKIO_STAT_TOTAL; k++)
stats_cpu->stat_arr_cpu[j][k] = 0;
}
}

static int
blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
{
Expand Down Expand Up @@ -552,7 +617,11 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
}
#endif
spin_unlock(&blkg->stats_lock);

/* Reset Per cpu stats which don't take blkg->stats_lock */
blkio_reset_stats_cpu(blkg);
}

spin_unlock_irq(&blkcg->lock);
return 0;
}
Expand Down Expand Up @@ -598,6 +667,59 @@ static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
return val;
}


static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg,
enum stat_type_cpu type, enum stat_sub_type sub_type)
{
int cpu;
struct blkio_group_stats_cpu *stats_cpu;
u64 val = 0, tval;

for_each_possible_cpu(cpu) {
unsigned int start;
stats_cpu = per_cpu_ptr(blkg->stats_cpu, cpu);

do {
start = u64_stats_fetch_begin(&stats_cpu->syncp);
if (type == BLKIO_STAT_CPU_SECTORS)
tval = stats_cpu->sectors;
else
tval = stats_cpu->stat_arr_cpu[type][sub_type];
} while(u64_stats_fetch_retry(&stats_cpu->syncp, start));

val += tval;
}

return val;
}

static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
struct cgroup_map_cb *cb, dev_t dev, enum stat_type_cpu type)
{
uint64_t disk_total, val;
char key_str[MAX_KEY_LEN];
enum stat_sub_type sub_type;

if (type == BLKIO_STAT_CPU_SECTORS) {
val = blkio_read_stat_cpu(blkg, type, 0);
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb, dev);
}

for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
sub_type++) {
blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
val = blkio_read_stat_cpu(blkg, type, sub_type);
cb->fill(cb, key_str, val);
}

disk_total = blkio_read_stat_cpu(blkg, type, BLKIO_STAT_READ) +
blkio_read_stat_cpu(blkg, type, BLKIO_STAT_WRITE);

blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
cb->fill(cb, key_str, disk_total);
return disk_total;
}

/* This should be called with blkg->stats_lock held */
static uint64_t blkio_get_stat(struct blkio_group *blkg,
struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
Expand All @@ -609,9 +731,6 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
if (type == BLKIO_STAT_TIME)
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
blkg->stats.time, cb, dev);
if (type == BLKIO_STAT_SECTORS)
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
blkg->stats.sectors, cb, dev);
#ifdef CONFIG_DEBUG_BLK_CGROUP
if (type == BLKIO_STAT_UNACCOUNTED_TIME)
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
Expand Down Expand Up @@ -1075,8 +1194,8 @@ static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
}

static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
bool show_total)
struct cftype *cft, struct cgroup_map_cb *cb,
enum stat_type type, bool show_total, bool pcpu)
{
struct blkio_group *blkg;
struct hlist_node *n;
Expand All @@ -1087,10 +1206,15 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
if (blkg->dev) {
if (!cftype_blkg_same_policy(cft, blkg))
continue;
spin_lock_irq(&blkg->stats_lock);
cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
type);
spin_unlock_irq(&blkg->stats_lock);
if (pcpu)
cgroup_total += blkio_get_stat_cpu(blkg, cb,
blkg->dev, type);
else {
spin_lock_irq(&blkg->stats_lock);
cgroup_total += blkio_get_stat(blkg, cb,
blkg->dev, type);
spin_unlock_irq(&blkg->stats_lock);
}
}
}
if (show_total)
Expand All @@ -1114,47 +1238,47 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
switch(name) {
case BLKIO_PROP_time:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_TIME, 0);
BLKIO_STAT_TIME, 0, 0);
case BLKIO_PROP_sectors:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_SECTORS, 0);
BLKIO_STAT_CPU_SECTORS, 0, 1);
case BLKIO_PROP_io_service_bytes:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_SERVICE_BYTES, 1);
BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
case BLKIO_PROP_io_serviced:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_SERVICED, 1);
BLKIO_STAT_CPU_SERVICED, 1, 1);
case BLKIO_PROP_io_service_time:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_SERVICE_TIME, 1);
BLKIO_STAT_SERVICE_TIME, 1, 0);
case BLKIO_PROP_io_wait_time:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_WAIT_TIME, 1);
BLKIO_STAT_WAIT_TIME, 1, 0);
case BLKIO_PROP_io_merged:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_MERGED, 1);
BLKIO_STAT_CPU_MERGED, 1, 1);
case BLKIO_PROP_io_queued:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_QUEUED, 1);
BLKIO_STAT_QUEUED, 1, 0);
#ifdef CONFIG_DEBUG_BLK_CGROUP
case BLKIO_PROP_unaccounted_time:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_UNACCOUNTED_TIME, 0);
BLKIO_STAT_UNACCOUNTED_TIME, 0, 0);
case BLKIO_PROP_dequeue:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_DEQUEUE, 0);
BLKIO_STAT_DEQUEUE, 0, 0);
case BLKIO_PROP_avg_queue_size:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_AVG_QUEUE_SIZE, 0);
BLKIO_STAT_AVG_QUEUE_SIZE, 0, 0);
case BLKIO_PROP_group_wait_time:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_GROUP_WAIT_TIME, 0);
BLKIO_STAT_GROUP_WAIT_TIME, 0, 0);
case BLKIO_PROP_idle_time:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_IDLE_TIME, 0);
BLKIO_STAT_IDLE_TIME, 0, 0);
case BLKIO_PROP_empty_time:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_EMPTY_TIME, 0);
BLKIO_STAT_EMPTY_TIME, 0, 0);
#endif
default:
BUG();
Expand All @@ -1164,10 +1288,10 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
switch(name){
case BLKIO_THROTL_io_service_bytes:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_SERVICE_BYTES, 1);
BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
case BLKIO_THROTL_io_serviced:
return blkio_read_blkg_stats(blkcg, cft, cb,
BLKIO_STAT_SERVICED, 1);
BLKIO_STAT_CPU_SERVICED, 1, 1);
default:
BUG();
}
Expand Down
Loading

0 comments on commit 798ce8f

Please sign in to comment.