Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
Browse files Browse the repository at this point in the history
* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
  cfq-iosched: remove redundant check for NULL cfqq in cfq_set_request()
  blocK: Restore barrier support for md and probably other virtual devices.
  block: get rid of queue-private command filter
  block: Create bip slabs with embedded integrity vectors
  cfq-iosched: get rid of the need for __GFP_NOFAIL in cfq_find_alloc_queue()
  cfq-iosched: move cfqq initialization out of cfq_find_alloc_queue()
  Trivial typo fixes in Documentation/block/data-integrity.txt.
  • Loading branch information
Linus Torvalds committed Jul 1, 2009
2 parents 544ae5f + b706f64 commit 2027bd9
Show file tree
Hide file tree
Showing 13 changed files with 294 additions and 406 deletions.
4 changes: 2 additions & 2 deletions Documentation/block/data-integrity.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ encouraged them to allow separation of the data and integrity metadata
scatter-gather lists.

The controller will interleave the buffers on write and split them on
read. This means that the Linux can DMA the data buffers to and from
read. This means that Linux can DMA the data buffers to and from
host memory without changes to the page cache.

Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs
Expand All @@ -66,7 +66,7 @@ software RAID5).

The IP checksum is weaker than the CRC in terms of detecting bit
errors. However, the strength is really in the separation of the data
buffers and the integrity metadata. These two distinct buffers much
buffers and the integrity metadata. These two distinct buffers must
match up for an I/O to complete.

The separation of the data and integrity metadata buffers as well as
Expand Down
2 changes: 1 addition & 1 deletion block/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
ioctl.o genhd.o scsi_ioctl.o

obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
Expand Down
14 changes: 6 additions & 8 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,8 +595,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

q->sg_reserved_size = INT_MAX;

blk_set_cmd_filter_defaults(&q->cmd_filter);

/*
* all done
*/
Expand Down Expand Up @@ -1172,6 +1170,11 @@ static int __make_request(struct request_queue *q, struct bio *bio)
const int unplug = bio_unplug(bio);
int rw_flags;

if (bio_barrier(bio) && bio_has_data(bio) &&
(q->next_ordered == QUEUE_ORDERED_NONE)) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
Expand Down Expand Up @@ -1472,11 +1475,6 @@ static inline void __generic_make_request(struct bio *bio)
err = -EOPNOTSUPP;
goto end_io;
}
if (bio_barrier(bio) && bio_has_data(bio) &&
(q->next_ordered == QUEUE_ORDERED_NONE)) {
err = -EOPNOTSUPP;
goto end_io;
}

ret = q->make_request_fn(q, bio);
} while (ret);
Expand Down Expand Up @@ -2365,7 +2363,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
__bio_clone(bio, bio_src);

if (bio_integrity(bio_src) &&
bio_integrity_clone(bio, bio_src, gfp_mask))
bio_integrity_clone(bio, bio_src, gfp_mask, bs))
goto free_and_out;

if (bio_ctr && bio_ctr(bio, bio_src, data))
Expand Down
2 changes: 1 addition & 1 deletion block/bsg.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
return -EFAULT;

if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
if (blk_verify_command(&q->cmd_filter, rq->cmd, has_write_perm))
if (blk_verify_command(rq->cmd, has_write_perm))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
Expand Down
176 changes: 93 additions & 83 deletions block/cfq-iosched.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,51 @@ struct cfq_rb_root {
};
#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }

/*
* Per process-grouping structure
*/
struct cfq_queue {
/* reference count */
atomic_t ref;
/* various state flags, see below */
unsigned int flags;
/* parent cfq_data */
struct cfq_data *cfqd;
/* service_tree member */
struct rb_node rb_node;
/* service_tree key */
unsigned long rb_key;
/* prio tree member */
struct rb_node p_node;
/* prio tree root we belong to, if any */
struct rb_root *p_root;
/* sorted list of pending requests */
struct rb_root sort_list;
/* if fifo isn't expired, next request to serve */
struct request *next_rq;
/* requests queued in sort_list */
int queued[2];
/* currently allocated requests */
int allocated[2];
/* fifo list of requests in sort_list */
struct list_head fifo;

unsigned long slice_end;
long slice_resid;
unsigned int slice_dispatch;

/* pending metadata requests */
int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;

/* io prio of this group */
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;

pid_t pid;
};

/*
* Per block device queue structure
*/
Expand Down Expand Up @@ -135,51 +180,11 @@ struct cfq_data {
unsigned int cfq_slice_idle;

struct list_head cic_list;
};

/*
* Per process-grouping structure
*/
struct cfq_queue {
/* reference count */
atomic_t ref;
/* various state flags, see below */
unsigned int flags;
/* parent cfq_data */
struct cfq_data *cfqd;
/* service_tree member */
struct rb_node rb_node;
/* service_tree key */
unsigned long rb_key;
/* prio tree member */
struct rb_node p_node;
/* prio tree root we belong to, if any */
struct rb_root *p_root;
/* sorted list of pending requests */
struct rb_root sort_list;
/* if fifo isn't expired, next request to serve */
struct request *next_rq;
/* requests queued in sort_list */
int queued[2];
/* currently allocated requests */
int allocated[2];
/* fifo list of requests in sort_list */
struct list_head fifo;

unsigned long slice_end;
long slice_resid;
unsigned int slice_dispatch;

/* pending metadata requests */
int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;

/* io prio of this group */
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;

pid_t pid;
/*
* Fallback dummy cfqq for extreme OOM conditions
*/
struct cfq_queue oom_cfqq;
};

enum cfqq_state_flags {
Expand Down Expand Up @@ -1641,6 +1646,26 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc)
ioc->ioprio_changed = 0;
}

static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
pid_t pid, int is_sync)
{
RB_CLEAR_NODE(&cfqq->rb_node);
RB_CLEAR_NODE(&cfqq->p_node);
INIT_LIST_HEAD(&cfqq->fifo);

atomic_set(&cfqq->ref, 0);
cfqq->cfqd = cfqd;

cfq_mark_cfqq_prio_changed(cfqq);

if (is_sync) {
if (!cfq_class_idle(cfqq))
cfq_mark_cfqq_idle_window(cfqq);
cfq_mark_cfqq_sync(cfqq);
}
cfqq->pid = pid;
}

static struct cfq_queue *
cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
struct io_context *ioc, gfp_t gfp_mask)
Expand All @@ -1653,56 +1678,40 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
/* cic always exists here */
cfqq = cic_to_cfqq(cic, is_sync);

if (!cfqq) {
/*
* Always try a new alloc if we fell back to the OOM cfqq
* originally, since it should just be a temporary situation.
*/
if (!cfqq || cfqq == &cfqd->oom_cfqq) {
cfqq = NULL;
if (new_cfqq) {
cfqq = new_cfqq;
new_cfqq = NULL;
} else if (gfp_mask & __GFP_WAIT) {
/*
* Inform the allocator of the fact that we will
* just repeat this allocation if it fails, to allow
* the allocator to do whatever it needs to attempt to
* free memory.
*/
spin_unlock_irq(cfqd->queue->queue_lock);
new_cfqq = kmem_cache_alloc_node(cfq_pool,
gfp_mask | __GFP_NOFAIL | __GFP_ZERO,
gfp_mask | __GFP_ZERO,
cfqd->queue->node);
spin_lock_irq(cfqd->queue->queue_lock);
goto retry;
if (new_cfqq)
goto retry;
} else {
cfqq = kmem_cache_alloc_node(cfq_pool,
gfp_mask | __GFP_ZERO,
cfqd->queue->node);
if (!cfqq)
goto out;
}

RB_CLEAR_NODE(&cfqq->rb_node);
RB_CLEAR_NODE(&cfqq->p_node);
INIT_LIST_HEAD(&cfqq->fifo);

atomic_set(&cfqq->ref, 0);
cfqq->cfqd = cfqd;

cfq_mark_cfqq_prio_changed(cfqq);

cfq_init_prio_data(cfqq, ioc);

if (is_sync) {
if (!cfq_class_idle(cfqq))
cfq_mark_cfqq_idle_window(cfqq);
cfq_mark_cfqq_sync(cfqq);
}
cfqq->pid = current->pid;
cfq_log_cfqq(cfqd, cfqq, "alloced");
if (cfqq) {
cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
cfq_init_prio_data(cfqq, ioc);
cfq_log_cfqq(cfqd, cfqq, "alloced");
} else
cfqq = &cfqd->oom_cfqq;
}

if (new_cfqq)
kmem_cache_free(cfq_pool, new_cfqq);

out:
WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
return cfqq;
}

Expand Down Expand Up @@ -1735,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
cfqq = *async_cfqq;
}

if (!cfqq) {
if (!cfqq)
cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
if (!cfqq)
return NULL;
}

/*
* pin the queue now that it's allocated, scheduler exit will prune it
Expand Down Expand Up @@ -2307,10 +2313,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
cfqq = cic_to_cfqq(cic, is_sync);
if (!cfqq) {
cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);

if (!cfqq)
goto queue_fail;

cic_set_cfqq(cic, cfqq, is_sync);
}

Expand Down Expand Up @@ -2465,6 +2467,14 @@ static void *cfq_init_queue(struct request_queue *q)
for (i = 0; i < CFQ_PRIO_LISTS; i++)
cfqd->prio_trees[i] = RB_ROOT;

/*
* Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
* Grab a permanent reference to it, so that the normal code flow
* will not attempt to free it.
*/
cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
atomic_inc(&cfqd->oom_cfqq.ref);

INIT_LIST_HEAD(&cfqd->cic_list);

cfqd->queue = q;
Expand Down
Loading

0 comments on commit 2027bd9

Please sign in to comment.