Skip to content

Commit

Permalink
ore: Fix wrong math in allocation of per device BIO
Browse files Browse the repository at this point in the history
At IO preparation we calculate the max pages at each device and
allocate a BIO per device of that size. The calculation was wrong
on some unaligned corner cases offset/length combination and would
make prepare return with -ENOMEM. This would be bad for pnfs-objects
that would in that case IO through MDS. And fatal for exofs were it
would fail writes with EIO.

Fix it by doing the proper math, that will work in all cases. (I
ran a test with all possible offset/length combinations this time
round).

Also when reading we do not need to allocate for the parity units
since we jump over them.

Also lower the max_io_length to take into account the parity pages
so not to allocate BIOs bigger than PAGE_SIZE

CC: Stable Kernel <stable@vger.kernel.org>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
  • Loading branch information
Boaz Harrosh committed Jan 23, 2014
1 parent 5e01dc7 commit aad560b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 12 deletions.
37 changes: 25 additions & 12 deletions fs/exofs/ore.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)

layout->max_io_length =
(BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) *
layout->group_width;
(layout->group_width - layout->parity);
if (layout->parity) {
unsigned stripe_length =
(layout->group_width - layout->parity) *
Expand Down Expand Up @@ -286,7 +286,8 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
if (length) {
ore_calc_stripe_info(layout, offset, length, &ios->si);
ios->length = ios->si.length;
ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
ios->nr_pages = ((ios->offset & (PAGE_SIZE - 1)) +
ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
if (layout->parity)
_ore_post_alloc_raid_stuff(ios);
}
Expand Down Expand Up @@ -536,6 +537,7 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
u64 H = LmodS - G * T;

u32 N = div_u64(H, U);
u32 Nlast;

/* "H - (N * U)" is just "H % U" so it's bound to u32 */
u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
Expand Down Expand Up @@ -568,6 +570,10 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
si->length = T - H;
if (si->length > length)
si->length = length;

Nlast = div_u64(H + si->length + U - 1, U);
si->maxdevUnits = Nlast - N;

si->M = M;
}
EXPORT_SYMBOL(ore_calc_stripe_info);
Expand All @@ -583,13 +589,16 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
int ret;

if (per_dev->bio == NULL) {
unsigned pages_in_stripe = ios->layout->group_width *
(ios->layout->stripe_unit / PAGE_SIZE);
unsigned nr_pages = ios->nr_pages * ios->layout->group_width /
(ios->layout->group_width -
ios->layout->parity);
unsigned bio_size = (nr_pages + pages_in_stripe) /
ios->layout->group_width;
unsigned bio_size;

if (!ios->reading) {
bio_size = ios->si.maxdevUnits;
} else {
bio_size = (ios->si.maxdevUnits + 1) *
(ios->layout->group_width - ios->layout->parity) /
ios->layout->group_width;
}
bio_size *= (ios->layout->stripe_unit / PAGE_SIZE);

per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
if (unlikely(!per_dev->bio)) {
Expand All @@ -609,8 +618,12 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
added_len = bio_add_pc_page(q, per_dev->bio, pages[pg],
pglen, pgbase);
if (unlikely(pglen != added_len)) {
ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n",
per_dev->bio->bi_vcnt);
/* If bi_vcnt == bi_max then this is a SW BUG */
ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=0x%x "
"bi_max=0x%x BIO_MAX=0x%x cur_len=0x%x\n",
per_dev->bio->bi_vcnt,
per_dev->bio->bi_max_vecs,
BIO_MAX_PAGES_KMALLOC, cur_len);
ret = -ENOMEM;
goto out;
}
Expand Down Expand Up @@ -1098,7 +1111,7 @@ int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
size_attr->attr = g_attr_logical_length;
size_attr->attr.val_ptr = &size_attr->newsize;

ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
ORE_DBGMSG2("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
_LLU(oc->comps->obj.id), _LLU(obj_size), i);
ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
&size_attr->attr);
Expand Down
1 change: 1 addition & 0 deletions include/scsi/osd_ore.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ struct ore_striping_info {
unsigned unit_off;
unsigned cur_pg;
unsigned cur_comp;
unsigned maxdevUnits;
};

struct ore_io_state;
Expand Down

0 comments on commit aad560b

Please sign in to comment.