Skip to content

Commit

Permalink
dm btree: introduce cursor api
Browse files Browse the repository at this point in the history
This uses prefetching to speed up iteration through a btree.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
  • Loading branch information
Joe Thornber authored and Mike Snitzer committed Sep 22, 2016
1 parent 9d1b404 commit 7d111c8
Show file tree
Hide file tree
Showing 2 changed files with 197 additions and 0 deletions.
162 changes: 162 additions & 0 deletions drivers/md/persistent-data/dm-btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -994,3 +994,165 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
return walk_node(info, root, fn, context);
}
EXPORT_SYMBOL_GPL(dm_btree_walk);

/*----------------------------------------------------------------*/

static void prefetch_values(struct dm_btree_cursor *c)
{
unsigned i, nr;
__le64 value_le;
struct cursor_node *n = c->nodes + c->depth - 1;
struct btree_node *bn = dm_block_data(n->b);
struct dm_block_manager *bm = dm_tm_get_bm(c->info->tm);

BUG_ON(c->info->value_type.size != sizeof(value_le));

nr = le32_to_cpu(bn->header.nr_entries);
for (i = 0; i < nr; i++) {
memcpy(&value_le, value_ptr(bn, i), sizeof(value_le));
dm_bm_prefetch(bm, le64_to_cpu(value_le));
}
}

static bool leaf_node(struct dm_btree_cursor *c)
{
struct cursor_node *n = c->nodes + c->depth - 1;
struct btree_node *bn = dm_block_data(n->b);

return le32_to_cpu(bn->header.flags) & LEAF_NODE;
}

static int push_node(struct dm_btree_cursor *c, dm_block_t b)
{
int r;
struct cursor_node *n = c->nodes + c->depth;

if (c->depth >= DM_BTREE_CURSOR_MAX_DEPTH - 1) {
DMERR("couldn't push cursor node, stack depth too high");
return -EINVAL;
}

r = bn_read_lock(c->info, b, &n->b);
if (r)
return r;

n->index = 0;
c->depth++;

if (c->prefetch_leaves || !leaf_node(c))
prefetch_values(c);

return 0;
}

static void pop_node(struct dm_btree_cursor *c)
{
c->depth--;
unlock_block(c->info, c->nodes[c->depth].b);
}

static int inc_or_backtrack(struct dm_btree_cursor *c)
{
struct cursor_node *n;
struct btree_node *bn;

for (;;) {
if (!c->depth)
return -ENODATA;

n = c->nodes + c->depth - 1;
bn = dm_block_data(n->b);

n->index++;
if (n->index < le32_to_cpu(bn->header.nr_entries))
break;

pop_node(c);
}

return 0;
}

static int find_leaf(struct dm_btree_cursor *c)
{
int r = 0;
struct cursor_node *n;
struct btree_node *bn;
__le64 value_le;

for (;;) {
n = c->nodes + c->depth - 1;
bn = dm_block_data(n->b);

if (le32_to_cpu(bn->header.flags) & LEAF_NODE)
break;

memcpy(&value_le, value_ptr(bn, n->index), sizeof(value_le));
r = push_node(c, le64_to_cpu(value_le));
if (r) {
DMERR("push_node failed");
break;
}
}

if (!r && (le32_to_cpu(bn->header.nr_entries) == 0))
return -ENODATA;

return r;
}

int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
bool prefetch_leaves, struct dm_btree_cursor *c)
{
int r;

c->info = info;
c->root = root;
c->depth = 0;
c->prefetch_leaves = prefetch_leaves;

r = push_node(c, root);
if (r)
return r;

return find_leaf(c);
}
EXPORT_SYMBOL_GPL(dm_btree_cursor_begin);

void dm_btree_cursor_end(struct dm_btree_cursor *c)
{
while (c->depth)
pop_node(c);
}
EXPORT_SYMBOL_GPL(dm_btree_cursor_end);

int dm_btree_cursor_next(struct dm_btree_cursor *c)
{
int r = inc_or_backtrack(c);
if (!r) {
r = find_leaf(c);
if (r)
DMERR("find_leaf failed");
}

return r;
}
EXPORT_SYMBOL_GPL(dm_btree_cursor_next);

int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le)
{
if (c->depth) {
struct cursor_node *n = c->nodes + c->depth - 1;
struct btree_node *bn = dm_block_data(n->b);

if (le32_to_cpu(bn->header.flags) & INTERNAL_NODE)
return -EINVAL;

*key = le64_to_cpu(*key_ptr(bn, n->index));
memcpy(value_le, value_ptr(bn, n->index), c->info->value_type.size);
return 0;

} else
return -ENODATA;
}
EXPORT_SYMBOL_GPL(dm_btree_cursor_get_value);
35 changes: 35 additions & 0 deletions drivers/md/persistent-data/dm-btree.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,4 +176,39 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
int (*fn)(void *context, uint64_t *keys, void *leaf),
void *context);


/*----------------------------------------------------------------*/

/*
* Cursor API. This does not follow the rolling lock convention. Since we
* know the order that values are required we can issue prefetches to speed
* up iteration. Use on a single level btree only.
*/
#define DM_BTREE_CURSOR_MAX_DEPTH 16

struct cursor_node {
struct dm_block *b;
unsigned index;
};

struct dm_btree_cursor {
struct dm_btree_info *info;
dm_block_t root;

bool prefetch_leaves;
unsigned depth;
struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH];
};

/*
* Creates a fresh cursor. If prefetch_leaves is set then it is assumed
* the btree contains block indexes that will be prefetched. The cursor is
* quite large, so you probably don't want to put it on the stack.
*/
int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
bool prefetch_leaves, struct dm_btree_cursor *c);
void dm_btree_cursor_end(struct dm_btree_cursor *c);
int dm_btree_cursor_next(struct dm_btree_cursor *c);
int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le);

#endif /* _LINUX_DM_BTREE_H */

0 comments on commit 7d111c8

Please sign in to comment.