Skip to content

Commit

Permalink
maple_tree: remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_al…
Browse files Browse the repository at this point in the history
…loc_bulk()

Preallocations are common in the VMA code to avoid allocating under
certain locking conditions.  The preallocations must also cover the
worst-case scenario.  Removing the GFP_ZERO flag from the
kmem_cache_alloc() (and bulk variant) calls will reduce the amount of time
spent zeroing memory that may not be used.  Only zero out the necessary
area to keep track of the allocations in the maple state.  Zero the entire
node prior to using it in the tree.

This required internal changes to node counting on allocation, so the test
code is also updated.

This restores some micro-benchmark performance: up to +9% in mmtests mmap1
by my testing +10% to +20% in mmap, mmapaddr, mmapmany tests reported by
Red Hat

Link: https://bugzilla.redhat.com/show_bug.cgi?id=2149636
Link: https://lkml.kernel.org/r/20230105160427.2988454-1-Liam.Howlett@oracle.com
Signed-off-by: Liam Howlett <Liam.Howlett@oracle.com>
Reported-by: Jirka Hladky <jhladky@redhat.com>
Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  • Loading branch information
Liam Howlett authored and Andrew Morton committed Jan 19, 2023
1 parent fc57448 commit 541e06b
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 46 deletions.
80 changes: 43 additions & 37 deletions lib/maple_tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,12 @@ struct maple_subtree_state {
/* Functions */
static inline struct maple_node *mt_alloc_one(gfp_t gfp)
{
return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO);
return kmem_cache_alloc(maple_node_cache, gfp);
}

static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
{
return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size,
nodes);
return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
}

static inline void mt_free_bulk(size_t size, void __rcu **nodes)
Expand Down Expand Up @@ -1125,9 +1124,10 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
{
struct maple_alloc *ret, *node = mas->alloc;
unsigned long total = mas_allocated(mas);
unsigned int req = mas_alloc_req(mas);

/* nothing or a request pending. */
if (unlikely(!total))
if (WARN_ON(!total))
return NULL;

if (total == 1) {
Expand All @@ -1137,27 +1137,25 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
goto single_node;
}

if (!node->node_count) {
if (node->node_count == 1) {
/* Single allocation in this node. */
mas->alloc = node->slot[0];
node->slot[0] = NULL;
mas->alloc->total = node->total - 1;
ret = node;
goto new_head;
}

node->total--;
ret = node->slot[node->node_count];
node->slot[node->node_count--] = NULL;
ret = node->slot[--node->node_count];
node->slot[node->node_count] = NULL;

single_node:
new_head:
ret->total = 0;
ret->node_count = 0;
if (ret->request_count) {
mas_set_alloc_req(mas, ret->request_count + 1);
ret->request_count = 0;
if (req) {
req++;
mas_set_alloc_req(mas, req);
}

memset(ret, 0, sizeof(*ret));
return (struct maple_node *)ret;
}

Expand All @@ -1176,21 +1174,20 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
unsigned long count;
unsigned int requested = mas_alloc_req(mas);

memset(reuse, 0, sizeof(*reuse));
count = mas_allocated(mas);

if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) {
if (head->slot[0])
head->node_count++;
head->slot[head->node_count] = reuse;
reuse->request_count = 0;
reuse->node_count = 0;
if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
head->slot[head->node_count++] = reuse;
head->total++;
goto done;
}

reuse->total = 1;
if ((head) && !((unsigned long)head & 0x1)) {
head->request_count = 0;
reuse->slot[0] = head;
reuse->node_count = 1;
reuse->total += head->total;
}

Expand All @@ -1209,7 +1206,6 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
{
struct maple_alloc *node;
unsigned long allocated = mas_allocated(mas);
unsigned long success = allocated;
unsigned int requested = mas_alloc_req(mas);
unsigned int count;
void **slots = NULL;
Expand All @@ -1225,24 +1221,29 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
WARN_ON(!allocated);
}

if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) {
if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
node = (struct maple_alloc *)mt_alloc_one(gfp);
if (!node)
goto nomem_one;

if (allocated)
if (allocated) {
node->slot[0] = mas->alloc;
node->node_count = 1;
} else {
node->node_count = 0;
}

success++;
mas->alloc = node;
node->total = ++allocated;
requested--;
}

node = mas->alloc;
node->request_count = 0;
while (requested) {
max_req = MAPLE_ALLOC_SLOTS;
if (node->slot[0]) {
unsigned int offset = node->node_count + 1;
if (node->node_count) {
unsigned int offset = node->node_count;

slots = (void **)&node->slot[offset];
max_req -= offset;
Expand All @@ -1256,15 +1257,13 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
goto nomem_bulk;

node->node_count += count;
/* zero indexed. */
if (slots == (void **)&node->slot)
node->node_count--;

success += count;
allocated += count;
node = node->slot[0];
node->node_count = 0;
node->request_count = 0;
requested -= count;
}
mas->alloc->total = success;
mas->alloc->total = allocated;
return;

nomem_bulk:
Expand All @@ -1273,7 +1272,7 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
nomem_one:
mas_set_alloc_req(mas, requested);
if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
mas->alloc->total = success;
mas->alloc->total = allocated;
mas_set_err(mas, -ENOMEM);
}

Expand Down Expand Up @@ -5734,6 +5733,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
void mas_destroy(struct ma_state *mas)
{
struct maple_alloc *node;
unsigned long total;

/*
* When using mas_for_each() to insert an expected number of elements,
Expand All @@ -5756,14 +5756,20 @@ void mas_destroy(struct ma_state *mas)
}
mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);

while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) {
total = mas_allocated(mas);
while (total) {
node = mas->alloc;
mas->alloc = node->slot[0];
if (node->node_count > 0)
mt_free_bulk(node->node_count,
(void __rcu **)&node->slot[1]);
if (node->node_count > 1) {
size_t count = node->node_count - 1;

mt_free_bulk(count, (void __rcu **)&node->slot[1]);
total -= count;
}
kmem_cache_free(maple_node_cache, node);
total--;
}

mas->alloc = NULL;
}
EXPORT_SYMBOL_GPL(mas_destroy);
Expand Down
18 changes: 9 additions & 9 deletions tools/testing/radix-tree/maple.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,11 @@ static noinline void check_new_node(struct maple_tree *mt)

if (!MAPLE_32BIT) {
if (i >= 35)
e = i - 35;
e = i - 34;
else if (i >= 5)
e = i - 5;
e = i - 4;
else if (i >= 2)
e = i - 2;
e = i - 1;
} else {
if (i >= 4)
e = i - 4;
Expand Down Expand Up @@ -305,32 +305,32 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);

mn = mas_pop_node(&mas); /* get the next node. */
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 2);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);

mas_push_node(&mas, mn);
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);

/* Check the limit of pop/push/pop */
mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */
MT_BUG_ON(mt, mas_alloc_req(&mas) != 1);
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
MT_BUG_ON(mt, mas_alloc_req(&mas));
MT_BUG_ON(mt, mas.alloc->node_count);
MT_BUG_ON(mt, mas.alloc->node_count != 1);
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
mas_push_node(&mas, mn);
MT_BUG_ON(mt, mas.alloc->node_count);
MT_BUG_ON(mt, mas.alloc->node_count != 1);
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
Expand Down

0 comments on commit 541e06b

Please sign in to comment.