Skip to content

Commit

Permalink
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/bpf/bpf

Pull bpf fixes from Alexei Starovoitov:

 - Followup fixes for resilient spinlock (Kumar Kartikeya Dwivedi):
     - Make res_spin_lock test less verbose, since it was spamming BPF
       CI on failure, and make the check for AA deadlock stronger
     - Fix rebasing mistake and use architecture provided
       res_smp_cond_load_acquire
     - Convert BPF maps (queue_stack and ringbuf) to resilient spinlock
       to address long standing syzbot reports

 - Make sure that classic BPF load instruction from SKF_[NET|LL]_OFF
   offsets works when skb is fragmeneted (Willem de Bruijn)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf: Convert ringbuf map to rqspinlock
  bpf: Convert queue_stack map to rqspinlock
  bpf: Use architecture provided res_smp_cond_load_acquire
  selftests/bpf: Make res_spin_lock AA test condition stronger
  selftests/net: test sk_filter support for SKF_NET_OFF on frags
  bpf: support SKF_NET_OFF and SKF_LL_OFF on skb frags
  selftests/bpf: Make res_spin_lock test less verbose
  • Loading branch information
Linus Torvalds committed Apr 12, 2025
2 parents ecd5d67 + a650d38 commit b676ac4
Show file tree
Hide file tree
Showing 11 changed files with 354 additions and 76 deletions.
2 changes: 1 addition & 1 deletion arch/arm64/include/asm/rqspinlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@

#endif

#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)

#include <asm-generic/rqspinlock.h>

Expand Down
35 changes: 12 additions & 23 deletions kernel/bpf/queue_stack_maps.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
#include <linux/slab.h>
#include <linux/btf_ids.h>
#include "percpu_freelist.h"
#include <asm/rqspinlock.h>

#define QUEUE_STACK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)

struct bpf_queue_stack {
struct bpf_map map;
raw_spinlock_t lock;
rqspinlock_t lock;
u32 head, tail;
u32 size; /* max_entries + 1 */

Expand Down Expand Up @@ -78,7 +79,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)

qs->size = size;

raw_spin_lock_init(&qs->lock);
raw_res_spin_lock_init(&qs->lock);

return &qs->map;
}
Expand All @@ -98,12 +99,8 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
int err = 0;
void *ptr;

if (in_nmi()) {
if (!raw_spin_trylock_irqsave(&qs->lock, flags))
return -EBUSY;
} else {
raw_spin_lock_irqsave(&qs->lock, flags);
}
if (raw_res_spin_lock_irqsave(&qs->lock, flags))
return -EBUSY;

if (queue_stack_map_is_empty(qs)) {
memset(value, 0, qs->map.value_size);
Expand All @@ -120,7 +117,7 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
}

out:
raw_spin_unlock_irqrestore(&qs->lock, flags);
raw_res_spin_unlock_irqrestore(&qs->lock, flags);
return err;
}

Expand All @@ -133,12 +130,8 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
void *ptr;
u32 index;

if (in_nmi()) {
if (!raw_spin_trylock_irqsave(&qs->lock, flags))
return -EBUSY;
} else {
raw_spin_lock_irqsave(&qs->lock, flags);
}
if (raw_res_spin_lock_irqsave(&qs->lock, flags))
return -EBUSY;

if (queue_stack_map_is_empty(qs)) {
memset(value, 0, qs->map.value_size);
Expand All @@ -157,7 +150,7 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
qs->head = index;

out:
raw_spin_unlock_irqrestore(&qs->lock, flags);
raw_res_spin_unlock_irqrestore(&qs->lock, flags);
return err;
}

Expand Down Expand Up @@ -203,12 +196,8 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
if (flags & BPF_NOEXIST || flags > BPF_EXIST)
return -EINVAL;

if (in_nmi()) {
if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
return -EBUSY;
} else {
raw_spin_lock_irqsave(&qs->lock, irq_flags);
}
if (raw_res_spin_lock_irqsave(&qs->lock, irq_flags))
return -EBUSY;

if (queue_stack_map_is_full(qs)) {
if (!replace) {
Expand All @@ -227,7 +216,7 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
qs->head = 0;

out:
raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
raw_res_spin_unlock_irqrestore(&qs->lock, irq_flags);
return err;
}

Expand Down
17 changes: 7 additions & 10 deletions kernel/bpf/ringbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <linux/kmemleak.h>
#include <uapi/linux/btf.h>
#include <linux/btf_ids.h>
#include <asm/rqspinlock.h>

#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)

Expand All @@ -29,7 +30,7 @@ struct bpf_ringbuf {
u64 mask;
struct page **pages;
int nr_pages;
raw_spinlock_t spinlock ____cacheline_aligned_in_smp;
rqspinlock_t spinlock ____cacheline_aligned_in_smp;
/* For user-space producer ring buffers, an atomic_t busy bit is used
* to synchronize access to the ring buffers in the kernel, rather than
* the spinlock that is used for kernel-producer ring buffers. This is
Expand Down Expand Up @@ -173,7 +174,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
if (!rb)
return NULL;

raw_spin_lock_init(&rb->spinlock);
raw_res_spin_lock_init(&rb->spinlock);
atomic_set(&rb->busy, 0);
init_waitqueue_head(&rb->waitq);
init_irq_work(&rb->work, bpf_ringbuf_notify);
Expand Down Expand Up @@ -416,12 +417,8 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)

cons_pos = smp_load_acquire(&rb->consumer_pos);

if (in_nmi()) {
if (!raw_spin_trylock_irqsave(&rb->spinlock, flags))
return NULL;
} else {
raw_spin_lock_irqsave(&rb->spinlock, flags);
}
if (raw_res_spin_lock_irqsave(&rb->spinlock, flags))
return NULL;

pend_pos = rb->pending_pos;
prod_pos = rb->producer_pos;
Expand All @@ -446,7 +443,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
*/
if (new_prod_pos - cons_pos > rb->mask ||
new_prod_pos - pend_pos > rb->mask) {
raw_spin_unlock_irqrestore(&rb->spinlock, flags);
raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
return NULL;
}

Expand All @@ -458,7 +455,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
/* pairs with consumer's smp_load_acquire() */
smp_store_release(&rb->producer_pos, new_prod_pos);

raw_spin_unlock_irqrestore(&rb->spinlock, flags);
raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);

return (void *)hdr + BPF_RINGBUF_HDR_SZ;
}
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/rqspinlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
})
#else
#define RES_CHECK_TIMEOUT(ts, ret, mask) \
({ (ret) = check_timeout(&(ts)); })
({ (ret) = check_timeout((lock), (mask), &(ts)); })
#endif

/*
Expand Down
80 changes: 44 additions & 36 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,24 +218,36 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
return 0;
}

static int bpf_skb_load_helper_convert_offset(const struct sk_buff *skb, int offset)
{
if (likely(offset >= 0))
return offset;

if (offset >= SKF_NET_OFF)
return offset - SKF_NET_OFF + skb_network_offset(skb);

if (offset >= SKF_LL_OFF && skb_mac_header_was_set(skb))
return offset - SKF_LL_OFF + skb_mac_offset(skb);

return INT_MIN;
}

BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
u8 tmp, *ptr;
u8 tmp;
const int len = sizeof(tmp);

if (offset >= 0) {
if (headlen - offset >= len)
return *(u8 *)(data + offset);
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
return tmp;
} else {
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
if (likely(ptr))
return *(u8 *)ptr;
}
offset = bpf_skb_load_helper_convert_offset(skb, offset);
if (offset == INT_MIN)
return -EFAULT;

return -EFAULT;
if (headlen - offset >= len)
return *(u8 *)(data + offset);
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
return tmp;
else
return -EFAULT;
}

BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
Expand All @@ -248,21 +260,19 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
__be16 tmp, *ptr;
__be16 tmp;
const int len = sizeof(tmp);

if (offset >= 0) {
if (headlen - offset >= len)
return get_unaligned_be16(data + offset);
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
return be16_to_cpu(tmp);
} else {
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
if (likely(ptr))
return get_unaligned_be16(ptr);
}
offset = bpf_skb_load_helper_convert_offset(skb, offset);
if (offset == INT_MIN)
return -EFAULT;

return -EFAULT;
if (headlen - offset >= len)
return get_unaligned_be16(data + offset);
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
return be16_to_cpu(tmp);
else
return -EFAULT;
}

BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
Expand All @@ -275,21 +285,19 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
__be32 tmp, *ptr;
__be32 tmp;
const int len = sizeof(tmp);

if (likely(offset >= 0)) {
if (headlen - offset >= len)
return get_unaligned_be32(data + offset);
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
return be32_to_cpu(tmp);
} else {
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
if (likely(ptr))
return get_unaligned_be32(ptr);
}
offset = bpf_skb_load_helper_convert_offset(skb, offset);
if (offset == INT_MIN)
return -EFAULT;

return -EFAULT;
if (headlen - offset >= len)
return get_unaligned_be32(data + offset);
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
return be32_to_cpu(tmp);
else
return -EFAULT;
}

BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
Expand Down
7 changes: 5 additions & 2 deletions tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ static void *spin_lock_thread(void *arg)

while (!READ_ONCE(skip)) {
err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
ASSERT_OK(topts.retval, "test_run retval");
if (err || topts.retval) {
ASSERT_OK(err, "test_run");
ASSERT_OK(topts.retval, "test_run retval");
break;
}
}
pthread_exit(arg);
}
Expand Down
10 changes: 7 additions & 3 deletions tools/testing/selftests/bpf/progs/res_spin_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,14 @@ int res_spin_lock_test(struct __sk_buff *ctx)
r = bpf_res_spin_lock(&elem1->lock);
if (r)
return r;
if (!bpf_res_spin_lock(&elem2->lock)) {
r = bpf_res_spin_lock(&elem2->lock);
if (!r) {
bpf_res_spin_unlock(&elem2->lock);
bpf_res_spin_unlock(&elem1->lock);
return -1;
}
bpf_res_spin_unlock(&elem1->lock);
return 0;
return r != -EDEADLK;
}

SEC("tc")
Expand Down Expand Up @@ -124,12 +125,15 @@ int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx)
/* Trigger AA, after exhausting entries in the held lock table. This
* time, only the timeout can save us, as AA detection won't succeed.
*/
if (!bpf_res_spin_lock(locks[34])) {
ret = bpf_res_spin_lock(locks[34]);
if (!ret) {
bpf_res_spin_unlock(locks[34]);
ret = 1;
goto end;
}

ret = ret != -ETIMEDOUT ? 2 : 0;

end:
for (i = i - 1; i >= 0; i--)
bpf_res_spin_unlock(locks[i]);
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/net/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
sk_so_peek_off
skf_net_off
socket
so_incoming_cpu
so_netns_cookie
Expand Down
2 changes: 2 additions & 0 deletions tools/testing/selftests/net/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh
TEST_PROGS += busy_poll_test.sh
TEST_GEN_PROGS += proc_net_pktgen
TEST_PROGS += lwt_dst_cache_ref_loop.sh
TEST_PROGS += skf_net_off.sh
TEST_GEN_FILES += skf_net_off

# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
Expand Down
Loading

0 comments on commit b676ac4

Please sign in to comment.