Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf 2021-09-14

The following pull-request contains BPF updates for your *net* tree.

We've added 7 non-merge commits during the last 13 day(s) which contain
a total of 18 files changed, 334 insertions(+), 193 deletions(-).

The main changes are:

1) Fix mmap_lock lockdep splat in BPF stack map's build_id lookup, from Yonghong Song.

2) Fix BPF cgroup v2 program bypass upon net_cls/prio activation, from Daniel Borkmann.

3) Fix kvcalloc() BTF line info splat on oversized allocation attempts, from Bixuan Cui.

4) Fix BPF selftest build of task_pt_regs test for arm64/s390, from Jean-Philippe Brucker.

5) Fix BPF's disasm.{c,h} to dual-license so that it is aligned with bpftool given the former
   is a build dependency for the latter, from Daniel Borkmann with ACKs from contributors.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Sep 14, 2021
2 parents 550ac9c + 43d2b88 commit 2865ba8
Show file tree
Hide file tree
Showing 18 changed files with 334 additions and 193 deletions.
107 changes: 27 additions & 80 deletions include/linux/cgroup-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
* sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
* per-socket cgroup information except for memcg association.
*
* On legacy hierarchies, net_prio and net_cls controllers directly set
* attributes on each sock which can then be tested by the network layer.
* On the default hierarchy, each sock is associated with the cgroup it was
* created in and the networking layer can match the cgroup directly.
*
* To avoid carrying all three cgroup related fields separately in sock,
* sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
* On boot, sock_cgroup_data records the cgroup that the sock was created
* in so that cgroup2 matches can be made; however, once either net_prio or
* net_cls starts being used, the area is overridden to carry prioidx and/or
* classid. The two modes are distinguished by whether the lowest bit is
* set. Clear bit indicates cgroup pointer while set bit prioidx and
* classid.
*
* While userland may start using net_prio or net_cls at any time, once
* either is used, cgroup2 matching no longer works. There is no reason to
* mix the two and this is in line with how legacy and v2 compatibility is
* handled. On mode switch, cgroup references which are already being
* pointed to by socks may be leaked. While this can be remedied by adding
* synchronization around sock_cgroup_data, given that the number of leaked
* cgroups is bound and highly unlikely to be high, this seems to be the
* better trade-off.
* On legacy hierarchies, net_prio and net_cls controllers directly
* set attributes on each sock which can then be tested by the network
* layer. On the default hierarchy, each sock is associated with the
* cgroup it was created in and the networking layer can match the
* cgroup directly.
*/
struct sock_cgroup_data {
union {
#ifdef __LITTLE_ENDIAN
struct {
u8 is_data : 1;
u8 no_refcnt : 1;
u8 unused : 6;
u8 padding;
u16 prioidx;
u32 classid;
} __packed;
#else
struct {
u32 classid;
u16 prioidx;
u8 padding;
u8 unused : 6;
u8 no_refcnt : 1;
u8 is_data : 1;
} __packed;
struct cgroup *cgroup; /* v2 */
#ifdef CONFIG_CGROUP_NET_CLASSID
u32 classid; /* v1 */
#endif
#ifdef CONFIG_CGROUP_NET_PRIO
u16 prioidx; /* v1 */
#endif
u64 val;
};
};

/*
* There's a theoretical window where the following accessors race with
* updaters and return part of the previous pointer as the prioidx or
* classid. Such races are short-lived and the result isn't critical.
*/
static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
{
/* fallback to 1 which is always the ID of the root cgroup */
return (skcd->is_data & 1) ? skcd->prioidx : 1;
#ifdef CONFIG_CGROUP_NET_PRIO
return READ_ONCE(skcd->prioidx);
#else
return 1;
#endif
}

static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
{
/* fallback to 0 which is the unconfigured default classid */
return (skcd->is_data & 1) ? skcd->classid : 0;
#ifdef CONFIG_CGROUP_NET_CLASSID
return READ_ONCE(skcd->classid);
#else
return 0;
#endif
}

/*
* If invoked concurrently, the updaters may clobber each other. The
* caller is responsible for synchronization.
*/
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
u16 prioidx)
{
struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};

if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
return;

if (!(skcd_buf.is_data & 1)) {
skcd_buf.val = 0;
skcd_buf.is_data = 1;
}

skcd_buf.prioidx = prioidx;
WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
#ifdef CONFIG_CGROUP_NET_PRIO
WRITE_ONCE(skcd->prioidx, prioidx);
#endif
}

static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
u32 classid)
{
struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};

if (sock_cgroup_classid(&skcd_buf) == classid)
return;

if (!(skcd_buf.is_data & 1)) {
skcd_buf.val = 0;
skcd_buf.is_data = 1;
}

skcd_buf.classid = classid;
WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
#ifdef CONFIG_CGROUP_NET_CLASSID
WRITE_ONCE(skcd->classid, classid);
#endif
}

#else /* CONFIG_SOCK_CGROUP_DATA */
Expand Down
22 changes: 1 addition & 21 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task,
*/
#ifdef CONFIG_SOCK_CGROUP_DATA

#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
extern spinlock_t cgroup_sk_update_lock;
#endif

void cgroup_sk_alloc_disable(void);
void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
void cgroup_sk_clone(struct sock_cgroup_data *skcd);
void cgroup_sk_free(struct sock_cgroup_data *skcd);

static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
{
#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
unsigned long v;

/*
* @skcd->val is 64bit but the following is safe on 32bit too as we
* just need the lower ulong to be written and read atomically.
*/
v = READ_ONCE(skcd->val);

if (v & 3)
return &cgrp_dfl_root.cgrp;

return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
#else
return (struct cgroup *)(unsigned long)skcd->val;
#endif
return skcd->cgroup;
}

#else /* CONFIG_CGROUP_DATA */
Expand Down
9 changes: 0 additions & 9 deletions include/linux/mmap_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,15 +144,6 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
__mmap_lock_trace_released(mm, false);
}

static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
{
if (mmap_read_trylock(mm)) {
rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
return true;
}
return false;
}

static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{
up_read_non_owner(&mm->mmap_lock);
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/disasm.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook
*/
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/disasm.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook
*/
Expand Down
10 changes: 8 additions & 2 deletions kernel/bpf/stackmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
* with build_id.
*/
if (!user || !current || !current->mm || irq_work_busy ||
!mmap_read_trylock_non_owner(current->mm)) {
!mmap_read_trylock(current->mm)) {
/* cannot access current->mm, fall back to ips */
for (i = 0; i < trace_nr; i++) {
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
Expand All @@ -204,9 +204,15 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
}

if (!work) {
mmap_read_unlock_non_owner(current->mm);
mmap_read_unlock(current->mm);
} else {
work->mm = current->mm;

/* The lock will be released once we're out of interrupt
* context. Tell lockdep that we've released it now so
* it doesn't complain that we forgot to release it.
*/
rwsem_release(&current->mm->mmap_lock.dep_map, _RET_IP_);
irq_work_queue(&work->irq_work);
}
}
Expand Down
2 changes: 2 additions & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -9912,6 +9912,8 @@ static int check_btf_line(struct bpf_verifier_env *env,
nr_linfo = attr->line_info_cnt;
if (!nr_linfo)
return 0;
if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
return -EINVAL;

rec_size = attr->line_info_rec_size;
if (rec_size < MIN_BPF_LINEINFO_SIZE ||
Expand Down
50 changes: 10 additions & 40 deletions kernel/cgroup/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -6572,74 +6572,44 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
*/
#ifdef CONFIG_SOCK_CGROUP_DATA

#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)

DEFINE_SPINLOCK(cgroup_sk_update_lock);
static bool cgroup_sk_alloc_disabled __read_mostly;

void cgroup_sk_alloc_disable(void)
{
if (cgroup_sk_alloc_disabled)
return;
pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
cgroup_sk_alloc_disabled = true;
}

#else

#define cgroup_sk_alloc_disabled false

#endif

void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
{
if (cgroup_sk_alloc_disabled) {
skcd->no_refcnt = 1;
return;
}

/* Don't associate the sock with unrelated interrupted task's cgroup. */
if (in_interrupt())
return;

rcu_read_lock();

while (true) {
struct css_set *cset;

cset = task_css_set(current);
if (likely(cgroup_tryget(cset->dfl_cgrp))) {
skcd->val = (unsigned long)cset->dfl_cgrp;
skcd->cgroup = cset->dfl_cgrp;
cgroup_bpf_get(cset->dfl_cgrp);
break;
}
cpu_relax();
}

rcu_read_unlock();
}

void cgroup_sk_clone(struct sock_cgroup_data *skcd)
{
if (skcd->val) {
if (skcd->no_refcnt)
return;
/*
* We might be cloning a socket which is left in an empty
* cgroup and the cgroup might have already been rmdir'd.
* Don't use cgroup_get_live().
*/
cgroup_get(sock_cgroup_ptr(skcd));
cgroup_bpf_get(sock_cgroup_ptr(skcd));
}
struct cgroup *cgrp = sock_cgroup_ptr(skcd);

/*
* We might be cloning a socket which is left in an empty
* cgroup and the cgroup might have already been rmdir'd.
* Don't use cgroup_get_live().
*/
cgroup_get(cgrp);
cgroup_bpf_get(cgrp);
}

void cgroup_sk_free(struct sock_cgroup_data *skcd)
{
struct cgroup *cgrp = sock_cgroup_ptr(skcd);

if (skcd->no_refcnt)
return;
cgroup_bpf_put(cgrp);
cgroup_put(cgrp);
}
Expand Down
7 changes: 1 addition & 6 deletions net/core/netclassid_cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file);

if (sock) {
spin_lock(&cgroup_sk_update_lock);
if (sock)
sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
spin_unlock(&cgroup_sk_update_lock);
}
if (--ctx->batch == 0) {
ctx->batch = UPDATE_CLASSID_BATCH;
return n + 1;
Expand Down Expand Up @@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
struct css_task_iter it;
struct task_struct *p;

cgroup_sk_alloc_disable();

cs->classid = (u32)value;

css_task_iter_start(css, 0, &it);
Expand Down
10 changes: 2 additions & 8 deletions net/core/netprio_cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
if (!dev)
return -ENODEV;

cgroup_sk_alloc_disable();

rtnl_lock();

ret = netprio_set_prio(of_css(of), dev, prio);
Expand All @@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
static int update_netprio(const void *v, struct file *file, unsigned n)
{
struct socket *sock = sock_from_file(file);
if (sock) {
spin_lock(&cgroup_sk_update_lock);

if (sock)
sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
(unsigned long)v);
spin_unlock(&cgroup_sk_update_lock);
}
return 0;
}

Expand All @@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset)
struct task_struct *p;
struct cgroup_subsys_state *css;

cgroup_sk_alloc_disable();

cgroup_taskset_for_each(p, css, tset) {
void *v = (void *)(unsigned long)css->id;

Expand Down
Loading

0 comments on commit 2865ba8

Please sign in to comment.