Skip to content

Commit

Permalink
Merge branch 'af_xdp-fixes'
Browse files Browse the repository at this point in the history
Björn Töpel says:

====================
William found two bugs, when doing socket teardown within the same
process.

The first issue was an invalid munmap call, and the second one was an
invalid XSKMAP cleanup. Both resulted in that the process kept
references to the socket, which was not correctly cleaned up. When a
new socket was created, the bind() call would fail, since the old
socket was still lingering, refusing to give up the queue on the
netdev.

More details can be found in the individual commits.

Thanks,
Björn
====================

Reviewed-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed May 5, 2019
2 parents 6cea337 + 5750902 commit ec1c8fa
Showing 1 changed file with 100 additions and 92 deletions.
192 changes: 100 additions & 92 deletions tools/lib/bpf/xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
return 0;

out_mmap:
munmap(umem->fill,
off.fr.desc + umem->config.fill_size * sizeof(__u64));
munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
out_socket:
close(umem->fd);
out_umem_alloc:
Expand Down Expand Up @@ -388,21 +387,17 @@ static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
{
close(xsk->qidconf_map_fd);
close(xsk->xsks_map_fd);
xsk->qidconf_map_fd = -1;
xsk->xsks_map_fd = -1;
}

static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
int xsks_value)
static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
{
bool qidconf_map_updated = false, xsks_map_updated = false;
__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
__u32 map_len = sizeof(struct bpf_map_info);
struct bpf_prog_info prog_info = {};
__u32 prog_len = sizeof(prog_info);
struct bpf_map_info map_info;
__u32 map_len = sizeof(map_info);
__u32 *map_ids;
int reset_value = 0;
__u32 num_maps;
unsigned int i;
int err;
int fd, err;

err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
if (err)
Expand All @@ -423,66 +418,71 @@ static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
goto out_map_ids;

for (i = 0; i < prog_info.nr_map_ids; i++) {
int fd;
if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1)
break;

fd = bpf_map_get_fd_by_id(map_ids[i]);
if (fd < 0) {
err = -errno;
goto out_maps;
}
if (fd < 0)
continue;

err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
if (err)
goto out_maps;
if (err) {
close(fd);
continue;
}

if (!strcmp(map_info.name, "qidconf_map")) {
err = bpf_map_update_elem(fd, &xsk->queue_id,
&qidconf_value, 0);
if (err)
goto out_maps;
qidconf_map_updated = true;
xsk->qidconf_map_fd = fd;
} else if (!strcmp(map_info.name, "xsks_map")) {
err = bpf_map_update_elem(fd, &xsk->queue_id,
&xsks_value, 0);
if (err)
goto out_maps;
xsks_map_updated = true;
continue;
}

if (!strcmp(map_info.name, "xsks_map")) {
xsk->xsks_map_fd = fd;
continue;
}

if (qidconf_map_updated && xsks_map_updated)
break;
close(fd);
}

if (!(qidconf_map_updated && xsks_map_updated)) {
err = 0;
if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
err = -ENOENT;
goto out_maps;
xsk_delete_bpf_maps(xsk);
}

err = 0;
goto out_success;

out_maps:
if (qidconf_map_updated)
(void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id,
&reset_value, 0);
if (xsks_map_updated)
(void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
&reset_value, 0);
out_success:
if (qidconf_map_updated)
close(xsk->qidconf_map_fd);
if (xsks_map_updated)
close(xsk->xsks_map_fd);
out_map_ids:
free(map_ids);
return err;
}

static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
{
int qid = false;

(void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
(void)bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
}

static int xsk_set_bpf_maps(struct xsk_socket *xsk)
{
int qid = true, fd = xsk->fd, err;

err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
if (err)
goto out;

err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0);
if (err)
goto out;

return 0;
out:
xsk_clear_bpf_maps(xsk);
return err;
}

static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
{
bool prog_attached = false;
__u32 prog_id = 0;
int err;

Expand All @@ -492,7 +492,6 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
return err;

if (!prog_id) {
prog_attached = true;
err = xsk_create_bpf_maps(xsk);
if (err)
return err;
Expand All @@ -502,20 +501,21 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
goto out_maps;
} else {
xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
err = xsk_lookup_bpf_maps(xsk);
if (err)
goto out_load;
}

err = xsk_update_bpf_maps(xsk, true, xsk->fd);
err = xsk_set_bpf_maps(xsk);
if (err)
goto out_load;

return 0;

out_load:
if (prog_attached)
close(xsk->prog_fd);
close(xsk->prog_fd);
out_maps:
if (prog_attached)
xsk_delete_bpf_maps(xsk);
xsk_delete_bpf_maps(xsk);
return err;
}

Expand All @@ -524,11 +524,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
const struct xsk_socket_config *usr_config)
{
void *rx_map = NULL, *tx_map = NULL;
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
struct xsk_socket *xsk;
socklen_t optlen;
void *map;
int err;

if (!umem || !xsk_ptr || !rx || !tx)
Expand Down Expand Up @@ -594,40 +594,40 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
}

if (rx) {
map = xsk_mmap(NULL, off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE,
xsk->fd, XDP_PGOFF_RX_RING);
if (map == MAP_FAILED) {
rx_map = xsk_mmap(NULL, off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE,
xsk->fd, XDP_PGOFF_RX_RING);
if (rx_map == MAP_FAILED) {
err = -errno;
goto out_socket;
}

rx->mask = xsk->config.rx_size - 1;
rx->size = xsk->config.rx_size;
rx->producer = map + off.rx.producer;
rx->consumer = map + off.rx.consumer;
rx->ring = map + off.rx.desc;
rx->producer = rx_map + off.rx.producer;
rx->consumer = rx_map + off.rx.consumer;
rx->ring = rx_map + off.rx.desc;
}
xsk->rx = rx;

if (tx) {
map = xsk_mmap(NULL, off.tx.desc +
xsk->config.tx_size * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE,
xsk->fd, XDP_PGOFF_TX_RING);
if (map == MAP_FAILED) {
tx_map = xsk_mmap(NULL, off.tx.desc +
xsk->config.tx_size * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE,
xsk->fd, XDP_PGOFF_TX_RING);
if (tx_map == MAP_FAILED) {
err = -errno;
goto out_mmap_rx;
}

tx->mask = xsk->config.tx_size - 1;
tx->size = xsk->config.tx_size;
tx->producer = map + off.tx.producer;
tx->consumer = map + off.tx.consumer;
tx->ring = map + off.tx.desc;
tx->producer = tx_map + off.tx.producer;
tx->consumer = tx_map + off.tx.consumer;
tx->ring = tx_map + off.tx.desc;
tx->cached_cons = xsk->config.tx_size;
}
xsk->tx = tx;
Expand All @@ -643,6 +643,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
goto out_mmap_tx;
}

xsk->qidconf_map_fd = -1;
xsk->xsks_map_fd = -1;

if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = xsk_setup_xdp_prog(xsk);
if (err)
Expand All @@ -654,13 +657,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,

out_mmap_tx:
if (tx)
munmap(xsk->tx,
off.tx.desc +
munmap(tx_map, off.tx.desc +
xsk->config.tx_size * sizeof(struct xdp_desc));
out_mmap_rx:
if (rx)
munmap(xsk->rx,
off.rx.desc +
munmap(rx_map, off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc));
out_socket:
if (--umem->refcount)
Expand All @@ -685,10 +686,12 @@ int xsk_umem__delete(struct xsk_umem *umem)
optlen = sizeof(off);
err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
if (!err) {
munmap(umem->fill->ring,
off.fr.desc + umem->config.fill_size * sizeof(__u64));
munmap(umem->comp->ring,
off.cr.desc + umem->config.comp_size * sizeof(__u64));
(void)munmap(umem->fill->ring - off.fr.desc,
off.fr.desc +
umem->config.fill_size * sizeof(__u64));
(void)munmap(umem->comp->ring - off.cr.desc,
off.cr.desc +
umem->config.comp_size * sizeof(__u64));
}

close(umem->fd);
Expand All @@ -699,26 +702,31 @@ int xsk_umem__delete(struct xsk_umem *umem)

void xsk_socket__delete(struct xsk_socket *xsk)
{
size_t desc_sz = sizeof(struct xdp_desc);
struct xdp_mmap_offsets off;
socklen_t optlen;
int err;

if (!xsk)
return;

(void)xsk_update_bpf_maps(xsk, 0, 0);
xsk_clear_bpf_maps(xsk);
xsk_delete_bpf_maps(xsk);

optlen = sizeof(off);
err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
if (!err) {
if (xsk->rx)
munmap(xsk->rx->ring,
off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc));
if (xsk->tx)
munmap(xsk->tx->ring,
off.tx.desc +
xsk->config.tx_size * sizeof(struct xdp_desc));
if (xsk->rx) {
(void)munmap(xsk->rx->ring - off.rx.desc,
off.rx.desc +
xsk->config.rx_size * desc_sz);
}
if (xsk->tx) {
(void)munmap(xsk->tx->ring - off.tx.desc,
off.tx.desc +
xsk->config.tx_size * desc_sz);
}

}

xsk->umem->refcount--;
Expand Down

0 comments on commit ec1c8fa

Please sign in to comment.