Skip to content

Commit

Permalink
xdp: fix hang while unregistering device bound to xdp socket
Browse files Browse the repository at this point in the history
Device that bound to XDP socket will not have zero refcount until the
userspace application will not close it. This leads to hang inside
'netdev_wait_allrefs()' if device unregistering requested:

  # ip link del p1
  < hang on recvmsg on netlink socket >

  # ps -x | grep ip
  5126  pts/0    D+   0:00 ip link del p1

  # journalctl -b

  Jun 05 07:19:16 kernel:
  unregister_netdevice: waiting for p1 to become free. Usage count = 1

  Jun 05 07:19:27 kernel:
  unregister_netdevice: waiting for p1 to become free. Usage count = 1
  ...

Fix that by implementing NETDEV_UNREGISTER event notification handler
to properly clean up all the resources and unref device.

This should also allow socket killing via ss(8) utility.

Fixes: 965a990 ("xsk: add support for bind for Rx")
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
  • Loading branch information
Ilya Maximets authored and Daniel Borkmann committed Jul 3, 2019
1 parent 162c820 commit 455302d
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 16 deletions.
5 changes: 5 additions & 0 deletions include/net/xdp_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ struct xdp_sock {
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head list;
bool zc;
enum {
XSK_READY = 0,
XSK_BOUND,
XSK_UNBOUND,
} state;
/* Protects multiple processes in the control path */
struct mutex mutex;
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
Expand Down
10 changes: 5 additions & 5 deletions net/xdp/xdp_umem.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,13 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
return err;
}

static void xdp_umem_clear_dev(struct xdp_umem *umem)
void xdp_umem_clear_dev(struct xdp_umem *umem)
{
struct netdev_bpf bpf;
int err;

ASSERT_RTNL();

if (!umem->dev)
return;

Expand All @@ -153,17 +155,13 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem)
bpf.xsk.umem = NULL;
bpf.xsk.queue_id = umem->queue_id;

rtnl_lock();
err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
rtnl_unlock();

if (err)
WARN(1, "failed to disable umem!\n");
}

rtnl_lock();
xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
rtnl_unlock();

dev_put(umem->dev);
umem->dev = NULL;
Expand Down Expand Up @@ -195,7 +193,9 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem)

static void xdp_umem_release(struct xdp_umem *umem)
{
rtnl_lock();
xdp_umem_clear_dev(umem);
rtnl_unlock();

ida_simple_remove(&umem_ida, umem->id);

Expand Down
1 change: 1 addition & 0 deletions net/xdp/xdp_umem.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
u16 queue_id, u16 flags);
void xdp_umem_clear_dev(struct xdp_umem *umem);
bool xdp_umem_validate_queues(struct xdp_umem *umem);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
Expand Down
87 changes: 76 additions & 11 deletions net/xdp/xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,22 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
return 0;
}

static void xsk_unbind_dev(struct xdp_sock *xs)
{
struct net_device *dev = xs->dev;

if (!dev || xs->state != XSK_BOUND)
return;

xs->state = XSK_UNBOUND;

/* Wait for driver to stop using the xdp socket. */
xdp_del_sk_umem(xs->umem, xs);
xs->dev = NULL;
synchronize_net();
dev_put(dev);
}

static int xsk_release(struct socket *sock)
{
struct sock *sk = sock->sk;
Expand All @@ -354,15 +370,7 @@ static int xsk_release(struct socket *sock)
sock_prot_inuse_add(net, sk->sk_prot, -1);
local_bh_enable();

if (xs->dev) {
struct net_device *dev = xs->dev;

/* Wait for driver to stop using the xdp socket. */
xdp_del_sk_umem(xs->umem, xs);
xs->dev = NULL;
synchronize_net();
dev_put(dev);
}
xsk_unbind_dev(xs);

xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
Expand Down Expand Up @@ -412,7 +420,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL;

mutex_lock(&xs->mutex);
if (xs->dev) {
if (xs->state != XSK_READY) {
err = -EBUSY;
goto out_release;
}
Expand Down Expand Up @@ -492,6 +500,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
out_unlock:
if (err)
dev_put(dev);
else
xs->state = XSK_BOUND;
out_release:
mutex_unlock(&xs->mutex);
return err;
Expand Down Expand Up @@ -520,6 +530,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;

mutex_lock(&xs->mutex);
if (xs->state != XSK_READY) {
mutex_unlock(&xs->mutex);
return -EBUSY;
}
q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
err = xsk_init_queue(entries, q, false);
mutex_unlock(&xs->mutex);
Expand All @@ -534,7 +548,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;

mutex_lock(&xs->mutex);
if (xs->umem) {
if (xs->state != XSK_READY || xs->umem) {
mutex_unlock(&xs->mutex);
return -EBUSY;
}
Expand All @@ -561,6 +575,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;

mutex_lock(&xs->mutex);
if (xs->state != XSK_READY) {
mutex_unlock(&xs->mutex);
return -EBUSY;
}
if (!xs->umem) {
mutex_unlock(&xs->mutex);
return -EINVAL;
Expand Down Expand Up @@ -662,6 +680,9 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long pfn;
struct page *qpg;

if (xs->state != XSK_READY)
return -EBUSY;

if (offset == XDP_PGOFF_RX_RING) {
q = READ_ONCE(xs->rx);
} else if (offset == XDP_PGOFF_TX_RING) {
Expand Down Expand Up @@ -693,6 +714,38 @@ static int xsk_mmap(struct file *file, struct socket *sock,
size, vma->vm_page_prot);
}

static int xsk_notifier(struct notifier_block *this,
unsigned long msg, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct sock *sk;

switch (msg) {
case NETDEV_UNREGISTER:
mutex_lock(&net->xdp.lock);
sk_for_each(sk, &net->xdp.list) {
struct xdp_sock *xs = xdp_sk(sk);

mutex_lock(&xs->mutex);
if (xs->dev == dev) {
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_error_report(sk);

xsk_unbind_dev(xs);

/* Clear device references in umem. */
xdp_umem_clear_dev(xs->umem);
}
mutex_unlock(&xs->mutex);
}
mutex_unlock(&net->xdp.lock);
break;
}
return NOTIFY_DONE;
}

static struct proto xsk_proto = {
.name = "XDP",
.owner = THIS_MODULE,
Expand Down Expand Up @@ -764,6 +817,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sock_set_flag(sk, SOCK_RCU_FREE);

xs = xdp_sk(sk);
xs->state = XSK_READY;
mutex_init(&xs->mutex);
spin_lock_init(&xs->tx_completion_lock);

Expand All @@ -784,6 +838,10 @@ static const struct net_proto_family xsk_family_ops = {
.owner = THIS_MODULE,
};

static struct notifier_block xsk_netdev_notifier = {
.notifier_call = xsk_notifier,
};

static int __net_init xsk_net_init(struct net *net)
{
mutex_init(&net->xdp.lock);
Expand Down Expand Up @@ -816,8 +874,15 @@ static int __init xsk_init(void)
err = register_pernet_subsys(&xsk_net_ops);
if (err)
goto out_sk;

err = register_netdevice_notifier(&xsk_netdev_notifier);
if (err)
goto out_pernet;

return 0;

out_pernet:
unregister_pernet_subsys(&xsk_net_ops);
out_sk:
sock_unregister(PF_XDP);
out_proto:
Expand Down

0 comments on commit 455302d

Please sign in to comment.