From 27e9c1de529919d8dd7d072415d3bcae77709300 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Thu, 28 Jan 2021 12:41:04 +0100 Subject: [PATCH 1/5] net/af_iucv: remove WARN_ONCE on malformed RX packets syzbot reported the following finding: AF_IUCV failed to receive skb, len=0 WARNING: CPU: 0 PID: 522 at net/iucv/af_iucv.c:2039 afiucv_hs_rcv+0x174/0x190 net/iucv/af_iucv.c:2039 CPU: 0 PID: 522 Comm: syz-executor091 Not tainted 5.10.0-rc1-syzkaller-07082-g55027a88ec9f #0 Hardware name: IBM 3906 M04 701 (KVM/Linux) Call Trace: [<00000000b87ea538>] afiucv_hs_rcv+0x178/0x190 net/iucv/af_iucv.c:2039 ([<00000000b87ea534>] afiucv_hs_rcv+0x174/0x190 net/iucv/af_iucv.c:2039) [<00000000b796533e>] __netif_receive_skb_one_core+0x13e/0x188 net/core/dev.c:5315 [<00000000b79653ce>] __netif_receive_skb+0x46/0x1c0 net/core/dev.c:5429 [<00000000b79655fe>] netif_receive_skb_internal+0xb6/0x220 net/core/dev.c:5534 [<00000000b796ac3a>] netif_receive_skb+0x42/0x318 net/core/dev.c:5593 [<00000000b6fd45f4>] tun_rx_batched.isra.0+0x6fc/0x860 drivers/net/tun.c:1485 [<00000000b6fddc4e>] tun_get_user+0x1c26/0x27f0 drivers/net/tun.c:1939 [<00000000b6fe0f00>] tun_chr_write_iter+0x158/0x248 drivers/net/tun.c:1968 [<00000000b4f22bfa>] call_write_iter include/linux/fs.h:1887 [inline] [<00000000b4f22bfa>] new_sync_write+0x442/0x648 fs/read_write.c:518 [<00000000b4f238fe>] vfs_write.part.0+0x36e/0x5d8 fs/read_write.c:605 [<00000000b4f2984e>] vfs_write+0x10e/0x148 fs/read_write.c:615 [<00000000b4f29d0e>] ksys_write+0x166/0x290 fs/read_write.c:658 [<00000000b8dc4ab4>] system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 Last Breaking-Event-Address: [<00000000b8dc64d4>] __s390_indirect_jump_r14+0x0/0xc Malformed RX packets shouldn't generate any warnings because debugging info already flows to dropmon via the kfree_skb(). Signed-off-by: Alexander Egorenkov Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 882f028992c38..427a1abce0a8a 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2036,7 +2036,6 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, char nullstring[8]; if (!pskb_may_pull(skb, sizeof(*trans_hdr))) { - WARN_ONCE(1, "AF_IUCV failed to receive skb, len=%u", skb->len); kfree_skb(skb); return NET_RX_SUCCESS; } From c464444fa2ca41255817e2bdcfc47a658ec20645 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:41:05 +0100 Subject: [PATCH 2/5] net/af_iucv: don't lookup the socket on TX notification Whoever called iucv_sk(sk)->sk_txnotify() must already know that they're dealing with an af_iucv socket. Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 427a1abce0a8a..8683b6939f459 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2134,23 +2134,14 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, static void afiucv_hs_callback_txnotify(struct sk_buff *skb, enum iucv_tx_notify n) { - struct sock *isk = skb->sk; - struct sock *sk = NULL; - struct iucv_sock *iucv = NULL; + struct iucv_sock *iucv = iucv_sk(skb->sk); + struct sock *sk = skb->sk; struct sk_buff_head *list; struct sk_buff *list_skb; struct sk_buff *nskb; unsigned long flags; - read_lock_irqsave(&iucv_sk_list.lock, flags); - sk_for_each(sk, &iucv_sk_list.head) - if (sk == isk) { - iucv = iucv_sk(sk); - break; - } - read_unlock_irqrestore(&iucv_sk_list.lock, flags); - - if (!iucv || sock_flag(sk, SOCK_ZAPPED)) + if (sock_flag(sk, SOCK_ZAPPED)) return; list = &iucv->send_skb_q; From ef6af7bdb9e6c14eae8dc5fe852aefe1e089c85c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:41:06 +0100 Subject: [PATCH 3/5] net/af_iucv: count packets in the xmit path The TX code keeps track of all skbs that are in-flight but haven't actually been sent out yet. For native IUCV sockets that's not a huge deal, but with TRANS_HIPER sockets it would be much better if we didn't need to maintain a list of skb clones. Note that we actually only care about the _count_ of skbs in this stage of the TX pipeline. So as prep work for removing the skb tracking on TRANS_HIPER sockets, keep track of the skb count in a separate variable and pair any list {enqueue, unlink} with a count {increment, decrement}. Then replace all occurences where we currently look at the skb list's fill level. Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/net/iucv/af_iucv.h | 1 + net/iucv/af_iucv.c | 30 ++++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 9259ce2b22f3e..0816bcd44dd17 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -128,6 +128,7 @@ struct iucv_sock { u8 flags; u16 msglimit; u16 msglimit_peer; + atomic_t skbs_in_xmit; atomic_t msg_sent; atomic_t msg_recv; atomic_t pendings; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 8683b6939f459..e487f472027a9 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -182,7 +182,7 @@ static inline int iucv_below_msglim(struct sock *sk) if (sk->sk_state != IUCV_CONNECTED) return 1; if (iucv->transport == AF_IUCV_TRANS_IUCV) - return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); + return (atomic_read(&iucv->skbs_in_xmit) < iucv->path->msglim); else return ((atomic_read(&iucv->msg_sent) < iucv->msglimit_peer) && (atomic_read(&iucv->pendings) <= 0)); @@ -269,8 +269,10 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, } skb_queue_tail(&iucv->send_skb_q, nskb); + atomic_inc(&iucv->skbs_in_xmit); err = dev_queue_xmit(skb); if (net_xmit_eval(err)) { + atomic_dec(&iucv->skbs_in_xmit); skb_unlink(nskb, &iucv->send_skb_q); kfree_skb(nskb); } else { @@ -424,7 +426,7 @@ static void iucv_sock_close(struct sock *sk) sk->sk_state = IUCV_CLOSING; sk->sk_state_change(sk); - if (!err && !skb_queue_empty(&iucv->send_skb_q)) { + if (!err && atomic_read(&iucv->skbs_in_xmit) > 0) { if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) timeo = sk->sk_lingertime; else @@ -491,6 +493,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, atomic_set(&iucv->pendings, 0); iucv->flags = 0; iucv->msglimit = 0; + atomic_set(&iucv->skbs_in_xmit, 0); atomic_set(&iucv->msg_sent, 0); atomic_set(&iucv->msg_recv, 0); iucv->path = NULL; @@ -1055,6 +1058,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, } } else { /* Classic VM IUCV transport */ skb_queue_tail(&iucv->send_skb_q, skb); + atomic_inc(&iucv->skbs_in_xmit); if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) && skb->len <= 7) { @@ -1063,6 +1067,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* on success: there is no message_complete callback */ /* for an IPRMDATA msg; remove skb from send queue */ if (err == 0) { + atomic_dec(&iucv->skbs_in_xmit); skb_unlink(skb, &iucv->send_skb_q); kfree_skb(skb); } @@ -1071,6 +1076,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* IUCV_IPRMDATA path flag is set... sever path */ if (err == 0x15) { pr_iucv->path_sever(iucv->path, NULL); + atomic_dec(&iucv->skbs_in_xmit); skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; @@ -1109,6 +1115,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, } else { err = -EPIPE; } + + atomic_dec(&iucv->skbs_in_xmit); skb_unlink(skb, &iucv->send_skb_q); goto fail; } @@ -1748,10 +1756,14 @@ static void iucv_callback_txdone(struct iucv_path *path, { struct sock *sk = path->private; struct sk_buff *this = NULL; - struct sk_buff_head *list = &iucv_sk(sk)->send_skb_q; + struct sk_buff_head *list; struct sk_buff *list_skb; + struct iucv_sock *iucv; unsigned long flags; + iucv = iucv_sk(sk); + list = &iucv->send_skb_q; + bh_lock_sock(sk); spin_lock_irqsave(&list->lock, flags); @@ -1761,8 +1773,11 @@ static void iucv_callback_txdone(struct iucv_path *path, break; } } - if (this) + if (this) { + atomic_dec(&iucv->skbs_in_xmit); __skb_unlink(this, list); + } + spin_unlock_irqrestore(&list->lock, flags); if (this) { @@ -1772,7 +1787,7 @@ static void iucv_callback_txdone(struct iucv_path *path, } if (sk->sk_state == IUCV_CLOSING) { - if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) { + if (atomic_read(&iucv->skbs_in_xmit) == 0) { sk->sk_state = IUCV_CLOSED; sk->sk_state_change(sk); } @@ -2150,6 +2165,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, if (skb_shinfo(list_skb) == skb_shinfo(skb)) { switch (n) { case TX_NOTIFY_OK: + atomic_dec(&iucv->skbs_in_xmit); __skb_unlink(list_skb, list); kfree_skb(list_skb); iucv_sock_wake_msglim(sk); @@ -2158,6 +2174,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, atomic_inc(&iucv->pendings); break; case TX_NOTIFY_DELAYED_OK: + atomic_dec(&iucv->skbs_in_xmit); __skb_unlink(list_skb, list); atomic_dec(&iucv->pendings); if (atomic_read(&iucv->pendings) <= 0) @@ -2169,6 +2186,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, case TX_NOTIFY_TPQFULL: /* not yet used */ case TX_NOTIFY_GENERALERROR: case TX_NOTIFY_DELAYED_GENERALERROR: + atomic_dec(&iucv->skbs_in_xmit); __skb_unlink(list_skb, list); kfree_skb(list_skb); if (sk->sk_state == IUCV_CONNECTED) { @@ -2183,7 +2201,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, spin_unlock_irqrestore(&list->lock, flags); if (sk->sk_state == IUCV_CLOSING) { - if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) { + if (atomic_read(&iucv->skbs_in_xmit) == 0) { sk->sk_state = IUCV_CLOSED; sk->sk_state_change(sk); } From 80bc97aa0aaab974bbbfb99a78d7515414004616 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:41:07 +0100 Subject: [PATCH 4/5] net/af_iucv: don't track individual TX skbs for TRANS_HIPER sockets Stop maintaining the skb_send_q list for TRANS_HIPER sockets. Not only is it extra overhead, but keeping around a list of skb clones means that we later also have to match the ->sk_txnotify() calls against these clones and free them accordingly. The current matching logic (comparing the skbs' shinfo location) is frustratingly fragile, and breaks if the skb's head is mangled in any sort of way while passing from dev_queue_xmit() to the device's HW queue. Also adjust the interface for ->sk_txnotify(), to make clear that we don't actually care about any skb internals. Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core_main.c | 6 ++- include/net/iucv/af_iucv.h | 2 +- net/iucv/af_iucv.c | 80 ++++++++----------------------- 3 files changed, 26 insertions(+), 62 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index ea2e139cd592d..89b223885b0c5 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1409,10 +1409,12 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *q, struct sk_buff *skb; skb_queue_walk(&buf->skb_list, skb) { + struct sock *sk = skb->sk; + QETH_CARD_TEXT_(q->card, 5, "skbn%d", notification); QETH_CARD_TEXT_(q->card, 5, "%lx", (long) skb); - if (skb->sk && skb->sk->sk_family == PF_IUCV) - iucv_sk(skb->sk)->sk_txnotify(skb, notification); + if (sk && sk->sk_family == PF_IUCV) + iucv_sk(sk)->sk_txnotify(sk, notification); } } diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 0816bcd44dd17..ff06246dbbb94 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -133,7 +133,7 @@ struct iucv_sock { atomic_t msg_recv; atomic_t pendings; int transport; - void (*sk_txnotify)(struct sk_buff *skb, + void (*sk_txnotify)(struct sock *sk, enum iucv_tx_notify n); }; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index e487f472027a9..0e0656db4ae71 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -89,7 +89,7 @@ static struct sock *iucv_accept_dequeue(struct sock *parent, static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); -static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify); +static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify); /* Call Back functions */ static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); @@ -211,7 +211,6 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, { struct iucv_sock *iucv = iucv_sk(sock); struct af_iucv_trans_hdr *phs_hdr; - struct sk_buff *nskb; int err, confirm_recv = 0; phs_hdr = skb_push(skb, sizeof(*phs_hdr)); @@ -261,20 +260,10 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, } skb->protocol = cpu_to_be16(ETH_P_AF_IUCV); - __skb_header_release(skb); - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) { - err = -ENOMEM; - goto err_free; - } - - skb_queue_tail(&iucv->send_skb_q, nskb); atomic_inc(&iucv->skbs_in_xmit); err = dev_queue_xmit(skb); if (net_xmit_eval(err)) { atomic_dec(&iucv->skbs_in_xmit); - skb_unlink(nskb, &iucv->send_skb_q); - kfree_skb(nskb); } else { atomic_sub(confirm_recv, &iucv->msg_recv); WARN_ON(atomic_read(&iucv->msg_recv) < 0); @@ -2146,59 +2135,33 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets * transport **/ -static void afiucv_hs_callback_txnotify(struct sk_buff *skb, - enum iucv_tx_notify n) +static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify n) { - struct iucv_sock *iucv = iucv_sk(skb->sk); - struct sock *sk = skb->sk; - struct sk_buff_head *list; - struct sk_buff *list_skb; - struct sk_buff *nskb; - unsigned long flags; + struct iucv_sock *iucv = iucv_sk(sk); if (sock_flag(sk, SOCK_ZAPPED)) return; - list = &iucv->send_skb_q; - spin_lock_irqsave(&list->lock, flags); - skb_queue_walk_safe(list, list_skb, nskb) { - if (skb_shinfo(list_skb) == skb_shinfo(skb)) { - switch (n) { - case TX_NOTIFY_OK: - atomic_dec(&iucv->skbs_in_xmit); - __skb_unlink(list_skb, list); - kfree_skb(list_skb); - iucv_sock_wake_msglim(sk); - break; - case TX_NOTIFY_PENDING: - atomic_inc(&iucv->pendings); - break; - case TX_NOTIFY_DELAYED_OK: - atomic_dec(&iucv->skbs_in_xmit); - __skb_unlink(list_skb, list); - atomic_dec(&iucv->pendings); - if (atomic_read(&iucv->pendings) <= 0) - iucv_sock_wake_msglim(sk); - kfree_skb(list_skb); - break; - case TX_NOTIFY_UNREACHABLE: - case TX_NOTIFY_DELAYED_UNREACHABLE: - case TX_NOTIFY_TPQFULL: /* not yet used */ - case TX_NOTIFY_GENERALERROR: - case TX_NOTIFY_DELAYED_GENERALERROR: - atomic_dec(&iucv->skbs_in_xmit); - __skb_unlink(list_skb, list); - kfree_skb(list_skb); - if (sk->sk_state == IUCV_CONNECTED) { - sk->sk_state = IUCV_DISCONN; - sk->sk_state_change(sk); - } - break; - } - break; + switch (n) { + case TX_NOTIFY_OK: + atomic_dec(&iucv->skbs_in_xmit); + iucv_sock_wake_msglim(sk); + break; + case TX_NOTIFY_PENDING: + atomic_inc(&iucv->pendings); + break; + case TX_NOTIFY_DELAYED_OK: + atomic_dec(&iucv->skbs_in_xmit); + if (atomic_dec_return(&iucv->pendings) <= 0) + iucv_sock_wake_msglim(sk); + break; + default: + atomic_dec(&iucv->skbs_in_xmit); + if (sk->sk_state == IUCV_CONNECTED) { + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); } } - spin_unlock_irqrestore(&list->lock, flags); if (sk->sk_state == IUCV_CLOSING) { if (atomic_read(&iucv->skbs_in_xmit) == 0) { @@ -2206,7 +2169,6 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, sk->sk_state_change(sk); } } - } /* From 2c3b4456c812681f963ef67502c5b8e8f8e2933f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:41:08 +0100 Subject: [PATCH 5/5] net/af_iucv: build SG skbs for TRANS_HIPER sockets The TX path no longer falls apart when some of its SG skbs are later linearized by lower layers of the stack. So enable the use of SG skbs in iucv_sock_sendmsg() again. This effectively reverts commit dc5367bcc556 ("net/af_iucv: don't use paged skbs for TX on HiperSockets"). Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 0e0656db4ae71..6092d5cb71687 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -256,7 +256,9 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, err = -EMSGSIZE; goto err_free; } - skb_trim(skb, skb->dev->mtu); + err = pskb_trim(skb, skb->dev->mtu); + if (err) + goto err_free; } skb->protocol = cpu_to_be16(ETH_P_AF_IUCV); @@ -996,7 +998,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (iucv->transport == AF_IUCV_TRANS_HIPER) { headroom = sizeof(struct af_iucv_trans_hdr) + LL_RESERVED_SPACE(iucv->hs_dev); - linear = len; + linear = min(len, PAGE_SIZE - headroom); } else { if (len < PAGE_SIZE) { linear = len;