From b9d83ab8a708f23a4001d60e9d8d0b3be3d9f607 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:06 +0000 Subject: [PATCH 1/9] net/packet: annotate accesses to po->xmit po->xmit can be set from setsockopt(PACKET_QDISC_BYPASS), while read locklessly. Use READ_ONCE()/WRITE_ONCE() to avoid potential load/store tearing issues. Fixes: d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket option") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d4e76e2ae153e..d25dd9f63cc4f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -307,7 +307,8 @@ static void packet_cached_dev_reset(struct packet_sock *po) static bool packet_use_direct_xmit(const struct packet_sock *po) { - return po->xmit == packet_direct_xmit; + /* Paired with WRITE_ONCE() in packet_setsockopt() */ + return READ_ONCE(po->xmit) == packet_direct_xmit; } static u16 packet_pick_tx_queue(struct sk_buff *skb) @@ -2867,7 +2868,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) packet_inc_pending(&po->tx_ring); status = TP_STATUS_SEND_REQUEST; - err = po->xmit(skb); + /* Paired with WRITE_ONCE() in packet_setsockopt() */ + err = READ_ONCE(po->xmit)(skb); if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); @@ -3070,7 +3072,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) virtio_net_hdr_set_proto(skb, &vnet_hdr); } - err = po->xmit(skb); + /* Paired with WRITE_ONCE() in packet_setsockopt() */ + err = READ_ONCE(po->xmit)(skb); if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); @@ -4007,7 +4010,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - po->xmit = val ? packet_direct_xmit : dev_queue_xmit; + /* Paired with all lockless reads of po->xmit */ + WRITE_ONCE(po->xmit, val ? packet_direct_xmit : dev_queue_xmit); return 0; } default: From ee5675ecdf7a4e713ed21d98a70c2871d6ebed01 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:07 +0000 Subject: [PATCH 2/9] net/packet: convert po->origdev to an atomic flag syzbot/KCAN reported that po->origdev can be read while another thread is changing its value. We can avoid this splat by converting this field to an actual bit. Following patches will convert remaining 1bit fields. Fixes: 80feaacb8a64 ("[AF_PACKET]: Add option to return orig_dev to userspace.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/packet/af_packet.c | 10 ++++------ net/packet/diag.c | 2 +- net/packet/internal.h | 22 +++++++++++++++++++++- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d25dd9f63cc4f..af7c44169b869 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2184,7 +2184,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, sll = &PACKET_SKB_CB(skb)->sa.ll; sll->sll_hatype = dev->type; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev)) + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -2461,7 +2461,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev)) + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -3914,9 +3914,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - lock_sock(sk); - po->origdev = !!val; - release_sock(sk); + packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val); return 0; } case PACKET_VNET_HDR: @@ -4065,7 +4063,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->auxdata; break; case PACKET_ORIGDEV: - val = po->origdev; + val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); break; case PACKET_VNET_HDR: val = po->has_vnet_hdr; diff --git a/net/packet/diag.c b/net/packet/diag.c index 07812ae5ca073..e1ac9bb375b31 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -25,7 +25,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_flags |= PDI_RUNNING; if (po->auxdata) pinfo.pdi_flags |= PDI_AUXDATA; - if (po->origdev) + if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV)) pinfo.pdi_flags |= PDI_ORIGDEV; if (po->has_vnet_hdr) pinfo.pdi_flags |= PDI_VNETHDR; diff --git a/net/packet/internal.h b/net/packet/internal.h index 48af35b1aed25..178cd1852238d 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -116,9 +116,9 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; + unsigned long flags; unsigned int running; /* bind_lock must be held */ unsigned int auxdata:1, /* writer must hold sock lock */ - origdev:1, has_vnet_hdr:1, tp_loss:1, tp_tx_has_off:1; @@ -144,4 +144,24 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) return (struct packet_sock *)sk; } +enum packet_sock_flags { + PACKET_SOCK_ORIGDEV, +}; + +static inline void packet_sock_flag_set(struct packet_sock *po, + enum packet_sock_flags flag, + bool val) +{ + if (val) + set_bit(flag, &po->flags); + else + clear_bit(flag, &po->flags); +} + +static inline bool packet_sock_flag(const struct packet_sock *po, + enum packet_sock_flags flag) +{ + return test_bit(flag, &po->flags); +} + #endif From fd53c297aa7b077ae98a3d3d2d3aa278a1686ba6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:08 +0000 Subject: [PATCH 3/9] net/packet: convert po->auxdata to an atomic flag po->auxdata can be read while another thread is changing its value, potentially raising KCSAN splat. Convert it to PACKET_SOCK_AUXDATA flag. Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 8 +++----- net/packet/diag.c | 2 +- net/packet/internal.h | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index af7c44169b869..ecd9fc27e360c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3514,7 +3514,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } - if (pkt_sk(sk)->auxdata) { + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) { struct tpacket_auxdata aux; aux.tp_status = TP_STATUS_USER; @@ -3900,9 +3900,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - lock_sock(sk); - po->auxdata = !!val; - release_sock(sk); + packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val); return 0; } case PACKET_ORIGDEV: @@ -4060,7 +4058,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, break; case PACKET_AUXDATA: - val = po->auxdata; + val = packet_sock_flag(po, PACKET_SOCK_AUXDATA); break; case PACKET_ORIGDEV: val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); diff --git a/net/packet/diag.c b/net/packet/diag.c index e1ac9bb375b31..d704c7bf51b20 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -23,7 +23,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_flags = 0; if (po->running) pinfo.pdi_flags |= PDI_RUNNING; - if (po->auxdata) + if (packet_sock_flag(po, PACKET_SOCK_AUXDATA)) pinfo.pdi_flags |= PDI_AUXDATA; if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV)) pinfo.pdi_flags |= PDI_ORIGDEV; diff --git a/net/packet/internal.h b/net/packet/internal.h index 178cd1852238d..3bae8ea7a36f5 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -118,8 +118,7 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned long flags; unsigned int running; /* bind_lock must be held */ - unsigned int auxdata:1, /* writer must hold sock lock */ - has_vnet_hdr:1, + unsigned int has_vnet_hdr:1, /* writer must hold sock lock */ tp_loss:1, tp_tx_has_off:1; int pressure; @@ -146,6 +145,7 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) enum packet_sock_flags { PACKET_SOCK_ORIGDEV, + PACKET_SOCK_AUXDATA, }; static inline void packet_sock_flag_set(struct packet_sock *po, From 1051ce4ab64db91f7b62369ddc321ba8747f8c84 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:09 +0000 Subject: [PATCH 4/9] net/packet: annotate accesses to po->tp_tstamp tp_tstamp is read locklessly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 9 +++++---- net/packet/diag.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ecd9fc27e360c..a27a811fa2b0d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -474,7 +474,7 @@ static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, struct timespec64 ts; __u32 ts_status; - if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp)))) return 0; h.raw = frame; @@ -2403,7 +2403,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, * closer to the time of capture. */ ts_status = tpacket_get_timestamp(skb, &ts, - po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE); + READ_ONCE(po->tp_tstamp) | + SOF_TIMESTAMPING_SOFTWARE); if (!ts_status) ktime_get_real_ts64(&ts); @@ -3945,7 +3946,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - po->tp_tstamp = val; + WRITE_ONCE(po->tp_tstamp, val); return 0; } case PACKET_FANOUT: @@ -4097,7 +4098,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_loss; break; case PACKET_TIMESTAMP: - val = po->tp_tstamp; + val = READ_ONCE(po->tp_tstamp); break; case PACKET_FANOUT: val = (po->fanout ? diff --git a/net/packet/diag.c b/net/packet/diag.c index d704c7bf51b20..0abca32e2f878 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -18,7 +18,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_version = po->tp_version; pinfo.pdi_reserve = po->tp_reserve; pinfo.pdi_copy_thresh = po->copy_thresh; - pinfo.pdi_tstamp = po->tp_tstamp; + pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp); pinfo.pdi_flags = 0; if (po->running) From 7438344660fa55b33b8234c1797c886eb73667a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:10 +0000 Subject: [PATCH 5/9] net/packet: convert po->tp_tx_has_off to an atomic flag This is to use existing space in po->flags, and reclaim the storage used by the non atomic bit fields. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 6 +++--- net/packet/internal.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a27a811fa2b0d..7800dc622ff37 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2672,7 +2672,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, return -EMSGSIZE; } - if (unlikely(po->tp_tx_has_off)) { + if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) { int off_min, off_max; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); @@ -3993,7 +3993,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, lock_sock(sk); if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec) - po->tp_tx_has_off = !!val; + packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val); release_sock(sk); return 0; @@ -4120,7 +4120,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: - val = po->tp_tx_has_off; + val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF); break; case PACKET_QDISC_BYPASS: val = packet_use_direct_xmit(po); diff --git a/net/packet/internal.h b/net/packet/internal.h index 3bae8ea7a36f5..0d16a581e2713 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -119,8 +119,7 @@ struct packet_sock { unsigned long flags; unsigned int running; /* bind_lock must be held */ unsigned int has_vnet_hdr:1, /* writer must hold sock lock */ - tp_loss:1, - tp_tx_has_off:1; + tp_loss:1; int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) enum packet_sock_flags { PACKET_SOCK_ORIGDEV, PACKET_SOCK_AUXDATA, + PACKET_SOCK_TX_HAS_OFF, }; static inline void packet_sock_flag_set(struct packet_sock *po, From 164bddace2e03f6005e650cb88f101a66ebdc05a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:11 +0000 Subject: [PATCH 6/9] net/packet: convert po->tp_loss to an atomic flag tp_loss can be read locklessly. Convert it to an atomic flag to avoid races. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 6 +++--- net/packet/diag.c | 2 +- net/packet/internal.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7800dc622ff37..119063c8a1c59 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2843,7 +2843,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(tp_len < 0)) { tpacket_error: - if (po->tp_loss) { + if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) { __packet_set_status(po, ph, TP_STATUS_AVAILABLE); packet_increment_head(&po->tx_ring); @@ -3886,7 +3886,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { - po->tp_loss = !!val; + packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val); ret = 0; } release_sock(sk); @@ -4095,7 +4095,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_reserve; break; case PACKET_LOSS: - val = po->tp_loss; + val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS); break; case PACKET_TIMESTAMP: val = READ_ONCE(po->tp_tstamp); diff --git a/net/packet/diag.c b/net/packet/diag.c index 0abca32e2f878..8bb4ce6a8e617 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -29,7 +29,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_flags |= PDI_ORIGDEV; if (po->has_vnet_hdr) pinfo.pdi_flags |= PDI_VNETHDR; - if (po->tp_loss) + if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) pinfo.pdi_flags |= PDI_LOSS; return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo); diff --git a/net/packet/internal.h b/net/packet/internal.h index 0d16a581e2713..9d406a92ede8e 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -118,8 +118,7 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned long flags; unsigned int running; /* bind_lock must be held */ - unsigned int has_vnet_hdr:1, /* writer must hold sock lock */ - tp_loss:1; + unsigned int has_vnet_hdr:1; /* writer must hold sock lock */ int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_ORIGDEV, PACKET_SOCK_AUXDATA, PACKET_SOCK_TX_HAS_OFF, + PACKET_SOCK_TP_LOSS, }; static inline void packet_sock_flag_set(struct packet_sock *po, From 50d935eafee292fc432d5ac8c8715a6492961abc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:12 +0000 Subject: [PATCH 7/9] net/packet: convert po->has_vnet_hdr to an atomic flag po->has_vnet_hdr can be read locklessly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 19 ++++++++++--------- net/packet/diag.c | 2 +- net/packet/internal.h | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 119063c8a1c59..5a6b05e17ca21 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2309,7 +2309,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { netoff += sizeof(struct virtio_net_hdr); do_vnet = true; } @@ -2780,7 +2780,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); - if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) + if ((size_max > dev->mtu + reserve + VLAN_HLEN) && + !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) size_max = dev->mtu + reserve + VLAN_HLEN; reinit_completion(&po->skb_completion); @@ -2809,7 +2810,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) status = TP_STATUS_SEND_REQUEST; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { vnet_hdr = data; data += sizeof(*vnet_hdr); tp_len -= sizeof(*vnet_hdr); @@ -2837,7 +2838,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && - !po->has_vnet_hdr && + !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR) && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; @@ -2856,7 +2857,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } } - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { tp_len = -EINVAL; goto tpacket_error; @@ -2991,7 +2992,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); if (err) goto out_unlock; @@ -3451,7 +3452,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, packet_rcv_try_clear_pressure(pkt_sk(sk)); - if (pkt_sk(sk)->has_vnet_hdr) { + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_HAS_VNET_HDR)) { err = packet_rcv_vnet(msg, skb, &len); if (err) goto out_free; @@ -3931,7 +3932,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { - po->has_vnet_hdr = !!val; + packet_sock_flag_set(po, PACKET_SOCK_HAS_VNET_HDR, val); ret = 0; } release_sock(sk); @@ -4065,7 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); break; case PACKET_VNET_HDR: - val = po->has_vnet_hdr; + val = packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR); break; case PACKET_VERSION: val = po->tp_version; diff --git a/net/packet/diag.c b/net/packet/diag.c index 8bb4ce6a8e617..56240aaf032b2 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -27,7 +27,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_flags |= PDI_AUXDATA; if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV)) pinfo.pdi_flags |= PDI_ORIGDEV; - if (po->has_vnet_hdr) + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) pinfo.pdi_flags |= PDI_VNETHDR; if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) pinfo.pdi_flags |= PDI_LOSS; diff --git a/net/packet/internal.h b/net/packet/internal.h index 9d406a92ede8e..2521176807f4f 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -118,7 +118,6 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned long flags; unsigned int running; /* bind_lock must be held */ - unsigned int has_vnet_hdr:1; /* writer must hold sock lock */ int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_AUXDATA, PACKET_SOCK_TX_HAS_OFF, PACKET_SOCK_TP_LOSS, + PACKET_SOCK_HAS_VNET_HDR, }; static inline void packet_sock_flag_set(struct packet_sock *po, From 61edf479818e63978cabd243b82ca80f8948a313 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:13 +0000 Subject: [PATCH 8/9] net/packet: convert po->running to an atomic flag Instead of consuming 32 bits for po->running, use one available bit in po->flags. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 20 ++++++++++---------- net/packet/diag.c | 2 +- net/packet/internal.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5a6b05e17ca21..ec446452bbe8d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -340,14 +340,14 @@ static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); - if (!po->running) { + if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); sock_hold(sk); - po->running = 1; + packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1); } } @@ -369,7 +369,7 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) lockdep_assert_held_once(&po->bind_lock); - po->running = 0; + packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0); if (po->fanout) __fanout_unlink(sk, po); @@ -389,7 +389,7 @@ static void unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); - if (po->running) + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) __unregister_prot_hook(sk, sync); } @@ -1782,7 +1782,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -EINVAL; spin_lock(&po->bind_lock); - if (po->running && + if (packet_sock_flag(po, PACKET_SOCK_RUNNING) && match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { @@ -3222,7 +3222,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, if (need_rehook) { dev_hold(dev); - if (po->running) { + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { rcu_read_unlock(); /* prevents packet_notifier() from calling * register_prot_hook() @@ -3235,7 +3235,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, dev->ifindex); } - BUG_ON(po->running); + BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING)); WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; @@ -4159,7 +4159,7 @@ static int packet_notifier(struct notifier_block *this, case NETDEV_DOWN: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); - if (po->running) { + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { __unregister_prot_hook(sk, false); sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) @@ -4470,7 +4470,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Detach socket from network */ spin_lock(&po->bind_lock); - was_running = po->running; + was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING); num = po->num; if (was_running) { WRITE_ONCE(po->num, 0); @@ -4681,7 +4681,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) s->sk_type, ntohs(READ_ONCE(po->num)), READ_ONCE(po->ifindex), - po->running, + packet_sock_flag(po, PACKET_SOCK_RUNNING), atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); diff --git a/net/packet/diag.c b/net/packet/diag.c index 56240aaf032b2..de4ced5cf3e8c 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -21,7 +21,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp); pinfo.pdi_flags = 0; - if (po->running) + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) pinfo.pdi_flags |= PDI_RUNNING; if (packet_sock_flag(po, PACKET_SOCK_AUXDATA)) pinfo.pdi_flags |= PDI_AUXDATA; diff --git a/net/packet/internal.h b/net/packet/internal.h index 2521176807f4f..58f042c631723 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -117,7 +117,6 @@ struct packet_sock { spinlock_t bind_lock; struct mutex pg_vec_lock; unsigned long flags; - unsigned int running; /* bind_lock must be held */ int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_TX_HAS_OFF, PACKET_SOCK_TP_LOSS, PACKET_SOCK_HAS_VNET_HDR, + PACKET_SOCK_RUNNING, }; static inline void packet_sock_flag_set(struct packet_sock *po, From 791a3e9f1a86fe8eb09173c9788493b8b5c957f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:14 +0000 Subject: [PATCH 9/9] net/packet: convert po->pressure to an atomic flag Not only this removes some READ_ONCE()/WRITE_ONCE(), this also removes one integer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 14 ++++++++------ net/packet/internal.h | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ec446452bbe8d..7b9367b233d34 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1307,22 +1307,23 @@ static int __packet_rcv_has_room(const struct packet_sock *po, static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { - int pressure, ret; + bool pressure; + int ret; ret = __packet_rcv_has_room(po, skb); pressure = ret != ROOM_NORMAL; - if (READ_ONCE(po->pressure) != pressure) - WRITE_ONCE(po->pressure, pressure); + if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure) + packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure); return ret; } static void packet_rcv_try_clear_pressure(struct packet_sock *po) { - if (READ_ONCE(po->pressure) && + if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) - WRITE_ONCE(po->pressure, 0); + packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false); } static void packet_sock_destruct(struct sock *sk) @@ -1409,7 +1410,8 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, i = j = min_t(int, po->rollover->sock, num - 1); do { po_next = pkt_sk(rcu_dereference(f->arr[i])); - if (po_next != po_skip && !READ_ONCE(po_next->pressure) && + if (po_next != po_skip && + !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) && packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { if (i != j) po->rollover->sock = i; diff --git a/net/packet/internal.h b/net/packet/internal.h index 58f042c631723..680703dbce5e0 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -117,7 +117,6 @@ struct packet_sock { spinlock_t bind_lock; struct mutex pg_vec_lock; unsigned long flags; - int pressure; int ifindex; /* bound device */ __be16 num; struct packet_rollover *rollover; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_TP_LOSS, PACKET_SOCK_HAS_VNET_HDR, PACKET_SOCK_RUNNING, + PACKET_SOCK_PRESSURE, }; static inline void packet_sock_flag_set(struct packet_sock *po,