From 9d027e3a83f39b819e908e4e09084277a2e45e95 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2019 14:11:49 -0800 Subject: [PATCH 1/6] net: neigh: use long type to store jiffies delta A difference of two unsigned long needs long storage. Fixes: c7fb64db001f ("[NETLINK]: Neighbour table configuration and statistics via rtnetlink") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5480edff0c868..8c82e95f75394 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2052,8 +2052,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, goto nla_put_failure; { unsigned long now = jiffies; - unsigned int flush_delta = now - tbl->last_flush; - unsigned int rand_delta = now - tbl->last_rand; + long flush_delta = now - tbl->last_flush; + long rand_delta = now - tbl->last_rand; struct neigh_hash_table *nht; struct ndt_config ndc = { .ndtc_key_len = tbl->key_len, From 3828a93f5cfdf5d8a4ff9dead741e9a2871ff57b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2019 14:11:50 -0800 Subject: [PATCH 2/6] inet_diag: use jiffies_delta_to_msecs() Use jiffies_delta_to_msecs() to avoid reporting 'infinite' timeouts and to cleanup code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7dc79b973e6ed..af154977904c0 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -226,17 +226,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = - jiffies_to_msecs(icsk->icsk_timeout - jiffies); + jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = - jiffies_to_msecs(icsk->icsk_timeout - jiffies); + jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = - jiffies_to_msecs(sk->sk_timer.expires - jiffies); + jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); } else { r->idiag_timer = 0; r->idiag_expires = 0; @@ -342,16 +342,13 @@ static int inet_twsk_diag_fill(struct sock *sk, r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); - tmo = tw->tw_timer.expires - jiffies; - if (tmo < 0) - tmo = 0; - inet_diag_msg_common_fill(r, sk); r->idiag_retrans = 0; r->idiag_state = tw->tw_substate; r->idiag_timer = 3; - r->idiag_expires = jiffies_to_msecs(tmo); + tmo = tw->tw_timer.expires - jiffies; + r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; @@ -385,7 +382,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, offsetof(struct sock, sk_cookie)); tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; - r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0; + r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; From 25c7a6d1f90e208ec27ca854b1381ed39842ec57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2019 14:11:51 -0800 Subject: [PATCH 3/6] net: avoid potential false sharing in neighbor related code There are common instances of the following construct : if (n->confirmed != now) n->confirmed = now; A C compiler could legally remove the conditional. Use READ_ONCE()/WRITE_ONCE() to avoid this problem. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/arp.h | 4 ++-- include/net/ndisc.h | 8 ++++---- include/net/sock.h | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/net/arp.h b/include/net/arp.h index c8f580a0e6b1f..4950191f6b2bf 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -57,8 +57,8 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } diff --git a/include/net/ndisc.h b/include/net/ndisc.h index b2f715ca05672..b5ebeb3b0de0e 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -414,8 +414,8 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } @@ -431,8 +431,8 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } diff --git a/include/net/sock.h b/include/net/sock.h index ac6042d0af328..f2f853439b657 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1939,8 +1939,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); static inline void sk_dst_confirm(struct sock *sk) { - if (!sk->sk_dst_pending_confirm) - sk->sk_dst_pending_confirm = 1; + if (!READ_ONCE(sk->sk_dst_pending_confirm)) + WRITE_ONCE(sk->sk_dst_pending_confirm, 1); } static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) @@ -1950,10 +1950,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; - if (sk && sk->sk_dst_pending_confirm) - sk->sk_dst_pending_confirm = 0; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); + if (sk && READ_ONCE(sk->sk_dst_pending_confirm)) + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); } } From 7976a11b30929871a4c84c3c406d7681a3dbcc10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2019 14:11:52 -0800 Subject: [PATCH 4/6] net: use helpers to change sk_ack_backlog Writers are holding a lock, but many readers do not. Following patch will add appropriate barriers in sk_acceptq_removed() and sk_acceptq_added(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/atm/signaling.c | 2 +- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 2 +- net/ax25/ax25_in.c | 2 +- net/bluetooth/af_bluetooth.c | 4 ++-- net/decnet/af_decnet.c | 2 +- net/decnet/dn_nsp_in.c | 2 +- net/llc/af_llc.c | 2 +- net/rose/af_rose.c | 4 ++-- net/sctp/associola.c | 4 ++-- net/sctp/endpointola.c | 2 +- net/vmw_vsock/af_vsock.c | 4 ++-- net/vmw_vsock/hyperv_transport.c | 2 +- net/vmw_vsock/virtio_transport_common.c | 2 +- net/vmw_vsock/vmci_transport.c | 2 +- net/x25/af_x25.c | 4 ++-- 16 files changed, 21 insertions(+), 21 deletions(-) diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 6c11cdf4dd4ce..fbd0c5e7b2999 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -109,7 +109,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb) dev_kfree_skb(skb); goto as_indicate_complete; } - sk->sk_ack_backlog++; + sk_acceptq_added(sk); skb_queue_tail(&sk->sk_receive_queue, skb); pr_debug("waking sk_sleep(sk) 0x%p\n", sk_sleep(sk)); sk->sk_state_change(sk); diff --git a/net/atm/svc.c b/net/atm/svc.c index 908cbb8654f53..ba144d035e3d4 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -381,7 +381,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags, msg->pvc.sap_addr.vpi, msg->pvc.sap_addr.vci); dev_kfree_skb(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); if (error) { sigd_enq2(NULL, as_reject, old_vcc, NULL, NULL, &old_vcc->qos, error); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index bb222b882b677..324306d6fde02 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1384,7 +1384,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, /* Now attach up the new socket */ kfree_skb(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); newsock->state = SS_CONNECTED; out: diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index dcdbaeeb2358a..cd6afe895db99 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -356,7 +356,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, make->sk_state = TCP_ESTABLISHED; - sk->sk_ack_backlog++; + sk_acceptq_added(sk); bh_unlock_sock(sk); } else { if (!mine) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 5f508c50649d0..3fd124927d4d1 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -173,7 +173,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) else release_sock(sk); - parent->sk_ack_backlog++; + sk_acceptq_added(parent); } EXPORT_SYMBOL(bt_accept_enqueue); @@ -185,7 +185,7 @@ void bt_accept_unlink(struct sock *sk) BT_DBG("sk %p state %d", sk, sk->sk_state); list_del_init(&bt_sk(sk)->accept_q); - bt_sk(sk)->parent->sk_ack_backlog--; + sk_acceptq_removed(bt_sk(sk)->parent); bt_sk(sk)->parent = NULL; sock_put(sk); } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 3349ea81f9016..e19a92a62e142 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1091,7 +1091,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags, } cb = DN_SKB_CB(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern); if (newsk == NULL) { release_sock(sk); diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index e4161e0c86aa1..c68503a180259 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -328,7 +328,7 @@ static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb) return; } - sk->sk_ack_backlog++; + sk_acceptq_added(sk); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_state_change(sk); } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index c74f44dfaa22a..50d2c9749db36 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -705,7 +705,7 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags, /* put original socket back into a clean listen state. */ sk->sk_state = TCP_LISTEN; - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); dprintk("%s: ok success on %02X, client on %02X\n", __func__, llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap); frees: diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 6a0df7c8a939e..46b8ff24020d7 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -906,7 +906,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags, /* Now attach up the new socket */ skb->sk = NULL; kfree_skb(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); out_release: release_sock(sk); @@ -1011,7 +1011,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros make_rose->va = 0; make_rose->vr = 0; make_rose->vl = 0; - sk->sk_ack_backlog++; + sk_acceptq_added(sk); rose_insert_socket(make); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 1ba893b85dad7..1b9809ad77252 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -324,7 +324,7 @@ void sctp_association_free(struct sctp_association *asoc) * socket. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); } /* Mark as dead, so other users can know this structure is @@ -1073,7 +1073,7 @@ void sctp_assoc_migrate(struct sctp_association *assoc, struct sock *newsk) /* Decrement the backlog value for a TCP-style socket. */ if (sctp_style(oldsk, TCP)) - oldsk->sk_ack_backlog--; + sk_acceptq_removed(oldsk); /* Release references to the old endpoint and the sock. */ sctp_endpoint_put(assoc->ep); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index ea53049d1db66..9d05b2e7bce24 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -164,7 +164,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep, /* Increment the backlog value for a TCP-style listening socket. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) - sk->sk_ack_backlog++; + sk_acceptq_added(sk); } /* Free the endpoint structure. Delay cleanup until diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index c0856e74f44f4..1f4fde4711b61 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -439,7 +439,7 @@ static void vsock_pending_work(struct work_struct *work) if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); - listener->sk_ack_backlog--; + sk_acceptq_removed(listener); } else if (!vsk->rejected) { /* We are not on the pending list and accept() did not reject * us, so we must have been accepted by our user process. We @@ -1299,7 +1299,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, err = -listener->sk_err; if (connected) { - listener->sk_ack_backlog--; + sk_acceptq_removed(listener); lock_sock_nested(connected, SINGLE_DEPTH_NESTING); vconnected = vsock_sk(connected); diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index bef8772116ec8..7fa09c5e4625c 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -428,7 +428,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) if (conn_from_host) { new->sk_state = TCP_ESTABLISHED; - sk->sk_ack_backlog++; + sk_acceptq_added(sk); hvs_addr_init(&vnew->local_addr, if_type); hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index d02c9b41a768e..193f959e51efe 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1066,7 +1066,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) return -ENOMEM; } - sk->sk_ack_backlog++; + sk_acceptq_added(sk); lock_sock_nested(child, SINGLE_DEPTH_NESTING); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 8c9c4ed90fa70..6ba98a1efe2e0 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1098,7 +1098,7 @@ static int vmci_transport_recv_listen(struct sock *sk, } vsock_add_pending(sk, pending); - sk->sk_ack_backlog++; + sk_acceptq_added(sk); pending->sk_state = TCP_SYN_SENT; vmci_trans(vpending)->produce_size = diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 6aee9f5e8e715..c34f7d0776046 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -891,7 +891,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags, /* Now attach up the new socket */ skb->sk = NULL; kfree_skb(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); newsock->state = SS_CONNECTED; rc = 0; out2: @@ -1062,7 +1062,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len); makex25->calluserdata.cudlength = skb->len; - sk->sk_ack_backlog++; + sk_acceptq_added(sk); x25_insert_socket(make); From 288efe8606b62d0753ba6722b36ef241877251fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2019 14:11:53 -0800 Subject: [PATCH 5/6] net: annotate lockless accesses to sk->sk_ack_backlog sk->sk_ack_backlog can be read without any lock being held. We need to use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing and/or potential KCSAN warnings. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++--- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_diag.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/sched/em_meta.c | 2 +- net/sctp/diag.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index f2f853439b657..a126784aa7d9b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -859,17 +859,17 @@ static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask) static inline void sk_acceptq_removed(struct sock *sk) { - sk->sk_ack_backlog--; + WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog - 1); } static inline void sk_acceptq_added(struct sock *sk) { - sk->sk_ack_backlog++; + WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1); } static inline bool sk_acceptq_is_full(const struct sock *sk) { - return sk->sk_ack_backlog > sk->sk_max_ack_backlog; + return READ_ONCE(sk->sk_ack_backlog) > sk->sk_max_ack_backlog; } /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1dd25189d83f2..68375f7ffdce1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3225,7 +3225,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) * tcpi_unacked -> Number of children ready for accept() * tcpi_sacked -> max backlog */ - info->tcpi_unacked = sk->sk_ack_backlog; + info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog); info->tcpi_sacked = sk->sk_max_ack_backlog; return; } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 549506162ddec..edfbab54c46f4 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -21,7 +21,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, struct tcp_info *info = _info; if (inet_sk_state_load(sk) == TCP_LISTEN) { - r->idiag_rqueue = sk->sk_ack_backlog; + r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog); r->idiag_wqueue = sk->sk_max_ack_backlog; } else if (sk->sk_type == SOCK_STREAM) { const struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 899e100a68e6a..92282f98dc822 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2451,7 +2451,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) state = inet_sk_state_load(sk); if (state == TCP_LISTEN) - rx_queue = sk->sk_ack_backlog; + rx_queue = READ_ONCE(sk->sk_ack_backlog); else /* Because we don't lock the socket, * we might find a transient negative value. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4804b6dc5e651..81f51335e326f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1891,7 +1891,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) state = inet_sk_state_load(sp); if (state == TCP_LISTEN) - rx_queue = sp->sk_ack_backlog; + rx_queue = READ_ONCE(sp->sk_ack_backlog); else /* Because we don't lock the socket, * we might find a transient negative value. diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 3177dcb173161..ebb6e2430861d 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -521,7 +521,7 @@ META_COLLECTOR(int_sk_ack_bl) *err = -1; return; } - dst->value = sk->sk_ack_backlog; + dst->value = READ_ONCE(sk->sk_ack_backlog); } META_COLLECTOR(int_sk_max_ack_bl) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 0851166b91759..f873f15407de4 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -425,7 +425,7 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc); r->idiag_wqueue = infox->asoc->sndbuf_used; } else { - r->idiag_rqueue = sk->sk_ack_backlog; + r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog); r->idiag_wqueue = sk->sk_max_ack_backlog; } if (infox->sctpinfo) From 099ecf59f05b5f30f42ebac0ab8cb94f9b18c90c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2019 14:11:54 -0800 Subject: [PATCH 6/6] net: annotate lockless accesses to sk->sk_max_ack_backlog sk->sk_max_ack_backlog can be read without any lock being held at least in TCP/DCCP cases. We need to use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing and/or potential KCSAN warnings. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- net/dccp/proto.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_diag.c | 2 +- net/sched/em_meta.c | 2 +- net/sctp/diag.c | 2 +- net/sctp/socket.c | 4 ++-- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index a126784aa7d9b..d4d3ef5ba0490 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -869,7 +869,7 @@ static inline void sk_acceptq_added(struct sock *sk) static inline bool sk_acceptq_is_full(const struct sock *sk) { - return READ_ONCE(sk->sk_ack_backlog) > sk->sk_max_ack_backlog; + return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog); } /* diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 5bad08dc43161..a52e8ba1ced04 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -944,7 +944,7 @@ int inet_dccp_listen(struct socket *sock, int backlog) if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) goto out; - sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); /* Really, if the socket is already in listen state * we can only allow the backlog to be adjusted. */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 70f92aaca4110..53de8e00990e2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -208,7 +208,7 @@ int inet_listen(struct socket *sock, int backlog) if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN))) goto out; - sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); /* Really, if the socket is already in listen state * we can only allow the backlog to be adjusted. */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index eb30fc1770def..e4c6e8b404906 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -716,7 +716,7 @@ static void reqsk_timer_handler(struct timer_list *t) * ones are about to clog our table. */ qlen = reqsk_queue_len(queue); - if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) { + if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) { int young = reqsk_queue_len_young(queue) << 1; while (thresh > 2) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 68375f7ffdce1..fb1666440e106 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3226,7 +3226,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) * tcpi_sacked -> max backlog */ info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog); - info->tcpi_sacked = sk->sk_max_ack_backlog; + info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog); return; } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index edfbab54c46f4..0d08f9e2d8d03 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -22,7 +22,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, if (inet_sk_state_load(sk) == TCP_LISTEN) { r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog); - r->idiag_wqueue = sk->sk_max_ack_backlog; + r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog); } else if (sk->sk_type == SOCK_STREAM) { const struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index ebb6e2430861d..d99966a55c84f 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -532,7 +532,7 @@ META_COLLECTOR(int_sk_max_ack_bl) *err = -1; return; } - dst->value = sk->sk_max_ack_backlog; + dst->value = READ_ONCE(sk->sk_max_ack_backlog); } META_COLLECTOR(int_sk_prio) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index f873f15407de4..8a15146faaebd 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -426,7 +426,7 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_wqueue = infox->asoc->sndbuf_used; } else { r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog); - r->idiag_wqueue = sk->sk_max_ack_backlog; + r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog); } if (infox->sctpinfo) sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ffd3262b7a41e..53abb97e0061c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -8376,7 +8376,7 @@ static int sctp_listen_start(struct sock *sk, int backlog) } } - sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); return sctp_hash_endpoint(ep); } @@ -8430,7 +8430,7 @@ int sctp_inet_listen(struct socket *sock, int backlog) /* If we are already listening, just update the backlog */ if (sctp_sstate(sk, LISTENING)) - sk->sk_max_ack_backlog = backlog; + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); else { err = sctp_listen_start(sk, backlog); if (err)