diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ef4a69865737c..c383630d3f065 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -282,7 +282,6 @@ struct ipv6_pinfo { __be32 rcv_flowinfo; __u32 dst_cookie; - __u32 rx_dst_cookie; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 40296ed976a97..4202c609bb0b0 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -130,7 +130,8 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - WRITE_ONCE(sk->sk_napi_id, skb->napi_id); + if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id)) + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif sk_rx_queue_set(sk, skb); } diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 89163ef8cf4be..9e1111f5915bd 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -207,11 +207,10 @@ struct inet_sock { __be32 inet_saddr; __s16 uc_ttl; __u16 cmsg_flags; + struct ip_options_rcu __rcu *inet_opt; __be16 inet_sport; __u16 inet_id; - struct ip_options_rcu __rcu *inet_opt; - int rx_dst_ifindex; __u8 tos; __u8 min_ttl; __u8 mc_ttl; diff --git a/include/net/ip.h b/include/net/ip.h index cf229a5311942..b71e88507c4a0 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -750,6 +751,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); +DECLARE_STATIC_KEY_FALSE(ip4_min_ttl); int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f2d0ecc257bb2..c19bf51ded1d0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1092,6 +1092,7 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, /* * socket options (ipv6_sockglue.c) */ +DECLARE_STATIC_KEY_FALSE(ip6_min_hopcount); int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); diff --git a/include/net/sock.h b/include/net/sock.h index 596ba85611bc7..b76be30674efc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -259,6 +259,8 @@ struct bpf_local_storage; * @sk_rcvbuf: size of receive buffer in bytes * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux + * @sk_rx_dst_ifindex: ifindex for @sk_rx_dst + * @sk_rx_dst_cookie: cookie for @sk_rx_dst * @sk_dst_cache: destination cache * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy @@ -430,6 +432,9 @@ struct sock { struct xfrm_policy __rcu *sk_policy[2]; #endif struct dst_entry *sk_rx_dst; + int sk_rx_dst_ifindex; + u32 sk_rx_dst_cookie; + struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; int sk_sndbuf; @@ -1911,10 +1916,8 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) if (skb_rx_queue_recorded(skb)) { u16 rx_queue = skb_get_rx_queue(skb); - if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING)) - return; - - sk->sk_rx_queue_mapping = rx_queue; + if (unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue)) + WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue); } #endif } @@ -1922,15 +1925,19 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) static inline void sk_rx_queue_clear(struct sock *sk) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING - sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING; + WRITE_ONCE(sk->sk_rx_queue_mapping, NO_QUEUE_MAPPING); #endif } static inline int sk_rx_queue_get(const struct sock *sk) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING - if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING) - return sk->sk_rx_queue_mapping; + if (sk) { + int res = READ_ONCE(sk->sk_rx_queue_mapping); + + if (res != NO_QUEUE_MAPPING) + return res; + } #endif return -1; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b297bb28556ec..38d29b175ca66 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -886,6 +886,8 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname, return ip_mc_leave_group(sk, &mreq); } +DEFINE_STATIC_KEY_FALSE(ip4_min_ttl); + static int do_ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -1352,7 +1354,14 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, goto e_inval; if (val < 0 || val > 255) goto e_inval; - inet->min_ttl = val; + + if (val) + static_branch_enable(&ip4_min_ttl); + + /* tcp_v4_err() and tcp_v4_rcv() might read min_ttl + * while we are changint it. + */ + WRITE_ONCE(inet->min_ttl, val); break; default: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5e6d8cb82ce5d..13d868c432845 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -508,9 +508,12 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) if (sk->sk_state == TCP_CLOSE) goto out; - if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto out; + if (static_branch_unlikely(&ip4_min_ttl)) { + /* min_ttl can be changed concurrently from do_ip_setsockopt() */ + if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto out; + } } tp = tcp_sk(sk); @@ -1703,7 +1706,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); if (dst) { - if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + if (sk->sk_rx_dst_ifindex != skb->skb_iif || !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check, dst, 0)) { dst_release(dst); @@ -1788,7 +1791,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) if (dst) dst = dst_check(dst, 0); if (dst && - inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) + sk->sk_rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } @@ -2068,9 +2071,13 @@ int tcp_v4_rcv(struct sk_buff *skb) return 0; } } - if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto discard_and_relse; + + if (static_branch_unlikely(&ip4_min_ttl)) { + /* min_ttl can be changed concurrently from do_ip_setsockopt() */ + if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto discard_and_relse; + } } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) @@ -2195,7 +2202,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) if (dst && dst_hold_safe(dst)) { sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + sk->sk_rx_dst_ifindex = skb->skb_iif; } } EXPORT_SYMBOL(inet_sk_rx_dst_set); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index e4bdb09c55867..41efca817db42 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -55,6 +55,8 @@ struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); +DEFINE_STATIC_KEY_FALSE(ip6_min_hopcount); + int ip6_ra_control(struct sock *sk, int sel) { struct ip6_ra_chain *ra, *new_ra, **rap; @@ -950,7 +952,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, goto e_inval; if (val < 0 || val > 255) goto e_inval; - np->min_hopcount = val; + + if (val) + static_branch_enable(&ip6_min_hopcount); + + /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount + * while we are changing it. + */ + WRITE_ONCE(np->min_hopcount, val); retv = 0; break; case IPV6_DONTFRAG: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1ef27cd7dbff7..c678e778c1fb8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -108,8 +108,8 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) const struct rt6_info *rt = (const struct rt6_info *)dst; sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); + sk->sk_rx_dst_ifindex = skb->skb_iif; + sk->sk_rx_dst_cookie = rt6_get_cookie(rt); } } @@ -414,9 +414,12 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_CLOSE) goto out; - if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto out; + if (static_branch_unlikely(&ip6_min_hopcount)) { + /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ + if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto out; + } } tp = tcp_sk(sk); @@ -569,7 +572,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, static void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree(inet_rsk(req)->ipv6_opt); - kfree_skb(inet_rsk(req)->pktopts); + consume_skb(inet_rsk(req)->pktopts); } #ifdef CONFIG_TCP_MD5SIG @@ -1509,9 +1512,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); if (dst) { - if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + if (sk->sk_rx_dst_ifindex != skb->skb_iif || INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, - dst, np->rx_dst_cookie) == NULL) { + dst, sk->sk_rx_dst_cookie) == NULL) { dst_release(dst); sk->sk_rx_dst = NULL; } @@ -1591,7 +1594,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } - kfree_skb(opt_skb); + consume_skb(opt_skb); return 0; } @@ -1726,9 +1729,13 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) return 0; } } - if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto discard_and_relse; + + if (static_branch_unlikely(&ip6_min_hopcount)) { + /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ + if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto discard_and_relse; + } } if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) @@ -1872,9 +1879,9 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) - dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie); + dst = dst_check(dst, sk->sk_rx_dst_cookie); if (dst && - inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) + sk->sk_rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8d785232b4796..14a94cddcf0bc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -884,7 +884,7 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) if (udp_sk_rx_dst_set(sk, dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); + sk->sk_rx_dst_cookie = rt6_get_cookie(rt); } } @@ -1073,7 +1073,7 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) dst = READ_ONCE(sk->sk_rx_dst); if (dst) - dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); + dst = dst_check(dst, sk->sk_rx_dst_cookie); if (dst) { /* set noref for now. * any place which wants to hold dst has to call