Skip to content

Commit

Permalink
Merge branch 'inet-more-data-race-fixes'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
inet: more data-race fixes

This series fixes some existing data-races on inet fields:

inet->mc_ttl, inet->pmtudisc, inet->tos, inet->uc_index,
inet->mc_index and inet->mc_addr.

While fixing them, we convert eight socket options
to lockless implementation.

v2: addressed David Ahern feedback on ("inet: implement lockless IP_TOS")
    Added David Reviewed-by: tag on other patches.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Oct 1, 2023
2 parents 2be825e + 0271592 commit fbff653
Show file tree
Hide file tree
Showing 15 changed files with 150 additions and 157 deletions.
16 changes: 9 additions & 7 deletions include/net/ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,

static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
{
return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos);
return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos));
}

/* datagram.c */
Expand Down Expand Up @@ -434,19 +434,22 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)

static inline bool ip_sk_accept_pmtu(const struct sock *sk)
{
return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE &&
inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT;
u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc);

return pmtudisc != IP_PMTUDISC_INTERFACE &&
pmtudisc != IP_PMTUDISC_OMIT;
}

static inline bool ip_sk_use_pmtu(const struct sock *sk)
{
return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
return READ_ONCE(inet_sk(sk)->pmtudisc) < IP_PMTUDISC_PROBE;
}

static inline bool ip_sk_ignore_df(const struct sock *sk)
{
return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO ||
inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT;
u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc);

return pmtudisc < IP_PMTUDISC_DO || pmtudisc == IP_PMTUDISC_OMIT;
}

static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
Expand Down Expand Up @@ -807,6 +810,5 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val);
void ip_sock_set_pktinfo(struct sock *sk);
void ip_sock_set_recverr(struct sock *sk);
void ip_sock_set_tos(struct sock *sk, int val);
void __ip_sock_set_tos(struct sock *sk, int val);

#endif /* _IP_H */
4 changes: 2 additions & 2 deletions include/net/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

#define RTO_ONLINK 0x01

#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
#define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE))
#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))

static inline __u8 ip_sock_rt_scope(const struct sock *sk)
Expand All @@ -50,7 +50,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk)

static inline __u8 ip_sock_rt_tos(const struct sock *sk)
{
return RT_TOS(inet_sk(sk)->tos);
return RT_TOS(READ_ONCE(inet_sk(sk)->tos));
}

struct ip_tunnel_info;
Expand Down
2 changes: 1 addition & 1 deletion net/dccp/ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
rcu_dereference(ireq->ireq_opt),
inet_sk(sk)->tos);
READ_ONCE(inet_sk(sk)->tos));
rcu_read_unlock();
err = net_xmit_eval(err);
}
Expand Down
6 changes: 3 additions & 3 deletions net/ipv4/datagram.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
saddr = inet->inet_saddr;
if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
if (!oif || netif_index_is_l3_master(sock_net(sk), oif))
oif = inet->mc_index;
oif = READ_ONCE(inet->mc_index);
if (!saddr)
saddr = inet->mc_addr;
saddr = READ_ONCE(inet->mc_addr);
} else if (!oif) {
oif = inet->uc_index;
oif = READ_ONCE(inet->uc_index);
}
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif,
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/inet_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
* hence this needs to be included regardless of socket family.
*/
if (ext & (1 << (INET_DIAG_TOS - 1)))
if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0)
if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0)
goto errout;

#if IS_ENABLED(CONFIG_IPV6)
Expand Down
13 changes: 7 additions & 6 deletions net/ipv4/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ EXPORT_SYMBOL(__ip_queue_xmit);

int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
{
return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
return __ip_queue_xmit(sk, skb, fl, READ_ONCE(inet_sk(sk)->tos));
}
EXPORT_SYMBOL(ip_queue_xmit);

Expand Down Expand Up @@ -1387,8 +1387,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
struct ip_options *opt = NULL;
struct rtable *rt = (struct rtable *)cork->dst;
struct iphdr *iph;
u8 pmtudisc, ttl;
__be16 df = 0;
__u8 ttl;

skb = __skb_dequeue(queue);
if (!skb)
Expand Down Expand Up @@ -1418,8 +1418,9 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
/* DF bit is set when we want to see DF on outgoing frames.
* If ignore_df is set too, we still allow to fragment this frame
* locally. */
if (inet->pmtudisc == IP_PMTUDISC_DO ||
inet->pmtudisc == IP_PMTUDISC_PROBE ||
pmtudisc = READ_ONCE(inet->pmtudisc);
if (pmtudisc == IP_PMTUDISC_DO ||
pmtudisc == IP_PMTUDISC_PROBE ||
(skb->len <= dst_mtu(&rt->dst) &&
ip_dont_fragment(sk, &rt->dst)))
df = htons(IP_DF);
Expand All @@ -1430,14 +1431,14 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
if (cork->ttl != 0)
ttl = cork->ttl;
else if (rt->rt_type == RTN_MULTICAST)
ttl = inet->mc_ttl;
ttl = READ_ONCE(inet->mc_ttl);
else
ttl = ip_select_ttl(inet, &rt->dst);

iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
iph->tos = (cork->tos != -1) ? cork->tos : inet->tos;
iph->tos = (cork->tos != -1) ? cork->tos : READ_ONCE(inet->tos);
iph->frag_off = df;
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
Expand Down
Loading

0 comments on commit fbff653

Please sign in to comment.