Skip to content

Commit

Permalink
ipv6: make lookups simpler and faster
Browse files Browse the repository at this point in the history
TCP listener refactoring, part 4 :

To speed up inet lookups, we moved IPv4 addresses from inet to struct
sock_common

Now is time to do the same for IPv6, because it permits us to have fast
lookups for all kind of sockets, including upcoming SYN_RECV.

Getting IPv6 addresses in TCP lookups currently requires two extra cache
lines, plus a dereference (and memory stall).

inet6_sk(sk) does the dereference of inet_sk(__sk)->pinet6

This patch is way bigger than its IPv4 counter part, because for IPv4,
we could add aliases (inet_daddr, inet_rcv_saddr), while on IPv6,
it's not doable easily.

inet6_sk(sk)->daddr becomes sk->sk_v6_daddr
inet6_sk(sk)->rcv_saddr becomes sk->sk_v6_rcv_saddr

And timewait socket also have tw->tw_v6_daddr & tw->tw_v6_rcv_saddr
at the same offset.

We get rid of INET6_TW_MATCH() as INET6_MATCH() is now the generic
macro.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Eric Dumazet authored and David S. Miller committed Oct 9, 2013
1 parent 05dbc7b commit efe4208
Show file tree
Hide file tree
Showing 35 changed files with 213 additions and 288 deletions.
46 changes: 6 additions & 40 deletions include/linux/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,6 @@ struct ipv6_fl_socklist;
*/
struct ipv6_pinfo {
struct in6_addr saddr;
struct in6_addr rcv_saddr;
struct in6_addr daddr;
struct in6_pktinfo sticky_pktinfo;
const struct in6_addr *daddr_cache;
#ifdef CONFIG_IPV6_SUBTREES
Expand Down Expand Up @@ -256,22 +254,10 @@ struct tcp6_sock {

extern int inet6_sk_rebuild_header(struct sock *sk);

struct inet6_timewait_sock {
struct in6_addr tw_v6_daddr;
struct in6_addr tw_v6_rcv_saddr;
};

struct tcp6_timewait_sock {
struct tcp_timewait_sock tcp6tw_tcp;
struct inet6_timewait_sock tcp6tw_inet6;
};

static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk)
{
return (struct inet6_timewait_sock *)(((u8 *)sk) +
inet_twsk(sk)->tw_ipv6_offset);
}

#if IS_ENABLED(CONFIG_IPV6)
static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
{
Expand Down Expand Up @@ -321,21 +307,11 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
#define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only)
#define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk))

static inline u16 inet6_tw_offset(const struct proto *prot)
{
return prot->twsk_prot->twsk_obj_size -
sizeof(struct inet6_timewait_sock);
}

static inline struct in6_addr *__inet6_rcv_saddr(const struct sock *sk)
static inline const struct in6_addr *inet6_rcv_saddr(const struct sock *sk)
{
return likely(sk->sk_state != TCP_TIME_WAIT) ?
&inet6_sk(sk)->rcv_saddr : &inet6_twsk(sk)->tw_v6_rcv_saddr;
}

static inline struct in6_addr *inet6_rcv_saddr(const struct sock *sk)
{
return sk->sk_family == AF_INET6 ? __inet6_rcv_saddr(sk) : NULL;
if (sk->sk_family == AF_INET6)
return &sk->sk_v6_rcv_saddr;
return NULL;
}

static inline int inet_v6_ipv6only(const struct sock *sk)
Expand Down Expand Up @@ -363,7 +339,6 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
return NULL;
}

#define __inet6_rcv_saddr(__sk) NULL
#define inet6_rcv_saddr(__sk) NULL
#define tcp_twsk_ipv6only(__sk) 0
#define inet_v6_ipv6only(__sk) 0
Expand All @@ -372,19 +347,10 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \
(((__sk)->sk_portpair == (__ports)) && \
((__sk)->sk_family == AF_INET6) && \
ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \
ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \
ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \
(!(__sk)->sk_bound_dev_if || \
((__sk)->sk_bound_dev_if == (__dif))) && \
net_eq(sock_net(__sk), (__net)))

#define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \
(((__sk)->sk_portpair == (__ports)) && \
((__sk)->sk_family == AF_INET6) && \
ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr)) && \
ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \
(!(__sk)->sk_bound_dev_if || \
((__sk)->sk_bound_dev_if == (__dif))) && \
net_eq(sock_net(__sk), (__net)))

#endif /* _IPV6_H */
5 changes: 2 additions & 3 deletions include/net/inet6_hashtables.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@ static inline unsigned int inet6_ehashfn(struct net *net,
static inline int inet6_sk_ehashfn(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
const struct in6_addr *laddr = &np->rcv_saddr;
const struct in6_addr *faddr = &np->daddr;
const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr;
const struct in6_addr *faddr = &sk->sk_v6_daddr;
const __u16 lport = inet->inet_num;
const __be16 fport = inet->inet_dport;
struct net *net = sock_net(sk);
Expand Down
4 changes: 3 additions & 1 deletion include/net/inet_timewait_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ struct inet_timewait_sock {
#define tw_prot __tw_common.skc_prot
#define tw_net __tw_common.skc_net
#define tw_daddr __tw_common.skc_daddr
#define tw_v6_daddr __tw_common.skc_v6_daddr
#define tw_rcv_saddr __tw_common.skc_rcv_saddr
#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr
#define tw_dport __tw_common.skc_dport
#define tw_num __tw_common.skc_num

Expand All @@ -133,7 +135,7 @@ struct inet_timewait_sock {
tw_transparent : 1,
tw_pad : 6, /* 6 bits hole */
tw_tos : 8,
tw_ipv6_offset : 16;
tw_pad2 : 16 /* 16 bits hole */
kmemcheck_bitfield_end(flags);
u32 tw_ttd;
struct inet_bind_bucket *tw_tb;
Expand Down
2 changes: 1 addition & 1 deletion include/net/ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
struct ipv6_pinfo *np = inet6_sk(sk);

memset(&np->saddr, 0, sizeof(np->saddr));
memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr));
}
#endif
}
Expand Down
2 changes: 1 addition & 1 deletion include/net/ip6_checksum.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ static inline void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);

__tcp_v6_send_check(skb, &np->saddr, &np->daddr);
__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
}

int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto);
Expand Down
9 changes: 9 additions & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ struct sock_common {
#ifdef CONFIG_NET_NS
struct net *skc_net;
#endif

#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr skc_v6_daddr;
struct in6_addr skc_v6_rcv_saddr;
#endif

/*
* fields between dontcopy_begin/dontcopy_end
* are not copied in sock_copy()
Expand Down Expand Up @@ -314,6 +320,9 @@ struct sock {
#define sk_bind_node __sk_common.skc_bind_node
#define sk_prot __sk_common.skc_prot
#define sk_net __sk_common.skc_net
#define sk_v6_daddr __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr

socket_lock_t sk_lock;
struct sk_buff_head sk_receive_queue;
/*
Expand Down
24 changes: 12 additions & 12 deletions net/dccp/ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
struct dccp_hdr *dh = dccp_hdr(skb);

dccp_csum_outgoing(skb);
dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr);
dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
}

static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
Expand Down Expand Up @@ -467,11 +467,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,

memcpy(newnp, np, sizeof(struct ipv6_pinfo));

ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);

ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);

newnp->rcv_saddr = newnp->saddr;
newsk->sk_v6_rcv_saddr = newnp->saddr;

inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
newsk->sk_backlog_rcv = dccp_v4_do_rcv;
Expand Down Expand Up @@ -538,9 +538,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,

memcpy(newnp, np, sizeof(struct ipv6_pinfo));

newnp->daddr = ireq6->rmt_addr;
newsk->sk_v6_daddr = ireq6->rmt_addr;
newnp->saddr = ireq6->loc_addr;
newnp->rcv_saddr = ireq6->loc_addr;
newsk->sk_v6_rcv_saddr = ireq6->loc_addr;
newsk->sk_bound_dev_if = ireq6->iif;

/* Now IPv6 options...
Expand Down Expand Up @@ -885,7 +885,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
return -EINVAL;
}

np->daddr = usin->sin6_addr;
sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;

/*
Expand Down Expand Up @@ -915,16 +915,16 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
goto failure;
}
ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr);
ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr);

return err;
}

if (!ipv6_addr_any(&np->rcv_saddr))
saddr = &np->rcv_saddr;
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
saddr = &sk->sk_v6_rcv_saddr;

fl6.flowi6_proto = IPPROTO_DCCP;
fl6.daddr = np->daddr;
fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = saddr ? *saddr : np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.fl6_dport = usin->sin6_port;
Expand All @@ -941,7 +941,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,

if (saddr == NULL) {
saddr = &fl6.saddr;
np->rcv_saddr = *saddr;
sk->sk_v6_rcv_saddr = *saddr;
}

/* set the source address */
Expand All @@ -963,7 +963,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
goto late_failure;

dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
np->daddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport);
err = dccp_connect(sk);
Expand Down
1 change: 0 additions & 1 deletion net/dccp/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ struct dccp6_request_sock {

struct dccp6_timewait_sock {
struct inet_timewait_sock inet;
struct inet6_timewait_sock tw6;
};

#endif /* _DCCP_IPV6_H */
7 changes: 2 additions & 5 deletions net/dccp/minisocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
struct inet6_timewait_sock *tw6;

tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
tw6 = inet6_twsk((struct sock *)tw);
tw6->tw_v6_daddr = np->daddr;
tw6->tw_v6_rcv_saddr = np->rcv_saddr;
tw->tw_v6_daddr = sk->sk_v6_daddr;
tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
tw->tw_ipv6only = np->ipv6only;
}
#endif
Expand Down
35 changes: 15 additions & 20 deletions net/ipv4/inet_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,

#if IS_ENABLED(CONFIG_IPV6)
if (r->idiag_family == AF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);

*(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
*(struct in6_addr *)r->id.idiag_dst = np->daddr;
*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;

if (ext & (1 << (INET_DIAG_TCLASS - 1)))
if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0)
if (nla_put_u8(skb, INET_DIAG_TCLASS,
inet6_sk(sk)->tclass) < 0)
goto errout;
}
#endif
Expand Down Expand Up @@ -255,11 +255,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
r->idiag_inode = 0;
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
const struct inet6_timewait_sock *tw6 =
inet6_twsk((struct sock *)tw);

*(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr;
*(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
*(struct in6_addr *)r->id.idiag_src = tw->tw_v6_rcv_saddr;
*(struct in6_addr *)r->id.idiag_dst = tw->tw_v6_daddr;
}
#endif

Expand All @@ -273,10 +270,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
const struct nlmsghdr *unlh)
{
if (sk->sk_state == TCP_TIME_WAIT)
return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
skb, r, portid, seq, nlmsg_flags,
unlh);
return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh);
return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq,
nlmsg_flags, unlh);

return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
nlmsg_flags, unlh);
}

int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
Expand Down Expand Up @@ -489,10 +487,9 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.family = sk->sk_family;
#if IS_ENABLED(CONFIG_IPV6)
if (entry.family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);

entry.saddr = np->rcv_saddr.s6_addr32;
entry.daddr = np->daddr.s6_addr32;
entry.saddr = sk->sk_v6_rcv_saddr.s6_addr32;
entry.daddr = sk->sk_v6_daddr.s6_addr32;
} else
#endif
{
Expand Down Expand Up @@ -649,10 +646,8 @@ static int inet_twsk_diag_dump(struct sock *sk,
entry.family = tw->tw_family;
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
struct inet6_timewait_sock *tw6 =
inet6_twsk((struct sock *)tw);
entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32;
entry.daddr = tw6->tw_v6_daddr.s6_addr32;
entry.saddr = tw->tw_v6_rcv_saddr.s6_addr32;
entry.daddr = tw->tw_v6_daddr.s6_addr32;
} else
#endif
{
Expand Down
15 changes: 7 additions & 8 deletions net/ipv4/ping.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,15 +202,14 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6) &&
sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);

pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk,
(int) isk->inet_num,
&inet6_sk(sk)->rcv_saddr,
&sk->sk_v6_rcv_saddr,
sk->sk_bound_dev_if);

if (!ipv6_addr_any(&np->rcv_saddr) &&
!ipv6_addr_equal(&np->rcv_saddr,
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
!ipv6_addr_equal(&sk->sk_v6_rcv_saddr,
&ipv6_hdr(skb)->daddr))
continue;
#endif
Expand Down Expand Up @@ -362,7 +361,7 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr)
} else if (saddr->sa_family == AF_INET6) {
struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr;
struct ipv6_pinfo *np = inet6_sk(sk);
np->rcv_saddr = np->saddr = addr->sin6_addr;
sk->sk_v6_rcv_saddr = np->saddr = addr->sin6_addr;
#endif
}
}
Expand All @@ -376,7 +375,7 @@ static void ping_clear_saddr(struct sock *sk, int dif)
#if IS_ENABLED(CONFIG_IPV6)
} else if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr));
memset(&np->saddr, 0, sizeof(np->saddr));
#endif
}
Expand Down Expand Up @@ -418,7 +417,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
err = 0;
if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) ||
(sk->sk_family == AF_INET6 &&
!ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)))
!ipv6_addr_any(&sk->sk_v6_rcv_saddr)))
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;

if (snum)
Expand All @@ -429,7 +428,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)

#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr));
memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
#endif

sk_dst_reset(sk);
Expand Down
Loading

0 comments on commit efe4208

Please sign in to comment.