Skip to content

Commit

Permalink
Merge branch 'net-l3mdev-Support-for-sockets-bound-to-enslaved-device'
Browse files Browse the repository at this point in the history
David Ahern says:

====================
net: l3mdev: Support for sockets bound to enslaved device

A missing piece to the VRF puzzle is the ability to bind sockets to
devices enslaved to a VRF. This patch set adds the enslaved device
index, sdif, to IPv4 and IPv6 socket lookups. The end result for users
is the following scope options for services:

1. "global" services - sockets not bound to any device

   Allows 1 service to work across all network interfaces with
   connected sockets bound to the VRF the connection originates
   (Requires net.ipv4.tcp_l3mdev_accept=1 for TCP and
    net.ipv4.udp_l3mdev_accept=1 for UDP)

2. "VRF" local services - sockets bound to a VRF

   Sockets work across all network interfaces enslaved to a VRF but
   are limited to just the one VRF.

3. "device" services - sockets bound to a specific network interface

   Service works only through the one specific interface.

v3
- convert __inet_lookup_established in dccp_v4_err; missed in v2

v2
- remove sk_lookup struct and add sdif as an argument to existing
  functions

Changes since RFC:
- no significant logic changes; mainly whitespace cleanups
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 7, 2017
2 parents 46d4b68 + 5108ab4 commit 9bcb5a5
Show file tree
Hide file tree
Showing 23 changed files with 228 additions and 135 deletions.
3 changes: 2 additions & 1 deletion include/linux/igmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
struct ip_msfilter __user *optval, int __user *optlen);
extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
struct group_filter __user *optval, int __user *optlen);
extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif);
extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt,
int dif, int sdif);
extern void ip_mc_init_dev(struct in_device *);
extern void ip_mc_destroy_dev(struct in_device *);
extern void ip_mc_up(struct in_device *);
Expand Down
10 changes: 10 additions & 0 deletions include/linux/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,16 @@ static inline bool inet6_is_jumbogram(const struct sk_buff *skb)
return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM);
}

/* can not be used in TCP layer after tcp_v6_fill_cb */
static inline int inet6_sdif(const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (skb && ipv6_l3mdev_skb(IP6CB(skb)->flags))
return IP6CB(skb)->iif;
#endif
return 0;
}

/* can not be used in TCP layer after tcp_v6_fill_cb */
static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
{
Expand Down
22 changes: 13 additions & 9 deletions include/net/inet6_hashtables.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,17 @@ struct sock *__inet6_lookup_established(struct net *net,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
const u16 hnum, const int dif);
const u16 hnum, const int dif,
const int sdif);

struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
const unsigned short hnum, const int dif);
const unsigned short hnum,
const int dif, const int sdif);

static inline struct sock *__inet6_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
Expand All @@ -66,24 +68,25 @@ static inline struct sock *__inet6_lookup(struct net *net,
const __be16 sport,
const struct in6_addr *daddr,
const u16 hnum,
const int dif,
const int dif, const int sdif,
bool *refcounted)
{
struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
sport, daddr, hnum, dif);
sport, daddr, hnum,
dif, sdif);
*refcounted = true;
if (sk)
return sk;
*refcounted = false;
return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
daddr, hnum, dif);
daddr, hnum, dif, sdif);
}

static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be16 sport,
const __be16 dport,
int iif,
int iif, int sdif,
bool *refcounted)
{
struct sock *sk = skb_steal_sock(skb);
Expand All @@ -95,7 +98,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
doff, &ipv6_hdr(skb)->saddr, sport,
&ipv6_hdr(skb)->daddr, ntohs(dport),
iif, refcounted);
iif, sdif, refcounted);
}

struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
Expand All @@ -107,13 +110,14 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
int inet6_hash(struct sock *sk);
#endif /* IS_ENABLED(CONFIG_IPV6) */

#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \
#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \
(((__sk)->sk_portpair == (__ports)) && \
((__sk)->sk_family == AF_INET6) && \
ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \
ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \
(!(__sk)->sk_bound_dev_if || \
((__sk)->sk_bound_dev_if == (__dif))) && \
((__sk)->sk_bound_dev_if == (__dif)) || \
((__sk)->sk_bound_dev_if == (__sdif))) && \
net_eq(sock_net(__sk), (__net)))

#endif /* _INET6_HASHTABLES_H */
31 changes: 17 additions & 14 deletions include/net/inet_hashtables.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,16 +221,16 @@ struct sock *__inet_lookup_listener(struct net *net,
const __be32 saddr, const __be16 sport,
const __be32 daddr,
const unsigned short hnum,
const int dif);
const int dif, const int sdif);

static inline struct sock *inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
__be32 daddr, __be16 dport, int dif, int sdif)
{
return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
daddr, ntohs(dport), dif);
daddr, ntohs(dport), dif, sdif);
}

/* Socket demux engine toys. */
Expand Down Expand Up @@ -262,22 +262,24 @@ static inline struct sock *inet_lookup_listener(struct net *net,
(((__force __u64)(__be32)(__daddr)) << 32) | \
((__force __u64)(__be32)(__saddr)))
#endif /* __BIG_ENDIAN */
#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
(((__sk)->sk_portpair == (__ports)) && \
((__sk)->sk_addrpair == (__cookie)) && \
(!(__sk)->sk_bound_dev_if || \
((__sk)->sk_bound_dev_if == (__dif))) && \
((__sk)->sk_bound_dev_if == (__dif)) || \
((__sk)->sk_bound_dev_if == (__sdif))) && \
net_eq(sock_net(__sk), (__net)))
#else /* 32-bit arch */
#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
const int __name __deprecated __attribute__((unused))

#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
(((__sk)->sk_portpair == (__ports)) && \
((__sk)->sk_daddr == (__saddr)) && \
((__sk)->sk_rcv_saddr == (__daddr)) && \
(!(__sk)->sk_bound_dev_if || \
((__sk)->sk_bound_dev_if == (__dif))) && \
((__sk)->sk_bound_dev_if == (__dif)) || \
((__sk)->sk_bound_dev_if == (__sdif))) && \
net_eq(sock_net(__sk), (__net)))
#endif /* 64-bit arch */

Expand All @@ -288,7 +290,7 @@ struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 hnum,
const int dif);
const int dif, const int sdif);

static inline struct sock *
inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
Expand All @@ -297,28 +299,28 @@ static inline struct sock *
const int dif)
{
return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
ntohs(dport), dif);
ntohs(dport), dif, 0);
}

static inline struct sock *__inet_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
const int dif,
const int dif, const int sdif,
bool *refcounted)
{
u16 hnum = ntohs(dport);
struct sock *sk;

sk = __inet_lookup_established(net, hashinfo, saddr, sport,
daddr, hnum, dif);
daddr, hnum, dif, sdif);
*refcounted = true;
if (sk)
return sk;
*refcounted = false;
return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
sport, daddr, hnum, dif);
sport, daddr, hnum, dif, sdif);
}

static inline struct sock *inet_lookup(struct net *net,
Expand All @@ -332,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net,
bool refcounted;

sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
dport, dif, &refcounted);
dport, dif, 0, &refcounted);

if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
Expand All @@ -344,6 +346,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
int doff,
const __be16 sport,
const __be16 dport,
const int sdif,
bool *refcounted)
{
struct sock *sk = skb_steal_sock(skb);
Expand All @@ -355,7 +358,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,

return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
doff, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
iph->daddr, dport, inet_iif(skb), sdif,
refcounted);
}

Expand Down
10 changes: 10 additions & 0 deletions include/net/ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ struct ipcm_cookie {
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
#define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))

/* return enslaved device index if relevant */
static inline int inet_sdif(struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
return IPCB(skb)->iif;
#endif
return 0;
}

struct ip_ra_chain {
struct ip_ra_chain __rcu *next;
struct sock *sk;
Expand Down
2 changes: 1 addition & 1 deletion include/net/raw.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ extern struct proto raw_prot;
extern struct raw_hashinfo raw_v4_hashinfo;
struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
unsigned short num, __be32 raddr,
__be32 laddr, int dif);
__be32 laddr, int dif, int sdif);

int raw_abort(struct sock *sk, int err);
void raw_icmp_error(struct sk_buff *, int, u32);
Expand Down
2 changes: 1 addition & 1 deletion include/net/rawv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
extern struct raw_hashinfo raw_v6_hashinfo;
struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
unsigned short num, const struct in6_addr *loc_addr,
const struct in6_addr *rmt_addr, int dif);
const struct in6_addr *rmt_addr, int dif, int sdif);

int raw_abort(struct sock *sk, int err);

Expand Down
20 changes: 20 additions & 0 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,16 @@ static inline int tcp_v6_iif(const struct sk_buff *skb)

return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
}

/* TCP_SKB_CB reference means this can not be used from early demux */
static inline int tcp_v6_sdif(const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags))
return TCP_SKB_CB(skb)->header.h6.iif;
#endif
return 0;
}
#endif

/* TCP_SKB_CB reference means this can not be used from early demux */
Expand All @@ -840,6 +850,16 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
return false;
}

/* TCP_SKB_CB reference means this can not be used from early demux */
static inline int tcp_v4_sdif(struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
return TCP_SKB_CB(skb)->header.h4.iif;
#endif
return 0;
}

/* Due to TSO, an SKB can be composed of multiple actual
* packets. To keep these tracked properly, we use this.
*/
Expand Down
4 changes: 2 additions & 2 deletions include/net/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif);
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif,
__be32 daddr, __be16 dport, int dif, int sdif,
struct udp_table *tbl, struct sk_buff *skb);
struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport);
Expand All @@ -298,7 +298,7 @@ struct sock *udp6_lib_lookup(struct net *net,
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, struct udp_table *tbl,
int dif, int sdif, struct udp_table *tbl,
struct sk_buff *skb);
struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport);
Expand Down
4 changes: 2 additions & 2 deletions net/dccp/ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
sk = __inet_lookup_established(net, &dccp_hashinfo,
iph->daddr, dh->dccph_dport,
iph->saddr, ntohs(dh->dccph_sport),
inet_iif(skb));
inet_iif(skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
Expand Down Expand Up @@ -804,7 +804,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)

lookup:
sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport, &refcounted);
dh->dccph_sport, dh->dccph_dport, 0, &refcounted);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
"get corresponding socket\n");
Expand Down
4 changes: 2 additions & 2 deletions net/dccp/ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __inet6_lookup_established(net, &dccp_hashinfo,
&hdr->daddr, dh->dccph_dport,
&hdr->saddr, ntohs(dh->dccph_sport),
inet6_iif(skb));
inet6_iif(skb), 0);

if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
Expand Down Expand Up @@ -687,7 +687,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
lookup:
sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport,
inet6_iif(skb), &refcounted);
inet6_iif(skb), 0, &refcounted);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
"get corresponding socket\n");
Expand Down
6 changes: 4 additions & 2 deletions net/ipv4/igmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2549,7 +2549,8 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
/*
* check if a multicast source filter allows delivery for a given <src,dst,intf>
*/
int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
int dif, int sdif)
{
struct inet_sock *inet = inet_sk(sk);
struct ip_mc_socklist *pmc;
Expand All @@ -2564,7 +2565,8 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
rcu_read_lock();
for_each_pmc_rcu(inet, pmc) {
if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
pmc->multi.imr_ifindex == dif)
(pmc->multi.imr_ifindex == dif ||
(sdif && pmc->multi.imr_ifindex == sdif)))
break;
}
ret = inet->mc_all;
Expand Down
Loading

0 comments on commit 9bcb5a5

Please sign in to comment.