Skip to content

Commit

Permalink
Merge branch 'net-prefer-listeners-bound-to-an-address'
Browse files Browse the repository at this point in the history
Peter Oskolkov says:

====================
net: prefer listeners bound to an address

A relatively common use case is to have several IPs configured
on a host, and have different listeners for each of them. We would
like to add a "catch all" listener on addr_any, to match incoming
connections not served by any of the listeners bound to a specific
address.

However, port-only lookups can match addr_any sockets when sockets
listening on specific addresses are present if so_reuseport flag
is set. This patchset eliminates lookups into port-only hashtable,
as lookups by (addr,port) tuple are easily available.

In a future patchset I plan to explore whether it is possible
to remove port-only hashtables completely: additional refactoring
will be required, as some non-lookup code uses the hashtables.
====================

Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Dec 14, 2018
2 parents 8e2ea53 + 6254e5c commit b9948e1
Show file tree
Hide file tree
Showing 8 changed files with 325 additions and 217 deletions.
60 changes: 8 additions & 52 deletions net/ipv4/inet_hashtables.c
Original file line number Diff line number Diff line change
Expand Up @@ -234,24 +234,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
const int dif, const int sdif, bool exact_dif)
{
int score = -1;
struct inet_sock *inet = inet_sk(sk);
bool dev_match;

if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
!ipv6_only_sock(sk)) {
__be32 rcv_saddr = inet->inet_rcv_saddr;
score = sk->sk_family == PF_INET ? 2 : 1;
if (rcv_saddr) {
if (rcv_saddr != daddr)
return -1;
score += 4;
}
dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
dif, sdif);
if (!dev_match)
if (sk->sk_rcv_saddr != daddr)
return -1;

if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
score += 4;

score = sk->sk_family == PF_INET ? 2 : 1;
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
Expand Down Expand Up @@ -307,26 +299,12 @@ struct sock *__inet_lookup_listener(struct net *net,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
bool exact_dif = inet_exact_dif_match(net, skb);
struct inet_listen_hashbucket *ilb2;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
struct sock *result = NULL;
unsigned int hash2;
u32 phash = 0;

if (ilb->count <= 10 || !hashinfo->lhash2)
goto port_lookup;

/* Too many sk in the ilb bucket (which is hashed by port alone).
* Try lhash2 (which is hashed by port and addr) instead.
*/

hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
if (ilb2->count > ilb->count)
goto port_lookup;

result = inet_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
Expand All @@ -335,34 +313,12 @@ struct sock *__inet_lookup_listener(struct net *net,
goto done;

/* Lookup lhash2 with INADDR_ANY */

hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
if (ilb2->count > ilb->count)
goto port_lookup;

result = inet_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
saddr, sport, htonl(INADDR_ANY), hnum,
dif, sdif);
goto done;

port_lookup:
sk_for_each_rcu(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
if (sk->sk_reuseport) {
phash = inet_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
goto done;
}
result = sk;
hiscore = score;
}
}
done:
if (unlikely(IS_ERR(result)))
return NULL;
Expand Down
76 changes: 19 additions & 57 deletions net/ipv4/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -380,15 +380,12 @@ static int compute_score(struct sock *sk, struct net *net,
ipv6_only_sock(sk))
return -1;

score = (sk->sk_family == PF_INET) ? 2 : 1;
inet = inet_sk(sk);
if (sk->sk_rcv_saddr != daddr)
return -1;

if (inet->inet_rcv_saddr) {
if (inet->inet_rcv_saddr != daddr)
return -1;
score += 4;
}
score = (sk->sk_family == PF_INET) ? 2 : 1;

inet = inet_sk(sk);
if (inet->inet_daddr) {
if (inet->inet_daddr != saddr)
return -1;
Expand Down Expand Up @@ -464,65 +461,30 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
struct sock *sk, *result;
struct sock *result;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
bool exact_dif = udp_lib_exact_dif_match(net, skb);
int score, badness;
u32 hash = 0;

if (hslot->count > 10) {
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];

result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
exact_dif, hslot2, skb);
if (!result) {
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
goto begin;

result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
htonl(INADDR_ANY), hnum, dif, sdif,
exact_dif, hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
/* avoid searching the same slot again. */
if (unlikely(slot2 == old_slot2))
return result;

hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
goto begin;

result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
exact_dif, hslot2, skb);
}
if (unlikely(IS_ERR(result)))
return NULL;
return result;
}
begin:
result = NULL;
badness = 0;
sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
if (sk->sk_reuseport) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (unlikely(IS_ERR(result)))
return NULL;
if (result)
return result;
}
result = sk;
badness = score;
}
}
if (unlikely(IS_ERR(result)))
return NULL;
return result;
}
EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
Expand Down
54 changes: 6 additions & 48 deletions net/ipv6/inet6_hashtables.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,23 +99,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
const int dif, const int sdif, bool exact_dif)
{
int score = -1;
bool dev_match;

if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
sk->sk_family == PF_INET6) {
if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;

score = 1;
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score++;
}
dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
dif, sdif);
if (!dev_match)
if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
score++;

score = 1;
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
Expand Down Expand Up @@ -164,26 +157,12 @@ struct sock *inet6_lookup_listener(struct net *net,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif, const int sdif)
{
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
bool exact_dif = inet6_exact_dif_match(net, skb);
struct inet_listen_hashbucket *ilb2;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
struct sock *result = NULL;
unsigned int hash2;
u32 phash = 0;

if (ilb->count <= 10 || !hashinfo->lhash2)
goto port_lookup;

/* Too many sk in the ilb bucket (which is hashed by port alone).
* Try lhash2 (which is hashed by port and addr) instead.
*/

hash2 = ipv6_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
if (ilb2->count > ilb->count)
goto port_lookup;

result = inet6_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
Expand All @@ -192,33 +171,12 @@ struct sock *inet6_lookup_listener(struct net *net,
goto done;

/* Lookup lhash2 with in6addr_any */

hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
if (ilb2->count > ilb->count)
goto port_lookup;

result = inet6_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
saddr, sport, &in6addr_any, hnum,
dif, sdif);
goto done;

port_lookup:
sk_for_each(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
if (score > hiscore) {
if (sk->sk_reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
goto done;
}
result = sk;
hiscore = score;
}
}
done:
if (unlikely(IS_ERR(result)))
return NULL;
Expand Down
79 changes: 21 additions & 58 deletions net/ipv6/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ static int compute_score(struct sock *sk, struct net *net,
sk->sk_family != PF_INET6)
return -1;

if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;

score = 0;
inet = inet_sk(sk);

Expand All @@ -134,12 +137,6 @@ static int compute_score(struct sock *sk, struct net *net,
score++;
}

if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score++;
}

if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
return -1;
Expand Down Expand Up @@ -197,66 +194,32 @@ struct sock *__udp6_lib_lookup(struct net *net,
int dif, int sdif, struct udp_table *udptable,
struct sk_buff *skb)
{
struct sock *sk, *result;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result;
bool exact_dif = udp6_lib_exact_dif_match(net, skb);
int score, badness;
u32 hash = 0;

if (hslot->count > 10) {
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];

result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif,
hslot2, skb);
if (!result) {
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;

hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
goto begin;

result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif,
hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
/* avoid searching the same slot again. */
if (unlikely(slot2 == old_slot2))
return result;

hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
goto begin;

result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
exact_dif, hslot2,
skb);
}
if (unlikely(IS_ERR(result)))
return NULL;
return result;
}
begin:
result = NULL;
badness = -1;
sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
sdif, exact_dif);
if (score > badness) {
if (sk->sk_reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (unlikely(IS_ERR(result)))
return NULL;
if (result)
return result;
}
result = sk;
badness = score;
}
&in6addr_any, hnum, dif, sdif,
exact_dif, hslot2,
skb);
}
if (unlikely(IS_ERR(result)))
return NULL;
return result;
}
EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
Expand Down
Loading

0 comments on commit b9948e1

Please sign in to comment.