From f5af1f57a2914e290de40e2c93716da8885c4965 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 May 2015 10:59:01 -0700 Subject: [PATCH 1/2] inet_hashinfo: remove bsocket counter We no longer need bsocket atomic counter, as inet_csk_get_port() calls bind_conflict() regardless of its value, after commit 2b05ad33e1e624e ("tcp: bind() fix autoselection to share ports") This patch removes overhead of maintaining this counter and double inet_csk_get_port() calls under pressure. Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Flavio Leitner Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 2 -- net/ipv4/inet_connection_sock.c | 5 ----- net/ipv4/inet_hashtables.c | 7 ------- 3 files changed, 14 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 73fe0f9525d92..774d24151d4a5 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -148,8 +148,6 @@ struct inet_hashinfo { */ struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] ____cacheline_aligned_in_smp; - - atomic_t bsockets; }; static inline struct inet_ehash_bucket *inet_ehash_bucket( diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8976ca423a074..b95fb263a13f8 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -127,11 +127,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; - if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && - !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { - snum = smallest_rover; - goto tb_found; - } } if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { snum = rover; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index c6fb80bd5826e..3766bddb3e8a7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -90,10 +90,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - - atomic_inc(&hashinfo->bsockets); - inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &tb->owners); tb->num_owners++; @@ -111,8 +107,6 @@ static void __inet_put_port(struct sock *sk) struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; - atomic_dec(&hashinfo->bsockets); - spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; __sk_del_bind_node(sk); @@ -608,7 +602,6 @@ void inet_hashinfo_init(struct inet_hashinfo *h) { int i; - atomic_set(&h->bsockets, 0); for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, From 946f9eb226c296da0d6c630bdad282ca11d77f60 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 May 2015 10:59:02 -0700 Subject: [PATCH 2/2] tcp: improve REUSEADDR/NOREUSEADDR cohabitation inet_csk_get_port() randomization effort tends to spread sockets on all the available range (ip_local_port_range) This is unfortunate because SO_REUSEADDR sockets have less requirements than non SO_REUSEADDR ones. If an application uses SO_REUSEADDR hint, it is to try to allow source ports being shared. So instead of picking a random port number in ip_local_port_range, lets try first in first half of the range. This gives more chances to use upper half of the range for the sockets with strong requirements (not using SO_REUSEADDR) Note this patch does not add a new sysctl, and only changes the way we try to pick port number. Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Flavio Leitner Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b95fb263a13f8..60021d0d9326a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -99,6 +99,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct net *net = sock_net(sk); int smallest_size = -1, smallest_rover; kuid_t uid = sock_i_uid(sk); + int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; local_bh_disable(); if (!snum) { @@ -106,6 +107,14 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) again: inet_get_local_port_range(net, &low, &high); + if (attempt_half) { + int half = low + ((high - low) >> 1); + + if (attempt_half == 1) + high = half; + else + low = half; + } remaining = (high - low) + 1; smallest_rover = rover = prandom_u32() % remaining + low; @@ -154,6 +163,11 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) snum = smallest_rover; goto have_snum; } + if (attempt_half == 1) { + /* OK we now try the upper half of the range */ + attempt_half = 2; + goto again; + } goto fail; } /* OK, here is the one we will use. HEAD is