Skip to content

Commit

Permalink
Merge branch 'ns-tcp-sysctls'
Browse files Browse the repository at this point in the history
Nikolay Borisov says:

====================
Namespaceify more of the tcp sysctl knobs

This patch series continues making more of the tcp-related
sysctl knobs be per net-namespace. Most of these apply per
socket and have global defaults so should be safe and I
don't expect any breakages.

Having those per net-namespace is useful when multiple
containers are hosted and it is required to tune the
tcp settings for each independently of the host node.

I've split the patches to be per-sysctl but after
the review if the outcome is positive I'm happy
to either send it in one big blob or just.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Feb 7, 2016
2 parents 9d1eb21 + 4979f2d commit 7158ce8
Show file tree
Hide file tree
Showing 13 changed files with 130 additions and 126 deletions.
10 changes: 10 additions & 0 deletions include/net/netns/ipv4.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@ struct netns_ipv4 {
int sysctl_tcp_keepalive_probes;
int sysctl_tcp_keepalive_intvl;

int sysctl_tcp_syn_retries;
int sysctl_tcp_synack_retries;
int sysctl_tcp_syncookies;
int sysctl_tcp_reordering;
int sysctl_tcp_retries1;
int sysctl_tcp_retries2;
int sysctl_tcp_orphan_retries;
int sysctl_tcp_fin_timeout;
unsigned int sysctl_tcp_notsent_lowat;

struct ping_group_range ping_group_range;

atomic_t dev_addr_genid;
Expand Down
17 changes: 6 additions & 11 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,6 @@ extern struct inet_timewait_death_row tcp_death_row;
extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
extern int sysctl_tcp_sack;
extern int sysctl_tcp_fin_timeout;
extern int sysctl_tcp_syn_retries;
extern int sysctl_tcp_synack_retries;
extern int sysctl_tcp_retries1;
extern int sysctl_tcp_retries2;
extern int sysctl_tcp_orphan_retries;
extern int sysctl_tcp_syncookies;
extern int sysctl_tcp_fastopen;
extern int sysctl_tcp_retrans_collapse;
extern int sysctl_tcp_stdurg;
Expand Down Expand Up @@ -274,7 +267,6 @@ extern int sysctl_tcp_thin_dupack;
extern int sysctl_tcp_early_retrans;
extern int sysctl_tcp_limit_output_bytes;
extern int sysctl_tcp_challenge_ack_limit;
extern unsigned int sysctl_tcp_notsent_lowat;
extern int sysctl_tcp_min_tso_segs;
extern int sysctl_tcp_min_rtt_wlen;
extern int sysctl_tcp_autocorking;
Expand Down Expand Up @@ -964,9 +956,11 @@ static inline void tcp_enable_fack(struct tcp_sock *tp)
*/
static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);

tp->do_early_retrans = sysctl_tcp_early_retrans &&
sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
sysctl_tcp_reordering == 3;
net->ipv4.sysctl_tcp_reordering == 3;
}

static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
Expand Down Expand Up @@ -1253,7 +1247,7 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)

static inline int tcp_fin_time(const struct sock *sk)
{
int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
const int rto = inet_csk(sk)->icsk_rto;

if (fin_timeout < (rto << 2) - (rto >> 1))
Expand Down Expand Up @@ -1687,7 +1681,8 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);

static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
return tp->notsent_lowat ?: sysctl_tcp_notsent_lowat;
struct net *net = sock_net((struct sock *)tp);
return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
}

static inline bool tcp_stream_memory_free(const struct sock *sk)
Expand Down
7 changes: 2 additions & 5 deletions net/ipv4/inet_connection_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -482,10 +482,6 @@ EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
#define AF_INET_FAMILY(fam) true
#endif

/* Only thing we need from tcp.h */
extern int sysctl_tcp_synack_retries;


/* Decide when to expire the request and when to resend SYN-ACK */
static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
const int max_retries,
Expand Down Expand Up @@ -557,6 +553,7 @@ static void reqsk_timer_handler(unsigned long data)
{
struct request_sock *req = (struct request_sock *)data;
struct sock *sk_listener = req->rsk_listener;
struct net *net = sock_net(sk_listener);
struct inet_connection_sock *icsk = inet_csk(sk_listener);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
int qlen, expire = 0, resend = 0;
Expand All @@ -566,7 +563,7 @@ static void reqsk_timer_handler(unsigned long data)
if (sk_state_load(sk_listener) != TCP_LISTEN)
goto drop;

max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
thresh = max_retries;
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
Expand Down
4 changes: 1 addition & 3 deletions net/ipv4/syncookies.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
#include <net/tcp.h>
#include <net/route.h>

extern int sysctl_tcp_syncookies;

static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;

#define COOKIEBITS 24 /* Upper bits store count */
Expand Down Expand Up @@ -307,7 +305,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
__u8 rcv_wscale;
struct flowi4 fl4;

if (!sysctl_tcp_syncookies || !th->ack || th->rst)
if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;

if (tcp_synq_no_recent_overflow(sk))
Expand Down
136 changes: 68 additions & 68 deletions net/ipv4/sysctl_net_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,22 +291,6 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &ip_ttl_min,
.extra2 = &ip_ttl_max,
},
{
.procname = "tcp_syn_retries",
.data = &sysctl_tcp_syn_retries,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &tcp_syn_retries_min,
.extra2 = &tcp_syn_retries_max
},
{
.procname = "tcp_synack_retries",
.data = &sysctl_tcp_synack_retries,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_max_orphans",
.data = &sysctl_tcp_max_orphans,
Expand Down Expand Up @@ -335,37 +319,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_retries1",
.data = &sysctl_tcp_retries1,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra2 = &tcp_retr1_max
},
{
.procname = "tcp_retries2",
.data = &sysctl_tcp_retries2,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_fin_timeout",
.data = &sysctl_tcp_fin_timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
#ifdef CONFIG_SYN_COOKIES
{
.procname = "tcp_syncookies",
.data = &sysctl_tcp_syncookies,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#endif
{
.procname = "tcp_fastopen",
.data = &sysctl_tcp_fastopen,
Expand Down Expand Up @@ -459,13 +412,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "tcp_orphan_retries",
.data = &sysctl_tcp_orphan_retries,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_fack",
.data = &sysctl_tcp_fack,
Expand All @@ -480,13 +426,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_reordering",
.data = &sysctl_tcp_reordering,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_max_reordering",
.data = &sysctl_tcp_max_reordering,
Expand Down Expand Up @@ -516,13 +455,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{
.procname = "tcp_notsent_lowat",
.data = &sysctl_tcp_notsent_lowat,
.maxlen = sizeof(sysctl_tcp_notsent_lowat),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_rmem",
.data = &sysctl_tcp_rmem,
Expand Down Expand Up @@ -960,6 +892,74 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "tcp_syn_retries",
.data = &init_net.ipv4.sysctl_tcp_syn_retries,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &tcp_syn_retries_min,
.extra2 = &tcp_syn_retries_max
},
{
.procname = "tcp_synack_retries",
.data = &init_net.ipv4.sysctl_tcp_synack_retries,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#ifdef CONFIG_SYN_COOKIES
{
.procname = "tcp_syncookies",
.data = &init_net.ipv4.sysctl_tcp_syncookies,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#endif
{
.procname = "tcp_reordering",
.data = &init_net.ipv4.sysctl_tcp_reordering,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_retries1",
.data = &init_net.ipv4.sysctl_tcp_retries1,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra2 = &tcp_retr1_max
},
{
.procname = "tcp_retries2",
.data = &init_net.ipv4.sysctl_tcp_retries2,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_orphan_retries",
.data = &init_net.ipv4.sysctl_tcp_orphan_retries,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_fin_timeout",
.data = &init_net.ipv4.sysctl_tcp_fin_timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "tcp_notsent_lowat",
.data = &init_net.ipv4.sysctl_tcp_notsent_lowat,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ }
};

Expand Down
12 changes: 6 additions & 6 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,6 @@
#include <asm/unaligned.h>
#include <net/busy_poll.h>

int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;

int sysctl_tcp_min_tso_segs __read_mostly = 2;

int sysctl_tcp_autocorking __read_mostly = 1;
Expand Down Expand Up @@ -406,7 +404,7 @@ void tcp_init_sock(struct sock *sk)
tp->mss_cache = TCP_MSS_DEFAULT;
u64_stats_init(&tp->syncp);

tp->reordering = sysctl_tcp_reordering;
tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
tcp_enable_early_retrans(tp);
tcp_assign_congestion_control(sk);

Expand Down Expand Up @@ -2330,6 +2328,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
int val;
int err = 0;

Expand Down Expand Up @@ -2526,7 +2525,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_LINGER2:
if (val < 0)
tp->linger2 = -1;
else if (val > sysctl_tcp_fin_timeout / HZ)
else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
tp->linger2 = 0;
else
tp->linger2 = val * HZ;
Expand Down Expand Up @@ -2731,6 +2730,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
int val, len;

if (get_user(len, optlen))
Expand Down Expand Up @@ -2765,12 +2765,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
break;
case TCP_LINGER2:
val = tp->linger2;
if (val >= 0)
val = (val ? : sysctl_tcp_fin_timeout) / HZ;
val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
break;
case TCP_DEFER_ACCEPT:
val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
Expand Down
Loading

0 comments on commit 7158ce8

Please sign in to comment.