Skip to content

Commit

Permalink
ipv4: Namespecify TCP PMTU mechanism
Browse files Browse the repository at this point in the history
Packetization Layer Path MTU Discovery works separately beside
Path MTU Discovery at IP level, different net namespace has
various requirements on which one to chose, e.g., a virutalized
container instance would require TCP PMTU to probe an usable
effective mtu for underlying tunnel, while the host would
employ classical ICMP based PMTU to function.

Hence making TCP PMTU mechanism per net namespace to decouple
two functionality. Furthermore the probe base MSS should also
be configured separately for each namespace.

Signed-off-by: Fan Du <fan.du@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Fan Du authored and David S. Miller committed Feb 10, 2015
1 parent f217d6c commit b0f9ca5
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 23 deletions.
2 changes: 2 additions & 0 deletions include/net/netns/ipv4.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ struct netns_ipv4 {

int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;
int sysctl_tcp_mtu_probing;
int sysctl_tcp_base_mss;

struct ping_group_range ping_group_range;

Expand Down
2 changes: 0 additions & 2 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,6 @@ extern int sysctl_tcp_low_latency;
extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor;
extern int sysctl_tcp_mtu_probing;
extern int sysctl_tcp_base_mss;
extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_thin_linear_timeouts;
Expand Down
28 changes: 14 additions & 14 deletions net/ipv4/sysctl_net_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -603,20 +603,6 @@ static struct ctl_table ipv4_table[] = {
.maxlen = TCP_CA_NAME_MAX,
.proc_handler = proc_tcp_congestion_control,
},
{
.procname = "tcp_mtu_probing",
.data = &sysctl_tcp_mtu_probing,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_base_mss",
.data = &sysctl_tcp_base_mss,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_workaround_signed_windows",
.data = &sysctl_tcp_workaround_signed_windows,
Expand Down Expand Up @@ -883,6 +869,20 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_mtu_probing",
.data = &init_net.ipv4.sysctl_tcp_mtu_probing,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_base_mss",
.data = &init_net.ipv4.sysctl_tcp_base_mss,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ }
};

Expand Down
1 change: 1 addition & 0 deletions net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -2459,6 +2459,7 @@ static int __net_init tcp_sk_init(struct net *net)
*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
}
net->ipv4.sysctl_tcp_ecn = 2;
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
return 0;

fail:
Expand Down
8 changes: 3 additions & 5 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
*/
int sysctl_tcp_tso_win_divisor __read_mostly = 3;

int sysctl_tcp_mtu_probing __read_mostly = 0;
int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;

/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;

Expand Down Expand Up @@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);

icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
icsk->icsk_af_ops->net_header_len;
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
icsk->icsk_mtup.probe_size = 0;
}
EXPORT_SYMBOL(tcp_mtup_init);
Expand Down
7 changes: 5 additions & 2 deletions net/ipv4/tcp_timer.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,20 @@ static int tcp_orphan_retries(struct sock *sk, int alive)

static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
{
struct net *net = sock_net(sk);

/* Black hole detection */
if (sysctl_tcp_mtu_probing) {
if (net->ipv4.sysctl_tcp_mtu_probing) {
if (!icsk->icsk_mtup.enabled) {
icsk->icsk_mtup.enabled = 1;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else {
struct net *net = sock_net(sk);
struct tcp_sock *tp = tcp_sk(sk);
int mss;

mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
mss = min(sysctl_tcp_base_mss, mss);
mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
mss = max(mss, 68 - tp->tcp_header_len);
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
Expand Down

0 comments on commit b0f9ca5

Please sign in to comment.