Skip to content

Commit

Permalink
tcp: remove per-destination timestamp cache
Browse files Browse the repository at this point in the history
Commit 8a5bd45f6616 (tcp: randomize tcp timestamp offsets for each connection)
randomizes TCP timestamps per connection. After this commit,
there is no guarantee that the timestamps received from the
same destination are monotonically increasing. As a result,
the per-destination timestamp cache in TCP metrics (i.e., tcpm_ts
in struct tcp_metrics_block) is broken and cannot be relied upon.

Remove the per-destination timestamp cache and all related code
paths.

Note that this cache was already broken for caching timestamps of
multiple machines behind a NAT sharing the same address.

Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Cc: Lutz Vieweg <lvml@5t9.de>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Soheil Hassas Yeganeh authored and David S. Miller committed Mar 17, 2017
1 parent 8b705f5 commit d82bae1
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 179 deletions.
6 changes: 1 addition & 5 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,11 +406,7 @@ void tcp_clear_retrans(struct tcp_sock *tp);
void tcp_update_metrics(struct sock *sk);
void tcp_init_metrics(struct sock *sk);
void tcp_metrics_init(void);
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
bool paws_check, bool timestamps);
bool tcp_remember_stamp(struct sock *sk);
bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
void tcp_disable_fack(struct tcp_sock *tp);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
Expand Down
6 changes: 2 additions & 4 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -6342,8 +6342,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
dst = af_ops->route_req(sk, &fl, req, &strict);

if (dst && strict &&
!tcp_peer_is_proven(req, dst, true,
tmp_opt.saw_tstamp)) {
!tcp_peer_is_proven(req, dst)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
Expand All @@ -6352,8 +6351,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
else if (!net->ipv4.sysctl_tcp_syncookies &&
(net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
(net->ipv4.sysctl_max_syn_backlog >> 2)) &&
!tcp_peer_is_proven(req, dst, false,
tmp_opt.saw_tstamp)) {
!tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
Expand Down
4 changes: 0 additions & 4 deletions net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->write_seq = 0;
}

if (tcp_death_row->sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
tcp_fetch_timewait_stamp(sk, &rt->dst);

inet->inet_dport = usin->sin_port;
sk_daddr_set(sk, daddr);

Expand Down
147 changes: 4 additions & 143 deletions net/ipv4/tcp_metrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ struct tcp_metrics_block {
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
u32 tcpm_ts;
u32 tcpm_ts_stamp;
u32 tcpm_lock;
u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
struct tcp_fastopen_metrics tcpm_fastopen;
Expand Down Expand Up @@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
tm->tcpm_ts = 0;
tm->tcpm_ts_stamp = 0;
if (fastopen_clear) {
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
Expand Down Expand Up @@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
return tm;
}

static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
{
struct tcp_metrics_block *tm;
struct inetpeer_addr saddr, daddr;
unsigned int hash;
struct net *net;

if (tw->tw_family == AF_INET) {
inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
hash = ipv4_addr_hash(tw->tw_daddr);
}
#if IS_ENABLED(CONFIG_IPV6)
else if (tw->tw_family == AF_INET6) {
if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
hash = ipv4_addr_hash(tw->tw_daddr);
} else {
inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr);
inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr);
hash = ipv6_addr_hash(&tw->tw_v6_daddr);
}
}
#endif
else
return NULL;

net = twsk_net(tw);
hash ^= net_hash_mix(net);
hash = hash_32(hash, tcp_metrics_hash_log);

for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
tm = rcu_dereference(tm->tcpm_next)) {
if (addr_same(&tm->tcpm_saddr, &saddr) &&
addr_same(&tm->tcpm_daddr, &daddr) &&
net_eq(tm_net(tm), net))
break;
}
return tm;
}

static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
struct dst_entry *dst,
bool create)
Expand Down Expand Up @@ -573,8 +527,7 @@ void tcp_init_metrics(struct sock *sk)
tp->snd_cwnd_stamp = tcp_time_stamp;
}

bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
bool paws_check, bool timestamps)
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
{
struct tcp_metrics_block *tm;
bool ret;
Expand All @@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,

rcu_read_lock();
tm = __tcp_get_metrics_req(req, dst);
if (paws_check) {
if (tm &&
(u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
!timestamps))
ret = false;
else
ret = true;
} else {
if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
ret = true;
else
ret = false;
}
rcu_read_unlock();

return ret;
}

void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
{
struct tcp_metrics_block *tm;

rcu_read_lock();
tm = tcp_get_metrics(sk, dst, true);
if (tm) {
struct tcp_sock *tp = tcp_sk(sk);

if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
tp->rx_opt.ts_recent = tm->tcpm_ts;
}
}
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);

/* VJ's idea. Save last timestamp seen from this destination and hold
* it at least for normal timewait interval to use for duplicate
* segment detection in subsequent connections, before they enter
* synchronized state.
*/
bool tcp_remember_stamp(struct sock *sk)
{
struct dst_entry *dst = __sk_dst_get(sk);
bool ret = false;

if (dst) {
struct tcp_metrics_block *tm;

rcu_read_lock();
tm = tcp_get_metrics(sk, dst, true);
if (tm) {
struct tcp_sock *tp = tcp_sk(sk);

if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
tm->tcpm_ts = tp->rx_opt.ts_recent;
}
ret = true;
}
rcu_read_unlock();
}
return ret;
}

bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
{
struct tcp_metrics_block *tm;
bool ret = false;

rcu_read_lock();
tm = __tcp_get_metrics_tw(tw);
if (tm) {
const struct tcp_timewait_sock *tcptw;
struct sock *sk = (struct sock *) tw;

tcptw = tcp_twsk(sk);
if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
tm->tcpm_ts = tcptw->tw_ts_recent;
}
if (tm && tcp_metric_get(tm, TCP_METRIC_RTT))
ret = true;
}
else
ret = false;
rcu_read_unlock();

return ret;
Expand Down Expand Up @@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
jiffies - tm->tcpm_stamp,
TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure;
if (tm->tcpm_ts_stamp) {
if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP,
(s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0)
goto nla_put_failure;
if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL,
tm->tcpm_ts) < 0)
goto nla_put_failure;
}

{
int n = 0;
Expand Down
22 changes: 4 additions & 18 deletions net/ipv4/tcp_minisocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
bool paws_reject = false;
struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row;

tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
Expand Down Expand Up @@ -149,12 +148,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
}

if (tcp_death_row->sysctl_tw_recycle &&
tcptw->tw_ts_recent_stamp &&
tcp_tw_remember_stamp(tw))
inet_twsk_reschedule(tw, tw->tw_timeout);
else
inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
return TCP_TW_ACK;
}

Expand Down Expand Up @@ -259,12 +253,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct inet_timewait_sock *tw;
bool recycle_ok = false;
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;

if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
recycle_ok = tcp_remember_stamp(sk);

tw = inet_twsk_alloc(sk, tcp_death_row, state);

if (tw) {
Expand Down Expand Up @@ -317,13 +307,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (timeo < rto)
timeo = rto;

if (recycle_ok) {
tw->tw_timeout = rto;
} else {
tw->tw_timeout = TCP_TIMEWAIT_LEN;
if (state == TCP_TIME_WAIT)
timeo = TCP_TIMEWAIT_LEN;
}
tw->tw_timeout = TCP_TIMEWAIT_LEN;
if (state == TCP_TIME_WAIT)
timeo = TCP_TIMEWAIT_LEN;

inet_twsk_schedule(tw, timeo);
/* Linkage updates. */
Expand Down
5 changes: 0 additions & 5 deletions net/ipv6/tcp_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,11 +265,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_gso_type = SKB_GSO_TCPV6;
ip6_dst_store(sk, dst, NULL, NULL);

if (tcp_death_row->sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
tcp_fetch_timewait_stamp(sk, dst);

icsk->icsk_ext_hdr_len = 0;
if (opt)
icsk->icsk_ext_hdr_len = opt->opt_flen +
Expand Down

0 comments on commit d82bae1

Please sign in to comment.