Skip to content

Commit

Permalink
Merge branch 'tcp-drop-reason-additions'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
tcp: drop reason additions

Currently, TCP is either missing drop reasons,
or pretending that some useful packets are dropped.

This patch series makes "perf record -a -e skb:kfree_skb"
much more usable.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Apr 17, 2022
2 parents 7925c2d + 8fbf195 commit 53c33a1
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 54 deletions.
13 changes: 13 additions & 0 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,19 @@ enum skb_drop_reason {
* the ofo queue, corresponding to
* LINUX_MIB_TCPOFOMERGE
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to
* LINUX_MIB_PAWSESTABREJECTED
*/
SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */
SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */
SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */
SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */
SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */
SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */
SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */
SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */
SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */
SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */
SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */
SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by
* BPF_PROG_TYPE_CGROUP_SKB
Expand Down
14 changes: 14 additions & 0 deletions include/trace/events/skb.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,20 @@
EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \
EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \
EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \
EM(SKB_DROP_REASON_TCP_OFO_DROP, TCP_OFO_DROP) \
EM(SKB_DROP_REASON_TCP_RFC7323_PAWS, TCP_RFC7323_PAWS) \
EM(SKB_DROP_REASON_TCP_INVALID_SEQUENCE, \
TCP_INVALID_SEQUENCE) \
EM(SKB_DROP_REASON_TCP_RESET, TCP_RESET) \
EM(SKB_DROP_REASON_TCP_INVALID_SYN, TCP_INVALID_SYN) \
EM(SKB_DROP_REASON_TCP_CLOSE, TCP_CLOSE) \
EM(SKB_DROP_REASON_TCP_FASTOPEN, TCP_FASTOPEN) \
EM(SKB_DROP_REASON_TCP_OLD_ACK, TCP_OLD_ACK) \
EM(SKB_DROP_REASON_TCP_TOO_OLD_ACK, TCP_TOO_OLD_ACK) \
EM(SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, \
TCP_ACK_UNSENT_DATA) \
EM(SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, \
TCP_OFO_QUEUE_PRUNE) \
EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \
EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \
BPF_CGROUP_EGRESS) \
Expand Down
127 changes: 73 additions & 54 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -3766,7 +3766,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una - tp->max_window)) {
if (!(flag & FLAG_NO_CHALLENGE_ACK))
tcp_send_challenge_ack(sk);
return -1;
return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
}
goto old_ack;
}
Expand All @@ -3775,7 +3775,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* this segment (RFC793 Section 3.9).
*/
if (after(ack, tp->snd_nxt))
return -1;
return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA;

if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
Expand Down Expand Up @@ -4674,7 +4674,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
{
bool res = tcp_try_coalesce(sk, to, from, fragstolen);

/* In case tcp_drop() is called later, update to->gso_segs */
/* In case tcp_drop_reason() is called later, update to->gso_segs */
if (res) {
u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
max_t(u16, 1, skb_shinfo(from)->gso_segs);
Expand All @@ -4691,11 +4691,6 @@ static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
kfree_skb_reason(skb, reason);
}

static void tcp_drop(struct sock *sk, struct sk_buff *skb)
{
tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED);
}

/* This one checks to see if we can put data from the
* out_of_order queue into the receive_queue.
*/
Expand Down Expand Up @@ -4723,7 +4718,7 @@ static void tcp_ofo_queue(struct sock *sk)
rb_erase(&skb->rbnode, &tp->out_of_order_queue);

if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
tcp_drop(sk, skb);
tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_DROP);
continue;
}

Expand Down Expand Up @@ -5334,7 +5329,8 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
goal -= rb_to_skb(node)->truesize;
tcp_drop(sk, rb_to_skb(node));
tcp_drop_reason(sk, rb_to_skb(node),
SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
if (!prev || goal <= 0) {
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
Expand Down Expand Up @@ -5667,7 +5663,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, int syn_inerr)
{
struct tcp_sock *tp = tcp_sk(sk);
bool rst_seq_match = false;
SKB_DR(reason);

/* RFC1323: H1. Apply PAWS check first. */
if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
Expand All @@ -5679,6 +5675,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
LINUX_MIB_TCPACKSKIPPEDPAWS,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto discard;
}
/* Reset is accepted even if it did not pass PAWS. */
Expand All @@ -5700,8 +5697,9 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
} else if (tcp_reset_check(sk, skb)) {
tcp_reset(sk, skb);
goto reset;
}
SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
goto discard;
}

Expand All @@ -5717,9 +5715,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
* Send a challenge ACK
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
tcp_reset_check(sk, skb)) {
rst_seq_match = true;
} else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
tcp_reset_check(sk, skb))
goto reset;

if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
struct tcp_sack_block *sp = &tp->selective_acks[0];
int max_sack = sp[0].end_seq;
int this_sack;
Expand All @@ -5732,21 +5731,18 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}

if (TCP_SKB_CB(skb)->seq == max_sack)
rst_seq_match = true;
goto reset;
}

if (rst_seq_match)
tcp_reset(sk, skb);
else {
/* Disable TFO if RST is out-of-order
* and no data has been received
* for current active TFO socket
*/
if (tp->syn_fastopen && !tp->data_segs_in &&
sk->sk_state == TCP_ESTABLISHED)
tcp_fastopen_active_disable(sk);
tcp_send_challenge_ack(sk);
}
/* Disable TFO if RST is out-of-order
* and no data has been received
* for current active TFO socket
*/
if (tp->syn_fastopen && !tp->data_segs_in &&
sk->sk_state == TCP_ESTABLISHED)
tcp_fastopen_active_disable(sk);
tcp_send_challenge_ack(sk);
SKB_DR_SET(reason, TCP_RESET);
goto discard;
}

Expand All @@ -5761,6 +5757,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
tcp_send_challenge_ack(sk);
SKB_DR_SET(reason, TCP_INVALID_SYN);
goto discard;
}

Expand All @@ -5769,7 +5766,12 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
return true;

discard:
tcp_drop(sk, skb);
tcp_drop_reason(sk, skb, reason);
return false;

reset:
tcp_reset(sk, skb);
__kfree_skb(skb);
return false;
}

Expand Down Expand Up @@ -5956,7 +5958,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
return;

step5:
if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
if (reason < 0)
goto discard;

tcp_rcv_rtt_measure_ts(sk, skb);
Expand Down Expand Up @@ -6136,6 +6139,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
SKB_DR(reason);

tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
Expand Down Expand Up @@ -6178,7 +6182,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,

if (th->rst) {
tcp_reset(sk, skb);
goto discard;
consume:
__kfree_skb(skb);
return 0;
}

/* rfc793:
Expand All @@ -6188,9 +6194,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* See note below!
* --ANK(990513)
*/
if (!th->syn)
if (!th->syn) {
SKB_DR_SET(reason, TCP_FLAGS);
goto discard_and_undo;

}
/* rfc793:
* "If the SYN bit is on ...
* are acceptable then ...
Expand Down Expand Up @@ -6267,13 +6274,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);

discard:
tcp_drop(sk, skb);
return 0;
} else {
tcp_send_ack(sk);
goto consume;
}
tcp_send_ack(sk);
return -1;
}

Expand All @@ -6285,15 +6288,16 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
*
* Otherwise (no ACK) drop the segment and return."
*/

SKB_DR_SET(reason, TCP_RESET);
goto discard_and_undo;
}

/* PAWS check. */
if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
tcp_paws_reject(&tp->rx_opt, 0))
tcp_paws_reject(&tp->rx_opt, 0)) {
SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto discard_and_undo;

}
if (th->syn) {
/* We see SYN without ACK. It is attempt of
* simultaneous connect with crossed SYNs.
Expand Down Expand Up @@ -6342,7 +6346,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
*/
return -1;
#else
goto discard;
goto consume;
#endif
}
/* "fifth, if neither of the SYN or RST bits is set then
Expand All @@ -6352,7 +6356,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
discard_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
goto discard;
tcp_drop_reason(sk, skb, reason);
return 0;

reset_and_undo:
tcp_clear_options(&tp->rx_opt);
Expand Down Expand Up @@ -6407,21 +6412,26 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
struct request_sock *req;
int queued = 0;
bool acceptable;
SKB_DR(reason);

switch (sk->sk_state) {
case TCP_CLOSE:
SKB_DR_SET(reason, TCP_CLOSE);
goto discard;

case TCP_LISTEN:
if (th->ack)
return 1;

if (th->rst)
if (th->rst) {
SKB_DR_SET(reason, TCP_RESET);
goto discard;

}
if (th->syn) {
if (th->fin)
if (th->fin) {
SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
}
/* It is possible that we process SYN packets from backlog,
* so we need to make sure to disable BH and RCU right there.
*/
Expand All @@ -6436,6 +6446,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
consume_skb(skb);
return 0;
}
SKB_DR_SET(reason, TCP_FLAGS);
goto discard;

case TCP_SYN_SENT:
Expand All @@ -6462,13 +6473,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);

if (!tcp_check_req(sk, skb, req, true, &req_stolen))
if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
SKB_DR_SET(reason, TCP_FASTOPEN);
goto discard;
}
}

if (!th->ack && !th->rst && !th->syn)
if (!th->ack && !th->rst && !th->syn) {
SKB_DR_SET(reason, TCP_FLAGS);
goto discard;

}
if (!tcp_validate_incoming(sk, skb, th, 0))
return 0;

Expand All @@ -6481,6 +6495,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_SYN_RECV)
return 1; /* send one RST */
tcp_send_challenge_ack(sk);
SKB_DR_SET(reason, TCP_OLD_ACK);
goto discard;
}
switch (sk->sk_state) {
Expand Down Expand Up @@ -6574,23 +6589,23 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
inet_csk_reset_keepalive_timer(sk, tmo);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto discard;
goto consume;
}
break;
}

case TCP_CLOSING:
if (tp->snd_una == tp->write_seq) {
tcp_time_wait(sk, TCP_TIME_WAIT, 0);
goto discard;
goto consume;
}
break;

case TCP_LAST_ACK:
if (tp->snd_una == tp->write_seq) {
tcp_update_metrics(sk);
tcp_done(sk);
goto discard;
goto consume;
}
break;
}
Expand Down Expand Up @@ -6641,9 +6656,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)

if (!queued) {
discard:
tcp_drop(sk, skb);
tcp_drop_reason(sk, skb, reason);
}
return 0;

consume:
__kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL(tcp_rcv_state_process);

Expand Down

0 comments on commit 53c33a1

Please sign in to comment.