Skip to content

Commit

Permalink
Merge branch 'tcpflags'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
tcp: no longer keep around headers in input path

Looking at tcp_try_coalesce() I was wondering why I did :

if (tcp_hdr(from)->fin)
     return false;

The answer would be to allow the aggregation, if we simply OR the FIN and PSH
flags eventually present in @from to @to packet. (Note a change is also
needed in skb_try_coalesce() to avoid calling skb_put() with 0 len)

Then, looking at tcp_recvmsg(), I realized we access tcp_hdr(skb)->syn
(and maybe tcp_hdr(skb)->fin) for every packet we process from socket
receive queue.

We have to understand TCP flags are cold in cpu caches most of the time
(assuming TCP timestamps, and that application calls recvmsg() a long
time after incoming packet was processed), and bringing a whole
cache line only to access one bit is not very nice.

It would make sense to use in TCP input path TCP_SKB_CB(skb)->tcp_flags
as we do in output path.

This saves one cache line miss, and TCP tcp_collapse() can avoid dealing
with the headers.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Sep 15, 2014
2 parents 13bb518 + b3d6cb9 commit 4370240
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 32 deletions.
3 changes: 2 additions & 1 deletion net/core/skbuff.c
Original file line number Diff line number Diff line change
Expand Up @@ -3936,7 +3936,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return false;

if (len <= skb_tailroom(to)) {
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
if (len)
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
*delta_truesize = 0;
return true;
}
Expand Down
18 changes: 10 additions & 8 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1510,9 +1510,9 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)

while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
offset = seq - TCP_SKB_CB(skb)->seq;
if (tcp_hdr(skb)->syn)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
offset--;
if (offset < skb->len || tcp_hdr(skb)->fin) {
if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
*off = offset;
return skb;
}
Expand Down Expand Up @@ -1585,7 +1585,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (offset + 1 != skb->len)
continue;
}
if (tcp_hdr(skb)->fin) {
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
sk_eat_skb(sk, skb, false);
++seq;
break;
Expand Down Expand Up @@ -1722,11 +1722,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
break;

offset = *seq - TCP_SKB_CB(skb)->seq;
if (tcp_hdr(skb)->syn)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
offset--;
if (offset < skb->len)
goto found_ok_skb;
if (tcp_hdr(skb)->fin)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
WARN(!(flags & MSG_PEEK),
"recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
Expand Down Expand Up @@ -1959,7 +1959,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (used + offset < skb->len)
continue;

if (tcp_hdr(skb)->fin)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
if (!(flags & MSG_PEEK)) {
sk_eat_skb(sk, skb, copied_early);
Expand Down Expand Up @@ -2160,8 +2160,10 @@ void tcp_close(struct sock *sk, long timeout)
* reader process may not have drained the data yet!
*/
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
tcp_hdr(skb)->fin;
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;

if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
len--;
data_was_unread += len;
__kfree_skb(skb);
}
Expand Down
31 changes: 8 additions & 23 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -4093,7 +4093,7 @@ static void tcp_ofo_queue(struct sock *sk)
__skb_unlink(skb, &tp->out_of_order_queue);
__skb_queue_tail(&sk->sk_receive_queue, skb);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (tcp_hdr(skb)->fin)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk);
}
}
Expand Down Expand Up @@ -4143,9 +4143,6 @@ static bool tcp_try_coalesce(struct sock *sk,

*fragstolen = false;

if (tcp_hdr(from)->fin)
return false;

/* Its possible this segment overlaps with prior segment in queue */
if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
return false;
Expand All @@ -4158,6 +4155,7 @@ static bool tcp_try_coalesce(struct sock *sk,
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
return true;
}

Expand Down Expand Up @@ -4513,7 +4511,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
* - bloated or contains data before "start" or
* overlaps to the next one.
*/
if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
(tcp_win_from_space(skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start))) {
end_of_skbs = false;
Expand All @@ -4532,30 +4530,18 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
/* Decided to skip this, advance start seq. */
start = TCP_SKB_CB(skb)->end_seq;
}
if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
if (end_of_skbs ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
return;

while (before(start, end)) {
int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
struct sk_buff *nskb;
unsigned int header = skb_headroom(skb);
int copy = SKB_MAX_ORDER(header, 0);

/* Too big header? This can happen with IPv6. */
if (copy < 0)
return;
if (end - start < copy)
copy = end - start;
nskb = alloc_skb(copy + header, GFP_ATOMIC);
nskb = alloc_skb(copy, GFP_ATOMIC);
if (!nskb)
return;

skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
skb_set_network_header(nskb, (skb_network_header(skb) -
skb->head));
skb_set_transport_header(nskb, (skb_transport_header(skb) -
skb->head));
skb_reserve(nskb, header);
memcpy(nskb->head, skb->head, header);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
__skb_queue_before(list, skb, nskb);
Expand All @@ -4579,8 +4565,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
skb = tcp_collapse_one(sk, skb, list);
if (!skb ||
skb == tail ||
tcp_hdr(skb)->syn ||
tcp_hdr(skb)->fin)
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
return;
}
}
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -1638,6 +1638,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
Expand Down
1 change: 1 addition & 0 deletions net/ipv6/tcp_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -1415,6 +1415,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
Expand Down

0 comments on commit 4370240

Please sign in to comment.