Skip to content

Commit

Permalink
Merge branch 'tcp-accecn'
Browse files Browse the repository at this point in the history
Chia-Yu Chang says:

====================
AccECN protocol preparation patch series

Please find the v7

v7 (03-Mar-2025)
- Move 2 new patches added in v6 to the next AccECN patch series

v6 (27-Dec-2024)
- Avoid removing removing the potential CA_ACK_WIN_UPDATE in ack_ev_flags of patch #1 (Eric Dumazet <edumazet@google.com>)
- Add reviewed-by tag in patches #2, #3, #4, #5, #6, #7, #8, #12, #14
- Foloiwng 2 new pathces are added after patch #9 (Patch that adds SKB_GSO_TCP_ACCECN)
  * New patch #10 to replace exisiting SKB_GSO_TCP_ECN with SKB_GSO_TCP_ACCECN in the driver to avoid CWR flag corruption
  * New patch #11 adds AccECN for virtio by adding new negotiation flag (VIRTIO_NET_F_HOST/GUEST_ACCECN) in feature handshake and translating Accurate ECN GSO flag between virtio_net_hdr (VIRTIO_NET_HDR_GSO_ACCECN) and skb header (SKB_GSO_TCP_ACCECN)
- Add detailed changelog and comments in #13 (Eric Dumazet <edumazet@google.com>)
- Move patch #14 to the next AccECN patch series (Eric Dumazet <edumazet@google.com>)

v5 (5-Nov-2024)
- Add helper function "tcp_flags_ntohs" to preserve last 2 bytes of TCP flags of patch #4 (Paolo Abeni <pabeni@redhat.com>)
- Fix reverse X-max tree order of patches #4, #11 (Paolo Abeni <pabeni@redhat.com>)
- Rename variable "delta" as "timestamp_delta" of patch #2 fo clariety
- Remove patch #14 in this series (Paolo Abeni <pabeni@redhat.com>, Joel Granados <joel.granados@kernel.org>)

v4 (21-Oct-2024)
- Fix line length warning of patches #2, #4, #8, #10, #11, #14
- Fix spaces preferred around '|' (ctx:VxV) warning of patch #7
- Add missing CC'ed of patches #4, #12, #14

v3 (19-Oct-2024)
- Fix build error in v2

v2 (18-Oct-2024)
- Fix warning caused by NETIF_F_GSO_ACCECN_BIT in patch #9 (Jakub Kicinski <kuba@kernel.org>)

The full patch series can be found in
https://github.com/L4STeam/linux-net-next/commits/upstream_l4steam/

The Accurate ECN draft can be found in
https://datatracker.ietf.org/doc/html/draft-ietf-tcpm-accurate-ecn-28
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Mar 17, 2025
2 parents bfc6c67 + 9866884 commit 2c99b2e
Show file tree
Hide file tree
Showing 18 changed files with 226 additions and 112 deletions.
8 changes: 5 additions & 3 deletions include/linux/netdev_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ enum {
NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */
NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */
NETIF_F_GSO_FRAGLIST_BIT, /* ... Fraglist GSO */
NETIF_F_GSO_ACCECN_BIT, /* TCP AccECN w/ TSO (no clear CWR) */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
NETIF_F_GSO_FRAGLIST_BIT,
NETIF_F_GSO_ACCECN_BIT,

NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
__UNUSED_NETIF_F_37,
NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */
NETIF_F_RXHASH_BIT, /* Receive hashing offload */
NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */
Expand Down Expand Up @@ -128,6 +128,7 @@ enum {
#define NETIF_F_SG __NETIF_F(SG)
#define NETIF_F_TSO6 __NETIF_F(TSO6)
#define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN)
#define NETIF_F_GSO_ACCECN __NETIF_F(GSO_ACCECN)
#define NETIF_F_TSO __NETIF_F(TSO)
#define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED)
#define NETIF_F_RXFCS __NETIF_F(RXFCS)
Expand Down Expand Up @@ -210,7 +211,8 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID)

/* List of features with software fallbacks. */
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \
NETIF_F_GSO_ACCECN | NETIF_F_GSO_SCTP | \
NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST)

/*
Expand Down
2 changes: 2 additions & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -5269,6 +5269,8 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_FRAGLIST != (NETIF_F_GSO_FRAGLIST >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TCP_ACCECN !=
(NETIF_F_GSO_ACCECN >> NETIF_F_GSO_SHIFT));

return (features & feature) == feature;
}
Expand Down
2 changes: 2 additions & 0 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,8 @@ enum {
SKB_GSO_UDP_L4 = 1 << 17,

SKB_GSO_FRAGLIST = 1 << 18,

SKB_GSO_TCP_ACCECN = 1 << 19,
};

#if BITS_PER_LONG > 32
Expand Down
81 changes: 63 additions & 18 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <linux/kref.h>
#include <linux/ktime.h>
#include <linux/indirect_call_wrapper.h>
#include <linux/bits.h>

#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
Expand Down Expand Up @@ -373,16 +374,53 @@ static inline void tcp_dec_quickack_mode(struct sock *sk)
}
}

#define TCP_ECN_OK 1
#define TCP_ECN_QUEUE_CWR 2
#define TCP_ECN_DEMAND_CWR 4
#define TCP_ECN_SEEN 8
#define TCP_ECN_MODE_RFC3168 BIT(0)
#define TCP_ECN_QUEUE_CWR BIT(1)
#define TCP_ECN_DEMAND_CWR BIT(2)
#define TCP_ECN_SEEN BIT(3)
#define TCP_ECN_MODE_ACCECN BIT(4)

#define TCP_ECN_DISABLED 0
#define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)
#define TCP_ECN_MODE_ANY (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)

static inline bool tcp_ecn_mode_any(const struct tcp_sock *tp)
{
return tp->ecn_flags & TCP_ECN_MODE_ANY;
}

static inline bool tcp_ecn_mode_rfc3168(const struct tcp_sock *tp)
{
return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_RFC3168;
}

static inline bool tcp_ecn_mode_accecn(const struct tcp_sock *tp)
{
return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_ACCECN;
}

static inline bool tcp_ecn_disabled(const struct tcp_sock *tp)
{
return !tcp_ecn_mode_any(tp);
}

static inline bool tcp_ecn_mode_pending(const struct tcp_sock *tp)
{
return (tp->ecn_flags & TCP_ECN_MODE_PENDING) == TCP_ECN_MODE_PENDING;
}

static inline void tcp_ecn_mode_set(struct tcp_sock *tp, u8 mode)
{
tp->ecn_flags &= ~TCP_ECN_MODE_ANY;
tp->ecn_flags |= mode;
}

enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
TCP_TW_RST = 1,
TCP_TW_ACK = 2,
TCP_TW_SYN = 3
TCP_TW_SYN = 3,
TCP_TW_ACK_OOW = 4
};


Expand Down Expand Up @@ -669,7 +707,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority,
enum sk_rst_reason reason);
int tcp_send_synack(struct sock *);
void tcp_push_one(struct sock *, unsigned int mss_now);
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt, u16 flags);
void tcp_send_ack(struct sock *sk);
void tcp_send_delayed_ack(struct sock *sk);
void tcp_send_loss_probe(struct sock *sk);
Expand Down Expand Up @@ -934,15 +972,22 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq)

#define tcp_flag_byte(th) (((u_int8_t *)th)[13])

#define TCPHDR_FIN 0x01
#define TCPHDR_SYN 0x02
#define TCPHDR_RST 0x04
#define TCPHDR_PSH 0x08
#define TCPHDR_ACK 0x10
#define TCPHDR_URG 0x20
#define TCPHDR_ECE 0x40
#define TCPHDR_CWR 0x80

#define TCPHDR_FIN BIT(0)
#define TCPHDR_SYN BIT(1)
#define TCPHDR_RST BIT(2)
#define TCPHDR_PSH BIT(3)
#define TCPHDR_ACK BIT(4)
#define TCPHDR_URG BIT(5)
#define TCPHDR_ECE BIT(6)
#define TCPHDR_CWR BIT(7)
#define TCPHDR_AE BIT(8)
#define TCPHDR_FLAGS_MASK (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
TCPHDR_PSH | TCPHDR_ACK | TCPHDR_URG | \
TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)
#define tcp_flags_ntohs(th) (ntohs(*(__be16 *)&tcp_flag_word(th)) & \
TCPHDR_FLAGS_MASK)

#define TCPHDR_ACE (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)
#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR)

/* State flags for sacked in struct tcp_skb_cb */
Expand Down Expand Up @@ -977,7 +1022,7 @@ struct tcp_skb_cb {
u16 tcp_gso_size;
};
};
__u8 tcp_flags; /* TCP header flags. (tcp[13]) */
__u16 tcp_flags; /* TCP header flags (tcp[12-13])*/

__u8 sacked; /* State flags for SACK. */
__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
Expand Down Expand Up @@ -1132,9 +1177,9 @@ enum tcp_ca_ack_event_flags {
#define TCP_CA_UNSPEC 0

/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
#define TCP_CONG_NON_RESTRICTED 0x1
#define TCP_CONG_NON_RESTRICTED BIT(0)
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN 0x2
#define TCP_CONG_NEEDS_ECN BIT(1)
#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)

union tcp_cc_info;
Expand Down
9 changes: 6 additions & 3 deletions include/uapi/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ struct tcphdr {
__be32 seq;
__be32 ack_seq;
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u16 res1:4,
__u16 ae:1,
res1:3,
doff:4,
fin:1,
syn:1,
Expand All @@ -40,7 +41,8 @@ struct tcphdr {
cwr:1;
#elif defined(__BIG_ENDIAN_BITFIELD)
__u16 doff:4,
res1:4,
res1:3,
ae:1,
cwr:1,
ece:1,
urg:1,
Expand Down Expand Up @@ -70,6 +72,7 @@ union tcp_word_hdr {
#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3])

enum {
TCP_FLAG_AE = __constant_cpu_to_be32(0x01000000),
TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000),
TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000),
TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000),
Expand All @@ -78,7 +81,7 @@ enum {
TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000),
TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000),
TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000),
TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000),
TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0E000000),
TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000)
};

Expand Down
1 change: 1 addition & 0 deletions net/ethtool/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
[NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
[NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
[NETIF_F_GSO_ACCECN_BIT] = "tx-tcp-accecn-segmentation",
[NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
[NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
[NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/bpf_tcp_ca.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
BPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt)
{
/* bpf_tcp_ca prog cannot have NULL tp */
__tcp_send_ack((struct sock *)tp, rcv_nxt);
__tcp_send_ack((struct sock *)tp, rcv_nxt, 0);
return 0;
}

Expand Down
3 changes: 1 addition & 2 deletions net/ipv4/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@
#include <net/checksum.h>
#include <net/gso.h>
#include <net/inetpeer.h>
#include <net/inet_ecn.h>
#include <net/lwtunnel.h>
#include <net/inet_dscp.h>
#include <linux/bpf-cgroup.h>
Expand Down Expand Up @@ -1640,7 +1639,7 @@ void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk,
if (IS_ERR(rt))
return;

inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK;
inet_sk(sk)->tos = arg->tos;

sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -4138,7 +4138,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
}

if (tp->ecn_flags & TCP_ECN_OK)
if (tcp_ecn_mode_any(tp))
info->tcpi_options |= TCPI_OPT_ECN;
if (tp->ecn_flags & TCP_ECN_SEEN)
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp_dctcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ __bpf_kfunc static void dctcp_init(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);

if ((tp->ecn_flags & TCP_ECN_OK) ||
if (tcp_ecn_mode_any(tp) ||
(sk->sk_state == TCP_LISTEN ||
sk->sk_state == TCP_CLOSE)) {
struct dctcp *ca = inet_csk_ca(sk);
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp_dctcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
*/
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
dctcp_ece_ack_cwr(sk, *ce_state);
__tcp_send_ack(sk, *prior_rcv_nxt);
__tcp_send_ack(sk, *prior_rcv_nxt, 0);
}
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
}
Expand Down
Loading

0 comments on commit 2c99b2e

Please sign in to comment.