Skip to content

Commit

Permalink
Merge branch 'Add IP-TFS mode to xfrm'
Browse files Browse the repository at this point in the history
Christian Hopps says:

====================
* Summary of Changes:

This patchset adds a new xfrm mode implementing on-demand IP-TFS. IP-TFS
(AggFrag encapsulation) has been standardized in RFC9347.

  Link: https://www.rfc-editor.org/rfc/rfc9347.txt

This feature supports demand driven (i.e., non-constant send rate)
IP-TFS to take advantage of the AGGFRAG ESP payload encapsulation. This
payload type supports aggregation and fragmentation of the inner IP
packet stream which in turn yields higher small-packet bandwidth as well
as reducing MTU/PMTU issues. Congestion control is unimplementated as
the send rate is demand driven rather than constant.

In order to allow loading this fucntionality as a module a set of
callbacks xfrm_mode_cbs has been added to xfrm as well.

Patchset Structure:
-------------------

The first 5 commits are changes to the net and xfrm infrastructure to
support the callbacks as well as more generic IP-TFS additions that
may be used outside the actual IP-TFS implementation.

  - xfrm: config: add CONFIG_XFRM_IPTFS
  - include: uapi: protocol number and packet structs for AGGFRAG in ESP
  - xfrm: netlink: add config (netlink) options
  - xfrm: add mode_cbs module functionality
  - xfrm: add generic iptfs defines and functionality

The last 10 commits constitute the IP-TFS implementation constructed in
layers to make review easier. The first 9 commits all apply to a single
file `net/xfrm/xfrm_iptfs.c`, the last commit adds a new tracepoint
header file along with the use of these new tracepoint calls.

  - xfrm: iptfs: add new iptfs xfrm mode impl
  - xfrm: iptfs: add user packet (tunnel ingress) handling
  - xfrm: iptfs: share page fragments of inner packets
  - xfrm: iptfs: add fragmenting of larger than MTU user packets
  - xfrm: iptfs: add basic receive packet (tunnel egress) handling
  - xfrm: iptfs: handle received fragmented inner packets
  - xfrm: iptfs: add reusing received skb for the tunnel egress packet
  - xfrm: iptfs: add skb-fragment sharing code
  - xfrm: iptfs: handle reordering of received packets
  - xfrm: iptfs: add tracepoint functionality
====================

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
  • Loading branch information
Steffen Klassert committed Dec 9, 2024
2 parents 152d00a + ed58b18 commit 59af653
Show file tree
Hide file tree
Showing 21 changed files with 3,292 additions and 19 deletions.
44 changes: 44 additions & 0 deletions include/net/xfrm.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#define XFRM_PROTO_COMP 108
#define XFRM_PROTO_IPIP 4
#define XFRM_PROTO_IPV6 41
#define XFRM_PROTO_IPTFS IPPROTO_AGGFRAG
#define XFRM_PROTO_ROUTING IPPROTO_ROUTING
#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS

Expand Down Expand Up @@ -213,6 +214,7 @@ struct xfrm_state {
u16 family;
xfrm_address_t saddr;
int header_len;
int enc_hdr_len;
int trailer_len;
u32 extra_flags;
struct xfrm_mark smark;
Expand Down Expand Up @@ -303,6 +305,9 @@ struct xfrm_state {
* interpreted by xfrm_type methods. */
void *data;
u8 dir;

const struct xfrm_mode_cbs *mode_cbs;
void *mode_data;
};

static inline struct net *xs_net(struct xfrm_state *x)
Expand Down Expand Up @@ -460,6 +465,45 @@ struct xfrm_type_offload {
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);

/**
* struct xfrm_mode_cbs - XFRM mode callbacks
* @owner: module owner or NULL
* @init_state: Add/init mode specific state in `xfrm_state *x`
* @clone_state: Copy mode specific values from `orig` to new state `x`
* @destroy_state: Cleanup mode specific state from `xfrm_state *x`
* @user_init: Process mode specific netlink attributes from user
* @copy_to_user: Add netlink attributes to `attrs` based on state in `x`
* @sa_len: Return space required to store mode specific netlink attributes
* @get_inner_mtu: Return avail payload space after removing encap overhead
* @input: Process received packet from SA using mode
* @output: Output given packet using mode
* @prepare_output: Add mode specific encapsulation to packet in skb. On return
* `transport_header` should point at ESP header, `network_header` should
* point at outer IP header and `mac_header` should opint at the
* protocol/nexthdr field of the outer IP.
*
* One should examine and understand the specific uses of these callbacks in
* xfrm for further detail on how and when these functions are called. RTSL.
*/
struct xfrm_mode_cbs {
struct module *owner;
int (*init_state)(struct xfrm_state *x);
int (*clone_state)(struct xfrm_state *x, struct xfrm_state *orig);
void (*destroy_state)(struct xfrm_state *x);
int (*user_init)(struct net *net, struct xfrm_state *x,
struct nlattr **attrs,
struct netlink_ext_ack *extack);
int (*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb);
unsigned int (*sa_len)(const struct xfrm_state *x);
u32 (*get_inner_mtu)(struct xfrm_state *x, int outer_mtu);
int (*input)(struct xfrm_state *x, struct sk_buff *skb);
int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
int (*prepare_output)(struct xfrm_state *x, struct sk_buff *skb);
};

int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs);
void xfrm_unregister_mode_cbs(u8 mode);

static inline int xfrm_af2proto(unsigned int family)
{
switch(family) {
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/linux/in.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */
#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_SMC = 256, /* Shared Memory Communications */
Expand Down
16 changes: 16 additions & 0 deletions include/uapi/linux/ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,22 @@ struct ip_beet_phdr {
__u8 reserved;
};

struct ip_iptfs_hdr {
__u8 subtype; /* 0*: basic, 1: CC */
__u8 flags;
__be16 block_offset;
};

struct ip_iptfs_cc_hdr {
__u8 subtype; /* 0: basic, 1*: CC */
__u8 flags;
__be16 block_offset;
__be32 loss_rate;
__be64 rtt_adelay_xdelay;
__be32 tval;
__be32 techo;
};

/* index values for the variables in ipv4_devconf */
enum
{
Expand Down
3 changes: 2 additions & 1 deletion include/uapi/linux/ipsec.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ enum {
IPSEC_MODE_ANY = 0, /* We do not support this for SA */
IPSEC_MODE_TRANSPORT = 1,
IPSEC_MODE_TUNNEL = 2,
IPSEC_MODE_BEET = 3
IPSEC_MODE_BEET = 3,
IPSEC_MODE_IPTFS = 4
};

enum {
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/linux/snmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,8 @@ enum
LINUX_MIB_XFRMACQUIREERROR, /* XfrmAcquireError */
LINUX_MIB_XFRMOUTSTATEDIRERROR, /* XfrmOutStateDirError */
LINUX_MIB_XFRMINSTATEDIRERROR, /* XfrmInStateDirError */
LINUX_MIB_XFRMINIPTFSERROR, /* XfrmInIptfsError */
LINUX_MIB_XFRMOUTNOQSPACE, /* XfrmOutNoQueueSpace */
__LINUX_MIB_XFRMMAX
};

Expand Down
9 changes: 8 additions & 1 deletion include/uapi/linux/xfrm.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ enum {
#define XFRM_MODE_ROUTEOPTIMIZATION 2
#define XFRM_MODE_IN_TRIGGER 3
#define XFRM_MODE_BEET 4
#define XFRM_MODE_MAX 5
#define XFRM_MODE_IPTFS 5
#define XFRM_MODE_MAX 6

/* Netlink configuration messages. */
enum {
Expand Down Expand Up @@ -323,6 +324,12 @@ enum xfrm_attr_type_t {
XFRMA_SA_DIR, /* __u8 */
XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */
XFRMA_SA_PCPU, /* __u32 */
XFRMA_IPTFS_DROP_TIME, /* __u32 in: usec to wait for next seq */
XFRMA_IPTFS_REORDER_WINDOW, /* __u16 in: reorder window size (pkts) */
XFRMA_IPTFS_DONT_FRAG, /* out: don't use fragmentation */
XFRMA_IPTFS_INIT_DELAY, /* __u32 out: initial packet wait delay (usec) */
XFRMA_IPTFS_MAX_QSIZE, /* __u32 out: max ingress queue size (octets) */
XFRMA_IPTFS_PKT_SIZE, /* __u32 out: size of outer packet, 0 for PMTU */
__XFRMA_MAX

#define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */
Expand Down
3 changes: 2 additions & 1 deletion net/ipv4/esp4.c
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,8 @@ int esp_input_done2(struct sk_buff *skb, int err)
}

skb_pull_rcsum(skb, hlen);
if (x->props.mode == XFRM_MODE_TUNNEL)
if (x->props.mode == XFRM_MODE_TUNNEL ||
x->props.mode == XFRM_MODE_IPTFS)
skb_reset_transport_header(skb);
else
skb_set_transport_header(skb, -ihl);
Expand Down
3 changes: 2 additions & 1 deletion net/ipv6/esp6.c
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,8 @@ int esp6_input_done2(struct sk_buff *skb, int err)
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
skb_pull_rcsum(skb, hlen);
if (x->props.mode == XFRM_MODE_TUNNEL)
if (x->props.mode == XFRM_MODE_TUNNEL ||
x->props.mode == XFRM_MODE_IPTFS)
skb_reset_transport_header(skb);
else
skb_set_transport_header(skb, -hdr_len);
Expand Down
3 changes: 2 additions & 1 deletion net/netfilter/nft_xfrm.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
return true;
}

return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL;
return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL ||
mode == XFRM_MODE_IPTFS;
}

static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
Expand Down
16 changes: 16 additions & 0 deletions net/xfrm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,22 @@ config NET_KEY_MIGRATE

If unsure, say N.

config XFRM_IPTFS
tristate "IPsec IP-TFS/AGGFRAG (RFC 9347) encapsulation support"
depends on XFRM
help
Information on the IP-TFS/AGGFRAG encapsulation can be found
in RFC 9347. This feature supports demand driven (i.e.,
non-constant send rate) IP-TFS to take advantage of the
AGGFRAG ESP payload encapsulation. This payload type
supports aggregation and fragmentation of the inner IP
packet stream which in turn yields higher small-packet
bandwidth as well as reducing MTU/PMTU issues. Congestion
control is unimplementated as the send rate is demand driven
rather than constant.

If unsure, say N.

config XFRM_ESPINTCP
bool

Expand Down
1 change: 1 addition & 0 deletions net/xfrm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ obj-$(CONFIG_XFRM_USER) += xfrm_user.o
obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
obj-$(CONFIG_XFRM_IPTFS) += xfrm_iptfs.o
obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
Loading

0 comments on commit 59af653

Please sign in to comment.