Skip to content

Commit

Permalink
Revert "xfrm: xfrm_state_mtu should return at least 1280 for ipv6"
Browse files Browse the repository at this point in the history
This reverts commit b515d26.

Commit b515d26 ("xfrm: xfrm_state_mtu
should return at least 1280 for ipv6") in v5.14 breaks the TCP MSS
calculation in ipsec transport mode, resulting complete stalls of TCP
connections. This happens when the (P)MTU is 1280 or slighly larger.

The desired formula for the MSS is:
MSS = (MTU - ESP_overhead) - IP header - TCP header

However, the above commit clamps the (MTU - ESP_overhead) to a
minimum of 1280, turning the formula into
MSS = max(MTU - ESP overhead, 1280) -  IP header - TCP header

With the (P)MTU near 1280, the calculated MSS is too large and the
resulting TCP packets never make it to the destination because they
are over the actual PMTU.

The above commit also causes suboptimal double fragmentation in
xfrm tunnel mode, as described in
https://lore.kernel.org/netdev/20210429202529.codhwpc7w6kbudug@dwarf.suse.cz/

The original problem the above commit was trying to fix is now fixed
by commit 6596a02 ("xfrm: fix MTU
regression").

Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
  • Loading branch information
Jiri Bohac authored and Steffen Klassert committed Jan 27, 2022
1 parent e03c3bb commit a6d95c5
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 15 deletions.
1 change: 0 additions & 1 deletion include/net/xfrm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1568,7 +1568,6 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
int xfrm_init_replay(struct xfrm_state *x);
u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
int xfrm_init_state(struct xfrm_state *x);
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/esp4.c
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;

padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
Expand Down
2 changes: 1 addition & 1 deletion net/ipv6/esp6.c
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;

padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
Expand Down
14 changes: 2 additions & 12 deletions net/xfrm/xfrm_state.c
Original file line number Diff line number Diff line change
Expand Up @@ -2579,7 +2579,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);

u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
const struct xfrm_type *type = READ_ONCE(x->type);
struct crypto_aead *aead;
Expand Down Expand Up @@ -2610,17 +2610,7 @@ u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
EXPORT_SYMBOL_GPL(__xfrm_state_mtu);

u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
mtu = __xfrm_state_mtu(x, mtu);

if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU)
return IPV6_MIN_MTU;

return mtu;
}
EXPORT_SYMBOL_GPL(xfrm_state_mtu);

int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
{
Expand Down

0 comments on commit a6d95c5

Please sign in to comment.