From ba3fa6e8c1eb4e40719451710263c87fb5f22221 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 19 Feb 2025 16:32:55 +0200 Subject: [PATCH 1/2] ip_tunnel: Use ip_tunnel_info() helper instead of 'info + 1' Tunnel options should not be accessed directly, use the ip_tunnel_info() accessor instead. Signed-off-by: Gal Pressman Reviewed-by: Kees Cook Link: https://patch.msgid.link/20250219143256.370277-2-gal@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 2 +- net/sched/act_tunnel_key.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 1aa31bdb2b31e..7b54cea5de270 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -650,7 +650,7 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) static inline void ip_tunnel_info_opts_get(void *to, const struct ip_tunnel_info *info) { - memcpy(to, info + 1, info->options_len); + memcpy(to, ip_tunnel_info_opts(info), info->options_len); } static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index af7c998459488..ae5dea7c48a82 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -571,8 +571,8 @@ static void tunnel_key_release(struct tc_action *a) static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { + const u8 *src = ip_tunnel_info_opts(info); int len = info->options_len; - u8 *src = (u8 *)(info + 1); struct nlattr *start; start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE); @@ -580,7 +580,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, return -EMSGSIZE; while (len > 0) { - struct geneve_opt *opt = (struct geneve_opt *)src; + const struct geneve_opt *opt = (const struct geneve_opt *)src; if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS, opt->opt_class) || @@ -603,7 +603,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { - struct vxlan_metadata *md = (struct vxlan_metadata *)(info + 1); + const struct vxlan_metadata *md = ip_tunnel_info_opts(info); struct nlattr *start; start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_VXLAN); @@ -622,7 +622,7 @@ static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb, static int tunnel_key_erspan_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { - struct erspan_metadata *md = (struct erspan_metadata *)(info + 1); + const struct erspan_metadata *md = ip_tunnel_info_opts(info); struct nlattr *start; start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN); From bb5e62f2d547c4de6d1b144cbce2373a76c33f18 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 19 Feb 2025 16:32:56 +0200 Subject: [PATCH 2/2] net: Add options as a flexible array to struct ip_tunnel_info Remove the hidden assumption that options are allocated at the end of the struct, and teach the compiler about them using a flexible array. With this, we can revert the unsafe_memcpy() call we have in tun_dst_unclone() [1], and resolve the false field-spanning write warning caused by the memcpy() in ip_tunnel_info_opts_set(). The layout of struct ip_tunnel_info remains the same with this patch. Before this patch, there was an implicit padding at the end of the struct, options would be written at 'info + 1' which is after the padding. This will remain the same as this patch explicitly aligns 'options'. The alignment is needed as the options are later casted to different structs, and might result in unaligned memory access. Pahole output before this patch: struct ip_tunnel_info { struct ip_tunnel_key key; /* 0 64 */ /* XXX last struct has 1 byte of padding */ /* --- cacheline 1 boundary (64 bytes) --- */ struct ip_tunnel_encap encap; /* 64 8 */ struct dst_cache dst_cache; /* 72 16 */ u8 options_len; /* 88 1 */ u8 mode; /* 89 1 */ /* size: 96, cachelines: 2, members: 5 */ /* padding: 6 */ /* paddings: 1, sum paddings: 1 */ /* last cacheline: 32 bytes */ }; Pahole output after this patch: struct ip_tunnel_info { struct ip_tunnel_key key; /* 0 64 */ /* XXX last struct has 1 byte of padding */ /* --- cacheline 1 boundary (64 bytes) --- */ struct ip_tunnel_encap encap; /* 64 8 */ struct dst_cache dst_cache; /* 72 16 */ u8 options_len; /* 88 1 */ u8 mode; /* 89 1 */ /* XXX 6 bytes hole, try to pack */ u8 options[] __attribute__((__aligned__(16))); /* 96 0 */ /* size: 96, cachelines: 2, members: 6 */ /* sum members: 90, holes: 1, sum holes: 6 */ /* paddings: 1, sum paddings: 1 */ /* forced alignments: 1, forced holes: 1, sum forced holes: 6 */ /* last cacheline: 32 bytes */ } __attribute__((__aligned__(16))); [1] Commit 13cfd6a6d7ac ("net: Silence false field-spanning write warning in metadata_dst memcpy") Link: https://lore.kernel.org/all/53D1D353-B8F6-4ADC-8F29-8C48A7C9C6F1@kernel.org/ Suggested-by: Kees Cook Reviewed-by: Cosmin Ratiu Reviewed-by: Tariq Toukan Signed-off-by: Gal Pressman Reviewed-by: Kees Cook Link: https://patch.msgid.link/20250219143256.370277-3-gal@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/dst_metadata.h | 7 ++----- include/net/ip_tunnels.h | 5 +++-- net/core/dst.c | 6 ++++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 84c15402931cd..4160731dcb6e3 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -163,11 +163,8 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) if (!new_md) return ERR_PTR(-ENOMEM); - unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, - sizeof(struct ip_tunnel_info) + md_size, - /* metadata_dst_alloc() reserves room (md_size bytes) for - * options right after the ip_tunnel_info struct. - */); + memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, + sizeof(struct ip_tunnel_info) + md_size); #ifdef CONFIG_DST_CACHE /* Unclone the dst cache if there is one */ if (new_md->u.tun_info.dst_cache.cache) { diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 7b54cea5de270..e041e4865373e 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -95,8 +95,8 @@ struct ip_tunnel_encap { #define ip_tunnel_info_opts(info) \ _Generic(info, \ - const struct ip_tunnel_info * : ((const void *)((info) + 1)),\ - struct ip_tunnel_info * : ((void *)((info) + 1))\ + const struct ip_tunnel_info * : ((const void *)(info)->options),\ + struct ip_tunnel_info * : ((void *)(info)->options)\ ) struct ip_tunnel_info { @@ -107,6 +107,7 @@ struct ip_tunnel_info { #endif u8 options_len; u8 mode; + u8 options[] __aligned_largest __counted_by(options_len); }; /* 6rd prefix/relay information */ diff --git a/net/core/dst.c b/net/core/dst.c index 9552a90d4772d..c99b95cf9cbb1 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -286,7 +286,8 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, { struct metadata_dst *md_dst; - md_dst = kmalloc(sizeof(*md_dst) + optslen, flags); + md_dst = kmalloc(struct_size(md_dst, u.tun_info.options, optslen), + flags); if (!md_dst) return NULL; @@ -314,7 +315,8 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags) int cpu; struct metadata_dst __percpu *md_dst; - md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen, + md_dst = __alloc_percpu_gfp(struct_size(md_dst, u.tun_info.options, + optslen), __alignof__(struct metadata_dst), flags); if (!md_dst) return NULL;