From 65cd8033ff375b68037df61603ee68070dc48578 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:51 +0200 Subject: [PATCH 1/9] ipv4: split inet_ehashfn to hash functions per compilation unit This duplicates a bit of code but let's us easily introduce separate secret keys later. The separate compilation units are ipv4/inet_hashtabbles.o, ipv4/udp.o and rds/connection.o. Cc: Eric Dumazet Cc: "David S. Miller" Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/inet_sock.h | 22 ++++++---------------- net/ipv4/inet_hashtables.c | 21 +++++++++++++++++++++ net/ipv4/udp.c | 16 ++++++++++++---- net/rds/connection.c | 6 +++--- 4 files changed, 42 insertions(+), 23 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 06da91efbc83..7a6c7f80a8fd 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -208,26 +208,16 @@ extern u32 inet_ehash_secret; extern u32 ipv6_hash_secret; void build_ehash_secret(void); -static inline unsigned int inet_ehashfn(struct net *net, - const __be32 laddr, const __u16 lport, - const __be32 faddr, const __be16 fport) +static inline unsigned int __inet_ehashfn(const __be32 laddr, + const __u16 lport, + const __be32 faddr, + const __be16 fport, + u32 initval) { return jhash_3words((__force __u32) laddr, (__force __u32) faddr, ((__u32) lport) << 16 | (__force __u32)fport, - inet_ehash_secret + net_hash_mix(net)); -} - -static inline int inet_sk_ehashfn(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const __be32 laddr = inet->inet_rcv_saddr; - const __u16 lport = inet->inet_num; - const __be32 faddr = inet->inet_daddr; - const __be16 fport = inet->inet_dport; - struct net *net = sock_net(sk); - - return inet_ehashfn(net, laddr, lport, faddr, fport); + initval); } static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index a4b66bbe4f21..18aa668d0cc9 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -24,6 +24,27 @@ #include #include +static unsigned int inet_ehashfn(struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) +{ + return __inet_ehashfn(laddr, lport, faddr, fport, + inet_ehash_secret + net_hash_mix(net)); +} + + +static unsigned int inet_sk_ehashfn(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const __be32 laddr = inet->inet_rcv_saddr; + const __u16 lport = inet->inet_num; + const __be32 faddr = inet->inet_daddr; + const __be16 fport = inet->inet_dport; + struct net *net = sock_net(sk); + + return inet_ehashfn(net, laddr, lport, faddr, fport); +} + /* * Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9f27bb800607..b4437c7db6ce 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -407,6 +407,14 @@ static inline int compute_score2(struct sock *sk, struct net *net, return score; } +static unsigned int udp_ehashfn(struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) +{ + return __inet_ehashfn(laddr, lport, faddr, fport, + inet_ehash_secret + net_hash_mix(net)); +} + /* called with read_rcu_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, @@ -430,8 +438,8 @@ static struct sock *udp4_lib_lookup2(struct net *net, badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - hash = inet_ehashfn(net, daddr, hnum, - saddr, sport); + hash = udp_ehashfn(net, daddr, hnum, + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { @@ -511,8 +519,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - hash = inet_ehashfn(net, daddr, hnum, - saddr, sport); + hash = udp_ehashfn(net, daddr, hnum, + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { diff --git a/net/rds/connection.c b/net/rds/connection.c index 642ad42c416b..45e23660437a 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -52,9 +52,9 @@ static struct kmem_cache *rds_conn_slab; static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) { /* Pass NULL, don't need struct net for hash */ - unsigned long hash = inet_ehashfn(NULL, - be32_to_cpu(laddr), 0, - be32_to_cpu(faddr), 0); + unsigned long hash = __inet_ehashfn(be32_to_cpu(laddr), 0, + be32_to_cpu(faddr), 0, + inet_ehash_secret); return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; } From b50026b5ac8fe2932e6af0c54b21da0913c4c1c7 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:52 +0200 Subject: [PATCH 2/9] ipv6: split inet6_ehashfn to hash functions per compilation unit This patch splits the inet6_ehashfn into separate ones in ipv6/inet6_hashtables.o and ipv6/udp.o to ease the introduction of seperate secrets keys later. Cc: Eric Dumazet Cc: "David S. Miller" Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 28 +++++++--------------------- include/net/ipv6.h | 4 ++-- net/ipv6/inet6_hashtables.c | 24 ++++++++++++++++++++++++ net/ipv6/udp.c | 20 ++++++++++++++++---- 4 files changed, 49 insertions(+), 27 deletions(-) diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index a105d1a2fc00..ae0613544308 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -28,28 +28,14 @@ struct inet_hashinfo; -static inline unsigned int inet6_ehashfn(struct net *net, - const struct in6_addr *laddr, const u16 lport, - const struct in6_addr *faddr, const __be16 fport) +static inline unsigned int __inet6_ehashfn(const u32 lhash, + const u16 lport, + const u32 fhash, + const __be16 fport, + const u32 initval) { - u32 ports = (((u32)lport) << 16) | (__force u32)fport; - - return jhash_3words((__force u32)laddr->s6_addr32[3], - ipv6_addr_jhash(faddr), - ports, - inet_ehash_secret + net_hash_mix(net)); -} - -static inline int inet6_sk_ehashfn(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; - const struct in6_addr *faddr = &sk->sk_v6_daddr; - const __u16 lport = inet->inet_num; - const __be16 fport = inet->inet_dport; - struct net *net = sock_net(sk); - - return inet6_ehashfn(net, laddr, lport, faddr, fport); + const u32 ports = (((u32)lport) << 16) | (__force u32)fport; + return jhash_3words(lhash, fhash, ports, initval); } int __inet6_hash(struct sock *sk, struct inet_timewait_sock *twp); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index fe1c7f6c9217..a35055f4f8da 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -539,14 +539,14 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a) } /* more secured version of ipv6_addr_hash() */ -static inline u32 ipv6_addr_jhash(const struct in6_addr *a) +static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval) { u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1]; return jhash_3words(v, (__force u32)a->s6_addr32[2], (__force u32)a->s6_addr32[3], - ipv6_hash_secret); + initval); } static inline bool ipv6_addr_loopback(const struct in6_addr *a) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 842d833dfc18..fa7dd3856c55 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,6 +23,30 @@ #include #include +static unsigned int inet6_ehashfn(struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) +{ + const u32 lhash = (__force u32)laddr->s6_addr32[3]; + const u32 fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); + return __inet6_ehashfn(lhash, lport, fhash, fport, + inet_ehash_secret + net_hash_mix(net)); +} + +static int inet6_sk_ehashfn(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *faddr = &sk->sk_v6_daddr; + const __u16 lport = inet->inet_num; + const __be16 fport = inet->inet_dport; + struct net *net = sock_net(sk); + + return inet6_ehashfn(net, laddr, lport, faddr, fport); +} + int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b496de19a341..324bd36c23bc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,6 +53,18 @@ #include #include "udp_impl.h" +static unsigned int udp6_ehashfn(struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) +{ + const u32 lhash = (__force u32)laddr->s6_addr32[3]; + const u32 fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); + return __inet6_ehashfn(lhash, lport, fhash, fport, + inet_ehash_secret + net_hash_mix(net)); +} + int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); @@ -214,8 +226,8 @@ static struct sock *udp6_lib_lookup2(struct net *net, badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - hash = inet6_ehashfn(net, daddr, hnum, - saddr, sport); + hash = udp6_ehashfn(net, daddr, hnum, + saddr, sport); matches = 1; } else if (score == SCORE2_MAX) goto exact_match; @@ -295,8 +307,8 @@ struct sock *__udp6_lib_lookup(struct net *net, badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - hash = inet6_ehashfn(net, daddr, hnum, - saddr, sport); + hash = udp6_ehashfn(net, daddr, hnum, + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { From c4b2c0c5f647aa1093e8f9097a30c17ce0f94d4d Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:53 +0200 Subject: [PATCH 3/9] static_key: WARN on usage before jump_label_init was called Usage of the static key primitives to toggle a branch must not be used before jump_label_init() is called from init/main.c. jump_label_init reorganizes and wires up the jump_entries so usage before that could have unforeseen consequences. Following primitives are now checked for correct use: * static_key_slow_inc * static_key_slow_dec * static_key_slow_dec_deferred * jump_label_rate_limit The x86 architecture already checks this by testing if the default_nop was already replaced with an optimal nop or with a branch instruction. It will panic then. Other architectures don't check for this. Because we need to relax this check for the x86 arch to allow code to transition from default_nop to the enabled state and other architectures did not check for this at all this patch introduces checking on the static_key primitives in a non-arch dependent manner. All checked functions are considered slow-path so the additional check does no harm to performance. The warnings are best observed with earlyprintk. Based on a patch from Andi Kleen. Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andi Kleen Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/jump_label.h | 10 ++++++++++ include/linux/jump_label_ratelimit.h | 2 ++ init/main.c | 7 +++++++ kernel/jump_label.c | 5 +++++ 4 files changed, 24 insertions(+) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a5079072da66..e96be7245717 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -48,6 +48,13 @@ #include #include +#include + +extern bool static_key_initialized; + +#define STATIC_KEY_CHECK_USE() WARN(!static_key_initialized, \ + "%s used before call to jump_label_init", \ + __func__) #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) @@ -128,6 +135,7 @@ struct static_key { static __always_inline void jump_label_init(void) { + static_key_initialized = true; } static __always_inline bool static_key_false(struct static_key *key) @@ -146,11 +154,13 @@ static __always_inline bool static_key_true(struct static_key *key) static inline void static_key_slow_inc(struct static_key *key) { + STATIC_KEY_CHECK_USE(); atomic_inc(&key->enabled); } static inline void static_key_slow_dec(struct static_key *key) { + STATIC_KEY_CHECK_USE(); atomic_dec(&key->enabled); } diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h index 113788389b3d..089f70f83e97 100644 --- a/include/linux/jump_label_ratelimit.h +++ b/include/linux/jump_label_ratelimit.h @@ -23,12 +23,14 @@ struct static_key_deferred { }; static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) { + STATIC_KEY_CHECK_USE(); static_key_slow_dec(&key->key); } static inline void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { + STATIC_KEY_CHECK_USE(); } #endif /* HAVE_JUMP_LABEL */ #endif /* _LINUX_JUMP_LABEL_RATELIMIT_H */ diff --git a/init/main.c b/init/main.c index af310afbef28..27bbec1a5b35 100644 --- a/init/main.c +++ b/init/main.c @@ -135,6 +135,13 @@ static char *static_command_line; static char *execute_command; static char *ramdisk_execute_command; +/* + * Used to generate warnings if static_key manipulation functions are used + * before jump_label_init is called. + */ +bool static_key_initialized __read_mostly = false; +EXPORT_SYMBOL_GPL(static_key_initialized); + /* * If set, this is an indication to the drivers that reset the underlying * device before going ahead with the initialization otherwise driver might diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 297a9247a3b3..9019f15deab2 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -58,6 +58,7 @@ static void jump_label_update(struct static_key *key, int enable); void static_key_slow_inc(struct static_key *key) { + STATIC_KEY_CHECK_USE(); if (atomic_inc_not_zero(&key->enabled)) return; @@ -103,12 +104,14 @@ static void jump_label_update_timeout(struct work_struct *work) void static_key_slow_dec(struct static_key *key) { + STATIC_KEY_CHECK_USE(); __static_key_slow_dec(key, 0, NULL); } EXPORT_SYMBOL_GPL(static_key_slow_dec); void static_key_slow_dec_deferred(struct static_key_deferred *key) { + STATIC_KEY_CHECK_USE(); __static_key_slow_dec(&key->key, key->timeout, &key->work); } EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); @@ -116,6 +119,7 @@ EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { + STATIC_KEY_CHECK_USE(); key->timeout = rl; INIT_DELAYED_WORK(&key->work, jump_label_update_timeout); } @@ -212,6 +216,7 @@ void __init jump_label_init(void) key->next = NULL; #endif } + static_key_initialized = true; jump_label_unlock(); } From a8fab0744585c1ab61009bfc1a1958f28e1c864f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:54 +0200 Subject: [PATCH 4/9] x86/jump_label: expect default_nop if static_key gets enabled on boot-up net_get_random_once(intrduced in the next patch) uses static_keys in a way that they get enabled on boot-up instead of replaced with an ideal_nop. So check for default_nop on initial enabling. Other architectures don't check for this. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Jason Baron Cc: Peter Zijlstra Cc: Eric Dumazet Cc: "David S. Miller" Cc: x86@kernel.org Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- arch/x86/kernel/jump_label.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index ee11b7dfbfbb..26d5a55a2736 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -42,15 +42,27 @@ static void __jump_label_transform(struct jump_entry *entry, int init) { union jump_code_union code; + const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; if (type == JUMP_LABEL_ENABLE) { - /* - * We are enabling this jump label. If it is not a nop - * then something must have gone wrong. - */ - if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0)) - bug_at((void *)entry->code, __LINE__); + if (init) { + /* + * Jump label is enabled for the first time. + * So we expect a default_nop... + */ + if (unlikely(memcmp((void *)entry->code, default_nop, 5) + != 0)) + bug_at((void *)entry->code, __LINE__); + } else { + /* + * ...otherwise expect an ideal_nop. Otherwise + * something went horribly wrong. + */ + if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) + != 0)) + bug_at((void *)entry->code, __LINE__); + } code.jump = 0xe9; code.offset = entry->target - @@ -63,7 +75,6 @@ static void __jump_label_transform(struct jump_entry *entry, * are converting the default nop to the ideal nop. */ if (init) { - const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0)) bug_at((void *)entry->code, __LINE__); } else { From a48e42920ff38bc90bbf75143fff4555723d4540 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:55 +0200 Subject: [PATCH 5/9] net: introduce new macro net_get_random_once net_get_random_once is a new macro which handles the initialization of secret keys. It is possible to call it in the fast path. Only the initialization depends on the spinlock and is rather slow. Otherwise it should get used just before the key is used to delay the entropy extration as late as possible to get better randomness. It returns true if the key got initialized. The usage of static_keys for net_get_random_once is a bit uncommon so it needs some further explanation why this actually works: === In the simple non-HAVE_JUMP_LABEL case we actually have === no constrains to use static_key_(true|false) on keys initialized with STATIC_KEY_INIT_(FALSE|TRUE). So this path just expands in favor of the likely case that the initialization is already done. The key is initialized like this: ___done_key = { .enabled = ATOMIC_INIT(0) } The check if (!static_key_true(&___done_key)) \ expands into (pseudo code) if (!likely(___done_key > 0)) , so we take the fast path as soon as ___done_key is increased from the helper function. === If HAVE_JUMP_LABELs are available this depends === on patching of jumps into the prepared NOPs, which is done in jump_label_init at boot-up time (from start_kernel). It is forbidden and dangerous to use net_get_random_once in functions which are called before that! At compilation time NOPs are generated at the call sites of net_get_random_once. E.g. net/ipv6/inet6_hashtable.c:inet6_ehashfn (we need to call net_get_random_once two times in inet6_ehashfn, so two NOPs): 71: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 76: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) Both will be patched to the actual jumps to the end of the function to call __net_get_random_once at boot time as explained above. arch_static_branch is optimized and inlined for false as return value and actually also returns false in case the NOP is placed in the instruction stream. So in the fast case we get a "return false". But because we initialize ___done_key with (enabled != (entries & 1)) this call-site will get patched up at boot thus returning true. The final check looks like this: if (!static_key_true(&___done_key)) \ ___ret = __net_get_random_once(buf, \ expands to if (!!static_key_false(&___done_key)) \ ___ret = __net_get_random_once(buf, \ So we get true at boot time and as soon as static_key_slow_inc is called on the key it will invert the logic and return false for the fast path. static_key_slow_inc will change the branch because it got initialized with .enabled == 0. After static_key_slow_inc is called on the key the branch is replaced with a nop again. === Misc: === The helper defers the increment into a workqueue so we don't have problems calling this code from atomic sections. A seperate boolean (___done) guards the case where we enter net_get_random_once again before the increment happend. Cc: Ingo Molnar Cc: Steven Rostedt Cc: Jason Baron Cc: Peter Zijlstra Cc: Eric Dumazet Cc: "David S. Miller" Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/net.h | 25 +++++++++++++++++++++++ net/core/utils.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/include/linux/net.h b/include/linux/net.h index ca9ec8540905..a489705f6fa3 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -239,6 +239,31 @@ do { \ #define net_random() prandom_u32() #define net_srandom(seed) prandom_seed((__force u32)(seed)) +bool __net_get_random_once(void *buf, int nbytes, bool *done, + struct static_key *done_key); + +#ifdef HAVE_JUMP_LABEL +#define ___NET_RANDOM_STATIC_KEY_INIT ((struct static_key) \ + { .enabled = ATOMIC_INIT(0), .entries = (void *)1 }) +#else /* !HAVE_JUMP_LABEL */ +#define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE +#endif /* HAVE_JUMP_LABEL */ + +/* BE CAREFUL: this function is not interrupt safe */ +#define net_get_random_once(buf, nbytes) \ + ({ \ + bool ___ret = false; \ + static bool ___done = false; \ + static struct static_key ___done_key = \ + ___NET_RANDOM_STATIC_KEY_INIT; \ + if (!static_key_true(&___done_key)) \ + ___ret = __net_get_random_once(buf, \ + nbytes, \ + &___done, \ + &___done_key); \ + ___ret; \ + }) + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, diff --git a/net/core/utils.c b/net/core/utils.c index aa88e23fc87a..bf09371e19b1 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -338,3 +338,51 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, csum_unfold(*sum))); } EXPORT_SYMBOL(inet_proto_csum_replace16); + +struct __net_random_once_work { + struct work_struct work; + struct static_key *key; +}; + +static void __net_random_once_deferred(struct work_struct *w) +{ + struct __net_random_once_work *work = + container_of(w, struct __net_random_once_work, work); + if (!static_key_enabled(work->key)) + static_key_slow_inc(work->key); + kfree(work); +} + +static void __net_random_once_disable_jump(struct static_key *key) +{ + struct __net_random_once_work *w; + + w = kmalloc(sizeof(*w), GFP_ATOMIC); + if (!w) + return; + + INIT_WORK(&w->work, __net_random_once_deferred); + w->key = key; + schedule_work(&w->work); +} + +bool __net_get_random_once(void *buf, int nbytes, bool *done, + struct static_key *done_key) +{ + static DEFINE_SPINLOCK(lock); + + spin_lock_bh(&lock); + if (*done) { + spin_unlock_bh(&lock); + return false; + } + + get_random_bytes(buf, nbytes); + *done = true; + spin_unlock_bh(&lock); + + __net_random_once_disable_jump(done_key); + + return true; +} +EXPORT_SYMBOL(__net_get_random_once); From b23a002fc6f0c19846ee0382f019429af54a27e9 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:56 +0200 Subject: [PATCH 6/9] inet: split syncookie keys for ipv4 and ipv6 and initialize with net_get_random_once This patch splits the secret key for syncookies for ipv4 and ipv6 and initializes them with net_get_random_once. This change was the reason I did this series. I think the initialization of the syncookie_secret is way to early. Cc: Florian Westphal Cc: Eric Dumazet Cc: "David S. Miller" Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - net/ipv4/syncookies.c | 15 +++++---------- net/ipv6/syncookies.c | 12 +++++++++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 372dcccfeed0..f30326f1c92b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -475,7 +475,6 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ -extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3b64c59b4109..b95331e6c077 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -25,15 +25,7 @@ extern int sysctl_tcp_syncookies; -__u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; -EXPORT_SYMBOL(syncookie_secret); - -static __init int init_syncookies(void) -{ - get_random_bytes(syncookie_secret, sizeof(syncookie_secret)); - return 0; -} -__initcall(init_syncookies); +static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) @@ -44,8 +36,11 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, u32 count, int c) { - __u32 *tmp = __get_cpu_var(ipv4_cookie_scratch); + __u32 *tmp; + + net_get_random_once(syncookie_secret, sizeof(syncookie_secret)); + tmp = __get_cpu_var(ipv4_cookie_scratch); memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c])); tmp[0] = (__force u32)saddr; tmp[1] = (__force u32)daddr; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index d04d3f1dd9b7..535a3ad262f1 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -24,6 +24,8 @@ #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) +static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS]; + /* RFC 2460, Section 8.3: * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] * @@ -61,14 +63,18 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, u32 count, int c) { - __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch); + __u32 *tmp; + + net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret)); + + tmp = __get_cpu_var(ipv6_cookie_scratch); /* * we have 320 bits of information to hash, copy in the remaining - * 192 bits required for sha_transform, from the syncookie_secret + * 192 bits required for sha_transform, from the syncookie6_secret * and overwrite the digest with the secret */ - memcpy(tmp + 10, syncookie_secret[c], 44); + memcpy(tmp + 10, syncookie6_secret[c], 44); memcpy(tmp, saddr, 16); memcpy(tmp + 4, daddr, 16); tmp[8] = ((__force u32)sport << 16) + (__force u32)dport; From 1bbdceef1e535add893bf71d7b7ab102e4eb69eb Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:57 +0200 Subject: [PATCH 7/9] inet: convert inet_ehash_secret and ipv6_hash_secret to net_get_random_once Initialize the ehash and ipv6_hash_secrets with net_get_random_once. Each compilation unit gets its own secret now: ipv4/inet_hashtables.o ipv4/udp.o ipv6/inet6_hashtables.o ipv6/udp.o rds/connection.o The functions still get inlined into the hashing functions. In the fast path we have at most two (needed in ipv6) if (unlikely(...)). Cc: Eric Dumazet Cc: "David S. Miller" Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/inet_sock.h | 4 ---- net/ipv4/af_inet.c | 27 --------------------------- net/ipv4/inet_hashtables.c | 4 ++++ net/ipv4/udp.c | 6 +++++- net/ipv6/af_inet6.c | 5 ----- net/ipv6/inet6_hashtables.c | 15 ++++++++++++--- net/ipv6/udp.c | 17 ++++++++++++++--- net/rds/connection.c | 12 +++++++++--- 8 files changed, 44 insertions(+), 46 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 7a6c7f80a8fd..1833c3f389ee 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -204,10 +204,6 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, int inet_sk_rebuild_header(struct sock *sk); -extern u32 inet_ehash_secret; -extern u32 ipv6_hash_secret; -void build_ehash_secret(void); - static inline unsigned int __inet_ehashfn(const __be32 laddr, const __u16 lport, const __be32 faddr, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4049906010f7..9433a6186f54 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -245,29 +245,6 @@ int inet_listen(struct socket *sock, int backlog) } EXPORT_SYMBOL(inet_listen); -u32 inet_ehash_secret __read_mostly; -EXPORT_SYMBOL(inet_ehash_secret); - -u32 ipv6_hash_secret __read_mostly; -EXPORT_SYMBOL(ipv6_hash_secret); - -/* - * inet_ehash_secret must be set exactly once, and to a non nul value - * ipv6_hash_secret must be set exactly once. - */ -void build_ehash_secret(void) -{ - u32 rnd; - - do { - get_random_bytes(&rnd, sizeof(rnd)); - } while (rnd == 0); - - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) - get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); -} -EXPORT_SYMBOL(build_ehash_secret); - /* * Create an inet socket. */ @@ -284,10 +261,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; - if (unlikely(!inet_ehash_secret)) - if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) - build_ehash_secret(); - sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 18aa668d0cc9..8b9cf279450d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -28,6 +28,10 @@ static unsigned int inet_ehashfn(struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { + static u32 inet_ehash_secret __read_mostly; + + net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); + return __inet_ehashfn(laddr, lport, faddr, fport, inet_ehash_secret + net_hash_mix(net)); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4437c7db6ce..89909dd730dd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -411,8 +411,12 @@ static unsigned int udp_ehashfn(struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { + static u32 udp_ehash_secret __read_mostly; + + net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret)); + return __inet_ehashfn(laddr, lport, faddr, fport, - inet_ehash_secret + net_hash_mix(net)); + udp_ehash_secret + net_hash_mix(net)); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a2cb07cd3850..20af1fb81c83 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -110,11 +110,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; - if (sock->type != SOCK_RAW && - sock->type != SOCK_DGRAM && - !inet_ehash_secret) - build_ehash_secret(); - /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index fa7dd3856c55..262e13c02ec2 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -29,10 +29,19 @@ static unsigned int inet6_ehashfn(struct net *net, const struct in6_addr *faddr, const __be16 fport) { - const u32 lhash = (__force u32)laddr->s6_addr32[3]; - const u32 fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); + static u32 inet6_ehash_secret __read_mostly; + static u32 ipv6_hash_secret __read_mostly; + + u32 lhash, fhash; + + net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); + net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); + + lhash = (__force u32)laddr->s6_addr32[3]; + fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); + return __inet6_ehashfn(lhash, lport, fhash, fport, - inet_ehash_secret + net_hash_mix(net)); + inet6_ehash_secret + net_hash_mix(net)); } static int inet6_sk_ehashfn(const struct sock *sk) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 324bd36c23bc..44fc4e3d661f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -59,10 +59,21 @@ static unsigned int udp6_ehashfn(struct net *net, const struct in6_addr *faddr, const __be16 fport) { - const u32 lhash = (__force u32)laddr->s6_addr32[3]; - const u32 fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); + static u32 udp6_ehash_secret __read_mostly; + static u32 udp_ipv6_hash_secret __read_mostly; + + u32 lhash, fhash; + + net_get_random_once(&udp6_ehash_secret, + sizeof(udp6_ehash_secret)); + net_get_random_once(&udp_ipv6_hash_secret, + sizeof(udp_ipv6_hash_secret)); + + lhash = (__force u32)laddr->s6_addr32[3]; + fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); + return __inet6_ehashfn(lhash, lport, fhash, fport, - inet_ehash_secret + net_hash_mix(net)); + udp_ipv6_hash_secret + net_hash_mix(net)); } int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) diff --git a/net/rds/connection.c b/net/rds/connection.c index 45e23660437a..378c3a6acf84 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -51,10 +51,16 @@ static struct kmem_cache *rds_conn_slab; static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) { + static u32 rds_hash_secret __read_mostly; + + unsigned long hash; + + net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret)); + /* Pass NULL, don't need struct net for hash */ - unsigned long hash = __inet_ehashfn(be32_to_cpu(laddr), 0, - be32_to_cpu(faddr), 0, - inet_ehash_secret); + hash = __inet_ehashfn(be32_to_cpu(laddr), 0, + be32_to_cpu(faddr), 0, + rds_hash_secret); return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; } From 222e83d2e0aecb6a5e8d42b1a8d51332a1eba960 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:58 +0200 Subject: [PATCH 8/9] tcp: switch tcp_fastopen key generation to net_get_random_once Changed key initialization of tcp_fastopen cookies to net_get_random_once. If the user sets a custom key net_get_random_once must be called at least once to ensure we don't overwrite the user provided key when the first cookie is generated later on. Cc: Yuchung Cheng Cc: Eric Dumazet Cc: "David S. Miller" Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/sysctl_net_ipv4.c | 5 +++++ net/ipv4/tcp_fastopen.c | 27 ++++++++++++++++----------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index f30326f1c92b..b12e29a76590 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1322,7 +1322,7 @@ extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; int tcp_fastopen_reset_cipher(void *key, unsigned int len); void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, struct tcp_fastopen_cookie *foc); - +void tcp_fastopen_init_key_once(bool publish); #define TCP_FASTOPEN_KEY_LENGTH 16 /* Fastopen key context */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c08f096d46b5..4b161d5aba0b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -274,6 +274,11 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, ret = -EINVAL; goto bad_key; } + /* Generate a dummy secret but don't publish it. This + * is needed so we don't regenerate a new key on the + * first invocation of tcp_fastopen_cookie_gen + */ + tcp_fastopen_init_key_once(false); tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index ab7bd35bb312..766032b4a6c3 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -14,6 +14,20 @@ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); +void tcp_fastopen_init_key_once(bool publish) +{ + static u8 key[TCP_FASTOPEN_KEY_LENGTH]; + + /* tcp_fastopen_reset_cipher publishes the new context + * atomically, so we allow this race happening here. + * + * All call sites of tcp_fastopen_cookie_gen also check + * for a valid cookie, so this is an acceptable risk. + */ + if (net_get_random_once(key, sizeof(key)) && publish) + tcp_fastopen_reset_cipher(key, sizeof(key)); +} + static void tcp_fastopen_ctx_free(struct rcu_head *head) { struct tcp_fastopen_context *ctx = @@ -70,6 +84,8 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, __be32 path[4] = { src, dst, 0, 0 }; struct tcp_fastopen_context *ctx; + tcp_fastopen_init_key_once(true); + rcu_read_lock(); ctx = rcu_dereference(tcp_fastopen_ctx); if (ctx) { @@ -78,14 +94,3 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, } rcu_read_unlock(); } - -static int __init tcp_fastopen_init(void) -{ - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; - - get_random_bytes(key, sizeof(key)); - tcp_fastopen_reset_cipher(key, sizeof(key)); - return 0; -} - -late_initcall(tcp_fastopen_init); From e34c9a69970d8664a36b46e6445a7cc879111cfd Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:59 +0200 Subject: [PATCH 9/9] net: switch net_secret key generation to net_get_random_once Cc: Eric Dumazet Cc: "David S. Miller" Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/core/secure_seq.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 3f1ec1586ae1..b02fd16b8942 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -16,18 +17,7 @@ static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; static void net_secret_init(void) { - u32 tmp; - int i; - - if (likely(net_secret[0])) - return; - - for (i = NET_SECRET_SIZE; i > 0;) { - do { - get_random_bytes(&tmp, sizeof(tmp)); - } while (!tmp); - cmpxchg(&net_secret[--i], 0, tmp); - } + net_get_random_once(net_secret, sizeof(net_secret)); } #ifdef CONFIG_INET