From e56d6cd6057aac1c6ed8e1590acd62b46e06201d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=E4rvinen?= Date: Thu, 1 Nov 2007 00:09:37 -0700 Subject: [PATCH 01/17] [TCP]: Process DSACKs that reside within a SACK block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DSACK inside another SACK block were missed if start_seq of DSACK was larger than SACK block's because sorting prioritizes full processing of the SACK block before DSACK. After SACK block sorting situation is like this: SSSSSSSSS D SSSSSS SSSSSSS Because write_queue is walked in-order, when the first SACK block has been processed, TCP is already past the skb for which the DSACK arrived and we haven't taught it to backtrack (nor should we), so TCP just continues processing by going to the next SACK block after the DSACK (if any). Whenever such DSACK is present, do an embedded checking during the previous SACK block. If the DSACK is below snd_una, there won't be overlapping SACK block, and thus no problem in that case. Also if start_seq of the DSACK is equal to the actual block, it will be processed first. Tested this by using netem to duplicate 15% of packets, and by printing SACK block when found_dup_sack is true and the selected skb in the dup_sack = 1 branch (if taken): SACK block 0: 4344-5792 (relative to snd_una 2019137317) SACK block 1: 4344-5792 (relative to snd_una 2019137317) equal start seqnos => next_dup = 0, dup_sack = 1 won't occur... SACK block 0: 5792-7240 (relative to snd_una 2019214061) SACK block 1: 2896-7240 (relative to snd_una 2019214061) DSACK skb match 5792-7240 (relative to snd_una) ...and next_dup = 1 case (after the not shown start_seq sort), went to dup_sack = 1 branch. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 69d8c38ccd390..4d72781a49bec 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1330,12 +1330,15 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ cached_fack_count = 0; } - for (i=0; istart_seq); __u32 end_seq = ntohl(sp->end_seq); int fack_count; int dup_sack = (found_dup_sack && (i == first_sack_index)); + int next_dup = (found_dup_sack && (i+1 == first_sack_index)); + + sp++; if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) { if (dup_sack) { @@ -1361,7 +1364,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ flag |= FLAG_DATA_LOST; tcp_for_write_queue_from(skb, sk) { - int in_sack; + int in_sack = 0; u8 sacked; if (skb == tcp_send_head(sk)) @@ -1380,7 +1383,23 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; - in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); + dup_sack = (found_dup_sack && (i == first_sack_index)); + + /* Due to sorting DSACK may reside within this SACK block! */ + if (next_dup) { + u32 dup_start = ntohl(sp->start_seq); + u32 dup_end = ntohl(sp->end_seq); + + if (before(TCP_SKB_CB(skb)->seq, dup_end)) { + in_sack = tcp_match_skb_to_sack(sk, skb, dup_start, dup_end); + if (in_sack > 0) + dup_sack = 1; + } + } + + /* DSACK info lost if out-of-mem, try SACK still */ + if (in_sack <= 0) + in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); if (in_sack < 0) break; From 261ab365fadd53ddc1b292b1663800e11fbf3e71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=E4rvinen?= Date: Thu, 1 Nov 2007 00:10:18 -0700 Subject: [PATCH 02/17] [TCP]: Another TAGBITS -> SACKED_ACKED|LOST conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to commit 3eec0047d9bdd, point of this is to avoid skipping R-bit skbs. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4d72781a49bec..ca9590f4f520a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2078,7 +2078,7 @@ static void tcp_update_scoreboard(struct sock *sk) if (!tcp_skb_timedout(sk, skb)) break; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); tcp_verify_retransmit_hint(tp, skb); From f1a6c4da14c365d3ee0b5de43a93f7470982637c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:29:45 -0700 Subject: [PATCH 03/17] [NET]: Move the sock_copy() from the header The sock_copy() call is not used outside the sock.c file, so just move it into a sock.c Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/sock.h | 14 -------------- net/core/sock.c | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 43fc3fa50d623..ecad7b4e2a631 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -993,20 +993,6 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } -static inline void sock_copy(struct sock *nsk, const struct sock *osk) -{ -#ifdef CONFIG_SECURITY_NETWORK - void *sptr = nsk->sk_security; -#endif - - memcpy(nsk, osk, osk->sk_prot->obj_size); - get_net(nsk->sk_net); -#ifdef CONFIG_SECURITY_NETWORK - nsk->sk_security = sptr; - security_sk_clone(osk, nsk); -#endif -} - extern int sock_i_uid(struct sock *sk); extern unsigned long sock_i_ino(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index bba9949681ff7..fdacf9c8f1cb6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -857,6 +857,20 @@ static inline void sock_lock_init(struct sock *sk) af_family_keys + sk->sk_family); } +static void sock_copy(struct sock *nsk, const struct sock *osk) +{ +#ifdef CONFIG_SECURITY_NETWORK + void *sptr = nsk->sk_security; +#endif + + memcpy(nsk, osk, osk->sk_prot->obj_size); + get_net(nsk->sk_net); +#ifdef CONFIG_SECURITY_NETWORK + nsk->sk_security = sptr; + security_sk_clone(osk, nsk); +#endif +} + /** * sk_alloc - All socket objects are allocated here * @net: the applicable net namespace From 1e2e6b89f1d3152da0606d23e65e8760bf62a4c3 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:31:26 -0700 Subject: [PATCH 04/17] [NET]: Move the get_net() from sock_copy() The sock_copy() is supposed to just clone the socket. In a perfect world it has to be just memcpy, but we have to handle the security mark correctly. All the extra setup must be performed in sk_clone() call, so move the get_net() into more proper place. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index fdacf9c8f1cb6..9c2dbfaca60d6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -864,7 +864,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) #endif memcpy(nsk, osk, osk->sk_prot->obj_size); - get_net(nsk->sk_net); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); @@ -958,6 +957,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); /* SANITY */ + get_net(newsk->sk_net); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); From c308c1b20e2eb7b13f200a7c18b3f23561318367 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:33:50 -0700 Subject: [PATCH 05/17] [NET]: Cleanup the allocation/freeing of the sock object The sock object is allocated either from the generic cache with the kmalloc, or from the proc->slab cache. Move this logic into an isolated set of helpers and make the sk_alloc/sk_free look a bit nicer. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/sock.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 9c2dbfaca60d6..6ee2ed104a833 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -870,6 +870,31 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) #endif } +static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority) +{ + struct sock *sk; + struct kmem_cache *slab; + + slab = prot->slab; + if (slab != NULL) + sk = kmem_cache_alloc(slab, priority); + else + sk = kmalloc(prot->obj_size, priority); + + return sk; +} + +static void sk_prot_free(struct proto *prot, struct sock *sk) +{ + struct kmem_cache *slab; + + slab = prot->slab; + if (slab != NULL) + kmem_cache_free(slab, sk); + else + kfree(sk); +} + /** * sk_alloc - All socket objects are allocated here * @net: the applicable net namespace @@ -881,14 +906,9 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int zero_it) { - struct sock *sk = NULL; - struct kmem_cache *slab = prot->slab; - - if (slab != NULL) - sk = kmem_cache_alloc(slab, priority); - else - sk = kmalloc(prot->obj_size, priority); + struct sock *sk; + sk = sk_prot_alloc(prot, priority); if (sk) { if (zero_it) { memset(sk, 0, prot->obj_size); @@ -911,10 +931,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, return sk; out_free: - if (slab != NULL) - kmem_cache_free(slab, sk); - else - kfree(sk); + sk_prot_free(prot, sk); return NULL; } @@ -940,10 +957,7 @@ void sk_free(struct sock *sk) security_sk_free(sk); put_net(sk->sk_net); - if (sk->sk_prot_creator->slab != NULL) - kmem_cache_free(sk->sk_prot_creator->slab, sk); - else - kfree(sk); + sk_prot_free(sk->sk_prot_creator, sk); module_put(owner); } From 3f0666ee3039443fa7b7cf436dd16ce0dd8e3f95 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:34:42 -0700 Subject: [PATCH 06/17] [NET]: Auto-zero the allocated sock object We have a __GFP_ZERO flag that allocates a zeroed chunk of memory. Use it in the sk_alloc() and avoid a hand-made memset(). This is a temporary patch that will help us in the nearest future :) Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/sock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 6ee2ed104a833..b66f607fcb961 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -908,10 +908,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, { struct sock *sk; + if (zero_it) + priority |= __GFP_ZERO; + sk = sk_prot_alloc(prot, priority); if (sk) { if (zero_it) { - memset(sk, 0, prot->obj_size); sk->sk_family = family; /* * See comment in struct sock definition to understand From 2e4afe7b35458beedba418a6e2aaf0b0ac82cc18 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:36:26 -0700 Subject: [PATCH 07/17] [NET]: Move some core sock setup into sk_prot_alloc The security_sk_alloc() and the module_get is a part of the object allocations - move it in the proper place. Note, that since we do not reset the newly allocated sock in the sk_alloc() (memset() is removed with the previous patch) we can safely do this. Also fix the error path in sk_prot_alloc() - release the security context if needed. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/sock.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index b66f607fcb961..2b744c2bf4662 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -870,7 +870,8 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) #endif } -static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority) +static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, + int family) { struct sock *sk; struct kmem_cache *slab; @@ -881,18 +882,40 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority) else sk = kmalloc(prot->obj_size, priority); + if (sk != NULL) { + if (security_sk_alloc(sk, family, priority)) + goto out_free; + + if (!try_module_get(prot->owner)) + goto out_free_sec; + } + return sk; + +out_free_sec: + security_sk_free(sk); +out_free: + if (slab != NULL) + kmem_cache_free(slab, sk); + else + kfree(sk); + return NULL; } static void sk_prot_free(struct proto *prot, struct sock *sk) { struct kmem_cache *slab; + struct module *owner; + owner = prot->owner; slab = prot->slab; + + security_sk_free(sk); if (slab != NULL) kmem_cache_free(slab, sk); else kfree(sk); + module_put(owner); } /** @@ -911,7 +934,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, if (zero_it) priority |= __GFP_ZERO; - sk = sk_prot_alloc(prot, priority); + sk = sk_prot_alloc(prot, priority, family); if (sk) { if (zero_it) { sk->sk_family = family; @@ -923,24 +946,14 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_lock_init(sk); sk->sk_net = get_net(net); } - - if (security_sk_alloc(sk, family, priority)) - goto out_free; - - if (!try_module_get(prot->owner)) - goto out_free; } - return sk; -out_free: - sk_prot_free(prot, sk); - return NULL; + return sk; } void sk_free(struct sock *sk) { struct sk_filter *filter; - struct module *owner = sk->sk_prot_creator->owner; if (sk->sk_destruct) sk->sk_destruct(sk); @@ -957,10 +970,8 @@ void sk_free(struct sock *sk) printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); - security_sk_free(sk); put_net(sk->sk_net); sk_prot_free(sk->sk_prot_creator, sk); - module_put(owner); } struct sock *sk_clone(const struct sock *sk, const gfp_t priority) From 8fd1d178a3f177777707ee782f12d93e9a7eb5e5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:37:32 -0700 Subject: [PATCH 08/17] [NET]: Make the sk_clone() lighter The sk_prot_alloc() already performs all the stuff needed by the sk_clone(). Besides, the sk_prot_alloc() requires almost twice less arguments than the sk_alloc() does, so call the sk_prot_alloc() saving the stack a bit. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 2b744c2bf4662..4f4708a6ff8fb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -976,8 +976,9 @@ void sk_free(struct sock *sk) struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { - struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0); + struct sock *newsk; + newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); if (newsk != NULL) { struct sk_filter *filter; From 154adbc8469ff21fbf5c958446ee92dbaab01be1 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:38:43 -0700 Subject: [PATCH 09/17] [NET]: Remove bogus zero_it argument from sk_alloc At this point nobody calls the sk_alloc(() with zero_it == 0, so remove unneeded checks from it. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/sock.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 4f4708a6ff8fb..6046fc69428b0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -931,21 +931,16 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, { struct sock *sk; - if (zero_it) - priority |= __GFP_ZERO; - - sk = sk_prot_alloc(prot, priority, family); + sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family); if (sk) { - if (zero_it) { - sk->sk_family = family; - /* - * See comment in struct sock definition to understand - * why we need sk_prot_creator -acme - */ - sk->sk_prot = sk->sk_prot_creator = prot; - sock_lock_init(sk); - sk->sk_net = get_net(net); - } + sk->sk_family = family; + /* + * See comment in struct sock definition to understand + * why we need sk_prot_creator -acme + */ + sk->sk_prot = sk->sk_prot_creator = prot; + sock_lock_init(sk); + sk->sk_net = get_net(net); } return sk; From 6257ff2177ff02d7f260a7a501876aa41cb9a9f6 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:39:31 -0700 Subject: [PATCH 10/17] [NET]: Forget the zero_it argument of sk_alloc() Finally, the zero_it argument can be completely removed from the callers and from the function prototype. Besides, fix the checkpatch.pl warnings about using the assignments inside if-s. This patch is rather big, and it is a part of the previous one. I splitted it wishing to make the patches more readable. Hope this particular split helped. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- drivers/net/pppoe.c | 2 +- drivers/net/pppol2tp.c | 2 +- include/net/sock.h | 2 +- net/appletalk/ddp.c | 2 +- net/atm/common.c | 2 +- net/ax25/af_ax25.c | 6 ++++-- net/bluetooth/bnep/sock.c | 2 +- net/bluetooth/cmtp/sock.c | 2 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/hidp/sock.c | 2 +- net/bluetooth/l2cap.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/sco.c | 2 +- net/core/sock.c | 2 +- net/decnet/af_decnet.c | 2 +- net/econet/af_econet.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipx/af_ipx.c | 2 +- net/irda/af_irda.c | 2 +- net/iucv/af_iucv.c | 2 +- net/key/af_key.c | 2 +- net/llc/llc_conn.c | 2 +- net/netlink/af_netlink.c | 2 +- net/netrom/af_netrom.c | 6 ++++-- net/packet/af_packet.c | 2 +- net/rose/af_rose.c | 6 ++++-- net/rxrpc/af_rxrpc.c | 2 +- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 3 ++- net/tipc/socket.c | 2 +- net/unix/af_unix.c | 2 +- net/x25/af_x25.c | 2 +- 33 files changed, 43 insertions(+), 36 deletions(-) diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 8936ed3469cf0..a005d8f4c38e5 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -491,7 +491,7 @@ static int pppoe_create(struct net *net, struct socket *sock) int error = -ENOMEM; struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto); if (!sk) goto out; diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index 921d4ef6d14b6..f8904fd92369b 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -1416,7 +1416,7 @@ static int pppol2tp_create(struct net *net, struct socket *sock) int error = -ENOMEM; struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto, 1); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto); if (!sk) goto out; diff --git a/include/net/sock.h b/include/net/sock.h index ecad7b4e2a631..20de3fa7ae400 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -779,7 +779,7 @@ extern void FASTCALL(release_sock(struct sock *sk)); extern struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot, int zero_it); + struct proto *prot); extern void sk_free(struct sock *sk); extern struct sock *sk_clone(const struct sock *sk, const gfp_t priority); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7c0b5151d5265..e0d37d6dc1f81 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1044,7 +1044,7 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol) if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; rc = -ENOMEM; - sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); + sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto); if (!sk) goto out; rc = 0; diff --git a/net/atm/common.c b/net/atm/common.c index e166d9e0ffd94..eba09a04f6bf4 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -133,7 +133,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family) sock->sk = NULL; if (sock->type == SOCK_STREAM) return -EINVAL; - sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, 1); + sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto); if (!sk) return -ENOMEM; sock_init_data(sock, sk); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 993e5c75e9090..8378afd54b301 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -836,7 +836,8 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol) return -ESOCKTNOSUPPORT; } - if ((sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) + sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto); + if (sk == NULL) return -ENOMEM; ax25 = sk->sk_protinfo = ax25_create_cb(); @@ -861,7 +862,8 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25, *oax25; - if ((sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot); + if (sk == NULL) return NULL; if ((ax25 = ax25_create_cb()) == NULL) { diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index f718965f296c7..9ebd3c64474d9 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -213,7 +213,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index cf700c20d11eb..783edab12ce82 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -204,7 +204,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 8825102c517c4..14991323c273a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -645,7 +645,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol) sock->ops = &hci_sock_ops; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 1de2b6fbcac0d..3292b956a7c49 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -255,7 +255,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 6fbbae78b3045..477e052b17b5d 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -607,7 +607,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p { struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); if (!sk) return NULL; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 266b6972667d7..c46d51035e77f 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -287,7 +287,7 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int struct rfcomm_dlc *d; struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto); if (!sk) return NULL; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 82d0dfdfa7e26..93ad1aae3f38d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -421,7 +421,7 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro { struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto); if (!sk) return NULL; diff --git a/net/core/sock.c b/net/core/sock.c index 6046fc69428b0..12ad2067a9884 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -927,7 +927,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) * @zero_it: if we should zero the newly allocated sock */ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot, int zero_it) + struct proto *prot) { struct sock *sk; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index aabe98d9402f4..57d5749518380 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -474,7 +474,7 @@ static struct proto dn_proto = { static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) { struct dn_scp *scp; - struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, 1); + struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto); if (!sk) goto out; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 9cae16b4e0b7c..f70df073c5880 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -624,7 +624,7 @@ static int econet_create(struct net *net, struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto, 1); + sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto); if (sk == NULL) goto out; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 621b128897d7a..d2f22e74b2671 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -323,7 +323,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); if (sk == NULL) goto out; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 1b1caf3aa1c18..ecbd38894fdd1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -162,7 +162,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); if (sk == NULL) goto out; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 29b063d431205..a195a66e0cc79 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1381,7 +1381,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol) goto out; rc = -ENOMEM; - sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, 1); + sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto); if (!sk) goto out; #ifdef IPX_REFCNT_DEBUG diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 0328ae2654f43..48ce59a6e0265 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1078,7 +1078,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol) } /* Allocate networking socket */ - sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto, 1); + sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto); if (sk == NULL) return -ENOMEM; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 43e01c8d382b5..aef6645803559 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -216,7 +216,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, 1); + sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto); if (!sk) return NULL; diff --git a/net/key/af_key.c b/net/key/af_key.c index 266f112c38c2f..10c89d47f685e 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -152,7 +152,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol) return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, 1); + sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto); if (sk == NULL) goto out; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 8ebc2769dfdab..5c0b484237c81 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -869,7 +869,7 @@ static void llc_sk_init(struct sock* sk) */ struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) { - struct sock *sk = sk_alloc(net, family, priority, prot, 1); + struct sock *sk = sk_alloc(net, family, priority, prot); if (!sk) goto out; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4f994c0fb3f80..2601712555767 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -396,7 +396,7 @@ static int __netlink_create(struct net *net, struct socket *sock, sock->ops = &netlink_ops; - sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); + sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); if (!sk) return -ENOMEM; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3a4d479ea64e4..972250c974f1c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -423,7 +423,8 @@ static int nr_create(struct net *net, struct socket *sock, int protocol) if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) + sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto); + if (sk == NULL) return -ENOMEM; nr = nr_sk(sk); @@ -465,7 +466,8 @@ static struct sock *nr_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot); + if (sk == NULL) return NULL; nr = nr_sk(sk); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d0936506b731a..4cb2dfba09931 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -995,7 +995,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, 1); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto); if (sk == NULL) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 509defe53ee53..ed2d65cd80106 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -513,7 +513,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol) if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto); + if (sk == NULL) return -ENOMEM; rose = rose_sk(sk); @@ -551,7 +552,8 @@ static struct sock *rose_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto); + if (sk == NULL) return NULL; rose = rose_sk(sk); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c680017f5c8e9..d6389450c4bff 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -627,7 +627,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol) sock->ops = &rxrpc_rpc_ops; sock->state = SS_UNCONNECTED; - sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); + sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto); if (!sk) return -ENOMEM; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index eb4deaf589148..7f31ff638bc63 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -631,7 +631,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot, 1); + newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot); if (!newsk) goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f5cd96f5fe742..40c1a47d1b8dc 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -552,7 +552,8 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, { struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; - struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1); + struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, + sk->sk_prot); if (!newsk) goto out; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e36b4b5a52223..6b792265dc06e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -201,7 +201,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol) return -EPROTOTYPE; } - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, 1); + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); if (!sk) { tipc_deleteport(ref); return -ENOMEM; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9163ec526c2a0..515e7a692f9bf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -602,7 +602,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock) if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) goto out; - sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); if (!sk) goto out; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index fc416f9606a9e..92cfe8e3e0b8d 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -472,7 +472,7 @@ static struct proto x25_proto = { static struct sock *x25_alloc_socket(struct net *net) { struct x25_sock *x25; - struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, 1); + struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto); if (!sk) goto out; From 1dba323b3f92cf4a475236763b0373cb7d49395d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:42:43 -0700 Subject: [PATCH 11/17] [NETNS]: Make the init/exit hooks checks outside the loop When the new pernet something (subsys, device or operations) is being registered, the init callback is to be called for each namespace, that currently exitst in the system. During the unregister, the same is to be done with the exit callback. However, not every pernet something has both calls, but the check for the appropriate pointer to be not NULL is performed inside the for_each_net() loop. This is (at least) strange, so tune this. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/net_namespace.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 662e6ea1cecfe..4e52921ade098 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -187,29 +187,28 @@ static int register_pernet_operations(struct list_head *list, struct net *net, *undo_net; int error; - error = 0; list_add_tail(&ops->list, list); - for_each_net(net) { - if (ops->init) { + if (ops->init) { + for_each_net(net) { error = ops->init(net); if (error) goto out_undo; } } -out: - return error; + return 0; out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - for_each_net(undo_net) { - if (undo_net == net) - goto undone; - if (ops->exit) + if (ops->exit) { + for_each_net(undo_net) { + if (undo_net == net) + goto undone; ops->exit(undo_net); + } } undone: - goto out; + return error; } static void unregister_pernet_operations(struct pernet_operations *ops) @@ -217,8 +216,8 @@ static void unregister_pernet_operations(struct pernet_operations *ops) struct net *net; list_del(&ops->list); - for_each_net(net) - if (ops->exit) + if (ops->exit) + for_each_net(net) ops->exit(net); } From d46557955f2a35e58772518775464cdf354b3245 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:43:49 -0700 Subject: [PATCH 12/17] [NET]: Relax the reference counting of init_net_ns When the CONFIG_NET_NS is n there's no need in refcounting the initial net namespace. So relax this code by making a stupid stubs for the "n" case. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/net_namespace.h | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 5279466606d2b..1fd449a6530b2 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -51,13 +51,12 @@ static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns) } #endif +#ifdef CONFIG_NET_NS extern void __put_net(struct net *net); static inline struct net *get_net(struct net *net) { -#ifdef CONFIG_NET atomic_inc(&net->count); -#endif return net; } @@ -75,26 +74,44 @@ static inline struct net *maybe_get_net(struct net *net) static inline void put_net(struct net *net) { -#ifdef CONFIG_NET if (atomic_dec_and_test(&net->count)) __put_net(net); -#endif } static inline struct net *hold_net(struct net *net) { -#ifdef CONFIG_NET atomic_inc(&net->use_count); -#endif return net; } static inline void release_net(struct net *net) { -#ifdef CONFIG_NET atomic_dec(&net->use_count); -#endif } +#else +static inline struct net *get_net(struct net *net) +{ + return net; +} + +static inline void put_net(struct net *net) +{ +} + +static inline struct net *hold_net(struct net *net) +{ + return net; +} + +static inline void release_net(struct net *net) +{ +} + +static inline struct net *maybe_get_net(struct net *net) +{ + return net; +} +#endif #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) From 6a1a3b9f686bb04820a232cc1657ef2c45670709 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:44:50 -0700 Subject: [PATCH 13/17] [NET]: Hide the dead code in the net_namespace.c The namespace creation/destruction code is never called if the CONFIG_NET_NS is n, so it's OK to move it under appropriate ifdef. The copy_net_ns() in the "n" case checks for flags and returns -EINVAL when new net ns is requested. In a perfect world this stub must be in net_namespace.h, but this function need to know the CLONE_NEWNET value and thus requires sched.h. On the other hand this header is to be injected into almost every .c file in the networking code, and making all this code depend on the sched.h is a suicidal attempt. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/net_namespace.c | 131 ++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 63 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 4e52921ade098..d5bf8b28bbf45 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -22,65 +22,6 @@ static struct kmem_cache *net_cachep; struct net init_net; EXPORT_SYMBOL_GPL(init_net); -static struct net *net_alloc(void) -{ - return kmem_cache_zalloc(net_cachep, GFP_KERNEL); -} - -static void net_free(struct net *net) -{ - if (!net) - return; - - if (unlikely(atomic_read(&net->use_count) != 0)) { - printk(KERN_EMERG "network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); - return; - } - - kmem_cache_free(net_cachep, net); -} - -static void cleanup_net(struct work_struct *work) -{ - struct pernet_operations *ops; - struct net *net; - - net = container_of(work, struct net, work); - - mutex_lock(&net_mutex); - - /* Don't let anyone else find us. */ - rtnl_lock(); - list_del(&net->list); - rtnl_unlock(); - - /* Run all of the network namespace exit methods */ - list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } - - mutex_unlock(&net_mutex); - - /* Ensure there are no outstanding rcu callbacks using this - * network namespace. - */ - rcu_barrier(); - - /* Finally it is safe to free my network namespace structure */ - net_free(net); -} - - -void __put_net(struct net *net) -{ - /* Cleanup the network namespace in process context */ - INIT_WORK(&net->work, cleanup_net); - schedule_work(&net->work); -} -EXPORT_SYMBOL_GPL(__put_net); - /* * setup_net runs the initializers for the network namespace object. */ @@ -117,6 +58,12 @@ static int setup_net(struct net *net) goto out; } +#ifdef CONFIG_NET_NS +static struct net *net_alloc(void) +{ + return kmem_cache_zalloc(net_cachep, GFP_KERNEL); +} + struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *new_net = NULL; @@ -127,10 +74,6 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) if (!(flags & CLONE_NEWNET)) return old_net; -#ifndef CONFIG_NET_NS - return ERR_PTR(-EINVAL); -#endif - err = -ENOMEM; new_net = net_alloc(); if (!new_net) @@ -157,6 +100,68 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) return new_net; } +static void net_free(struct net *net) +{ + if (!net) + return; + + if (unlikely(atomic_read(&net->use_count) != 0)) { + printk(KERN_EMERG "network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); + return; + } + + kmem_cache_free(net_cachep, net); +} + +static void cleanup_net(struct work_struct *work) +{ + struct pernet_operations *ops; + struct net *net; + + net = container_of(work, struct net, work); + + mutex_lock(&net_mutex); + + /* Don't let anyone else find us. */ + rtnl_lock(); + list_del(&net->list); + rtnl_unlock(); + + /* Run all of the network namespace exit methods */ + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->exit) + ops->exit(net); + } + + mutex_unlock(&net_mutex); + + /* Ensure there are no outstanding rcu callbacks using this + * network namespace. + */ + rcu_barrier(); + + /* Finally it is safe to free my network namespace structure */ + net_free(net); +} + +void __put_net(struct net *net) +{ + /* Cleanup the network namespace in process context */ + INIT_WORK(&net->work, cleanup_net); + schedule_work(&net->work); +} +EXPORT_SYMBOL_GPL(__put_net); + +#else +struct net *copy_net_ns(unsigned long flags, struct net *old_net) +{ + if (flags & CLONE_NEWNET) + return ERR_PTR(-EINVAL); + return old_net; +} +#endif + static int __init net_ns_init(void) { int err; From 1a2ee93d281d00cc6e2db1c306032a105d2c9474 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:45:59 -0700 Subject: [PATCH 14/17] [NET]: Mark the setup_net as __net_init The setup_net is called for the init net namespace only (int the CONFIG_NET_NS=n of course) from the __init function, so mark it as __net_init to disappear with the caller after the boot. Yet again, in the perfect world this has to be under #ifdef CONFIG_NET_NS, but it isn't guaranteed that every subsystem is registered *after* the init_net_ns is set up. After we are sure, that we don't start registering them before the init net setup, we'll be able to move this code under the ifdef. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index d5bf8b28bbf45..a044e2d9a8f0e 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -25,7 +25,7 @@ EXPORT_SYMBOL_GPL(init_net); /* * setup_net runs the initializers for the network namespace object. */ -static int setup_net(struct net *net) +static __net_init int setup_net(struct net *net) { /* Must be called with net_mutex held */ struct pernet_operations *ops; From d57a9212e00779181d8d820887dcab3e9d529194 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 1 Nov 2007 00:46:50 -0700 Subject: [PATCH 15/17] [NET]: Hide the net_ns kmem cache This cache is only required to create new namespaces, but we won't have them in CONFIG_NET_NS=n case. Hide it under the appropriate ifdef. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/net_namespace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a044e2d9a8f0e..e9f0964ce70bf 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -17,8 +17,6 @@ static DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); -static struct kmem_cache *net_cachep; - struct net init_net; EXPORT_SYMBOL_GPL(init_net); @@ -59,6 +57,8 @@ static __net_init int setup_net(struct net *net) } #ifdef CONFIG_NET_NS +static struct kmem_cache *net_cachep; + static struct net *net_alloc(void) { return kmem_cache_zalloc(net_cachep, GFP_KERNEL); @@ -167,9 +167,11 @@ static int __init net_ns_init(void) int err; printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); +#ifdef CONFIG_NET_NS net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, SLAB_PANIC, NULL); +#endif mutex_lock(&net_mutex); err = setup_net(&init_net); From 3b582cc14c50f71eabf1c3cada05acb8dc9f457c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 1 Nov 2007 02:21:47 -0700 Subject: [PATCH 16/17] [NET]: docbook fixes for netif_ functions Documentation updates for network interfaces. 1. Add doc for netif_napi_add 2. Remove doc for unused returns from netif_rx 3. Add doc for netif_receive_skb [ Incorporated minor mods from Randy Dunlap -DaveM ] Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ net/core/dev.c | 18 +++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9b0c8f12373e1..1e6af4f174b6c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -739,6 +739,16 @@ static inline void *netdev_priv(const struct net_device *dev) */ #define SET_NETDEV_DEV(net, pdev) ((net)->dev.parent = (pdev)) +/** + * netif_napi_add - initialize a napi context + * @dev: network device + * @napi: napi context + * @poll: polling function + * @weight: default weight + * + * netif_napi_add() must be used to initialize a napi context prior to calling + * *any* of the other napi related functions. + */ static inline void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), diff --git a/net/core/dev.c b/net/core/dev.c index 91ece48e127e5..be6cedab5aa83 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1751,9 +1751,6 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; * * return values: * NET_RX_SUCCESS (no congestion) - * NET_RX_CN_LOW (low congestion) - * NET_RX_CN_MOD (moderate congestion) - * NET_RX_CN_HIGH (high congestion) * NET_RX_DROP (packet was dropped) * */ @@ -2001,6 +1998,21 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, } #endif +/** + * netif_receive_skb - process receive buffer from network + * @skb: buffer to process + * + * netif_receive_skb() is the main receive data processing function. + * It always succeeds. The buffer may be dropped during processing + * for congestion control or by the protocol layers. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; From 49259d34c52df6be482fefca946debe28ba9a2f6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Nov 2007 02:26:38 -0700 Subject: [PATCH 17/17] [IRDA] IRNET: Fix build when TCGETS2 is defined. Signed-off-by: David S. Miller --- net/irda/irnet/irnet_ppp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 2f9f8dce5a694..e0eab5927c4f4 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -731,15 +731,25 @@ dev_irnet_ioctl(struct inode * inode, /* Get termios */ case TCGETS: DEBUG(FS_INFO, "Get termios.\n"); +#ifndef TCGETS2 if(kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) break; +#else + if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios)) + break; +#endif err = 0; break; /* Set termios */ case TCSETSF: DEBUG(FS_INFO, "Set termios.\n"); +#ifndef TCGETS2 if(user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) break; +#else + if(user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp)) + break; +#endif err = 0; break;