From b43309578477ff3271945d4efb920f558a309b4a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 May 2005 14:23:13 -0700
Subject: [PATCH 01/28] [NETFILTER]: Missing owner-field initialization in
 iptable_raw

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/iptable_raw.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 01b4a3c814d39..47449ba83eb94 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -103,13 +103,15 @@ static struct nf_hook_ops ipt_ops[] = {
 	  .hook = ipt_hook, 
 	  .pf = PF_INET, 
 	  .hooknum = NF_IP_PRE_ROUTING, 
-	  .priority = NF_IP_PRI_RAW
+	  .priority = NF_IP_PRI_RAW,
+	  .owner = THIS_MODULE,
 	},
 	{
 	  .hook = ipt_hook, 
 	  .pf = PF_INET, 
 	  .hooknum = NF_IP_LOCAL_OUT, 
-	  .priority = NF_IP_PRI_RAW
+	  .priority = NF_IP_PRI_RAW,
+	  .owner = THIS_MODULE,
 	},
 };
 

From 31da185d8162ae0f30a13ed945f1f4d28d158133 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 May 2005 14:23:50 -0700
Subject: [PATCH 02/28] [NETFILTER]: Don't checksum CHECKSUM_UNNECESSARY skbs
 in TCP connection tracking

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 2b87c1974be60..721ddbf522b42 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -819,6 +819,7 @@ static int tcp_error(struct sk_buff *skb,
 	 */
 	/* FIXME: Source route IP option packets --RR */
 	if (hooknum == NF_IP_PRE_ROUTING
+	    && skb->ip_summed != CHECKSUM_UNNECESSARY
 	    && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
 			         skb->ip_summed == CHECKSUM_HW ? skb->csum
 			      	 : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {

From 679a87382433cf12a28f07a7d5c240f30f0daa08 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 May 2005 14:24:36 -0700
Subject: [PATCH 03/28] [IPV6]: Fix raw socket checksums with IPsec

I made a mistake in my last patch to the raw socket checksum code.
I used the value of inet->cork.length as the length of the payload.
While this works with normal packets, it breaks down when IPsec is
present since the cork length includes the extension header length.

So here is a patch to fix the length calculations.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 1352c1d9bf4d3..617645bc5ed6a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -455,11 +455,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 				     struct raw6_sock *rp)
 {
-	struct inet_sock *inet = inet_sk(sk);
 	struct sk_buff *skb;
 	int err = 0;
 	int offset;
 	int len;
+	int total_len;
 	u32 tmp_csum;
 	u16 csum;
 
@@ -470,7 +470,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 		goto out;
 
 	offset = rp->offset;
-	if (offset >= inet->cork.length - 1) {
+	total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data);
+	if (offset >= total_len - 1) {
 		err = -EINVAL;
 		ip6_flush_pending_frames(sk);
 		goto out;
@@ -514,7 +515,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 
 	tmp_csum = csum_ipv6_magic(&fl->fl6_src,
 				   &fl->fl6_dst,
-				   inet->cork.length, fl->proto, tmp_csum);
+				   total_len, fl->proto, tmp_csum);
 
 	if (tmp_csum == 0)
 		tmp_csum = -1;

From e4553eddae592b948c9695c9a0002169b0cab6fc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 May 2005 14:25:13 -0700
Subject: [PATCH 04/28] [IPV6]: Include ipv6.h for ipv6_addr_set

This patch includes net/ipv6.h from addrconf.h since it needs
ipv6_addr_set.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 7af9a13cb9beb..f1e5af4be98e4 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -46,6 +46,7 @@ struct prefix_info {
 #include <linux/in6.h>
 #include <linux/netdevice.h>
 #include <net/if_inet6.h>
+#include <net/ipv6.h>
 
 #define IN6_ADDR_HSIZE		16
 

From 526bdb80a23b2e10ed4ccc3fcf309c9118d892d6 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 May 2005 14:26:01 -0700
Subject: [PATCH 05/28] [XFRM]: Prevent off-by-one access to xfrm_dispatch

Makes the type > XFRM_MSG_MAX check behave correctly to
protect access to xfrm_dispatch.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index f0df02ae68a41..4d19b9e65317c 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -140,8 +140,9 @@ enum {
 	XFRM_MSG_FLUSHPOLICY,
 #define XFRM_MSG_FLUSHPOLICY XFRM_MSG_FLUSHPOLICY
 
-	XFRM_MSG_MAX
+	__XFRM_MSG_MAX
 };
+#define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
 
 struct xfrm_user_tmpl {
 	struct xfrm_id		id;

From 492b558b3191319cbc859a9e025bc354d336c261 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 May 2005 14:26:40 -0700
Subject: [PATCH 06/28] [XFRM]: Cleanup xfrm_msg_min and xfrm_dispatch

Converts xfrm_msg_min and xfrm_dispatch to use c99 designated
initializers to make greping a little bit easier. Also replaces
two hardcoded message type with meaningful names.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h |  2 ++
 net/xfrm/xfrm_user.c | 73 ++++++++++++++++++++++----------------------
 2 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 4d19b9e65317c..fd2ef742a9fd1 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -144,6 +144,8 @@ enum {
 };
 #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
 
+#define XFRM_NR_MSGTYPES (XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)
+
 struct xfrm_user_tmpl {
 	struct xfrm_id		id;
 	__u16			family;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 63661b0fd736a..52b5843937c58 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -855,47 +855,44 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **x
 	return 0;
 }
 
-static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = {
-	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* NEW SA */
-	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* DEL SA */
-	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* GET SA */
-	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */
-	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* DEL POLICY */
-	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* GET POLICY */
-	NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)),	/* ALLOC SPI */
-	NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)),	/* ACQUIRE */
-	NLMSG_LENGTH(sizeof(struct xfrm_user_expire)),	/* EXPIRE */
-	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* UPD POLICY */
-	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* UPD SA */
-	NLMSG_LENGTH(sizeof(struct xfrm_user_polexpire)), /* POLEXPIRE */
-	NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)),	/* FLUSH SA */
-	NLMSG_LENGTH(0),				/* FLUSH POLICY */
+#define XMSGSIZE(type) NLMSG_LENGTH(sizeof(struct type))
+
+static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
+	[XFRM_MSG_NEWSA       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info),
+	[XFRM_MSG_DELSA       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id),
+	[XFRM_MSG_GETSA       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id),
+	[XFRM_MSG_NEWPOLICY   - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info),
+	[XFRM_MSG_DELPOLICY   - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
+	[XFRM_MSG_GETPOLICY   - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
+	[XFRM_MSG_ALLOCSPI    - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userspi_info),
+	[XFRM_MSG_ACQUIRE     - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_acquire),
+	[XFRM_MSG_EXPIRE      - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_expire),
+	[XFRM_MSG_UPDPOLICY   - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info),
+	[XFRM_MSG_UPDSA       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info),
+	[XFRM_MSG_POLEXPIRE   - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire),
+	[XFRM_MSG_FLUSHSA     - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush),
+	[XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0),
 };
 
+#undef XMSGSIZE
+
 static struct xfrm_link {
 	int (*doit)(struct sk_buff *, struct nlmsghdr *, void **);
 	int (*dump)(struct sk_buff *, struct netlink_callback *);
-} xfrm_dispatch[] = {
-	{	.doit	=	xfrm_add_sa, 		},
-	{	.doit	=	xfrm_del_sa, 		},
-	{
-		.doit	=	xfrm_get_sa,
-		.dump	=	xfrm_dump_sa,
-	},
-	{	.doit	=	xfrm_add_policy 	},
-	{	.doit	=	xfrm_get_policy 	},
-	{
-		.doit	=	xfrm_get_policy,
-		.dump	=	xfrm_dump_policy,
-	},
-	{	.doit	=	xfrm_alloc_userspi	},
-	{},
-	{},
-	{	.doit	=	xfrm_add_policy 	},
-	{	.doit	=	xfrm_add_sa, 		},
-	{},
-	{	.doit	=	xfrm_flush_sa		},
-	{	.doit	=	xfrm_flush_policy	},
+} xfrm_dispatch[XFRM_NR_MSGTYPES] = {
+	[XFRM_MSG_NEWSA       - XFRM_MSG_BASE] = { .doit = xfrm_add_sa        },
+	[XFRM_MSG_DELSA       - XFRM_MSG_BASE] = { .doit = xfrm_del_sa        },
+	[XFRM_MSG_GETSA       - XFRM_MSG_BASE] = { .doit = xfrm_get_sa,
+						   .dump = xfrm_dump_sa       },
+	[XFRM_MSG_NEWPOLICY   - XFRM_MSG_BASE] = { .doit = xfrm_add_policy    },
+	[XFRM_MSG_DELPOLICY   - XFRM_MSG_BASE] = { .doit = xfrm_get_policy    },
+	[XFRM_MSG_GETPOLICY   - XFRM_MSG_BASE] = { .doit = xfrm_get_policy,
+						   .dump = xfrm_dump_policy   },
+	[XFRM_MSG_ALLOCSPI    - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi },
+	[XFRM_MSG_UPDPOLICY   - XFRM_MSG_BASE] = { .doit = xfrm_add_policy    },
+	[XFRM_MSG_UPDSA       - XFRM_MSG_BASE] = { .doit = xfrm_add_sa        },
+	[XFRM_MSG_FLUSHSA     - XFRM_MSG_BASE] = { .doit = xfrm_flush_sa      },
+	[XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy  },
 };
 
 static int xfrm_done(struct netlink_callback *cb)
@@ -931,7 +928,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
 		return -1;
 	}
 
-	if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) {
+	if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
+	     type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
+	    (nlh->nlmsg_flags & NLM_F_DUMP)) {
 		u32 rlen;
 
 		if (link->dump == NULL)

From d775fc09f16f4b88cd0373006b112c4772589778 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 May 2005 14:27:35 -0700
Subject: [PATCH 07/28] [RTNETLINK] Fix RTM_MAX to represent the maximum valid
 message type

RTM_MAX is currently set to the maximum reserverd message type plus one
thus being the cause of two bugs for new types being assigned a) given the
new family registers only the NEW command in its reserved block the array
size for per family entries is calculated one entry short and b) given the
new family registers all commands RTM_MAX would point to the first entry
of the block following this one and the rtnetlink receive path would accept
a message type for a nonexisting family.

This patch changes RTM_MAX to point to the maximum valid message type
by aligning it to the start of the next block and subtracting one.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 32e52769a00b6..d607219af6ac9 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -89,8 +89,8 @@ enum {
 	RTM_GETANYCAST	= 62,
 #define RTM_GETANYCAST	RTM_GETANYCAST
 
-	RTM_MAX,
-#define RTM_MAX		RTM_MAX
+	__RTM_MAX,
+#define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
 
 /* 

From f90a0a74b864fdc46737614f03b8868f4f31e3bf Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 May 2005 14:29:00 -0700
Subject: [PATCH 08/28] [RTNETLINK] Fix & cleanup rtm_min/rtm_max

Converts rtm_min and rtm_max arrays to use c99 designated
initializers for easier insertion of new message families.
RTM_GETMULTICAST and RTM_GETANYCAST did not have the minimal
message size specified which means that the netlink message
was parsed for routing attributes starting from the header.
Adds the proper minimal message sizes for these messages
(netlink header + common rtnetlink header) to fix this issue.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  2 ++
 net/core/rtnetlink.c      | 39 +++++++++++++++++++++------------------
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index d607219af6ac9..1ecaea74d55a6 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -93,6 +93,8 @@ enum {
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
 
+#define RTM_FAM(cmd)	(((cmd) - RTM_BASE) >> 2)
+
 /* 
    Generic structure for encapsulation of optional route information.
    It is reminiscent of sockaddr, but with sa_family replaced
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d8c198e42f905..58a981806268a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -88,28 +88,31 @@ struct rtnetlink_link * rtnetlink_links[NPROTO];
 
 static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
 {
-	NLMSG_LENGTH(sizeof(struct ifinfomsg)),
-	NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
-	NLMSG_LENGTH(sizeof(struct rtmsg)),
-	NLMSG_LENGTH(sizeof(struct ndmsg)),
-	NLMSG_LENGTH(sizeof(struct rtmsg)),
-	NLMSG_LENGTH(sizeof(struct tcmsg)),
-	NLMSG_LENGTH(sizeof(struct tcmsg)),
-	NLMSG_LENGTH(sizeof(struct tcmsg)),
-	NLMSG_LENGTH(sizeof(struct tcamsg))
+	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+	[RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+	[RTM_FAM(RTM_NEWROUTE)]     = NLMSG_LENGTH(sizeof(struct rtmsg)),
+	[RTM_FAM(RTM_NEWNEIGH)]     = NLMSG_LENGTH(sizeof(struct ndmsg)),
+	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct rtmsg)),
+	[RTM_FAM(RTM_NEWQDISC)]     = NLMSG_LENGTH(sizeof(struct tcmsg)),
+	[RTM_FAM(RTM_NEWTCLASS)]    = NLMSG_LENGTH(sizeof(struct tcmsg)),
+	[RTM_FAM(RTM_NEWTFILTER)]   = NLMSG_LENGTH(sizeof(struct tcmsg)),
+	[RTM_FAM(RTM_NEWACTION)]    = NLMSG_LENGTH(sizeof(struct tcamsg)),
+	[RTM_FAM(RTM_NEWPREFIX)]    = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
+	[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
+	[RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
 };
 
 static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
 {
-	IFLA_MAX,
-	IFA_MAX,
-	RTA_MAX,
-	NDA_MAX,
-	RTA_MAX,
-	TCA_MAX,
-	TCA_MAX,
-	TCA_MAX,
-	TCAA_MAX
+	[RTM_FAM(RTM_NEWLINK)]      = IFLA_MAX,
+	[RTM_FAM(RTM_NEWADDR)]      = IFA_MAX,
+	[RTM_FAM(RTM_NEWROUTE)]     = RTA_MAX,
+	[RTM_FAM(RTM_NEWNEIGH)]     = NDA_MAX,
+	[RTM_FAM(RTM_NEWRULE)]      = RTA_MAX,
+	[RTM_FAM(RTM_NEWQDISC)]     = TCA_MAX,
+	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
+	[RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX,
+	[RTM_FAM(RTM_NEWACTION)]    = TCAA_MAX,
 };
 
 void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)

From db46edc6d3b66bf708a8f23a9aa89f63a49ebe33 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 May 2005 14:29:39 -0700
Subject: [PATCH 09/28] [RTNETLINK] Cleanup rtnetlink_link tables

Converts remaining rtnetlink_link tables to use c99 designated
initializers to make greping a little bit easier.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  2 ++
 net/core/rtnetlink.c      |  6 +++---
 net/decnet/dn_dev.c       | 25 +++++++++++++------------
 net/ipv4/devinet.c        | 21 +++++++++++----------
 net/ipv6/addrconf.c       |  2 +-
 5 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 1ecaea74d55a6..91ac97c207771 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -93,6 +93,8 @@ enum {
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
 
+#define RTM_NR_MSGTYPES	(RTM_MAX + 1 - RTM_BASE)
+#define RTM_NR_FAMILIES	(RTM_NR_MSGTYPES >> 2)
 #define RTM_FAM(cmd)	(((cmd) - RTM_BASE) >> 2)
 
 /* 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 58a981806268a..5fb70cfa10850 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -86,7 +86,7 @@ struct sock *rtnl;
 
 struct rtnetlink_link * rtnetlink_links[NPROTO];
 
-static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
+static const int rtm_min[RTM_NR_FAMILIES] =
 {
 	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
 	[RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
@@ -102,7 +102,7 @@ static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
 	[RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
 };
 
-static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
+static const int rta_max[RTM_NR_FAMILIES] =
 {
 	[RTM_FAM(RTM_NEWLINK)]      = IFLA_MAX,
 	[RTM_FAM(RTM_NEWADDR)]      = IFA_MAX,
@@ -641,7 +641,7 @@ static void rtnetlink_rcv(struct sock *sk, int len)
 	} while (rtnl && rtnl->sk_receive_queue.qlen);
 }
 
-static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 {
 	[RTM_GETLINK  - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
 	[RTM_SETLINK  - RTM_BASE] = { .doit   = do_setlink	      },
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c2a0346f423b4..e6e23eb144280 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1411,21 +1411,22 @@ static struct file_operations dn_dev_seq_fops = {
 
 #endif /* CONFIG_PROC_FS */
 
-static struct rtnetlink_link dnet_rtnetlink_table[RTM_MAX-RTM_BASE+1] = 
+static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] =
 {
-	 [4] = { .doit   = dn_dev_rtm_newaddr,	},
-	 [5] = { .doit   = dn_dev_rtm_deladdr,	},
-	 [6] = { .dumpit = dn_dev_dump_ifaddr,	},
-
+	[RTM_NEWADDR  - RTM_BASE] = { .doit	= dn_dev_rtm_newaddr,	},
+	[RTM_DELADDR  - RTM_BASE] = { .doit	= dn_dev_rtm_deladdr,	},
+	[RTM_GETADDR  - RTM_BASE] = { .dumpit	= dn_dev_dump_ifaddr,	},
 #ifdef CONFIG_DECNET_ROUTER
-	 [8] = { .doit   = dn_fib_rtm_newroute,	},
-	 [9] = { .doit   = dn_fib_rtm_delroute,	},
-	[10] = { .doit   = dn_cache_getroute, .dumpit = dn_fib_dump, },
-	[16] = { .doit   = dn_fib_rtm_newrule, },
-	[17] = { .doit   = dn_fib_rtm_delrule, },
-	[18] = { .dumpit = dn_fib_dump_rules,  },
+	[RTM_NEWROUTE - RTM_BASE] = { .doit	= dn_fib_rtm_newroute,	},
+	[RTM_DELROUTE - RTM_BASE] = { .doit	= dn_fib_rtm_delroute,	},
+	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
+				      .dumpit	= dn_fib_dump,		},
+	[RTM_NEWRULE  - RTM_BASE] = { .doit	= dn_fib_rtm_newrule,	},
+	[RTM_DELRULE  - RTM_BASE] = { .doit	= dn_fib_rtm_delrule,	},
+	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= dn_fib_dump_rules,	},
 #else
-	[10] = { .doit   = dn_cache_getroute, .dumpit = dn_cache_dump, },
+	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
+				      .dumpit	= dn_cache_dump,	
 #endif
 
 };
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index eea7ef0107767..abbc6d5c183e3 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1107,17 +1107,18 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
 	}
 }
 
-static struct rtnetlink_link inet_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = {
-	 [4] = { .doit	 = inet_rtm_newaddr,  },
-	 [5] = { .doit	 = inet_rtm_deladdr,  },
-	 [6] = { .dumpit = inet_dump_ifaddr,  },
-	 [8] = { .doit	 = inet_rtm_newroute, },
-	 [9] = { .doit	 = inet_rtm_delroute, },
-	[10] = { .doit	 = inet_rtm_getroute, .dumpit = inet_dump_fib, },
+static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
+	[RTM_NEWADDR  - RTM_BASE] = { .doit	= inet_rtm_newaddr,	},
+	[RTM_DELADDR  - RTM_BASE] = { .doit	= inet_rtm_deladdr,	},
+	[RTM_GETADDR  - RTM_BASE] = { .dumpit	= inet_dump_ifaddr,	},
+	[RTM_NEWROUTE - RTM_BASE] = { .doit	= inet_rtm_newroute,	},
+	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet_rtm_delroute,	},
+	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet_rtm_getroute,
+				      .dumpit	= inet_dump_fib,	},
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-	[16] = { .doit	 = inet_rtm_newrule, },
-	[17] = { .doit	 = inet_rtm_delrule, },
-	[18] = { .dumpit = inet_dump_rules,  },
+	[RTM_NEWRULE  - RTM_BASE] = { .doit	= inet_rtm_newrule,	},
+	[RTM_DELRULE  - RTM_BASE] = { .doit	= inet_rtm_delrule,	},
+	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= inet_dump_rules,	},
 #endif
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7196ac2f2d168..7744a2592693f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3076,7 +3076,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 	netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC);
 }
 
-static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = {
+static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
 	[RTM_GETLINK - RTM_BASE] = { .dumpit	= inet6_dump_ifinfo, },
 	[RTM_NEWADDR - RTM_BASE] = { .doit	= inet6_rtm_newaddr, },
 	[RTM_DELADDR - RTM_BASE] = { .doit	= inet6_rtm_deladdr, },

From 6a5d362120a61a719095443194cc2d5e9a7027dd Mon Sep 17 00:00:00 2001
From: Jesper Juhl <juhl-lkml@dif.dk>
Date: Tue, 3 May 2005 14:33:27 -0700
Subject: [PATCH 10/28] [WAN]: kfree of NULL pointer is valid

kfree(0) is perfectly valid, checking pointers for NULL before calling
kfree() on them is redundant. The patch below cleans away a few such
redundant checks (and while I was around some of those bits I couldn't
stop myself from making a few tiny whitespace changes as well).

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cycx_x25.c   |  8 ++------
 drivers/net/wan/pc300_tty.c  | 27 +++++++++++----------------
 drivers/net/wan/sdla_chdlc.c | 13 ++++---------
 drivers/net/wan/x25_asy.c    | 20 ++++++--------------
 4 files changed, 23 insertions(+), 45 deletions(-)

diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c
index 5b48cd8568f51..02d57c0b4243a 100644
--- a/drivers/net/wan/cycx_x25.c
+++ b/drivers/net/wan/cycx_x25.c
@@ -436,9 +436,7 @@ static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev,
 	}
 
 	if (err) {
-		if (chan->local_addr)
-			kfree(chan->local_addr);
-
+		kfree(chan->local_addr);
 		kfree(chan);
 		return err;
 	}
@@ -458,9 +456,7 @@ static int cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev)
 		struct cycx_x25_channel *chan = dev->priv;
 
 		if (chan->svc) {
-			if (chan->local_addr)
-				kfree(chan->local_addr);
-
+			kfree(chan->local_addr);
 			if (chan->state == WAN_CONNECTED)
 				del_timer(&chan->timer);
 		}
diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c
index 29f84ad087303..8454bf6caaa70 100644
--- a/drivers/net/wan/pc300_tty.c
+++ b/drivers/net/wan/pc300_tty.c
@@ -400,10 +400,8 @@ static void cpc_tty_close(struct tty_struct *tty, struct file *flip)
 		cpc_tty->buf_rx.last = NULL;
 	}
 	
-	if (cpc_tty->buf_tx) {
-		kfree(cpc_tty->buf_tx);
-		cpc_tty->buf_tx = NULL;
-	}
+	kfree(cpc_tty->buf_tx);
+	cpc_tty->buf_tx = NULL;
 
 	CPC_TTY_DBG("%s: TTY closed\n",cpc_tty->name);
 	
@@ -666,7 +664,7 @@ static void cpc_tty_rx_work(void * data)
 	unsigned long port;
 	int i, j;
 	st_cpc_tty_area *cpc_tty; 
-	volatile st_cpc_rx_buf * buf;
+	volatile st_cpc_rx_buf *buf;
 	char flags=0,flg_rx=1; 
 	struct tty_ldisc *ld;
 
@@ -680,9 +678,9 @@ static void cpc_tty_rx_work(void * data)
 			cpc_tty = &cpc_tty_area[port];
 		
 			if ((buf=cpc_tty->buf_rx.first) != 0) {
-				if(cpc_tty->tty) {
+				if (cpc_tty->tty) {
 					ld = tty_ldisc_ref(cpc_tty->tty);
-					if(ld) {
+					if (ld) {
 						if (ld->receive_buf) {
 							CPC_TTY_DBG("%s: call line disc. receive_buf\n",cpc_tty->name);
 							ld->receive_buf(cpc_tty->tty, (char *)(buf->data), &flags, buf->size);
@@ -691,7 +689,7 @@ static void cpc_tty_rx_work(void * data)
 					}
 				}	
 				cpc_tty->buf_rx.first = cpc_tty->buf_rx.first->next;
-				kfree((unsigned char *)buf);
+				kfree(buf);
 				buf = cpc_tty->buf_rx.first;
 				flg_rx = 1;
 			}
@@ -733,7 +731,7 @@ static void cpc_tty_rx_disc_frame(pc300ch_t *pc300chan)
 
 void cpc_tty_receive(pc300dev_t *pc300dev)
 {
-	st_cpc_tty_area    *cpc_tty; 
+	st_cpc_tty_area *cpc_tty; 
 	pc300ch_t *pc300chan = (pc300ch_t *)pc300dev->chan; 
 	pc300_t *card = (pc300_t *)pc300chan->card; 
 	int ch = pc300chan->channel; 
@@ -742,7 +740,7 @@ void cpc_tty_receive(pc300dev_t *pc300dev)
 	int rx_len, rx_aux; 
 	volatile unsigned char status; 
 	unsigned short first_bd = pc300chan->rx_first_bd;
-	st_cpc_rx_buf	*new=NULL;
+	st_cpc_rx_buf *new = NULL;
 	unsigned char dsr_rx;
 
 	if (pc300dev->cpc_tty == NULL) { 
@@ -762,7 +760,7 @@ void cpc_tty_receive(pc300dev_t *pc300dev)
 			if (status & DST_EOM) {
 				break;
 			}
-			ptdescr=(pcsca_bd_t __iomem *)(card->hw.rambase+cpc_readl(&ptdescr->next));
+			ptdescr = (pcsca_bd_t __iomem *)(card->hw.rambase+cpc_readl(&ptdescr->next));
 		}
 			
 		if (!rx_len) { 
@@ -771,10 +769,7 @@ void cpc_tty_receive(pc300dev_t *pc300dev)
 				cpc_writel(card->hw.scabase + DRX_REG(EDAL, ch), 
 						RX_BD_ADDR(ch, pc300chan->rx_last_bd)); 
 			}
-			if (new) {
-				kfree(new);
-				new = NULL;
-			}
+			kfree(new);
 			return; 
 		}
 		
@@ -787,7 +782,7 @@ void cpc_tty_receive(pc300dev_t *pc300dev)
 			continue;
 		} 
 		
-		new = (st_cpc_rx_buf *) kmalloc(rx_len + sizeof(st_cpc_rx_buf), GFP_ATOMIC);
+		new = (st_cpc_rx_buf *)kmalloc(rx_len + sizeof(st_cpc_rx_buf), GFP_ATOMIC);
 		if (new == 0) {
 			cpc_tty_rx_disc_frame(pc300chan);
 			continue;
diff --git a/drivers/net/wan/sdla_chdlc.c b/drivers/net/wan/sdla_chdlc.c
index afbe0024e3e15..496d29237e927 100644
--- a/drivers/net/wan/sdla_chdlc.c
+++ b/drivers/net/wan/sdla_chdlc.c
@@ -3664,15 +3664,10 @@ static void wanpipe_tty_close(struct tty_struct *tty, struct file * filp)
 		chdlc_disable_comm_shutdown(card);
 		unlock_adapter_irq(&card->wandev.lock,&smp_flags);
 
-		if (card->tty_buf){
-			kfree(card->tty_buf);
-			card->tty_buf=NULL;			
-		}
-
-		if (card->tty_rx){
-			kfree(card->tty_rx);
-			card->tty_rx=NULL;
-		}
+		kfree(card->tty_buf);
+		card->tty_buf = NULL;			
+		kfree(card->tty_rx);
+		card->tty_rx = NULL;
 	}
 	return;
 }
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 8c5cfcb55826b..1c540d825551f 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -107,13 +107,9 @@ static struct x25_asy *x25_asy_alloc(void)
 static void x25_asy_free(struct x25_asy *sl)
 {
 	/* Free all X.25 frame buffers. */
-	if (sl->rbuff)  {
-		kfree(sl->rbuff);
-	}
+	kfree(sl->rbuff);
 	sl->rbuff = NULL;
-	if (sl->xbuff)  {
-		kfree(sl->xbuff);
-	}
+	kfree(sl->xbuff);
 	sl->xbuff = NULL;
 
 	if (!test_and_clear_bit(SLF_INUSE, &sl->flags)) {
@@ -134,10 +130,8 @@ static int x25_asy_change_mtu(struct net_device *dev, int newmtu)
 	{
 		printk("%s: unable to grow X.25 buffers, MTU change cancelled.\n",
 		       dev->name);
-		if (xbuff != NULL)  
-			kfree(xbuff);
-		if (rbuff != NULL)  
-			kfree(rbuff);
+		kfree(xbuff);
+		kfree(rbuff);
 		return -ENOMEM;
 	}
 
@@ -169,10 +163,8 @@ static int x25_asy_change_mtu(struct net_device *dev, int newmtu)
 
 	spin_unlock_bh(&sl->lock);
 
-	if (xbuff != NULL) 
-		kfree(xbuff);
-	if (rbuff != NULL)
-		kfree(rbuff);
+	kfree(xbuff);
+	kfree(rbuff);
 	return 0;
 }
 

From 20cc6befa23bb993cf4a4c58becb1dd99e7fc927 Mon Sep 17 00:00:00 2001
From: Lucas Correia Villa Real <lucasvr@gobolinux.org>
Date: Tue, 3 May 2005 14:34:20 -0700
Subject: [PATCH 11/28] [PKT_SCHED]: fix typo on Kconfig

This is a trivial fix for a typo on Kconfig, where the Generic Random Early
Detection algorithm is abbreviated as RED instead of GRED.

Signed-off-by: Lucas Correia Villa Real <lucasvr@gobolinux.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9c118baed9dc5..b0941186f8677 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -185,7 +185,7 @@ config NET_SCH_GRED
 	depends on NET_SCHED
 	help
 	  Say Y here if you want to use the Generic Random Early Detection
-	  (RED) packet scheduling algorithm for some of your network devices
+	  (GRED) packet scheduling algorithm for some of your network devices
 	  (see the top of <file:net/sched/sch_red.c> for details and
 	  references about the algorithm).
 

From 0b2531bdc54e19717de5cb161d57e5ee0a7725ff Mon Sep 17 00:00:00 2001
From: Folkert van Heusden <folkert@vanheusden.com>
Date: Tue, 3 May 2005 14:36:08 -0700
Subject: [PATCH 12/28] [TCP]: Optimize check in port-allocation code.

Signed-off-by: Folkert van Heusden <folkert@vanheusden.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3ac6659869c41..dad98e4a50431 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -222,10 +222,13 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
 		int rover;
 
 		spin_lock(&tcp_portalloc_lock);
-		rover = tcp_port_rover;
+		if (tcp_port_rover < low)
+			rover = low;
+		else
+			rover = tcp_port_rover;
 		do {
 			rover++;
-			if (rover < low || rover > high)
+			if (rover > high)
 				rover = low;
 			head = &tcp_bhash[tcp_bhashfn(rover)];
 			spin_lock(&head->lock);

From c3924c70dd3bddc28b99ccd1688bd281bad1a9be Mon Sep 17 00:00:00 2001
From: Folkert van Heusden <folkert@vanheusden.com>
Date: Tue, 3 May 2005 14:36:45 -0700
Subject: [PATCH 13/28] [TCP]: Optimize check in port-allocation code, v6
 version.

Signed-off-by: Folkert van Heusden <folkert@vanheusden.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4760c85e19db8..0f69e800a0ad6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -139,9 +139,12 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 		int rover;
 
 		spin_lock(&tcp_portalloc_lock);
-		rover = tcp_port_rover;
+		if (tcp_port_rover < low)
+			rover = low;
+		else
+			rover = tcp_port_rover;
 		do {	rover++;
-			if ((rover < low) || (rover > high))
+			if (rover > high)
 				rover = low;
 			head = &tcp_bhash[tcp_bhashfn(rover)];
 			spin_lock(&head->lock);

From 96edf83c4e284c08584f97623f7c7f029759459e Mon Sep 17 00:00:00 2001
From: Jesper Juhl <juhl-lkml@dif.dk>
Date: Tue, 3 May 2005 14:38:09 -0700
Subject: [PATCH 14/28] [PPP]: remove redundant NULL pointer checks before
 kfree & vfree

kfree() and vfree() can both deal with NULL pointers. This patch removes
redundant NULL pointer checks from the ppp code in drivers/net/

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp_deflate.c |  6 ++----
 drivers/net/ppp_generic.c | 12 ++++--------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ppp_deflate.c b/drivers/net/ppp_deflate.c
index 507d6328d4eb0..3872088fdd10c 100644
--- a/drivers/net/ppp_deflate.c
+++ b/drivers/net/ppp_deflate.c
@@ -87,8 +87,7 @@ static void z_comp_free(void *arg)
 
 	if (state) {
 		zlib_deflateEnd(&state->strm);
-		if (state->strm.workspace)
-			vfree(state->strm.workspace);
+		vfree(state->strm.workspace);
 		kfree(state);
 	}
 }
@@ -308,8 +307,7 @@ static void z_decomp_free(void *arg)
 
 	if (state) {
 		zlib_inflateEnd(&state->strm);
-		if (state->strm.workspace)
-			kfree(state->strm.workspace);
+		kfree(state->strm.workspace);
 		kfree(state);
 	}
 }
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index c456dc81b8730..3b377f6cd4a0b 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -2467,14 +2467,10 @@ static void ppp_destroy_interface(struct ppp *ppp)
 	skb_queue_purge(&ppp->mrq);
 #endif /* CONFIG_PPP_MULTILINK */
 #ifdef CONFIG_PPP_FILTER
-	if (ppp->pass_filter) {
-		kfree(ppp->pass_filter);
-		ppp->pass_filter = NULL;
-	}
-	if (ppp->active_filter) {
-		kfree(ppp->active_filter);
-		ppp->active_filter = NULL;
-	}
+	kfree(ppp->pass_filter);
+	ppp->pass_filter = NULL;
+	kfree(ppp->active_filter);
+	ppp->active_filter = NULL;
 #endif /* CONFIG_PPP_FILTER */
 
 	kfree(ppp);

From 033d899904792d3501b7dd469495ca9138424ec3 Mon Sep 17 00:00:00 2001
From: Asim Shankar <asimshankar@gmail.com>
Date: Tue, 3 May 2005 14:39:33 -0700
Subject: [PATCH 15/28] [PKT_SCHED]: HTB: Drop packet when direct queue is full

htb_enqueue(): Free skb and return NET_XMIT_DROP if a packet is
destined for the direct_queue but the direct_queue is full. (Before
this: erroneously returned NET_XMIT_SUCCESS even though the packet was
not enqueued)

Signed-off-by: Asim Shankar <asimshankar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index a85935e7d53d2..558cc087e6023 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -717,6 +717,10 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (q->direct_queue.qlen < q->direct_qlen) {
 	    __skb_queue_tail(&q->direct_queue, skb);
 	    q->direct_pkts++;
+	} else {
+	    kfree_skb(skb);
+	    sch->qstats.drops++;
+	    return NET_XMIT_DROP;
 	}
 #ifdef CONFIG_NET_CLS_ACT
     } else if (!cl) {

From 9dfa277f88388a94993b121db46b80df66f48d9e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 May 2005 14:41:18 -0700
Subject: [PATCH 16/28] [PKT_SCHED]: Fix range in PSCHED_TDIFF_SAFE to 0..bound

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 7352e455053cc..fcb05a387dbee 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -157,7 +157,8 @@ psched_tod_diff(int delta_sec, int bound)
 	   case 1: \
 		   __delta += 1000000; \
 	   case 0: \
-		   __delta = abs(__delta); \
+ 		   if (__delta > bound || __delta < 0) \
+ 			__delta = bound; \
 	   } \
 	   __delta; \
 })

From 96c36023434b7b6824b1da72a6b7b1ca61d7310c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 May 2005 14:43:27 -0700
Subject: [PATCH 17/28] [NETLINK]: cb_lock does not needs ref count on sk

Here is a little optimisation for the cb_lock used by netlink_dump.
While fixing that race earlier, I noticed that the reference count
held by cb_lock is completely useless.  The reason is that in order
to obtain the protection of the reference count, you have to take
the cb_lock.  But the only way to take the cb_lock is through
dereferencing the socket.

That is, you must already possess a reference count on the socket
before you can take advantage of the reference count held by cb_lock.
As a corollary, we can remve the reference count held by the cb_lock.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 29a5fd231eac9..4ee392066148e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -373,7 +373,6 @@ static int netlink_release(struct socket *sock)
 		nlk->cb->done(nlk->cb);
 		netlink_destroy_callback(nlk->cb);
 		nlk->cb = NULL;
-		__sock_put(sk);
 	}
 	spin_unlock(&nlk->cb_lock);
 
@@ -1099,7 +1098,6 @@ static int netlink_dump(struct sock *sk)
 	spin_unlock(&nlk->cb_lock);
 
 	netlink_destroy_callback(cb);
-	__sock_put(sk);
 	return 0;
 }
 
@@ -1138,7 +1136,6 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 		return -EBUSY;
 	}
 	nlk->cb = cb;
-	sock_hold(sk);
 	spin_unlock(&nlk->cb_lock);
 
 	netlink_dump(sk);

From 2a0a6ebee1d68552152ae8d4aeda91d806995dec Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 May 2005 14:55:09 -0700
Subject: [PATCH 18/28] [NETLINK]: Synchronous message processing.

Let's recap the problem.  The current asynchronous netlink kernel
message processing is vulnerable to these attacks:

1) Hit and run: Attacker sends one or more messages and then exits
before they're processed.  This may confuse/disable the next netlink
user that gets the netlink address of the attacker since it may
receive the responses to the attacker's messages.

Proposed solutions:

a) Synchronous processing.
b) Stream mode socket.
c) Restrict/prohibit binding.

2) Starvation: Because various netlink rcv functions were written
to not return until all messages have been processed on a socket,
it is possible for these functions to execute for an arbitrarily
long period of time.  If this is successfully exploited it could
also be used to hold rtnl forever.

Proposed solutions:

a) Synchronous processing.
b) Stream mode socket.

Firstly let's cross off solution c).  It only solves the first
problem and it has user-visible impacts.  In particular, it'll
break user space applications that expect to bind or communicate
with specific netlink addresses (pid's).

So we're left with a choice of synchronous processing versus
SOCK_STREAM for netlink.

For the moment I'm sticking with the synchronous approach as
suggested by Alexey since it's simpler and I'd rather spend
my time working on other things.

However, it does have a number of deficiencies compared to the
stream mode solution:

1) User-space to user-space netlink communication is still vulnerable.

2) Inefficient use of resources.  This is especially true for rtnetlink
since the lock is shared with other users such as networking drivers.
The latter could hold the rtnl while communicating with hardware which
causes the rtnetlink user to wait when it could be doing other things.

3) It is still possible to DoS all netlink users by flooding the kernel
netlink receive queue.  The attacker simply fills the receive socket
with a single netlink message that fills up the entire queue.  The
attacker then continues to call sendmsg with the same message in a loop.

Point 3) can be countered by retransmissions in user-space code, however
it is pretty messy.

In light of these problems (in particular, point 3), we should implement
stream mode netlink at some point.  In the mean time, here is a patch
that implements synchronous processing.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/audit.c                  | 19 ++++++++-----------
 net/core/rtnetlink.c            | 23 ++++++++++++++---------
 net/decnet/netfilter/dn_rtmsg.c |  3 ++-
 net/ipv4/netfilter/ip_queue.c   | 20 +++++++++-----------
 net/ipv4/tcp_diag.c             |  3 ++-
 net/ipv6/netfilter/ip6_queue.c  | 20 +++++++++-----------
 net/xfrm/xfrm_user.c            | 15 +++++++++++----
 7 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index 0f84dd7af2c8d..ac26d4d960d33 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -427,7 +427,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 /* Get message from skb (based on rtnetlink_rcv_skb).  Each message is
  * processed by audit_receive_msg.  Malformed skbs with wrong length are
  * discarded silently.  */
-static int audit_receive_skb(struct sk_buff *skb)
+static void audit_receive_skb(struct sk_buff *skb)
 {
 	int		err;
 	struct nlmsghdr	*nlh;
@@ -436,7 +436,7 @@ static int audit_receive_skb(struct sk_buff *skb)
 	while (skb->len >= NLMSG_SPACE(0)) {
 		nlh = (struct nlmsghdr *)skb->data;
 		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
-			return 0;
+			return;
 		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
 		if (rlen > skb->len)
 			rlen = skb->len;
@@ -446,23 +446,20 @@ static int audit_receive_skb(struct sk_buff *skb)
 			netlink_ack(skb, nlh, 0);
 		skb_pull(skb, rlen);
 	}
-	return 0;
 }
 
 /* Receive messages from netlink socket. */
 static void audit_receive(struct sock *sk, int length)
 {
 	struct sk_buff  *skb;
+	unsigned int qlen;
 
-	if (down_trylock(&audit_netlink_sem))
-		return;
+	down(&audit_netlink_sem);
 
-				/* FIXME: this must not cause starvation */
-	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
-		if (audit_receive_skb(skb) && skb->len)
-			skb_queue_head(&sk->sk_receive_queue, skb);
-		else
-			kfree_skb(skb);
+	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		audit_receive_skb(skb);
+		kfree_skb(skb);
 	}
 	up(&audit_netlink_sem);
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5fb70cfa10850..6e1ab1e34b2ec 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -609,26 +609,31 @@ static inline int rtnetlink_rcv_skb(struct sk_buff *skb)
 
 /*
  *  rtnetlink input queue processing routine:
- *	- try to acquire shared lock. If it is failed, defer processing.
+ *	- process as much as there was in the queue upon entry.
  *	- feed skbs to rtnetlink_rcv_skb, until it refuse a message,
- *	  that will occur, when a dump started and/or acquisition of
- *	  exclusive lock failed.
+ *	  that will occur, when a dump started.
  */
 
 static void rtnetlink_rcv(struct sock *sk, int len)
 {
+	unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
+
 	do {
 		struct sk_buff *skb;
 
-		if (rtnl_shlock_nowait())
-			return;
+		rtnl_lock();
+
+		if (qlen > skb_queue_len(&sk->sk_receive_queue))
+			qlen = skb_queue_len(&sk->sk_receive_queue);
 
-		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		while (qlen--) {
+			skb = skb_dequeue(&sk->sk_receive_queue);
 			if (rtnetlink_rcv_skb(skb)) {
-				if (skb->len)
+				if (skb->len) {
 					skb_queue_head(&sk->sk_receive_queue,
 						       skb);
-				else
+					qlen++;
+				} else
 					kfree_skb(skb);
 				break;
 			}
@@ -638,7 +643,7 @@ static void rtnetlink_rcv(struct sock *sk, int len)
 		up(&rtnl_sem);
 
 		netdev_run_todo();
-	} while (rtnl && rtnl->sk_receive_queue.qlen);
+	} while (qlen);
 }
 
 static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index f86a6259fd121..101ddef9ba9aa 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -119,8 +119,9 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 static void dnrmg_receive_user_sk(struct sock *sk, int len)
 {
 	struct sk_buff *skb;
+	unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
 
-	while((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+	while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
 		dnrmg_receive_user_skb(skb);
 		kfree_skb(skb);
 	}
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 9e40dffc204f3..e5746b6744134 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -546,20 +546,18 @@ ipq_rcv_skb(struct sk_buff *skb)
 static void
 ipq_rcv_sk(struct sock *sk, int len)
 {
-	do {
-		struct sk_buff *skb;
+	struct sk_buff *skb;
+	unsigned int qlen;
 
-		if (down_trylock(&ipqnl_sem))
-			return;
+	down(&ipqnl_sem);
 			
-		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-			ipq_rcv_skb(skb);
-			kfree_skb(skb);
-		}
+	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		ipq_rcv_skb(skb);
+		kfree_skb(skb);
+	}
 		
-		up(&ipqnl_sem);
-
-	} while (ipqnl && ipqnl->sk_receive_queue.qlen);
+	up(&ipqnl_sem);
 }
 
 static int
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 313c1408da33a..8faa8948f75c2 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -777,8 +777,9 @@ static inline void tcpdiag_rcv_skb(struct sk_buff *skb)
 static void tcpdiag_rcv(struct sock *sk, int len)
 {
 	struct sk_buff *skb;
+	unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
 
-	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+	while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
 		tcpdiag_rcv_skb(skb);
 		kfree_skb(skb);
 	}
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index c54830b895939..750943e2d34ee 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -549,20 +549,18 @@ ipq_rcv_skb(struct sk_buff *skb)
 static void
 ipq_rcv_sk(struct sock *sk, int len)
 {
-	do {
-		struct sk_buff *skb;
+	struct sk_buff *skb;
+	unsigned int qlen;
 
-		if (down_trylock(&ipqnl_sem))
-			return;
+	down(&ipqnl_sem);
 			
-		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-			ipq_rcv_skb(skb);
-			kfree_skb(skb);
-		}
+	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		ipq_rcv_skb(skb);
+		kfree_skb(skb);
+	}
 		
-		up(&ipqnl_sem);
-
-	} while (ipqnl && ipqnl->sk_receive_queue.qlen);
+	up(&ipqnl_sem);
 }
 
 static int
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 52b5843937c58..dab112f1dd8a8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1008,17 +1008,24 @@ static int xfrm_user_rcv_skb(struct sk_buff *skb)
 
 static void xfrm_netlink_rcv(struct sock *sk, int len)
 {
+	unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
+
 	do {
 		struct sk_buff *skb;
 
 		down(&xfrm_cfg_sem);
 
-		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		if (qlen > skb_queue_len(&sk->sk_receive_queue))
+			qlen = skb_queue_len(&sk->sk_receive_queue);
+
+		while (qlen--) {
+			skb = skb_dequeue(&sk->sk_receive_queue);
 			if (xfrm_user_rcv_skb(skb)) {
-				if (skb->len)
+				if (skb->len) {
 					skb_queue_head(&sk->sk_receive_queue,
 						       skb);
-				else
+					qlen++;
+				} else
 					kfree_skb(skb);
 				break;
 			}
@@ -1027,7 +1034,7 @@ static void xfrm_netlink_rcv(struct sock *sk, int len)
 
 		up(&xfrm_cfg_sem);
 
-	} while (xfrm_nl && xfrm_nl->sk_receive_queue.qlen);
+	} while (qlen);
 }
 
 static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard)

From 09e14305982efc2f3b509d3c50ef5dcbff64a998 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 3 May 2005 15:30:05 -0700
Subject: [PATCH 19/28] [NETLINK]: Fix infinite loops in synchronous netlink
 changes.

The qlen should continue to decrement, even if we
pop partially processed SKBs back onto the receive queue.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c            | 7 +++----
 net/decnet/netfilter/dn_rtmsg.c | 2 +-
 net/xfrm/xfrm_user.c            | 7 +++----
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6e1ab1e34b2ec..75b6d33b52924 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -626,14 +626,13 @@ static void rtnetlink_rcv(struct sock *sk, int len)
 		if (qlen > skb_queue_len(&sk->sk_receive_queue))
 			qlen = skb_queue_len(&sk->sk_receive_queue);
 
-		while (qlen--) {
+		for (; qlen; qlen--) {
 			skb = skb_dequeue(&sk->sk_receive_queue);
 			if (rtnetlink_rcv_skb(skb)) {
-				if (skb->len) {
+				if (skb->len)
 					skb_queue_head(&sk->sk_receive_queue,
 						       skb);
-					qlen++;
-				} else
+				else
 					kfree_skb(skb);
 				break;
 			}
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 101ddef9ba9aa..284a9998e53d7 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -121,7 +121,7 @@ static void dnrmg_receive_user_sk(struct sock *sk, int len)
 	struct sk_buff *skb;
 	unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
 
-	while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
+	for (; qlen && (skb = skb_dequeue(&sk->sk_receive_queue)); qlen--) {
 		dnrmg_receive_user_skb(skb);
 		kfree_skb(skb);
 	}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index dab112f1dd8a8..e8740a4a1d784 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1018,14 +1018,13 @@ static void xfrm_netlink_rcv(struct sock *sk, int len)
 		if (qlen > skb_queue_len(&sk->sk_receive_queue))
 			qlen = skb_queue_len(&sk->sk_receive_queue);
 
-		while (qlen--) {
+		for (; qlen; qlen--) {
 			skb = skb_dequeue(&sk->sk_receive_queue);
 			if (xfrm_user_rcv_skb(skb)) {
-				if (skb->len) {
+				if (skb->len)
 					skb_queue_head(&sk->sk_receive_queue,
 						       skb);
-					qlen++;
-				} else
+				else
 					kfree_skb(skb);
 				break;
 			}

From 0f4821e7b93fe72e89b8ff393bd8e705bd178aa5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 3 May 2005 16:15:59 -0700
Subject: [PATCH 20/28] [XFRM/RTNETLINK]: Decrement qlen properly in
 {xfrm_,rt}netlink_rcv().

If we free up a partially processed packet because it's
skb->len dropped to zero, we need to decrement qlen because
we are dropping out of the top-level loop so it will do
the decrement for us.

Spotted by Herbert Xu.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 +++-
 net/xfrm/xfrm_user.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 75b6d33b52924..00caf4b318b20 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -632,8 +632,10 @@ static void rtnetlink_rcv(struct sock *sk, int len)
 				if (skb->len)
 					skb_queue_head(&sk->sk_receive_queue,
 						       skb);
-				else
+				else {
 					kfree_skb(skb);
+					qlen--;
+				}
 				break;
 			}
 			kfree_skb(skb);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e8740a4a1d784..5ddda2c98af9c 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1024,8 +1024,10 @@ static void xfrm_netlink_rcv(struct sock *sk, int len)
 				if (skb->len)
 					skb_queue_head(&sk->sk_receive_queue,
 						       skb);
-				else
+				else {
 					kfree_skb(skb);
+					qlen--;
+				}
 				break;
 			}
 			kfree_skb(skb);

From cacaddf57ed4d5ca994e9a7e2bd5558061f5d89d Mon Sep 17 00:00:00 2001
From: "Tommy S. Christensen" <tommy.christensen@tpack.net>
Date: Tue, 3 May 2005 16:18:52 -0700
Subject: [PATCH 21/28] [NET]: Disable queueing when carrier is lost.

Some network drivers call netif_stop_queue() when detecting loss of
carrier. This leads to packets being queued up at the qdisc level for
an unbound period of time. In order to prevent this effect, the core
networking stack will now cease to queue packets for any device, that
is operationally down (i.e. the queue is flushed and disabled).

Signed-off-by: Tommy S. Christensen <tommy.christensen@tpack.net>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/link_watch.c   | 7 +++++++
 net/sched/sch_generic.c | 4 ++++
 2 files changed, 11 insertions(+)

diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 4859b7446c6fc..d43d1201275c1 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -16,6 +16,7 @@
 #include <linux/netdevice.h>
 #include <linux/if.h>
 #include <net/sock.h>
+#include <net/pkt_sched.h>
 #include <linux/rtnetlink.h>
 #include <linux/jiffies.h>
 #include <linux/spinlock.h>
@@ -74,6 +75,12 @@ void linkwatch_run_queue(void)
 		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
 
 		if (dev->flags & IFF_UP) {
+			if (netif_carrier_ok(dev)) {
+				WARN_ON(dev->qdisc_sleeping == &noop_qdisc);
+				dev_activate(dev);
+			} else
+				dev_deactivate(dev);
+
 			netdev_state_change(dev);
 		}
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8c01e023f02e7..9a2f8e41a26ee 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -539,6 +539,10 @@ void dev_activate(struct net_device *dev)
 		write_unlock_bh(&qdisc_tree_lock);
 	}
 
+	if (!netif_carrier_ok(dev))
+		/* Delay activation until next carrier-on event */
+		return;
+
 	spin_lock_bh(&dev->queue_lock);
 	rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping);
 	if (dev->qdisc != &noqueue_qdisc) {

From e4f8ab00cf3599ecb8110c0a838cd15d013b79e5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 May 2005 16:20:39 -0700
Subject: [PATCH 22/28] [NETFILTER]: Fix nf_debug_ip_local_deliver()

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netfilter.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/net/core/netfilter.c b/net/core/netfilter.c
index e51cfa46950cf..92c51824797dd 100644
--- a/net/core/netfilter.c
+++ b/net/core/netfilter.c
@@ -217,21 +217,10 @@ void nf_debug_ip_local_deliver(struct sk_buff *skb)
 	 * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING.  */
 	if (!skb->dev) {
 		printk("ip_local_deliver: skb->dev is NULL.\n");
-	}
-	else if (strcmp(skb->dev->name, "lo") == 0) {
-		if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
-				      | (1 << NF_IP_POST_ROUTING)
-				      | (1 << NF_IP_PRE_ROUTING)
-				      | (1 << NF_IP_LOCAL_IN))) {
-			printk("ip_local_deliver: bad loopback skb: ");
-			debug_print_hooks_ip(skb->nf_debug);
-			nf_dump_skb(PF_INET, skb);
-		}
-	}
-	else {
+	} else {
 		if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
 				      | (1<<NF_IP_LOCAL_IN))) {
-			printk("ip_local_deliver: bad non-lo skb: ");
+			printk("ip_local_deliver: bad skb: ");
 			debug_print_hooks_ip(skb->nf_debug);
 			nf_dump_skb(PF_INET, skb);
 		}

From bd96535b81ad09d7593cc75093534acb984d3dc9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 May 2005 16:21:37 -0700
Subject: [PATCH 23/28] [NETFILTER]: Drop conntrack reference in
 ip_dev_loopback_xmit()

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netfilter.c | 2 --
 net/ipv4/ip_output.c | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/core/netfilter.c b/net/core/netfilter.c
index 92c51824797dd..22a8f127c4aad 100644
--- a/net/core/netfilter.c
+++ b/net/core/netfilter.c
@@ -236,8 +236,6 @@ void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
 		debug_print_hooks_ip(newskb->nf_debug);
 		nf_dump_skb(PF_INET, newskb);
 	}
-	/* Clear to avoid confusing input check */
-	newskb->nf_debug = 0;
 }
 
 void nf_debug_ip_finish_output2(struct sk_buff *skb)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 38f69532a029e..24fe3e00b42b0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -111,6 +111,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
 #ifdef CONFIG_NETFILTER_DEBUG
 	nf_debug_ip_loopback_xmit(newskb);
 #endif
+	nf_reset(newskb);
 	netif_rx(newskb);
 	return 0;
 }

From 8cbe1d46d69f9e2c49f284fe0e9aee3387bd2c71 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 3 May 2005 16:24:03 -0700
Subject: [PATCH 24/28] [PKT_SCHED]: netetm: trap infinite loop hange on qlen
 underflow

Due to bugs in netem (fixed by later patches), it is possible to get qdisc
qlen to go negative. If this happens the CPU ends up spinning forever
in qdisc_run(). So add a BUG_ON() to trap it.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9a2f8e41a26ee..87e48a4e10513 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -179,6 +179,7 @@ int qdisc_restart(struct net_device *dev)
 		netif_schedule(dev);
 		return 1;
 	}
+	BUG_ON((int) q->q.qlen < 0);
 	return q->q.qlen;
 }
 

From 771018e76aaa6474be20a53c20458bcae8b00485 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 3 May 2005 16:24:32 -0700
Subject: [PATCH 25/28] [PKT_SCHED]: netetm: make qdisc friendly to outer
 disciplines

Netem currently dumps packets into the queue when timer expires. This
patch makes work by self-clocking (more like TBF).  It fixes a bug
when 0 delay is requested (only doing loss or duplication).

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 113 +++++++++++++++++++++++++-----------------
 1 file changed, 67 insertions(+), 46 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 31c29deb139d3..864b8d353ffae 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -138,38 +138,78 @@ static long tabledist(unsigned long mu, long sigma,
 }
 
 /* Put skb in the private delayed queue. */
-static int delay_skb(struct Qdisc *sch, struct sk_buff *skb)
+static int netem_delay(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
 	psched_tdiff_t td;
 	psched_time_t now;
 	
 	PSCHED_GET_TIME(now);
 	td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
-	PSCHED_TADD2(now, td, cb->time_to_send);
 	
 	/* Always queue at tail to keep packets in order */
 	if (likely(q->delayed.qlen < q->limit)) {
+		struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
+	
+		PSCHED_TADD2(now, td, cb->time_to_send);
+
+		pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb, 
+			 now, cb->time_to_send);
+	
 		__skb_queue_tail(&q->delayed, skb);
-		if (!timer_pending(&q->timer)) {
-			q->timer.expires = jiffies + PSCHED_US2JIFFIE(td);
-			add_timer(&q->timer);
-		}
 		return NET_XMIT_SUCCESS;
 	}
 
+	pr_debug("netem_delay: queue over limit %d\n", q->limit);
+	sch->qstats.overlimits++;
 	kfree_skb(skb);
 	return NET_XMIT_DROP;
 }
 
+/*
+ *  Move a packet that is ready to send from the delay holding
+ *  list to the underlying qdisc.
+ */
+static int netem_run(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	psched_time_t now;
+
+	PSCHED_GET_TIME(now);
+
+	skb = skb_peek(&q->delayed);
+	if (skb) {
+		const struct netem_skb_cb *cb
+			= (const struct netem_skb_cb *)skb->cb;
+		long delay 
+			= PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
+		pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
+
+		/* if more time remaining? */
+		if (delay > 0) {
+			mod_timer(&q->timer, jiffies + delay);
+			return 1;
+		}
+
+		__skb_unlink(skb, &q->delayed);
+		
+		if (q->qdisc->enqueue(skb, q->qdisc)) {
+			sch->q.qlen--;
+			sch->qstats.drops++;
+		} 
+	}
+
+	return 0;
+}
+
 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb2;
 	int ret;
 
-	pr_debug("netem_enqueue skb=%p @%lu\n", skb, jiffies);
+	pr_debug("netem_enqueue skb=%p\n", skb);
 
 	/* Random packet drop 0 => none, ~0 => all */
 	if (q->loss && q->loss >= get_crandom(&q->loss_cor)) {
@@ -184,7 +224,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	    && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
 		pr_debug("netem_enqueue: dup %p\n", skb2);
 
-		if (delay_skb(sch, skb2)) {
+		if (netem_delay(sch, skb2)) {
 			sch->q.qlen++;
 			sch->bstats.bytes += skb2->len;
 			sch->bstats.packets++;
@@ -202,7 +242,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		ret = q->qdisc->enqueue(skb, q->qdisc);
 	} else {
 		q->counter = 0;
-		ret = delay_skb(sch, skb);
+		ret = netem_delay(sch, skb);
+		netem_run(sch);
 	}
 
 	if (likely(ret == NET_XMIT_SUCCESS)) {
@@ -241,56 +282,35 @@ static unsigned int netem_drop(struct Qdisc* sch)
 	return len;
 }
 
-/* Dequeue packet.
- *  Move all packets that are ready to send from the delay holding
- *  list to the underlying qdisc, then just call dequeue
- */
 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
+	int pending;
+
+	pending = netem_run(sch);
 
 	skb = q->qdisc->dequeue(q->qdisc);
-	if (skb) 
+	if (skb) {
+		pr_debug("netem_dequeue: return skb=%p\n", skb);
 		sch->q.qlen--;
+		sch->flags &= ~TCQ_F_THROTTLED;
+	}
+	else if (pending) {
+		pr_debug("netem_dequeue: throttling\n");
+		sch->flags |= TCQ_F_THROTTLED;
+	} 
+
 	return skb;
 }
 
 static void netem_watchdog(unsigned long arg)
 {
 	struct Qdisc *sch = (struct Qdisc *)arg;
-	struct netem_sched_data *q = qdisc_priv(sch);
-	struct net_device *dev = sch->dev;
-	struct sk_buff *skb;
-	psched_time_t now;
 
-	pr_debug("netem_watchdog: fired @%lu\n", jiffies);
-
-	spin_lock_bh(&dev->queue_lock);
-	PSCHED_GET_TIME(now);
-
-	while ((skb = skb_peek(&q->delayed)) != NULL) {
-		const struct netem_skb_cb *cb
-			= (const struct netem_skb_cb *)skb->cb;
-		long delay 
-			= PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
-		pr_debug("netem_watchdog: skb %p@%lu %ld\n",
-			 skb, jiffies, delay);
-
-		/* if more time remaining? */
-		if (delay > 0) {
-			mod_timer(&q->timer, jiffies + delay);
-			break;
-		}
-		__skb_unlink(skb, &q->delayed);
-
-		if (q->qdisc->enqueue(skb, q->qdisc)) {
-			sch->q.qlen--;
-			sch->qstats.drops++;
-		}
-	}
-	qdisc_run(dev);
-	spin_unlock_bh(&dev->queue_lock);
+	pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
+	sch->flags &= ~TCQ_F_THROTTLED;
+	netif_schedule(sch->dev);
 }
 
 static void netem_reset(struct Qdisc *sch)
@@ -301,6 +321,7 @@ static void netem_reset(struct Qdisc *sch)
 	skb_queue_purge(&q->delayed);
 
 	sch->q.qlen = 0;
+	sch->flags &= ~TCQ_F_THROTTLED;
 	del_timer_sync(&q->timer);
 }
 

From d5d75cd6b10ddad2f375b61092754474ad78aec7 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 3 May 2005 16:24:57 -0700
Subject: [PATCH 26/28] [PKT_SCHED]: netetm: adjust parent qlen when
 duplicating

Fix qlen underrun when doing duplication with netem. If netem is used
as leaf discipline, then the parent needs to be tweaked when packets
are duplicated.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c   |  1 +
 net/sched/sch_netem.c | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 4323a74eea30b..07977f8f2679b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1289,6 +1289,7 @@ static int __init pktsched_init(void)
 
 subsys_initcall(pktsched_init);
 
+EXPORT_SYMBOL(qdisc_lookup);
 EXPORT_SYMBOL(qdisc_get_rtab);
 EXPORT_SYMBOL(qdisc_put_rtab);
 EXPORT_SYMBOL(register_qdisc);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 864b8d353ffae..e0c9fbe73b15c 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -206,7 +206,6 @@ static int netem_run(struct Qdisc *sch)
 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	struct sk_buff *skb2;
 	int ret;
 
 	pr_debug("netem_enqueue skb=%p\n", skb);
@@ -220,11 +219,21 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	/* Random duplication */
-	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)
-	    && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
-		pr_debug("netem_enqueue: dup %p\n", skb2);
+	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) {
+		struct sk_buff *skb2;
+
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) {
+			struct Qdisc *qp;
+
+			/* Since one packet can generate two packets in the
+			 * queue, the parent's qlen accounting gets confused,
+			 * so fix it.
+			 */
+			qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent));
+			if (qp)
+				qp->q.qlen++;
 
-		if (netem_delay(sch, skb2)) {
 			sch->q.qlen++;
 			sch->bstats.bytes += skb2->len;
 			sch->bstats.packets++;
@@ -253,6 +262,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	} else
 		sch->qstats.drops++;
 
+	pr_debug("netem: enqueue ret %d\n", ret);
 	return ret;
 }
 

From aabc9761b69f1bfa30a78f7005be95cc9cc06175 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 May 2005 16:27:10 -0700
Subject: [PATCH 27/28] [IPSEC]: Store idev entries

I found a bug that stopped IPsec/IPv6 from working.  About
a month ago IPv6 started using rt6i_idev->dev on the cached socket dst
entries.  If the cached socket dst entry is IPsec, then rt6i_idev will
be NULL.

Since we want to look at the rt6i_idev of the original route in this
case, the easiest fix is to store rt6i_idev in the IPsec dst entry just
as we do for a number of other IPv6 route attributes.  Unfortunately
this means that we need some new code to handle the references to
rt6i_idev.  That's why this patch is bigger than it would otherwise be.

I've also done the same thing for IPv4 since it is conceivable that
once these idev attributes start getting used for accounting, we
probably need to dereference them for IPv4 IPsec entries too.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 10 ++++++++++
 net/ipv4/xfrm4_policy.c | 42 ++++++++++++++++++++++++++++++++++++++++
 net/ipv6/xfrm6_policy.c | 43 +++++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_policy.c  | 25 ++----------------------
 4 files changed, 97 insertions(+), 23 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 73e9a8ca3d3b7..e142a256d5dc3 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1,6 +1,7 @@
 #ifndef _NET_XFRM_H
 #define _NET_XFRM_H
 
+#include <linux/compiler.h>
 #include <linux/xfrm.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
@@ -516,6 +517,15 @@ struct xfrm_dst
 	u32 child_mtu_cached;
 };
 
+static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
+{
+	dst_release(xdst->route);
+	if (likely(xdst->u.dst.xfrm))
+		xfrm_state_put(xdst->u.dst.xfrm);
+}
+
+extern void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
+
 /* Decapsulation state, used by the input to store data during
  * decapsulation procedure, to be used later (during the policy
  * check
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 7fe2afd2e6695..b2b60f3e9cdd6 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -8,7 +8,10 @@
  * 	
  */
 
+#include <asm/bug.h>
+#include <linux/compiler.h>
 #include <linux/config.h>
+#include <linux/inetdevice.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
@@ -152,6 +155,8 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		x->u.rt.rt_dst = rt0->rt_dst;
 		x->u.rt.rt_gateway = rt->rt_gateway;
 		x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
+		x->u.rt.idev = rt0->idev;
+		in_dev_hold(rt0->idev);
 		header_len -= x->u.dst.xfrm->props.header_len;
 		trailer_len -= x->u.dst.xfrm->props.trailer_len;
 	}
@@ -243,11 +248,48 @@ static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
 	path->ops->update_pmtu(path, mtu);
 }
 
+static void xfrm4_dst_destroy(struct dst_entry *dst)
+{
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+	if (likely(xdst->u.rt.idev))
+		in_dev_put(xdst->u.rt.idev);
+	xfrm_dst_destroy(xdst);
+}
+
+static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			     int unregister)
+{
+	struct xfrm_dst *xdst;
+
+	if (!unregister)
+		return;
+
+	xdst = (struct xfrm_dst *)dst;
+	if (xdst->u.rt.idev->dev == dev) {
+		struct in_device *loopback_idev = in_dev_get(&loopback_dev);
+		BUG_ON(!loopback_idev);
+
+		do {
+			in_dev_put(xdst->u.rt.idev);
+			xdst->u.rt.idev = loopback_idev;
+			in_dev_hold(loopback_idev);
+			xdst = (struct xfrm_dst *)xdst->u.dst.child;
+		} while (xdst->u.dst.xfrm);
+
+		__in_dev_put(loopback_idev);
+	}
+
+	xfrm_dst_ifdown(dst, dev);
+}
+
 static struct dst_ops xfrm4_dst_ops = {
 	.family =		AF_INET,
 	.protocol =		__constant_htons(ETH_P_IP),
 	.gc =			xfrm4_garbage_collect,
 	.update_pmtu =		xfrm4_update_pmtu,
+	.destroy =		xfrm4_dst_destroy,
+	.ifdown =		xfrm4_dst_ifdown,
 	.gc_thresh =		1024,
 	.entry_size =		sizeof(struct xfrm_dst),
 };
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8a4f37de4d2da..4429b1a1fe5fe 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -11,7 +11,11 @@
  * 
  */
 
+#include <asm/bug.h>
+#include <linux/compiler.h>
 #include <linux/config.h>
+#include <linux/netdevice.h>
+#include <net/addrconf.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -166,6 +170,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); 
 		x->u.rt6.rt6i_dst      = rt0->rt6i_dst;
 		x->u.rt6.rt6i_src      = rt0->rt6i_src;	
+		x->u.rt6.rt6i_idev     = rt0->rt6i_idev;
+		in6_dev_hold(rt0->rt6i_idev);
 		header_len -= x->u.dst.xfrm->props.header_len;
 		trailer_len -= x->u.dst.xfrm->props.trailer_len;
 	}
@@ -251,11 +257,48 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
 	path->ops->update_pmtu(path, mtu);
 }
 
+static void xfrm6_dst_destroy(struct dst_entry *dst)
+{
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+	if (likely(xdst->u.rt6.rt6i_idev))
+		in6_dev_put(xdst->u.rt6.rt6i_idev);
+	xfrm_dst_destroy(xdst);
+}
+
+static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			     int unregister)
+{
+	struct xfrm_dst *xdst;
+
+	if (!unregister)
+		return;
+
+	xdst = (struct xfrm_dst *)dst;
+	if (xdst->u.rt6.rt6i_idev->dev == dev) {
+		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
+		BUG_ON(!loopback_idev);
+
+		do {
+			in6_dev_put(xdst->u.rt6.rt6i_idev);
+			xdst->u.rt6.rt6i_idev = loopback_idev;
+			in6_dev_hold(loopback_idev);
+			xdst = (struct xfrm_dst *)xdst->u.dst.child;
+		} while (xdst->u.dst.xfrm);
+
+		__in6_dev_put(loopback_idev);
+	}
+
+	xfrm_dst_ifdown(dst, dev);
+}
+
 static struct dst_ops xfrm6_dst_ops = {
 	.family =		AF_INET6,
 	.protocol =		__constant_htons(ETH_P_IPV6),
 	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
+	.destroy =		xfrm6_dst_destroy,
+	.ifdown =		xfrm6_dst_ifdown,
 	.gc_thresh =		1024,
 	.entry_size =		sizeof(struct xfrm_dst),
 };
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 80828078733dd..55ed979db1445 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1028,30 +1028,15 @@ static int stale_bundle(struct dst_entry *dst)
 	return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
 }
 
-static void xfrm_dst_destroy(struct dst_entry *dst)
+void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
 {
-	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-
-	dst_release(xdst->route);
-
-	if (!dst->xfrm)
-		return;
-	xfrm_state_put(dst->xfrm);
-	dst->xfrm = NULL;
-}
-
-static void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-			    int unregister)
-{
-	if (!unregister)
-		return;
-
 	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
 		dst->dev = &loopback_dev;
 		dev_hold(&loopback_dev);
 		dev_put(dev);
 	}
 }
+EXPORT_SYMBOL(xfrm_dst_ifdown);
 
 static void xfrm_link_failure(struct sk_buff *skb)
 {
@@ -1262,10 +1247,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 			dst_ops->kmem_cachep = xfrm_dst_cache;
 		if (likely(dst_ops->check == NULL))
 			dst_ops->check = xfrm_dst_check;
-		if (likely(dst_ops->destroy == NULL))
-			dst_ops->destroy = xfrm_dst_destroy;
-		if (likely(dst_ops->ifdown == NULL))
-			dst_ops->ifdown = xfrm_dst_ifdown;
 		if (likely(dst_ops->negative_advice == NULL))
 			dst_ops->negative_advice = xfrm_negative_advice;
 		if (likely(dst_ops->link_failure == NULL))
@@ -1297,8 +1278,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
 			xfrm_policy_afinfo[afinfo->family] = NULL;
 			dst_ops->kmem_cachep = NULL;
 			dst_ops->check = NULL;
-			dst_ops->destroy = NULL;
-			dst_ops->ifdown = NULL;
 			dst_ops->negative_advice = NULL;
 			dst_ops->link_failure = NULL;
 			dst_ops->get_mss = NULL;

From 14d50e78f947d340066ee0465dd892ad1d9162c0 Mon Sep 17 00:00:00 2001
From: J Hadi Salim <hadi@cyberus.ca>
Date: Tue, 3 May 2005 16:29:13 -0700
Subject: [PATCH 28/28] [PKT_SCHED]: Action repeat

Long standing bug.
Policy to repeat an action never worked.

Signed-off-by: J Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 5e6cc371b39ea..cafcb084098d1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -171,10 +171,10 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
 				skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
 				skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
 			}
-			if (ret != TC_ACT_PIPE)
-				goto exec_done;
 			if (ret == TC_ACT_REPEAT)
 				goto repeat;	/* we need a ttl - JHS */
+			if (ret != TC_ACT_PIPE)
+				goto exec_done;
 		}
 		act = a->next;
 	}