From d47effe1be0c4fc983306a9c704632e3a087eed8 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Tue, 1 Mar 2011 17:06:37 +0530
Subject: [PATCH 01/33] vhost: Cleanup vhost.c and net.c

Minor cleanup of vhost.c and net.c to match coding style.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c   | 19 ++++++++++------
 drivers/vhost/vhost.c | 53 ++++++++++++++++++++++++++++++-------------
 2 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f616cefc95ba7..59dad9fe52ddc 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -60,6 +60,7 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to,
 {
 	int seg = 0;
 	size_t size;
+
 	while (len && seg < iov_count) {
 		size = min(from->iov_len, len);
 		to->iov_base = from->iov_base;
@@ -79,6 +80,7 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
 {
 	int seg = 0;
 	size_t size;
+
 	while (len && seg < iovcount) {
 		size = min(from->iov_len, len);
 		to->iov_base = from->iov_base;
@@ -296,17 +298,16 @@ static void handle_rx_big(struct vhost_net *net)
 		.msg_iov = vq->iov,
 		.msg_flags = MSG_DONTWAIT,
 	};
-
 	struct virtio_net_hdr hdr = {
 		.flags = 0,
 		.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
-
 	size_t len, total_len = 0;
 	int err;
 	size_t hdr_size;
 	/* TODO: check that we are running from vhost_worker? */
 	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
+
 	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
 		return;
 
@@ -405,18 +406,17 @@ static void handle_rx_mergeable(struct vhost_net *net)
 		.msg_iov = vq->iov,
 		.msg_flags = MSG_DONTWAIT,
 	};
-
 	struct virtio_net_hdr_mrg_rxbuf hdr = {
 		.hdr.flags = 0,
 		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
-
 	size_t total_len = 0;
 	int err, headcount;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
 	/* TODO: check that we are running from vhost_worker? */
 	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
+
 	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
 		return;
 
@@ -654,6 +654,7 @@ static struct socket *get_raw_socket(int fd)
 	} uaddr;
 	int uaddr_len = sizeof uaddr, r;
 	struct socket *sock = sockfd_lookup(fd, &r);
+
 	if (!sock)
 		return ERR_PTR(-ENOTSOCK);
 
@@ -682,6 +683,7 @@ static struct socket *get_tap_socket(int fd)
 {
 	struct file *file = fget(fd);
 	struct socket *sock;
+
 	if (!file)
 		return ERR_PTR(-EBADF);
 	sock = tun_get_socket(file);
@@ -696,6 +698,7 @@ static struct socket *get_tap_socket(int fd)
 static struct socket *get_socket(int fd)
 {
 	struct socket *sock;
+
 	/* special case to disable backend */
 	if (fd == -1)
 		return NULL;
@@ -741,9 +744,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 	oldsock = rcu_dereference_protected(vq->private_data,
 					    lockdep_is_held(&vq->mutex));
 	if (sock != oldsock) {
-                vhost_net_disable_vq(n, vq);
-                rcu_assign_pointer(vq->private_data, sock);
-                vhost_net_enable_vq(n, vq);
+		vhost_net_disable_vq(n, vq);
+		rcu_assign_pointer(vq->private_data, sock);
+		vhost_net_enable_vq(n, vq);
 	}
 
 	mutex_unlock(&vq->mutex);
@@ -768,6 +771,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
 	struct socket *tx_sock = NULL;
 	struct socket *rx_sock = NULL;
 	long err;
+
 	mutex_lock(&n->dev.mutex);
 	err = vhost_dev_check_owner(&n->dev);
 	if (err)
@@ -829,6 +833,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
 	struct vhost_vring_file backend;
 	u64 features;
 	int r;
+
 	switch (ioctl) {
 	case VHOST_NET_SET_BACKEND:
 		if (copy_from_user(&backend, argp, sizeof backend))
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ade0568c07a4e..b0cc7f8ca4dea 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -41,8 +41,8 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
 			    poll_table *pt)
 {
 	struct vhost_poll *poll;
-	poll = container_of(pt, struct vhost_poll, table);
 
+	poll = container_of(pt, struct vhost_poll, table);
 	poll->wqh = wqh;
 	add_wait_queue(wqh, &poll->wait);
 }
@@ -85,6 +85,7 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
 void vhost_poll_start(struct vhost_poll *poll, struct file *file)
 {
 	unsigned long mask;
+
 	mask = file->f_op->poll(file, &poll->table);
 	if (mask)
 		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
@@ -101,6 +102,7 @@ static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
 				unsigned seq)
 {
 	int left;
+
 	spin_lock_irq(&dev->work_lock);
 	left = seq - work->done_seq;
 	spin_unlock_irq(&dev->work_lock);
@@ -222,6 +224,7 @@ static int vhost_worker(void *data)
 static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 {
 	int i;
+
 	for (i = 0; i < dev->nvqs; ++i) {
 		dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect *
 					       UIO_MAXIOV, GFP_KERNEL);
@@ -235,6 +238,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 			goto err_nomem;
 	}
 	return 0;
+
 err_nomem:
 	for (; i >= 0; --i) {
 		kfree(dev->vqs[i].indirect);
@@ -247,6 +251,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 static void vhost_dev_free_iovecs(struct vhost_dev *dev)
 {
 	int i;
+
 	for (i = 0; i < dev->nvqs; ++i) {
 		kfree(dev->vqs[i].indirect);
 		dev->vqs[i].indirect = NULL;
@@ -296,26 +301,28 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
 }
 
 struct vhost_attach_cgroups_struct {
-        struct vhost_work work;
-        struct task_struct *owner;
-        int ret;
+	struct vhost_work work;
+	struct task_struct *owner;
+	int ret;
 };
 
 static void vhost_attach_cgroups_work(struct vhost_work *work)
 {
-        struct vhost_attach_cgroups_struct *s;
-        s = container_of(work, struct vhost_attach_cgroups_struct, work);
-        s->ret = cgroup_attach_task_all(s->owner, current);
+	struct vhost_attach_cgroups_struct *s;
+
+	s = container_of(work, struct vhost_attach_cgroups_struct, work);
+	s->ret = cgroup_attach_task_all(s->owner, current);
 }
 
 static int vhost_attach_cgroups(struct vhost_dev *dev)
 {
-        struct vhost_attach_cgroups_struct attach;
-        attach.owner = current;
-        vhost_work_init(&attach.work, vhost_attach_cgroups_work);
-        vhost_work_queue(dev, &attach.work);
-        vhost_work_flush(dev, &attach.work);
-        return attach.ret;
+	struct vhost_attach_cgroups_struct attach;
+
+	attach.owner = current;
+	vhost_work_init(&attach.work, vhost_attach_cgroups_work);
+	vhost_work_queue(dev, &attach.work);
+	vhost_work_flush(dev, &attach.work);
+	return attach.ret;
 }
 
 /* Caller should have device mutex */
@@ -323,11 +330,13 @@ static long vhost_dev_set_owner(struct vhost_dev *dev)
 {
 	struct task_struct *worker;
 	int err;
+
 	/* Is there an owner already? */
 	if (dev->mm) {
 		err = -EBUSY;
 		goto err_mm;
 	}
+
 	/* No owner, become one */
 	dev->mm = get_task_mm(current);
 	worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
@@ -380,6 +389,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
 void vhost_dev_cleanup(struct vhost_dev *dev)
 {
 	int i;
+
 	for (i = 0; i < dev->nvqs; ++i) {
 		if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
 			vhost_poll_stop(&dev->vqs[i].poll);
@@ -421,6 +431,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
 {
 	u64 a = addr / VHOST_PAGE_SIZE / 8;
+
 	/* Make sure 64 bit math will not overflow. */
 	if (a > ULONG_MAX - (unsigned long)log_base ||
 	    a + (unsigned long)log_base > ULONG_MAX)
@@ -461,6 +472,7 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
 			    int log_all)
 {
 	int i;
+
 	for (i = 0; i < d->nvqs; ++i) {
 		int ok;
 		mutex_lock(&d->vqs[i].mutex);
@@ -527,6 +539,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
 {
 	struct vhost_memory mem, *newmem, *oldmem;
 	unsigned long size = offsetof(struct vhost_memory, regions);
+
 	if (copy_from_user(&mem, m, size))
 		return -EFAULT;
 	if (mem.padding)
@@ -544,7 +557,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
 		return -EFAULT;
 	}
 
-	if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL))) {
+	if (!memory_access_ok(d, newmem,
+			      vhost_has_feature(d, VHOST_F_LOG_ALL))) {
 		kfree(newmem);
 		return -EFAULT;
 	}
@@ -560,6 +574,7 @@ static int init_used(struct vhost_virtqueue *vq,
 		     struct vring_used __user *used)
 {
 	int r = put_user(vq->used_flags, &used->flags);
+
 	if (r)
 		return r;
 	return get_user(vq->last_used_idx, &used->idx);
@@ -849,6 +864,7 @@ static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
 {
 	struct vhost_memory_region *reg;
 	int i;
+
 	/* linear search is not brilliant, but we really have on the order of 6
 	 * regions in practice */
 	for (i = 0; i < mem->nregions; ++i) {
@@ -871,6 +887,7 @@ static int set_bit_to_user(int nr, void __user *addr)
 	void *base;
 	int bit = nr + (log % PAGE_SIZE) * 8;
 	int r;
+
 	r = get_user_pages_fast(log, 1, 1, &page);
 	if (r < 0)
 		return r;
@@ -888,6 +905,7 @@ static int log_write(void __user *log_base,
 {
 	u64 write_page = write_address / VHOST_PAGE_SIZE;
 	int r;
+
 	if (!write_length)
 		return 0;
 	write_length += write_address % VHOST_PAGE_SIZE;
@@ -1037,8 +1055,8 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			       i, count);
 			return -EINVAL;
 		}
-		if (unlikely(memcpy_fromiovec((unsigned char *)&desc, vq->indirect,
-					      sizeof desc))) {
+		if (unlikely(memcpy_fromiovec((unsigned char *)&desc,
+					      vq->indirect, sizeof desc))) {
 			vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
 			       i, (size_t)indirect->addr + i * sizeof desc);
 			return -EINVAL;
@@ -1317,6 +1335,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
 void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
 	__u16 flags;
+
 	/* Flush out used index updates. This is paired
 	 * with the barrier that the Guest executes when enabling
 	 * interrupts. */
@@ -1361,6 +1380,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
 {
 	u16 avail_idx;
 	int r;
+
 	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
 		return false;
 	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
@@ -1387,6 +1407,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
 void vhost_disable_notify(struct vhost_virtqueue *vq)
 {
 	int r;
+
 	if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
 		return;
 	vq->used_flags |= VRING_USED_F_NO_NOTIFY;

From fcc042a2806064ffcaed7a0c5cb710eca0e99108 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 6 Mar 2011 13:33:49 +0200
Subject: [PATCH 02/33] vhost: copy_from_user -> __copy_from_user

copy_from_user is pretty high on perf top profile,
replacing it with __copy_from_user helps.
It's also safe because we do access_ok checks during setup.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vhost.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index b0cc7f8ca4dea..2ab291241635d 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1171,7 +1171,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			       i, vq->num, head);
 			return -EINVAL;
 		}
-		ret = copy_from_user(&desc, vq->desc + i, sizeof desc);
+		ret = __copy_from_user(&desc, vq->desc + i, sizeof desc);
 		if (unlikely(ret)) {
 			vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
 			       i, vq->desc + i);

From cfbdab951369f15de890597530076bf0119361be Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 17 Jan 2011 16:10:59 +0800
Subject: [PATCH 03/33] vhost-net: check the support of mergeable buffer
 outside the receive loop

No need to check the support of mergeable buffer inside the recevie
loop as the whole handle_rx()_xx is in the read critical region.  So
this patch move it ahead of the receiving loop.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 59dad9fe52ddc..9f57cd45fe8ff 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -411,7 +411,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
 	size_t total_len = 0;
-	int err, headcount;
+	int err, headcount, mergeable;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
 	/* TODO: check that we are running from vhost_worker? */
@@ -427,6 +427,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 
 	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
+	mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
 
 	while ((sock_len = peek_head_len(sock->sk))) {
 		sock_len += sock_hlen;
@@ -476,7 +477,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 			break;
 		}
 		/* TODO: Should check and handle checksum. */
-		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) &&
+		if (likely(mergeable) &&
 		    memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
 				      offsetof(typeof(hdr), num_buffers),
 				      sizeof hdr.num_buffers)) {

From 94249369e9930276e30087da205349a55478cbb5 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 17 Jan 2011 16:11:08 +0800
Subject: [PATCH 04/33] vhost-net: Unify the code of mergeable and big buffer
 handling

Codes duplication were found between the handling of mergeable and big
buffers, so this patch tries to unify them. This could be easily done
by adding a quota to the get_rx_bufs() which is used to limit the
number of buffers it returns (for mergeable buffer, the quota is
simply UIO_MAXIOV, for big buffers, the quota is just 1), and then the
previous handle_rx_mergeable() could be resued also for big buffers.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 128 +++-----------------------------------------
 1 file changed, 7 insertions(+), 121 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9f57cd45fe8ff..0329c411bbf10 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -229,6 +229,7 @@ static int peek_head_len(struct sock *sk)
  * @iovcount	- returned count of io vectors we fill
  * @log		- vhost log
  * @log_num	- log offset
+ * @quota       - headcount quota, 1 for big buffer
  *	returns number of buffer heads allocated, negative on error
  */
 static int get_rx_bufs(struct vhost_virtqueue *vq,
@@ -236,7 +237,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 		       int datalen,
 		       unsigned *iovcount,
 		       struct vhost_log *log,
-		       unsigned *log_num)
+		       unsigned *log_num,
+		       unsigned int quota)
 {
 	unsigned int out, in;
 	int seg = 0;
@@ -244,7 +246,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 	unsigned d;
 	int r, nlogs = 0;
 
-	while (datalen > 0) {
+	while (datalen > 0 && headcount < quota) {
 		if (unlikely(seg >= UIO_MAXIOV)) {
 			r = -ENOBUFS;
 			goto err;
@@ -284,116 +286,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
-static void handle_rx_big(struct vhost_net *net)
-{
-	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
-	unsigned out, in, log, s;
-	int head;
-	struct vhost_log *vq_log;
-	struct msghdr msg = {
-		.msg_name = NULL,
-		.msg_namelen = 0,
-		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
-		.msg_controllen = 0,
-		.msg_iov = vq->iov,
-		.msg_flags = MSG_DONTWAIT,
-	};
-	struct virtio_net_hdr hdr = {
-		.flags = 0,
-		.gso_type = VIRTIO_NET_HDR_GSO_NONE
-	};
-	size_t len, total_len = 0;
-	int err;
-	size_t hdr_size;
-	/* TODO: check that we are running from vhost_worker? */
-	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
-
-	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
-		return;
-
-	mutex_lock(&vq->mutex);
-	vhost_disable_notify(vq);
-	hdr_size = vq->vhost_hlen;
-
-	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
-		vq->log : NULL;
-
-	for (;;) {
-		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
-					 ARRAY_SIZE(vq->iov),
-					 &out, &in,
-					 vq_log, &log);
-		/* On error, stop handling until the next kick. */
-		if (unlikely(head < 0))
-			break;
-		/* OK, now we need to know about added descriptors. */
-		if (head == vq->num) {
-			if (unlikely(vhost_enable_notify(vq))) {
-				/* They have slipped one in as we were
-				 * doing that: check again. */
-				vhost_disable_notify(vq);
-				continue;
-			}
-			/* Nothing new?  Wait for eventfd to tell us
-			 * they refilled. */
-			break;
-		}
-		/* We don't need to be notified again. */
-		if (out) {
-			vq_err(vq, "Unexpected descriptor format for RX: "
-			       "out %d, int %d\n",
-			       out, in);
-			break;
-		}
-		/* Skip header. TODO: support TSO/mergeable rx buffers. */
-		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
-		msg.msg_iovlen = in;
-		len = iov_length(vq->iov, in);
-		/* Sanity check */
-		if (!len) {
-			vq_err(vq, "Unexpected header len for RX: "
-			       "%zd expected %zd\n",
-			       iov_length(vq->hdr, s), hdr_size);
-			break;
-		}
-		err = sock->ops->recvmsg(NULL, sock, &msg,
-					 len, MSG_DONTWAIT | MSG_TRUNC);
-		/* TODO: Check specific error and bomb out unless EAGAIN? */
-		if (err < 0) {
-			vhost_discard_vq_desc(vq, 1);
-			break;
-		}
-		/* TODO: Should check and handle checksum. */
-		if (err > len) {
-			pr_debug("Discarded truncated rx packet: "
-				 " len %d > %zd\n", err, len);
-			vhost_discard_vq_desc(vq, 1);
-			continue;
-		}
-		len = err;
-		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
-		if (err) {
-			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
-			       vq->iov->iov_base, err);
-			break;
-		}
-		len += hdr_size;
-		vhost_add_used_and_signal(&net->dev, vq, head, len);
-		if (unlikely(vq_log))
-			vhost_log_write(vq, vq_log, log, len);
-		total_len += len;
-		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-			vhost_poll_queue(&vq->poll);
-			break;
-		}
-	}
-
-	mutex_unlock(&vq->mutex);
-}
-
-/* Expects to be always run from workqueue - which acts as
- * read-size critical section for our kind of RCU. */
-static void handle_rx_mergeable(struct vhost_net *net)
+static void handle_rx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
 	unsigned uninitialized_var(in), log;
@@ -433,7 +326,8 @@ static void handle_rx_mergeable(struct vhost_net *net)
 		sock_len += sock_hlen;
 		vhost_len = sock_len + vhost_hlen;
 		headcount = get_rx_bufs(vq, vq->heads, vhost_len,
-					&in, vq_log, &log);
+					&in, vq_log, &log,
+					likely(mergeable) ? UIO_MAXIOV : 1);
 		/* On error, stop handling until the next kick. */
 		if (unlikely(headcount < 0))
 			break;
@@ -499,14 +393,6 @@ static void handle_rx_mergeable(struct vhost_net *net)
 	mutex_unlock(&vq->mutex);
 }
 
-static void handle_rx(struct vhost_net *net)
-{
-	if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
-		handle_rx_mergeable(net);
-	else
-		handle_rx_big(net);
-}
-
 static void handle_tx_kick(struct vhost_work *work)
 {
 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,

From 783e3988544b94ff3918666b9f36866ac547fba1 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 17 Jan 2011 16:11:17 +0800
Subject: [PATCH 05/33] vhost: lock receive queue, not the socket

vhost takes a sock lock to try and prevent
the skb from being pulled from the receive queue
after skb_peek.  However this is not the right lock to use for that,
sk_receive_queue.lock is. Fix that up.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 0329c411bbf10..57203014c4577 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -213,12 +213,13 @@ static int peek_head_len(struct sock *sk)
 {
 	struct sk_buff *head;
 	int len = 0;
+	unsigned long flags;
 
-	lock_sock(sk);
+	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
 	head = skb_peek(&sk->sk_receive_queue);
-	if (head)
+	if (likely(head))
 		len = head->len;
-	release_sock(sk);
+	spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
 	return len;
 }
 

From de4d768a428d9de943dd6dc82bcd61742955cb6e Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 13 Mar 2011 23:00:52 +0200
Subject: [PATCH 06/33] vhost-net: remove unlocked use of receive_queue

Use of skb_queue_empty(&sock->sk->sk_receive_queue)
without taking the sk_receive_queue.lock is unsafe
or useless. Take it out.

Reported-by:  Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 57203014c4577..2f7c76a85e532 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -311,7 +311,7 @@ static void handle_rx(struct vhost_net *net)
 	/* TODO: check that we are running from vhost_worker? */
 	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
 
-	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
+	if (!sock)
 		return;
 
 	mutex_lock(&vq->mutex);

From 4363c2fddb1399b728ef21ee8101c148a311ea45 Mon Sep 17 00:00:00 2001
From: Alex Dubov <oakad@yahoo.com>
Date: Wed, 16 Mar 2011 17:57:13 +0000
Subject: [PATCH 07/33] gianfar: Fall back to software tcp/udp checksum on
 older controllers

As specified by errata eTSEC49 of MPC8548 and errata eTSEC12 of MPC83xx,
older revisions of gianfar controllers will be unable to calculate a TCP/UDP
packet checksum for some alignments of the appropriate FCB. This patch checks
for FCB alignment on such controllers and falls back to software checksumming
if the alignment is known to be bad.

Signed-off-by: Alex Dubov <oakad@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar.c | 16 ++++++++++++++--
 drivers/net/gianfar.h |  1 +
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index ccb231c4d9334..2a0ad9a501bbe 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -949,6 +949,11 @@ static void gfar_detect_errata(struct gfar_private *priv)
 			(pvr == 0x80861010 && (mod & 0xfff9) == 0x80c0))
 		priv->errata |= GFAR_ERRATA_A002;
 
+	/* MPC8313 Rev < 2.0, MPC8548 rev 2.0 */
+	if ((pvr == 0x80850010 && mod == 0x80b0 && rev < 0x0020) ||
+			(pvr == 0x80210020 && mod == 0x8030 && rev == 0x0020))
+		priv->errata |= GFAR_ERRATA_12;
+
 	if (priv->errata)
 		dev_info(dev, "enabled errata workarounds, flags: 0x%x\n",
 			 priv->errata);
@@ -2154,8 +2159,15 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Set up checksumming */
 	if (CHECKSUM_PARTIAL == skb->ip_summed) {
 		fcb = gfar_add_fcb(skb);
-		lstatus |= BD_LFLAG(TXBD_TOE);
-		gfar_tx_checksum(skb, fcb);
+		/* as specified by errata */
+		if (unlikely(gfar_has_errata(priv, GFAR_ERRATA_12)
+			     && ((unsigned long)fcb % 0x20) > 0x18)) {
+			__skb_pull(skb, GMAC_FCB_LEN);
+			skb_checksum_help(skb);
+		} else {
+			lstatus |= BD_LFLAG(TXBD_TOE);
+			gfar_tx_checksum(skb, fcb);
+		}
 	}
 
 	if (vlan_tx_tag_present(skb)) {
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index 54de4135e932b..ec5d595ce2e26 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -1039,6 +1039,7 @@ enum gfar_errata {
 	GFAR_ERRATA_74		= 0x01,
 	GFAR_ERRATA_76		= 0x02,
 	GFAR_ERRATA_A002	= 0x04,
+	GFAR_ERRATA_12		= 0x08, /* a.k.a errata eTSEC49 */
 };
 
 /* Struct stolen almost completely (and shamelessly) from the FCC enet source

From 67c5c6cb8129c595f21e88254a3fc6b3b841ae8e Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Thu, 17 Mar 2011 01:40:10 +0000
Subject: [PATCH 08/33] econet: 4 byte infoleak to the network

struct aunhdr has 4 padding bytes between 'pad' and 'handle' fields on
x86_64.  These bytes are not initialized in the variable 'ah' before
sending 'ah' to the network.  This leads to 4 bytes kernel stack
infoleak.

This bug was introduced before the git epoch.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Acked-by: Phil Blundell <philb@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 0c2826337919a..116d3fd3d6691 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -435,10 +435,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		udpdest.sin_addr.s_addr = htonl(network | addr.station);
 	}
 
+	memset(&ah, 0, sizeof(ah));
 	ah.port = port;
 	ah.cb = cb & 0x7f;
 	ah.code = 2;		/* magic */
-	ah.pad = 0;
 
 	/* tack our header on the front of the iovec */
 	size = sizeof(struct aunhdr);

From 5e5069b41d5b82bcadc1dbf73f48476b428c102f Mon Sep 17 00:00:00 2001
From: Roger Luethi <rl@hellgate.ch>
Date: Thu, 17 Mar 2011 06:37:21 +0000
Subject: [PATCH 09/33] ethtool: __ethtool_set_sg: check for function pointer
 before using it

__ethtool_set_sg does not check if dev->ethtool_ops->set_sg is defined
which can result in a NULL pointer dereference when ethtool is used to
change SG settings for drivers without SG support.

Signed-off-by: Roger Luethi <rl@hellgate.ch>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index c1a71bb738da4..a1086fb0c0c7d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1457,6 +1457,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
 {
 	int err;
 
+	if (!dev->ethtool_ops->set_sg)
+		return -EOPNOTSUPP;
+
 	if (data && !(dev->features & NETIF_F_ALL_CSUM))
 		return -EINVAL;
 

From 3a7da39d165e0c363c294feec119db1427032afd Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 17 Mar 2011 07:34:32 +0000
Subject: [PATCH 10/33] ethtool: Compat handling for struct ethtool_rxnfc

This structure was accidentally defined such that its layout can
differ between 32-bit and 64-bit processes.  Add compat structure
definitions and an ioctl wrapper function.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: stable@kernel.org [2.6.30+]
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  34 ++++++++++++
 net/socket.c            | 114 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 141 insertions(+), 7 deletions(-)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index aac3e2eeb4fd6..b297f288f6eb8 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -13,6 +13,9 @@
 #ifndef _LINUX_ETHTOOL_H
 #define _LINUX_ETHTOOL_H
 
+#ifdef __KERNEL__
+#include <linux/compat.h>
+#endif
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
@@ -450,6 +453,37 @@ struct ethtool_rxnfc {
 	__u32				rule_locs[0];
 };
 
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+
+struct compat_ethtool_rx_flow_spec {
+	u32		flow_type;
+	union {
+		struct ethtool_tcpip4_spec		tcp_ip4_spec;
+		struct ethtool_tcpip4_spec		udp_ip4_spec;
+		struct ethtool_tcpip4_spec		sctp_ip4_spec;
+		struct ethtool_ah_espip4_spec		ah_ip4_spec;
+		struct ethtool_ah_espip4_spec		esp_ip4_spec;
+		struct ethtool_usrip4_spec		usr_ip4_spec;
+		struct ethhdr				ether_spec;
+		u8					hdata[72];
+	} h_u, m_u;
+	compat_u64	ring_cookie;
+	u32		location;
+};
+
+struct compat_ethtool_rxnfc {
+	u32				cmd;
+	u32				flow_type;
+	compat_u64			data;
+	struct compat_ethtool_rx_flow_spec fs;
+	u32				rule_cnt;
+	u32				rule_locs[0];
+};
+
+#endif /* CONFIG_COMPAT */
+#endif /* __KERNEL__ */
+
 /**
  * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection
  * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR
diff --git a/net/socket.c b/net/socket.c
index 937d0fcf74bc6..5212447c86e72 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2588,23 +2588,123 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 
 static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 {
+	struct compat_ethtool_rxnfc __user *compat_rxnfc;
+	bool convert_in = false, convert_out = false;
+	size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
+	struct ethtool_rxnfc __user *rxnfc;
 	struct ifreq __user *ifr;
+	u32 rule_cnt = 0, actual_rule_cnt;
+	u32 ethcmd;
 	u32 data;
-	void __user *datap;
+	int ret;
+
+	if (get_user(data, &ifr32->ifr_ifru.ifru_data))
+		return -EFAULT;
 
-	ifr = compat_alloc_user_space(sizeof(*ifr));
+	compat_rxnfc = compat_ptr(data);
 
-	if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
+	if (get_user(ethcmd, &compat_rxnfc->cmd))
 		return -EFAULT;
 
-	if (get_user(data, &ifr32->ifr_ifru.ifru_data))
+	/* Most ethtool structures are defined without padding.
+	 * Unfortunately struct ethtool_rxnfc is an exception.
+	 */
+	switch (ethcmd) {
+	default:
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		/* Buffer size is variable */
+		if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
+			return -EFAULT;
+		if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
+			return -ENOMEM;
+		buf_size += rule_cnt * sizeof(u32);
+		/* fall through */
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+		convert_out = true;
+		/* fall through */
+	case ETHTOOL_SRXCLSRLDEL:
+	case ETHTOOL_SRXCLSRLINS:
+		buf_size += sizeof(struct ethtool_rxnfc);
+		convert_in = true;
+		break;
+	}
+
+	ifr = compat_alloc_user_space(buf_size);
+	rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
+
+	if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
 		return -EFAULT;
 
-	datap = compat_ptr(data);
-	if (put_user(datap, &ifr->ifr_ifru.ifru_data))
+	if (put_user(convert_in ? rxnfc : compat_ptr(data),
+		     &ifr->ifr_ifru.ifru_data))
 		return -EFAULT;
 
-	return dev_ioctl(net, SIOCETHTOOL, ifr);
+	if (convert_in) {
+		/* We expect there to be holes between fs.m_u and
+		 * fs.ring_cookie and at the end of fs, but nowhere else.
+		 */
+		BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) +
+			     sizeof(compat_rxnfc->fs.m_u) !=
+			     offsetof(struct ethtool_rxnfc, fs.m_u) +
+			     sizeof(rxnfc->fs.m_u));
+		BUILD_BUG_ON(
+			offsetof(struct compat_ethtool_rxnfc, fs.location) -
+			offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
+			offsetof(struct ethtool_rxnfc, fs.location) -
+			offsetof(struct ethtool_rxnfc, fs.ring_cookie));
+
+		if (copy_in_user(rxnfc, compat_rxnfc,
+				 (void *)(&rxnfc->fs.m_u + 1) -
+				 (void *)rxnfc) ||
+		    copy_in_user(&rxnfc->fs.ring_cookie,
+				 &compat_rxnfc->fs.ring_cookie,
+				 (void *)(&rxnfc->fs.location + 1) -
+				 (void *)&rxnfc->fs.ring_cookie) ||
+		    copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
+				 sizeof(rxnfc->rule_cnt)))
+			return -EFAULT;
+	}
+
+	ret = dev_ioctl(net, SIOCETHTOOL, ifr);
+	if (ret)
+		return ret;
+
+	if (convert_out) {
+		if (copy_in_user(compat_rxnfc, rxnfc,
+				 (const void *)(&rxnfc->fs.m_u + 1) -
+				 (const void *)rxnfc) ||
+		    copy_in_user(&compat_rxnfc->fs.ring_cookie,
+				 &rxnfc->fs.ring_cookie,
+				 (const void *)(&rxnfc->fs.location + 1) -
+				 (const void *)&rxnfc->fs.ring_cookie) ||
+		    copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
+				 sizeof(rxnfc->rule_cnt)))
+			return -EFAULT;
+
+		if (ethcmd == ETHTOOL_GRXCLSRLALL) {
+			/* As an optimisation, we only copy the actual
+			 * number of rules that the underlying
+			 * function returned.  Since Mallory might
+			 * change the rule count in user memory, we
+			 * check that it is less than the rule count
+			 * originally given (as the user buffer size),
+			 * which has been range-checked.
+			 */
+			if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
+				return -EFAULT;
+			if (actual_rule_cnt < rule_cnt)
+				rule_cnt = actual_rule_cnt;
+			if (copy_in_user(&compat_rxnfc->rule_locs[0],
+					 &rxnfc->rule_locs[0],
+					 rule_cnt * sizeof(u32)))
+				return -EFAULT;
+		}
+	}
+
+	return 0;
 }
 
 static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)

From d870bfb9d366c5d466c0f5419a4ec95a3f71ea8a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 18 Mar 2011 00:27:27 +0000
Subject: [PATCH 11/33] vlan: should take into account needed_headroom

Commit c95b819ad7 (gre: Use needed_headroom)
made gre use needed_headroom instead of hard_header_len

This uncover a bug in vlan code.

We should make sure vlan devices take into account their
real_dev->needed_headroom or we risk a crash in ipgre_header(), because
we dont have enough room to push IP header in skb.

Reported-by: Diddi Oscarsson <diddi@diddi.se>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ae610f046de59..e34ea9e5e28bc 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -720,6 +720,7 @@ static int vlan_dev_init(struct net_device *dev)
 	dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid;
 #endif
 
+	dev->needed_headroom = real_dev->needed_headroom;
 	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
 		dev->header_ops      = real_dev->header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;

From 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 18 Mar 2011 05:27:28 +0000
Subject: [PATCH 12/33] bridge: Reset IPCB when entering IP stack on NF_FORWARD

Whenever we enter the IP stack proper from bridge netfilter we
need to ensure that the skb is in a form the IP stack expects
it to be in.

The entry point on NF_FORWARD did not meet the requirements of
the IP stack, therefore leading to potential crashes/panics.

This patch fixes the problem.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f97af5590ba12..008ff6c4eecf3 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -739,6 +739,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 		nf_bridge->mask |= BRNF_PKT_TYPE;
 	}
 
+	if (br_parse_ip_options(skb))
+		return NF_DROP;
+
 	/* The physdev module checks on this */
 	nf_bridge->mask |= BRNF_BRIDGED;
 	nf_bridge->physoutdev = skb->dev;

From b51bdad63046d1d5a4807630cc8c02845cf67893 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 18 Mar 2011 08:50:37 +0000
Subject: [PATCH 13/33] headers: use __aligned_xx types for userspace

Now that we finally have __aligned_xx exported to userspace, convert
the headers that get exported over to the proper type.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ppp.h                    | 16 ++++++++--------
 include/linux/netfilter/nfnetlink_log.h   |  4 ++--
 include/linux/netfilter/nfnetlink_queue.h |  4 ++--
 include/linux/netfilter/xt_connbytes.h    |  4 ++--
 include/linux/netfilter/xt_quota.h        |  2 +-
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/linux/if_ppp.h b/include/linux/if_ppp.h
index fcef103aa3f60..c9ad383225763 100644
--- a/include/linux/if_ppp.h
+++ b/include/linux/if_ppp.h
@@ -114,14 +114,14 @@ struct pppol2tp_ioc_stats {
 	__u16		tunnel_id;	/* redundant */
 	__u16		session_id;	/* if zero, get tunnel stats */
 	__u32		using_ipsec:1;	/* valid only for session_id == 0 */
-	aligned_u64	tx_packets;
-	aligned_u64	tx_bytes;
-	aligned_u64	tx_errors;
-	aligned_u64	rx_packets;
-	aligned_u64	rx_bytes;
-	aligned_u64	rx_seq_discards;
-	aligned_u64	rx_oos_packets;
-	aligned_u64	rx_errors;
+	__aligned_u64	tx_packets;
+	__aligned_u64	tx_bytes;
+	__aligned_u64	tx_errors;
+	__aligned_u64	rx_packets;
+	__aligned_u64	rx_bytes;
+	__aligned_u64	rx_seq_discards;
+	__aligned_u64	rx_oos_packets;
+	__aligned_u64	rx_errors;
 };
 
 #define ifr__name       b.ifr_ifrn.ifrn_name
diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h
index ea9b8d3805272..90c2c9575bac3 100644
--- a/include/linux/netfilter/nfnetlink_log.h
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -28,8 +28,8 @@ struct nfulnl_msg_packet_hw {
 };
 
 struct nfulnl_msg_packet_timestamp {
-	aligned_be64	sec;
-	aligned_be64	usec;
+	__aligned_be64	sec;
+	__aligned_be64	usec;
 };
 
 enum nfulnl_attr_type {
diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h
index 2455fe5f4e016..af94e0014ebdf 100644
--- a/include/linux/netfilter/nfnetlink_queue.h
+++ b/include/linux/netfilter/nfnetlink_queue.h
@@ -25,8 +25,8 @@ struct nfqnl_msg_packet_hw {
 };
 
 struct nfqnl_msg_packet_timestamp {
-	aligned_be64	sec;
-	aligned_be64	usec;
+	__aligned_be64	sec;
+	__aligned_be64	usec;
 };
 
 enum nfqnl_attr_type {
diff --git a/include/linux/netfilter/xt_connbytes.h b/include/linux/netfilter/xt_connbytes.h
index 92fcbb0d193ea..f1d6c15bd9e37 100644
--- a/include/linux/netfilter/xt_connbytes.h
+++ b/include/linux/netfilter/xt_connbytes.h
@@ -17,8 +17,8 @@ enum xt_connbytes_direction {
 
 struct xt_connbytes_info {
 	struct {
-		aligned_u64 from;	/* count to be matched */
-		aligned_u64 to;		/* count to be matched */
+		__aligned_u64 from;	/* count to be matched */
+		__aligned_u64 to;	/* count to be matched */
 	} count;
 	__u8 what;		/* ipt_connbytes_what */
 	__u8 direction;	/* ipt_connbytes_direction */
diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h
index ca6e03e47a17c..9314723f39ca1 100644
--- a/include/linux/netfilter/xt_quota.h
+++ b/include/linux/netfilter/xt_quota.h
@@ -13,7 +13,7 @@ struct xt_quota_priv;
 struct xt_quota_info {
 	__u32 flags;
 	__u32 pad;
-	aligned_u64 quota;
+	__aligned_u64 quota;
 
 	/* Used internally by the kernel */
 	struct xt_quota_priv	*master;

From 93d03203d5a165d7a757546245dd1543dfe0ff80 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 18 Mar 2011 21:53:03 -0700
Subject: [PATCH 14/33] ftmac100: use resource_size()

The calculation is off-by-one.  It should be "end - start + 1".  This
patch fixes it to use resource_size() instead.  Oddly, the code already
uses resource size correctly a couple lines earlier when it calls
request_mem_region() for this memory.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ftmac100.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ftmac100.c b/drivers/net/ftmac100.c
index 1d6f4b8d393ab..a31661948c420 100644
--- a/drivers/net/ftmac100.c
+++ b/drivers/net/ftmac100.c
@@ -1102,7 +1102,7 @@ static int ftmac100_probe(struct platform_device *pdev)
 		goto err_req_mem;
 	}
 
-	priv->base = ioremap(res->start, res->end - res->start);
+	priv->base = ioremap(res->start, resource_size(res));
 	if (!priv->base) {
 		dev_err(&pdev->dev, "Failed to ioremap ethernet registers\n");
 		err = -EIO;

From dadaa10b077133e5c03333131b82ecb13679af2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolas=20de=20Peslo=C3=BCan?= <nicolas.2p.debian@free.fr>
Date: Sat, 19 Mar 2011 13:36:18 -0700
Subject: [PATCH 15/33] bonding: fix a typo in a comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Nicolas de Pesloüan <nicolas.2p.debian@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1a6e9eb7af431..338bea147c645 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2130,7 +2130,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 }
 
 /*
-* First release a slave and than destroy the bond if no more slaves are left.
+* First release a slave and then destroy the bond if no more slaves are left.
 * Must be under rtnl_lock when this function is called.
 */
 static int  bond_release_and_destroy(struct net_device *bond_dev,

From b26fa4e0275426450238a14158bc1db24bb696e6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 19 Mar 2011 05:39:11 +0000
Subject: [PATCH 16/33] r8169: fix a bug in rtl8169_init_phy()

commit 54405cde7624 (r8169: support control of advertising.)
introduced a bug in rtl8169_init_phy()

Reported-by: Piotr Hosowicz <piotr@hosowicz.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Oliver Neukum <oliver@neukum.org>
Cc: Francois Romieu <romieu@fr.zoreil.com>
Tested-by: Anca Emanuel <anca.emanuel@gmail.com>
Tested-by: Piotr Hosowicz <piotr@hosowicz.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/r8169.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 5e403511289de..493b0de3848bf 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2685,9 +2685,9 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 	rtl8169_set_speed(dev, AUTONEG_ENABLE, SPEED_1000, DUPLEX_FULL,
 		ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
 		ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
-		tp->mii.supports_gmii ?
+		(tp->mii.supports_gmii ?
 			ADVERTISED_1000baseT_Half |
-			ADVERTISED_1000baseT_Full : 0);
+			ADVERTISED_1000baseT_Full : 0));
 
 	if (RTL_R8(PHYstatus) & TBI_Enable)
 		netif_info(tp, link, dev, "TBI auto-negotiating\n");

From a769f4968396093d5cc1b1a86204cef579784b24 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 19 Mar 2011 23:06:33 -0700
Subject: [PATCH 17/33] niu: Rename NIU parent platform device name to fix
 conflict.

When the OF device driver bits were converted over to the platform
device infrastructure in commit 74888760d40b3ac9054f9c5fa07b566c0676ba2d
("dt/net: Eliminate users of of_platform_{,un}register_driver") we
inadvertantly created probing problems in the OF case.

The NIU driver creates a dummy platform device to represent the
board that contains one or more child NIU devices.  Unfortunately
we use the same name, "niu", as the OF device driver itself uses.

The result is that we try to probe the dummy "niu" parent device we
create, and since it has a NULL ofdevice pointer etc. everything
explodes:

[783019.128243] niu: niu.c:v1.1 (Apr 22, 2010)
[783019.128810] Unable to handle kernel NULL pointer dereference
[783019.128949] tsk->{mm,active_mm}->context = 000000000000039e
[783019.129078] tsk->{mm,active_mm}->pgd = fffff803afc5a000
[783019.129206]               \|/ ____ \|/
[783019.129213]               "@'/ .. \`@"
[783019.129220]               /_| \__/ |_\
[783019.129226]                  \__U_/
[783019.129378] modprobe(2004): Oops [#1]
[783019.129423] TSTATE: 0000000011001602 TPC: 0000000010052ff8 TNPC: 000000000061bbb4 Y: 00000000    Not tainted
[783019.129542] TPC: <niu_of_probe+0x3c/0x2dc [niu]>
[783019.129624] g0: 8080000000000000 g1: 0000000000000000 g2: 0000000010056000 g3: 0000000000000002
[783019.129733] g4: fffff803fc1da0c0 g5: fffff800441e2000 g6: fffff803fba84000 g7: 0000000000000000
[783019.129842] o0: fffff803fe7df010 o1: 0000000010055700 o2: 0000000000000000 o3: fffff803fbacaca0
[783019.129951] o4: 0000000000000080 o5: 0000000000777908 sp: fffff803fba866e1 ret_pc: 0000000010052ff4
[783019.130083] RPC: <niu_of_probe+0x38/0x2dc [niu]>
[783019.130165] l0: fffff803fe7df010 l1: fffff803fbacafc0 l2: fffff803fbacaca0 l3: ffffffffffffffed
[783019.130273] l4: 0000000000000000 l5: 000000007fffffff l6: fffff803fba86f40 l7: 0000000000000001
[783019.130382] i0: fffff803fe7df000 i1: fffff803fc20aba0 i2: 0000000000000000 i3: 0000000000000001
[783019.130490] i4: 0000000000000000 i5: 0000000000000000 i6: fffff803fba867a1 i7: 000000000062038c
[783019.130614] I7: <platform_drv_probe+0xc/0x20>

Fix by simply renaming the parent device to "niu-board".

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/niu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 40fa59e2fd5c6..32678b6c6b399 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -9501,7 +9501,7 @@ static struct niu_parent * __devinit niu_new_parent(struct niu *np,
 	struct niu_parent *p;
 	int i;
 
-	plat_dev = platform_device_register_simple("niu", niu_parent_index,
+	plat_dev = platform_device_register_simple("niu-board", niu_parent_index,
 						   NULL, 0);
 	if (IS_ERR(plat_dev))
 		return NULL;

From 5e0c1eb7e6b61998c7ecd39b7f69a15773d894d4 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sun, 20 Mar 2011 15:33:26 +0100
Subject: [PATCH 18/33] netfilter: ipset: fix address ranges at hash:*port*
 types

The hash:*port* types with IPv4 silently ignored when address ranges
with non TCP/UDP were added/deleted from the set and used the first
address from the range only.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 .../linux/netfilter/ipset/ip_set_getport.h    | 10 ++++++
 net/netfilter/ipset/ip_set_hash_ipport.c      | 34 ++++++-------------
 net/netfilter/ipset/ip_set_hash_ipportip.c    | 34 ++++++-------------
 net/netfilter/ipset/ip_set_hash_ipportnet.c   | 34 ++++++-------------
 net/netfilter/ipset/ip_set_hash_netport.c     | 30 +++++-----------
 5 files changed, 48 insertions(+), 94 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h
index 3882a81a3b3c8..5aebd170f899f 100644
--- a/include/linux/netfilter/ipset/ip_set_getport.h
+++ b/include/linux/netfilter/ipset/ip_set_getport.h
@@ -18,4 +18,14 @@ static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
 extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src,
 				__be16 *port);
 
+static inline bool ip_set_proto_with_ports(u8 proto)
+{
+	switch (proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		return true;
+	}
+	return false;
+}
+
 #endif /*_IP_SET_GETPORT_H*/
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index adbe787ea5dcc..b9214145d357f 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -150,6 +150,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipport4_elem data = { };
 	u32 ip, ip_to, p, port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -172,21 +173,15 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -195,7 +190,6 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
 		ret = adtfn(set, &data, timeout);
@@ -219,13 +213,12 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else
 		ip_to = ip;
 
-	port = ntohs(data.port);
-	if (tb[IPSET_ATTR_PORT_TO]) {
+	port_to = port = ntohs(data.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
 			swap(port, port_to);
-	} else
-		port_to = port;
+	}
 
 	for (; !before(ip_to, ip); ip++)
 		for (p = port; p <= port_to; p++) {
@@ -361,6 +354,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipport6_elem data = { };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -385,21 +379,15 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -407,9 +395,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 22e23abb86c6d..4642872df6e13 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -154,6 +154,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportip4_elem data = { };
 	u32 ip, ip_to, p, port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -180,21 +181,15 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -203,7 +198,6 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
 		ret = adtfn(set, &data, timeout);
@@ -227,13 +221,12 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else
 		ip_to = ip;
 
-	port = ntohs(data.port);
-	if (tb[IPSET_ATTR_PORT_TO]) {
+	port_to = port = ntohs(data.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
 			swap(port, port_to);
-	} else
-		port_to = port;
+	}
 
 	for (; !before(ip_to, ip); ip++)
 		for (p = port; p <= port_to; p++) {
@@ -375,6 +368,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportip6_elem data = { };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -403,21 +397,15 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -425,9 +413,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 6033e8b54bbdc..2cb84a54b7adb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -174,6 +174,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportnet4_elem data = { .cidr = HOST_MASK };
 	u32 ip, ip_to, p, port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -208,21 +209,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -231,7 +226,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
 		ret = adtfn(set, &data, timeout);
@@ -255,13 +249,12 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else
 		ip_to = ip;
 
-	port = ntohs(data.port);
-	if (tb[IPSET_ATTR_PORT_TO]) {
+	port_to = port = ntohs(data.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
 			swap(port, port_to);
-	} else
-		port_to = port;
+	}
 
 	for (; !before(ip_to, ip); ip++)
 		for (p = port; p <= port_to; p++) {
@@ -429,6 +422,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportnet6_elem data = { .cidr = HOST_MASK };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -465,21 +459,15 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -487,9 +475,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 34a165626ee93..8598676f2a053 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -170,6 +170,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netport4_elem data = { .cidr = HOST_MASK };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -198,21 +199,15 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -220,9 +215,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
@@ -390,6 +383,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netport6_elem data = { .cidr = HOST_MASK };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -418,21 +412,15 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -440,9 +428,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}

From 5c1aba467828bf0574ec5754c84884d573f590af Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sun, 20 Mar 2011 15:35:01 +0100
Subject: [PATCH 19/33] netfilter: ipset: fix checking the type revision at
 create command

The revision of the set type was not checked at the create command: if the
userspace sent a valid set type but with not supported revision number,
it'd create a loop.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_core.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 618a615acc9d9..d6b48230a5401 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -94,16 +94,28 @@ static int
 find_set_type_get(const char *name, u8 family, u8 revision,
 		  struct ip_set_type **found)
 {
+	struct ip_set_type *type;
+	int err;
+
 	rcu_read_lock();
 	*found = find_set_type(name, family, revision);
 	if (*found) {
-		int err = !try_module_get((*found)->me);
-		rcu_read_unlock();
-		return err ? -EFAULT : 0;
+		err = !try_module_get((*found)->me) ? -EFAULT : 0;
+		goto unlock;
 	}
+	/* Make sure the type is loaded but we don't support the revision */
+	list_for_each_entry_rcu(type, &ip_set_type_list, list)
+		if (STREQ(type->name, name)) {
+			err = -IPSET_ERR_FIND_TYPE;
+			goto unlock;
+		}
 	rcu_read_unlock();
 
 	return try_to_load_type(name);
+
+unlock:
+	rcu_read_unlock();
+	return err;
 }
 
 /* Find a given set type by name and family.
@@ -116,7 +128,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
 	struct ip_set_type *type;
 	bool found = false;
 
-	*min = *max = 0;
+	*min = 255; *max = 0;
 	rcu_read_lock();
 	list_for_each_entry_rcu(type, &ip_set_type_list, list)
 		if (STREQ(type->name, name) &&
@@ -124,7 +136,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
 			found = true;
 			if (type->revision < *min)
 				*min = type->revision;
-			else if (type->revision > *max)
+			if (type->revision > *max)
 				*max = type->revision;
 		}
 	rcu_read_unlock();

From db856674ac69e31946e56085239757cca3f7655f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 20 Mar 2011 15:40:06 +0100
Subject: [PATCH 20/33] netfilter: xtables: fix reentrancy

commit f3c5c1bfd4308 (make ip_tables reentrant) introduced a race in
handling the stackptr restore, at the end of ipt_do_table()

We should do it before the call to xt_info_rdunlock_bh(), or we allow
cpu preemption and another cpu overwrites stackptr of original one.

A second fix is to change the underflow test to check the origptr value
instead of 0 to detect underflow, or else we allow a jump from different
hooks.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c  | 4 ++--
 net/ipv6/netfilter/ip6_tables.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b09ed0d080f99..ffcea0d1678ee 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -387,7 +387,7 @@ ipt_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				if (*stackptr == 0) {
+				if (*stackptr <= origptr) {
 					e = get_entry(table_base,
 					    private->underflow[hook]);
 					pr_debug("Underflow (this is normal) "
@@ -427,10 +427,10 @@ ipt_do_table(struct sk_buff *skb,
 			/* Verdict */
 			break;
 	} while (!acpar.hotdrop);
-	xt_info_rdunlock_bh();
 	pr_debug("Exiting %s; resetting sp from %u to %u\n",
 		 __func__, *stackptr, origptr);
 	*stackptr = origptr;
+	xt_info_rdunlock_bh();
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c9598a9067d74..0b2af9b85cecd 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -410,7 +410,7 @@ ip6t_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				if (*stackptr == 0)
+				if (*stackptr <= origptr)
 					e = get_entry(table_base,
 					    private->underflow[hook]);
 				else
@@ -441,8 +441,8 @@ ip6t_do_table(struct sk_buff *skb,
 			break;
 	} while (!acpar.hotdrop);
 
-	xt_info_rdunlock_bh();
 	*stackptr = origptr;
+	xt_info_rdunlock_bh();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;

From 961ed183a9fd080cf306c659b8736007e44065a5 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Sun, 20 Mar 2011 15:42:52 +0100
Subject: [PATCH 21/33] netfilter: ipt_CLUSTERIP: fix buffer overflow

'buffer' string is copied from userspace.  It is not checked whether it is
zero terminated.  This may lead to overflow inside of simple_strtoul().
Changli Gao suggested to copy not more than user supplied 'size' bytes.

It was introduced before the git epoch.  Files "ipt_CLUSTERIP/*" are
root writable only by default, however, on some setups permissions might be
relaxed to e.g. network admin user.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 403ca57f60118..d609ac3cb9a45 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -664,8 +664,11 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
 	char buffer[PROC_WRITELEN+1];
 	unsigned long nodenum;
 
-	if (copy_from_user(buffer, input, PROC_WRITELEN))
+	if (size > PROC_WRITELEN)
+		return -EIO;
+	if (copy_from_user(buffer, input, size))
 		return -EFAULT;
+	buffer[size] = 0;
 
 	if (*buffer == '+') {
 		nodenum = simple_strtoul(buffer+1, NULL, 10);

From a454f0ccefbfdbfc0e1aa8a5f8010af5e48b8845 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Mon, 21 Mar 2011 18:08:28 -0700
Subject: [PATCH 22/33] xfrm: Fix initialize repl field of struct xfrm_state

Commit 'xfrm: Move IPsec replay detection functions to a separate file'
  (9fdc4883d92d20842c5acea77a4a21bb1574b495)
introduce repl field to struct xfrm_state, and only initialize it
under SA's netlink create path, the other path, such as pf_key,
ipcomp/ipcomp6 etc, the repl field remaining uninitialize. So if
the SA is created by pf_key, any input packet with SA's encryption
algorithm will cause panic.

    int xfrm_input()
    {
        ...
        x->repl->advance(x, seq);
        ...
    }

This patch fixed it by introduce new function __xfrm_init_state().

Pid: 0, comm: swapper Not tainted 2.6.38-next+ #14 Bochs Bochs
EIP: 0060:[<c078e5d5>] EFLAGS: 00010206 CPU: 0
EIP is at xfrm_input+0x31c/0x4cc
EAX: dd839c00 EBX: 00000084 ECX: 00000000 EDX: 01000000
ESI: dd839c00 EDI: de3a0780 EBP: dec1de88 ESP: dec1de64
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process swapper (pid: 0, ti=dec1c000 task=c09c0f20 task.ti=c0992000)
Stack:
 00000000 00000000 00000002 c0ba27c0 00100000 01000000 de3a0798 c0ba27c0
 00000033 dec1de98 c0786848 00000000 de3a0780 dec1dea4 c0786868 00000000
 dec1debc c074ee56 e1da6b8c de3a0780 c074ed44 de3a07a8 dec1decc c074ef32
Call Trace:
 [<c0786848>] xfrm4_rcv_encap+0x22/0x27
 [<c0786868>] xfrm4_rcv+0x1b/0x1d
 [<c074ee56>] ip_local_deliver_finish+0x112/0x1b1
 [<c074ed44>] ? ip_local_deliver_finish+0x0/0x1b1
 [<c074ef32>] NF_HOOK.clone.1+0x3d/0x44
 [<c074ef77>] ip_local_deliver+0x3e/0x44
 [<c074ed44>] ? ip_local_deliver_finish+0x0/0x1b1
 [<c074ec03>] ip_rcv_finish+0x30a/0x332
 [<c074e8f9>] ? ip_rcv_finish+0x0/0x332
 [<c074ef32>] NF_HOOK.clone.1+0x3d/0x44
 [<c074f188>] ip_rcv+0x20b/0x247
 [<c074e8f9>] ? ip_rcv_finish+0x0/0x332
 [<c072797d>] __netif_receive_skb+0x373/0x399
 [<c0727bc1>] netif_receive_skb+0x4b/0x51
 [<e0817e2a>] cp_rx_poll+0x210/0x2c4 [8139cp]
 [<c072818f>] net_rx_action+0x9a/0x17d
 [<c0445b5c>] __do_softirq+0xa1/0x149
 [<c0445abb>] ? __do_softirq+0x0/0x149

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    |  1 +
 net/xfrm/xfrm_state.c | 15 ++++++++++++++-
 net/xfrm/xfrm_user.c  |  2 +-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 42a8c32a10e24..cffa5dc664492 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1430,6 +1430,7 @@ extern void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
 extern u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
 extern int xfrm_init_replay(struct xfrm_state *x);
 extern int xfrm_state_mtu(struct xfrm_state *x, int mtu);
+extern int __xfrm_init_state(struct xfrm_state *x, bool init_replay);
 extern int xfrm_init_state(struct xfrm_state *x);
 extern int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
 extern int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d575f05348688..f83a3d1da81b9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1907,7 +1907,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 	return res;
 }
 
-int xfrm_init_state(struct xfrm_state *x)
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
 {
 	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_mode *inner_mode;
@@ -1980,12 +1980,25 @@ int xfrm_init_state(struct xfrm_state *x)
 	if (x->outer_mode == NULL)
 		goto error;
 
+	if (init_replay) {
+		err = xfrm_init_replay(x);
+		if (err)
+			goto error;
+	}
+
 	x->km.state = XFRM_STATE_VALID;
 
 error:
 	return err;
 }
 
+EXPORT_SYMBOL(__xfrm_init_state);
+
+int xfrm_init_state(struct xfrm_state *x)
+{
+	return __xfrm_init_state(x, true);
+}
+
 EXPORT_SYMBOL(xfrm_init_state);
 
 int __net_init xfrm_state_init(struct net *net)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 706385ae3e4bb..fc152d28753c4 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -511,7 +511,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_mark_get(attrs, &x->mark);
 
-	err = xfrm_init_state(x);
+	err = __xfrm_init_state(x, false);
 	if (err)
 		goto error;
 

From 8aa525a9340da4227797a06221ca08399006635f Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Mon, 21 Mar 2011 18:10:25 -0700
Subject: [PATCH 23/33] l2tp: fix possible oops on l2tp_eth module unload

A struct used in the l2tp_eth driver for registering network namespace
ops was incorrectly marked as __net_initdata, leading to oops when
module unloaded.

BUG: unable to handle kernel paging request at ffffffffa00ec098
IP: [<ffffffff8123dbd8>] ops_exit_list+0x7/0x4b
PGD 142d067 PUD 1431063 PMD 195da8067 PTE 0
Oops: 0000 [#1] SMP
last sysfs file: /sys/module/l2tp_eth/refcnt
Call Trace:
 [<ffffffff8123dc94>] ? unregister_pernet_operations+0x32/0x93
 [<ffffffff8123dd20>] ? unregister_pernet_device+0x2b/0x38
 [<ffffffff81068b6e>] ? sys_delete_module+0x1b8/0x222
 [<ffffffff810c7300>] ? do_munmap+0x254/0x318
 [<ffffffff812c64e5>] ? page_fault+0x25/0x30
 [<ffffffff812c6952>] ? system_call_fastpath+0x16/0x1b

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8d9ce0accc982..a8193f52c13c2 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -283,7 +283,7 @@ static __net_init int l2tp_eth_init_net(struct net *net)
 	return 0;
 }
 
-static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
+static struct pernet_operations l2tp_eth_net_ops = {
 	.init = l2tp_eth_init_net,
 	.id   = &l2tp_eth_net_id,
 	.size = sizeof(struct l2tp_eth_net),

From 20246a800389fe5442675c59863fec5a4f520c7c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Mar 2011 18:12:54 -0700
Subject: [PATCH 24/33] snmp: SNMP_UPD_PO_STATS_BH() always called from softirq

We dont need to test if we run from softirq context, we definitely are.

This saves few instructions in ip_rcv() & ip_rcv_finish()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/snmp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/snmp.h b/include/net/snmp.h
index 762e2abce8898..27461d6dd46f2 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -150,7 +150,7 @@ struct linux_xfrm_mib {
 #define SNMP_UPD_PO_STATS_BH(mib, basefield, addend)	\
 	do { \
 		__typeof__(*mib[0]) *ptr = \
-			__this_cpu_ptr((mib)[!in_softirq()]); \
+			__this_cpu_ptr((mib)[0]); \
 		ptr->mibs[basefield##PKTS]++; \
 		ptr->mibs[basefield##OCTETS] += addend;\
 	} while (0)
@@ -202,7 +202,7 @@ struct linux_xfrm_mib {
 #define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend)			\
 	do {								\
 		__typeof__(*mib[0]) *ptr;				\
-		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		ptr = __this_cpu_ptr((mib)[0]);				\
 		u64_stats_update_begin(&ptr->syncp);			\
 		ptr->mibs[basefield##PKTS]++;				\
 		ptr->mibs[basefield##OCTETS] += addend;			\

From 674f2115995b7b588cbf3540c9f9b2448a8c7ea8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Mar 2011 18:16:39 -0700
Subject: [PATCH 25/33] ipx: fix ipx_release()

Commit b0d0d915d1d1a0 (remove the BKL) added a regression, because
sock_put() can free memory while we are going to use it later.

Fix is to delay sock_put() _after_ release_sock().

Reported-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipx/af_ipx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 2731b51923d1f..9680226640ef7 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -148,7 +148,6 @@ static void ipx_destroy_socket(struct sock *sk)
 	ipx_remove_socket(sk);
 	skb_queue_purge(&sk->sk_receive_queue);
 	sk_refcnt_debug_dec(sk);
-	sock_put(sk);
 }
 
 /*
@@ -1404,6 +1403,7 @@ static int ipx_release(struct socket *sock)
 	sk_refcnt_debug_release(sk);
 	ipx_destroy_socket(sk);
 	release_sock(sk);
+	sock_put(sk);
 out:
 	return 0;
 }

From b20e7bbfc7a15a4182730f0936433145992b4b06 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Mar 2011 18:18:00 -0700
Subject: [PATCH 26/33] net/appletalk: fix atalk_release use after free

The BKL removal in appletalk introduced a use-after-free problem,
where atalk_destroy_socket frees a sock, but we still release
the socket lock on it.

An easy fix is to take an extra reference on the sock and sock_put
it when returning from atalk_release.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/ddp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 3d4f4b0434066..206e771e82d17 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1051,6 +1051,7 @@ static int atalk_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 
+	sock_hold(sk);
 	lock_sock(sk);
 	if (sk) {
 		sock_orphan(sk);
@@ -1058,6 +1059,8 @@ static int atalk_release(struct socket *sock)
 		atalk_destroy_socket(sk);
 	}
 	release_sock(sk);
+	sock_put(sk);
+
 	return 0;
 }
 

From 4f2d56c45fec7c15169599cab05e9f6df18769d0 Mon Sep 17 00:00:00 2001
From: Jan Altenberg <jan@linutronix.de>
Date: Mon, 21 Mar 2011 18:19:26 -0700
Subject: [PATCH 27/33] can: c_can: Do basic c_can configuration _before_
 enabling the interrupts

I ran into some trouble while testing the SocketCAN driver for the BOSCH
C_CAN controller. The interface is not correctly initialized, if I put
some CAN traffic on the line, _while_ the interface is being started
(which means: the interface doesn't come up correcty, if there's some RX
traffic while doing 'ifconfig can0 up').

The current implementation enables the controller interrupts _before_
doing the basic c_can configuration. I think, this should be done the
other way round.

The patch below fixes things for me.

Signed-off-by: Jan Altenberg <jan@linutronix.de>
Acked-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/c_can/c_can.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 14050786218ae..110eda01843c6 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -633,9 +633,6 @@ static void c_can_start(struct net_device *dev)
 {
 	struct c_can_priv *priv = netdev_priv(dev);
 
-	/* enable status change, error and module interrupts */
-	c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
-
 	/* basic c_can configuration */
 	c_can_chip_config(dev);
 
@@ -643,6 +640,9 @@ static void c_can_start(struct net_device *dev)
 
 	/* reset tx helper pointers */
 	priv->tx_next = priv->tx_echo = 0;
+
+	/* enable status change, error and module interrupts */
+	c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
 }
 
 static void c_can_stop(struct net_device *dev)

From ac0a121d7906b049dfee3649f886c969fbb3c1b7 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 21 Mar 2011 18:20:26 -0700
Subject: [PATCH 28/33] net: fix incorrect spelling in drop monitor protocol

It was pointed out to me recently that my spelling could be better :)

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 36e603c78ce9f..706502ff64aa8 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -350,7 +350,7 @@ static int __init init_net_drop_monitor(void)
 	struct per_cpu_dm_data *data;
 	int cpu, rc;
 
-	printk(KERN_INFO "Initalizing network drop monitor service\n");
+	printk(KERN_INFO "Initializing network drop monitor service\n");
 
 	if (sizeof(void *) > 8) {
 		printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");

From d5cd92448fded12c91f7574e49747c5f7d975a8d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Mon, 21 Mar 2011 18:22:22 -0700
Subject: [PATCH 29/33] macvlan: Fix use after free of struct macvlan_port.

When the macvlan driver was extended to call unregisgter_netdevice_queue
in 23289a37e2b127dfc4de1313fba15bb4c9f0cd5b, a use after free of struct
macvlan_port was introduced.  The code in dellink relied on unregister_netdevice
actually unregistering the net device so it would be safe to free macvlan_port.

Since unregister_netdevice_queue can just queue up the unregister instead of
performing the unregiser immediately we free the macvlan_port too soon and
then the code in macvlan_stop removes the macaddress for the set of macaddress
to listen for and uses memory that has already been freed.

To fix this add a reference count to track when it is safe to free the macvlan_port
and move the call of macvlan_port_destroy into macvlan_uninit which is guaranteed
to be called after the final macvlan_port_close.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 5b37d3c191e49..78e34e9e4f008 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -39,8 +39,11 @@ struct macvlan_port {
 	struct list_head	vlans;
 	struct rcu_head		rcu;
 	bool 			passthru;
+	int			count;
 };
 
+static void macvlan_port_destroy(struct net_device *dev);
+
 #define macvlan_port_get_rcu(dev) \
 	((struct macvlan_port *) rcu_dereference(dev->rx_handler_data))
 #define macvlan_port_get(dev) ((struct macvlan_port *) dev->rx_handler_data)
@@ -457,8 +460,13 @@ static int macvlan_init(struct net_device *dev)
 static void macvlan_uninit(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_port *port = vlan->port;
 
 	free_percpu(vlan->pcpu_stats);
+
+	port->count -= 1;
+	if (!port->count)
+		macvlan_port_destroy(port->dev);
 }
 
 static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
@@ -691,12 +699,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]);
 
 	if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
-		if (!list_empty(&port->vlans))
+		if (port->count)
 			return -EINVAL;
 		port->passthru = true;
 		memcpy(dev->dev_addr, lowerdev->dev_addr, ETH_ALEN);
 	}
 
+	port->count += 1;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto destroy_port;
@@ -707,7 +716,8 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	return 0;
 
 destroy_port:
-	if (list_empty(&port->vlans))
+	port->count -= 1;
+	if (!port->count)
 		macvlan_port_destroy(lowerdev);
 
 	return err;
@@ -725,13 +735,9 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
 void macvlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
-	struct macvlan_port *port = vlan->port;
 
 	list_del(&vlan->list);
 	unregister_netdevice_queue(dev, head);
-
-	if (list_empty(&port->vlans))
-		macvlan_port_destroy(port->dev);
 }
 EXPORT_SYMBOL_GPL(macvlan_dellink);
 

From 9d2a8fa96a44ba242de3a6f56acaef7a40a97b97 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Mon, 21 Mar 2011 18:23:34 -0700
Subject: [PATCH 30/33] net ipv6: Fix duplicate /proc/sys/net/ipv6/neigh
 directory entries.

When I was fixing issues with unregisgtering tables under /proc/sys/net/ipv6/neigh
by adding a mount point it appears I missed a critical ordering issue, in the
ipv6 initialization.  I had not realized that ipv6_sysctl_register is called
at the very end of the ipv6 initialization and in particular after we call
neigh_sysctl_register from ndisc_init.

"neigh" needs to be initialized in ipv6_static_sysctl_register which is
the first ipv6 table to initialized, and definitely before ndisc_init.
This removes the weirdness of duplicate tables while still providing a
"neigh" mount point which prevents races in sysctl unregistering.

This was initially reported at https://bugzilla.kernel.org/show_bug.cgi?id=31232
Reported-by: sunkan@zappa.cx
Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sysctl_net_ipv6.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7cb65ef79f9cd..6dcf5e7d661bd 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,6 +17,16 @@
 
 static struct ctl_table empty[1];
 
+static ctl_table ipv6_static_skeleton[] = {
+	{
+		.procname	= "neigh",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= empty,
+	},
+	{ }
+};
+
 static ctl_table ipv6_table_template[] = {
 	{
 		.procname	= "route",
@@ -37,12 +47,6 @@ static ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{
-		.procname	= "neigh",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= empty,
-	},
 	{ }
 };
 
@@ -160,7 +164,7 @@ static struct ctl_table_header *ip6_base;
 
 int ipv6_static_sysctl_register(void)
 {
-	ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty);
+	ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton);
 	if (ip6_base == NULL)
 		return -ENOMEM;
 	return 0;

From 675071a2ef3f4a6d25ee002a7437d50431168344 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Mon, 21 Mar 2011 18:24:53 -0700
Subject: [PATCH 31/33] veth: Fix the byte counters

Commit 44540960 "veth: move loopback logic to common location" introduced
a bug in the packet counters.  I don't understand why that happened as it
is not explained in the comments and the mut check in dev_forward_skb
retains the assumption that skb->len is the total length of the packet.

I just measured this emperically by setting up a veth pair between two
noop network namespaces setting and attempting a telnet connection between
the two.  I saw three packets in each direction and the byte counters were
exactly 14*3 = 42 bytes high in each direction.  I got the actual
packet lengths with tcpdump.

So remove the extra ETH_HLEN from the veth byte count totals.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 105d7f0630ccb..2de9b90c5f8f0 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -171,7 +171,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (skb->ip_summed == CHECKSUM_NONE)
 		skb->ip_summed = rcv_priv->ip_summed;
 
-	length = skb->len + ETH_HLEN;
+	length = skb->len;
 	if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
 		goto rx_drop;
 

From f40f94fc6c3b5a5542d5ed976e9ff69a3b463802 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Mar 2011 10:15:40 +0000
Subject: [PATCH 32/33] ipvs: fix a typo in __ip_vs_control_init()

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Simon Horman <horms@verge.net.au>
Cc: Julian Anastasov <ja@ssi.bg>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b799cea31f954..33733c8872e70 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3605,7 +3605,7 @@ int __net_init __ip_vs_control_init(struct net *net)
 
 	/* procfs stats */
 	ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-	if (ipvs->tot_stats.cpustats) {
+	if (!ipvs->tot_stats.cpustats) {
 		pr_err("%s(): alloc_percpu.\n", __func__);
 		return -ENOMEM;
 	}

From 736561a01f11114146b1b7f82d486fa9c95828ef Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 21 Mar 2011 15:18:01 +0000
Subject: [PATCH 33/33] IPVS: Use global mutex in ip_vs_app.c

As part of the work to make IPVS network namespace aware
__ip_vs_app_mutex was replaced by a per-namespace lock,
ipvs->app_mutex. ipvs->app_key is also supplied for debugging purposes.

Unfortunately this implementation results in ipvs->app_key residing
in non-static storage which at the very least causes a lockdep warning.

This patch takes the rather heavy-handed approach of reinstating
__ip_vs_app_mutex which will cover access to the ipvs->list_head
of all network namespaces.

[   12.610000] IPVS: Creating netns size=2456 id=0
[   12.630000] IPVS: Registered protocols (TCP, UDP, SCTP, AH, ESP)
[   12.640000] BUG: key ffff880003bbf1a0 not in .data!
[   12.640000] ------------[ cut here ]------------
[   12.640000] WARNING: at kernel/lockdep.c:2701 lockdep_init_map+0x37b/0x570()
[   12.640000] Hardware name: Bochs
[   12.640000] Pid: 1, comm: swapper Tainted: G        W 2.6.38-kexec-06330-g69b7efe-dirty #122
[   12.650000] Call Trace:
[   12.650000]  [<ffffffff8102e685>] warn_slowpath_common+0x75/0xb0
[   12.650000]  [<ffffffff8102e6d5>] warn_slowpath_null+0x15/0x20
[   12.650000]  [<ffffffff8105967b>] lockdep_init_map+0x37b/0x570
[   12.650000]  [<ffffffff8105829d>] ? trace_hardirqs_on+0xd/0x10
[   12.650000]  [<ffffffff81055ad8>] debug_mutex_init+0x38/0x50
[   12.650000]  [<ffffffff8104bc4c>] __mutex_init+0x5c/0x70
[   12.650000]  [<ffffffff81685ee7>] __ip_vs_app_init+0x64/0x86
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.660000]  [<ffffffff811b1c33>] T.620+0x43/0x170
[   12.660000]  [<ffffffff811b1e9a>] ? register_pernet_subsys+0x1a/0x40
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.660000]  [<ffffffff811b1db7>] register_pernet_operations+0x57/0xb0
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.670000]  [<ffffffff811b1ea9>] register_pernet_subsys+0x29/0x40
[   12.670000]  [<ffffffff81685f19>] ip_vs_app_init+0x10/0x12
[   12.670000]  [<ffffffff81685a87>] ip_vs_init+0x4c/0xff
[   12.670000]  [<ffffffff8166562c>] do_one_initcall+0x7a/0x12e
[   12.670000]  [<ffffffff8166583e>] kernel_init+0x13e/0x1c2
[   12.670000]  [<ffffffff8128c134>] kernel_thread_helper+0x4/0x10
[   12.670000]  [<ffffffff8128ad40>] ? restore_args+0x0/0x30
[   12.680000]  [<ffffffff81665700>] ? kernel_init+0x0/0x1c2
[   12.680000]  [<ffffffff8128c130>] ? kernel_thread_helper+0x0/0x1global0

Signed-off-by: Simon Horman <horms@verge.net.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Julian Anastasov <ja@ssi.bg>
Cc: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_vs.h            |  2 --
 net/netfilter/ipvs/ip_vs_app.c | 23 ++++++++++-------------
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 272f59336b737..30b49ed72f0d0 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -801,8 +801,6 @@ struct netns_ipvs {
 	struct list_head	rs_table[IP_VS_RTAB_SIZE];
 	/* ip_vs_app */
 	struct list_head	app_list;
-	struct mutex		app_mutex;
-	struct lock_class_key	app_key;	/* mutex debuging */
 
 	/* ip_vs_proto */
 	#define IP_VS_PROTO_TAB_SIZE	32	/* must be power of 2 */
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 5c48ffb60c283..2dc6de13ac189 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,6 +43,8 @@ EXPORT_SYMBOL(register_ip_vs_app);
 EXPORT_SYMBOL(unregister_ip_vs_app);
 EXPORT_SYMBOL(register_ip_vs_app_inc);
 
+static DEFINE_MUTEX(__ip_vs_app_mutex);
+
 /*
  *	Get an ip_vs_app object
  */
@@ -167,14 +169,13 @@ int
 register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
 		       __u16 port)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	int result;
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	result = ip_vs_app_inc_new(net, app, proto, port);
 
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 
 	return result;
 }
@@ -189,11 +190,11 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
 	/* increase the module use count */
 	ip_vs_use_count_inc();
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	list_add(&app->a_list, &ipvs->app_list);
 
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 
 	return 0;
 }
@@ -205,10 +206,9 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
  */
 void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_app *inc, *nxt;
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
 		ip_vs_app_inc_release(net, inc);
@@ -216,7 +216,7 @@ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 
 	list_del(&app->a_list);
 
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
@@ -501,7 +501,7 @@ static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 	struct net *net = seq_file_net(seq);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 }
@@ -535,9 +535,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 {
-	struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
-
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 }
 
 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -583,7 +581,6 @@ static int __net_init __ip_vs_app_init(struct net *net)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	INIT_LIST_HEAD(&ipvs->app_list);
-	__mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
 	proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
 	return 0;
 }