From 27ebe308af7c14aed0711e25817c6a1235bd16ff Mon Sep 17 00:00:00 2001
From: Anthony Liguori <aliguori@us.ibm.com>
Date: Sun, 2 Mar 2008 16:37:48 -0600
Subject: [PATCH 1/5] virtio: Use spin_lock_irqsave/restore for virtio-pci

virtio-pci acquires its spin lock in an interrupt context so it's necessary
to use spin_lock_irqsave/restore variants.  This patch fixes guest SMP when
using virtio devices in KVM.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_pci.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 26f787ddd5ff5..59a8f73dec730 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -177,6 +177,7 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 	struct virtio_pci_device *vp_dev = opaque;
 	struct virtio_pci_vq_info *info;
 	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
 	u8 isr;
 
 	/* reading the ISR has the effect of also clearing it so it's very
@@ -197,12 +198,12 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 			drv->config_changed(&vp_dev->vdev);
 	}
 
-	spin_lock(&vp_dev->lock);
+	spin_lock_irqsave(&vp_dev->lock, flags);
 	list_for_each_entry(info, &vp_dev->virtqueues, node) {
 		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
 			ret = IRQ_HANDLED;
 	}
-	spin_unlock(&vp_dev->lock);
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	return ret;
 }
@@ -214,6 +215,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtio_pci_vq_info *info;
 	struct virtqueue *vq;
+	unsigned long flags;
 	u16 num;
 	int err;
 
@@ -255,9 +257,9 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	vq->priv = info;
 	info->vq = vq;
 
-	spin_lock(&vp_dev->lock);
+	spin_lock_irqsave(&vp_dev->lock, flags);
 	list_add(&info->node, &vp_dev->virtqueues);
-	spin_unlock(&vp_dev->lock);
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	return vq;
 
@@ -274,10 +276,11 @@ static void vp_del_vq(struct virtqueue *vq)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 	struct virtio_pci_vq_info *info = vq->priv;
+	unsigned long flags;
 
-	spin_lock(&vp_dev->lock);
+	spin_lock_irqsave(&vp_dev->lock, flags);
 	list_del(&info->node);
-	spin_unlock(&vp_dev->lock);
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	vring_del_virtqueue(vq);
 

From c483934670d31e064e18967bb679c1079b54ea72 Mon Sep 17 00:00:00 2001
From: Jeremy Katz <katzj@redhat.com>
Date: Sun, 2 Mar 2008 17:00:15 -0500
Subject: [PATCH 2/5] virtio: Fix sysfs bits to have proper block symlink

Fix up so that the virtio_blk devices in sysfs link correctly to their
block device.  This then allows them to be detected by hal, etc

Signed-off-by: Jeremy Katz <katzj@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 3b1a68d6eddb3..0cfbe8c594a55 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -238,6 +238,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	vblk->disk->first_minor = index_to_minor(index);
 	vblk->disk->private_data = vblk;
 	vblk->disk->fops = &virtblk_fops;
+	vblk->disk->driverfs_dev = &vdev->dev;
 	index++;
 
 	/* If barriers are supported, tell block layer that queue is ordered */

From bdc1681cdf1ab6a65fa935a2b3f8fc63b20c54ea Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 17 Mar 2008 22:58:15 -0500
Subject: [PATCH 3/5] virtio: handle > 2 billion page balloon targets

If the host asks for a huge target towards_target() can overflow, and
we up oops as we try to release more pages than we have.  The simple
fix is to use a 64-bit value.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_balloon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c8a4332d1132e..0b3efc31ee6d3 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -152,7 +152,7 @@ static void virtballoon_changed(struct virtio_device *vdev)
 	wake_up(&vb->config_change);
 }
 
-static inline int towards_target(struct virtio_balloon *vb)
+static inline s64 towards_target(struct virtio_balloon *vb)
 {
 	u32 v;
 	__virtio_config_val(vb->vdev,
@@ -176,7 +176,7 @@ static int balloon(void *_vballoon)
 
 	set_freezable();
 	while (!kthread_should_stop()) {
-		int diff;
+		s64 diff;
 
 		try_to_freeze();
 		wait_event_interruptible(vb->config_change,

From da74e89d40995600b3b07ac500084920247687ce Mon Sep 17 00:00:00 2001
From: Amit Shah <amitshah@gmx.net>
Date: Fri, 29 Feb 2008 16:24:50 +0530
Subject: [PATCH 4/5] virtio: Enable netpoll interface for netconsole logging

Add a new poll_controller handler that the netpoll interface needs.

This enables netconsole logging from a kvm guest over the virtio
net interface.

Signed-off-by: Amit Shah <amitshah@gmx.net>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/virtio_net.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 19fd4cb0ddf85..e575df83e5c21 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -294,6 +294,15 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void virtnet_netpoll(struct net_device *dev)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+
+	napi_schedule(&vi->napi);
+}
+#endif
+
 static int virtnet_open(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
@@ -336,6 +345,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 	dev->stop = virtnet_close;
 	dev->hard_start_xmit = start_xmit;
 	dev->features = NETIF_F_HIGHDMA;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	dev->poll_controller = virtnet_netpoll;
+#endif
 	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	/* Do we support "hardware" checksums? */

From 4265f161b6bb7b31163671329b1142b9023bf4e3 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 14 Mar 2008 14:17:05 +0100
Subject: [PATCH 5/5] virtio: fix race in enable_cb

There is a race in virtio_net, dealing with disabling/enabling the callback.
I saw the following oops:

kernel BUG at /space/kvm/drivers/virtio/virtio_ring.c:218!
illegal operation: 0001 [#1] SMP
Modules linked in: sunrpc dm_mod
CPU: 2 Not tainted 2.6.25-rc1zlive-host-10623-gd358142-dirty #99
Process swapper (pid: 0, task: 000000000f85a610, ksp: 000000000f873c60)
Krnl PSW : 0404300180000000 00000000002b81a6 (vring_disable_cb+0x16/0x20)
           R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:3 PM:0 EA:3
Krnl GPRS: 0000000000000001 0000000000000001 0000000010005800 0000000000000001
           000000000f3a0900 000000000f85a610 0000000000000000 0000000000000000
           0000000000000000 000000000f870000 0000000000000000 0000000000001237
           000000000f3a0920 000000000010ff74 00000000002846f6 000000000fa0bcd8
Krnl Code: 00000000002b819a: a7110001           tmll    %r1,1
           00000000002b819e: a7840004           brc     8,2b81a6
           00000000002b81a2: a7f40001           brc     15,2b81a4
          >00000000002b81a6: a51b0001           oill    %r1,1
           00000000002b81aa: 40102000           sth     %r1,0(%r2)
           00000000002b81ae: 07fe               bcr     15,%r14
           00000000002b81b0: eb7ff0380024       stmg    %r7,%r15,56(%r15)
           00000000002b81b6: a7f13e00           tmll    %r15,15872
Call Trace:
([<000000000fa0bcd0>] 0xfa0bcd0)
 [<00000000002b8350>] vring_interrupt+0x5c/0x6c
 [<000000000010ab08>] do_extint+0xb8/0xf0
 [<0000000000110716>] ext_no_vtime+0x16/0x1a
 [<0000000000107e72>] cpu_idle+0x1c2/0x1e0

The problem can be triggered with a high amount of host->guest traffic.
I think its the following race:

poll says netif_rx_complete
poll calls enable_cb
enable_cb opens the interrupt mask
a new packet comes, an interrupt is triggered----\
enable_cb sees that there is more work           |
enable_cb disables the interrupt                 |
       .                                         V
       .                            interrupt is delivered
       .                            skb_recv_done does atomic napi test, ok
 some waiting                       disable_cb is called->check fails->bang!
       .
poll would do napi check
poll would do disable_cb

The fix is to let enable_cb not disable the interrupt again, but expect the
caller to do the cleanup if it returns false. In that case, the interrupt is
only disabled, if the napi test_set_bit was successful.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (cleaned up doco)
---
 drivers/net/virtio_net.c     | 10 +++++++---
 drivers/virtio/virtio_ring.c |  1 -
 include/linux/virtio.h       |  5 +++--
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e575df83e5c21..b58472cf76f86 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -203,8 +203,11 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
 	if (received < budget) {
 		netif_rx_complete(vi->dev, napi);
 		if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
-		    && netif_rx_reschedule(vi->dev, napi))
+		    && napi_schedule_prep(napi)) {
+			vi->rvq->vq_ops->disable_cb(vi->rvq);
+			__netif_rx_schedule(vi->dev, napi);
 			goto again;
+		}
 	}
 
 	return received;
@@ -278,10 +281,11 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev)
 		pr_debug("%s: virtio not prepared to send\n", dev->name);
 		netif_stop_queue(dev);
 
-		/* Activate callback for using skbs: if this fails it
+		/* Activate callback for using skbs: if this returns false it
 		 * means some were used in the meantime. */
 		if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
-			printk("Unlikely: restart svq failed\n");
+			printk("Unlikely: restart svq race\n");
+			vi->svq->vq_ops->disable_cb(vi->svq);
 			netif_start_queue(dev);
 			goto again;
 		}
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 3a28c1382131d..aa714028641e6 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -232,7 +232,6 @@ static bool vring_enable_cb(struct virtqueue *_vq)
 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
 	mb();
 	if (unlikely(more_used(vq))) {
-		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 		END_USE(vq);
 		return false;
 	}
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 260d1fcf29a42..12c18ac1b973c 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -43,8 +43,9 @@ struct virtqueue
  *	vq: the struct virtqueue we're talking about.
  * @enable_cb: restart callbacks after disable_cb.
  *	vq: the struct virtqueue we're talking about.
- *	This returns "false" (and doesn't re-enable) if there are pending
- *	buffers in the queue, to avoid a race.
+ *	This re-enables callbacks; it returns "false" if there are pending
+ *	buffers in the queue, to detect a possible race between the driver
+ *	checking for more work, and enabling callbacks.
  *
  * Locking rules are straightforward: the driver is responsible for
  * locking.  No two operations may be invoked simultaneously.