Skip to content

Commit

Permalink
lguest: Tell Guest net not to notify us on every packet xmit
Browse files Browse the repository at this point in the history
virtio_ring has the ability to suppress notifications.  This prevents
a guest exit for every packet, but we need to set a timer on packet
receipt to re-check if there were any remaining packets.

Here are the times for 1G TCP Guest->Host with different timeout
settings (it matters because the TCP window doesn't grow big enough to
fill the entire buffer):

Timeout value	Seconds		Xmit/Recv/Timeout
None (before)	25.3784		xmit 7750233 recv 1
2500 usec	62.5119		xmit 207020 recv 2 timeout 207020
1000 usec	34.5379		xmit 207003 recv 2 timeout 207003
750 usec	29.2305		xmit 207002 recv 1 timeout 207002
500 usec	19.1887		xmit 561141 recv 1 timeout 559657
250 usec	20.0465		xmit 214128 recv 2 timeout 214110
100 usec	19.2583		xmit 561621 recv 1 timeout 560153

(Note that these values are sensitive to the GSO patches which come
 later, and probably other traffic-related variables, so take with a
 large grain of salt).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
  • Loading branch information
Rusty Russell committed Jul 28, 2008
1 parent 5dae785 commit a161883
Showing 1 changed file with 93 additions and 13 deletions.
106 changes: 93 additions & 13 deletions Documentation/lguest/lguest.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <sched.h>
#include <limits.h>
#include <stddef.h>
#include <signal.h>
#include "linux/lguest_launcher.h"
#include "linux/virtio_config.h"
#include "linux/virtio_net.h"
Expand Down Expand Up @@ -81,6 +82,8 @@ static int waker_fd;
static void *guest_base;
/* The maximum guest physical address allowed, and maximum possible. */
static unsigned long guest_limit, guest_max;
/* The pipe for signal hander to write to. */
static int timeoutpipe[2];

/* a per-cpu variable indicating whose vcpu is currently running */
static unsigned int __thread cpu_id;
Expand Down Expand Up @@ -156,11 +159,14 @@ struct virtqueue
/* Last available index we saw. */
u16 last_avail_idx;

/* The routine to call when the Guest pings us. */
void (*handle_output)(int fd, struct virtqueue *me);
/* The routine to call when the Guest pings us, or timeout. */
void (*handle_output)(int fd, struct virtqueue *me, bool timeout);

/* Outstanding buffers */
unsigned int inflight;

/* Is this blocked awaiting a timer? */
bool blocked;
};

/* Remember the arguments to the program so we can "reboot" */
Expand Down Expand Up @@ -874,7 +880,7 @@ static bool handle_console_input(int fd, struct device *dev)

/* Handling output for console is simple: we just get all the output buffers
* and write them to stdout. */
static void handle_console_output(int fd, struct virtqueue *vq)
static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
{
unsigned int head, out, in;
int len;
Expand All @@ -889,16 +895,31 @@ static void handle_console_output(int fd, struct virtqueue *vq)
}
}

static void block_vq(struct virtqueue *vq)
{
struct itimerval itm;

vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
vq->blocked = true;

itm.it_interval.tv_sec = 0;
itm.it_interval.tv_usec = 0;
itm.it_value.tv_sec = 0;
itm.it_value.tv_usec = 500;

setitimer(ITIMER_REAL, &itm, NULL);
}

/*
* The Network
*
* Handling output for network is also simple: we get all the output buffers
* and write them (ignoring the first element) to this device's file descriptor
* (/dev/net/tun).
*/
static void handle_net_output(int fd, struct virtqueue *vq)
static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
{
unsigned int head, out, in;
unsigned int head, out, in, num = 0;
int len;
struct iovec iov[vq->vring.num];

Expand All @@ -912,7 +933,12 @@ static void handle_net_output(int fd, struct virtqueue *vq)
(void)convert(&iov[0], struct virtio_net_hdr);
len = writev(vq->dev->fd, iov+1, out-1);
add_used_and_trigger(fd, vq, head, len);
num++;
}

/* Block further kicks and set up a timer if we saw anything. */
if (!timeout && num)
block_vq(vq);
}

/* This is where we handle a packet coming in from the tun device to our
Expand Down Expand Up @@ -967,18 +993,18 @@ static bool handle_tun_input(int fd, struct device *dev)
/*L:215 This is the callback attached to the network and console input
* virtqueues: it ensures we try again, in case we stopped console or net
* delivery because Guest didn't have any buffers. */
static void enable_fd(int fd, struct virtqueue *vq)
static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
{
add_device_fd(vq->dev->fd);
/* Tell waker to listen to it again */
write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
}

static void net_enable_fd(int fd, struct virtqueue *vq)
static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
{
/* We don't need to know again when Guest refills receive buffer. */
vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
enable_fd(fd, vq);
enable_fd(fd, vq, timeout);
}

/* When the Guest tells us they updated the status field, we handle it. */
Expand Down Expand Up @@ -1047,7 +1073,7 @@ static void handle_output(int fd, unsigned long addr)
if (strcmp(vq->dev->name, "console") != 0)
verbose("Output to %s\n", vq->dev->name);
if (vq->handle_output)
vq->handle_output(fd, vq);
vq->handle_output(fd, vq, false);
return;
}
}
Expand All @@ -1061,6 +1087,29 @@ static void handle_output(int fd, unsigned long addr)
strnlen(from_guest_phys(addr), guest_limit - addr));
}

static void handle_timeout(int fd)
{
char buf[32];
struct device *i;
struct virtqueue *vq;

/* Clear the pipe */
read(timeoutpipe[0], buf, sizeof(buf));

/* Check each device and virtqueue: flush blocked ones. */
for (i = devices.dev; i; i = i->next) {
for (vq = i->vq; vq; vq = vq->next) {
if (!vq->blocked)
continue;

vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
vq->blocked = false;
if (vq->handle_output)
vq->handle_output(fd, vq, true);
}
}
}

/* This is called when the Waker wakes us up: check for incoming file
* descriptors. */
static void handle_input(int fd)
Expand All @@ -1071,9 +1120,14 @@ static void handle_input(int fd)
for (;;) {
struct device *i;
fd_set fds = devices.infds;
int num;

num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
/* Could get interrupted */
if (num < 0)
continue;
/* If nothing is ready, we're done. */
if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0)
if (num == 0)
break;

/* Otherwise, call the device(s) which have readable file
Expand All @@ -1097,6 +1151,10 @@ static void handle_input(int fd)
write(waker_fd, &dev_fd, sizeof(dev_fd));
}
}

/* Is this the timeout fd? */
if (FD_ISSET(timeoutpipe[0], &fds))
handle_timeout(fd);
}
}

Expand Down Expand Up @@ -1145,7 +1203,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
/* Each device descriptor is followed by the description of its virtqueues. We
* specify how many descriptors the virtqueue is to have. */
static void add_virtqueue(struct device *dev, unsigned int num_descs,
void (*handle_output)(int fd, struct virtqueue *me))
void (*handle_output)(int, struct virtqueue *, bool))
{
unsigned int pages;
struct virtqueue **i, *vq = malloc(sizeof(*vq));
Expand All @@ -1161,6 +1219,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
vq->last_avail_idx = 0;
vq->dev = dev;
vq->inflight = 0;
vq->blocked = false;

/* Initialize the configuration. */
vq->config.num = num_descs;
Expand Down Expand Up @@ -1293,6 +1352,24 @@ static void setup_console(void)
}
/*:*/

static void timeout_alarm(int sig)
{
write(timeoutpipe[1], "", 1);
}

static void setup_timeout(void)
{
if (pipe(timeoutpipe) != 0)
err(1, "Creating timeout pipe");

if (fcntl(timeoutpipe[1], F_SETFL,
fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
err(1, "Making timeout pipe nonblocking");

add_device_fd(timeoutpipe[0]);
signal(SIGALRM, timeout_alarm);
}

/*M:010 Inter-guest networking is an interesting area. Simplest is to have a
* --sharenet=<name> option which opens or creates a named pipe. This can be
* used to send packets to another guest in a 1:1 manner.
Expand Down Expand Up @@ -1653,7 +1730,7 @@ static bool handle_io_finish(int fd, struct device *dev)
}

/* When the Guest submits some I/O, we just need to wake the I/O thread. */
static void handle_virtblk_output(int fd, struct virtqueue *vq)
static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
{
struct vblk_info *vblk = vq->dev->priv;
char c = 0;
Expand Down Expand Up @@ -1824,7 +1901,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
/* ERESTART means that we need to reboot the guest */
} else if (errno == ERESTART) {
restart_guest();
/* EAGAIN means the Waker wanted us to look at some input.
/* EAGAIN means a signal (timeout).
* Anything else means a bug or incompatible change. */
} else if (errno != EAGAIN)
err(1, "Running guest failed");
Expand Down Expand Up @@ -1948,6 +2025,9 @@ int main(int argc, char *argv[])
/* We always have a console device */
setup_console();

/* We can timeout waiting for Guest network transmit. */
setup_timeout();

/* Now we load the kernel */
start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));

Expand Down

0 comments on commit a161883

Please sign in to comment.