Skip to content

Commit

Permalink
xen-netback: improve guest-receive-side flow control
Browse files Browse the repository at this point in the history
The way that flow control works without this patch is that, in start_xmit()
the code uses xenvif_count_skb_slots() to predict how many slots
xenvif_gop_skb() will consume and then adds this to a 'req_cons_peek'
counter which it then uses to determine if the shared ring has that amount
of space available by checking whether 'req_prod' has passed that value.
If the ring doesn't have space the tx queue is stopped.
xenvif_gop_skb() will then consume slots and update 'req_cons' and issue
responses, updating 'rsp_prod' as it goes. The frontend will consume those
responses and post new requests, by updating req_prod. So, req_prod chases
req_cons which chases rsp_prod, and can never exceed that value. Thus if
xenvif_count_skb_slots() ever returns a number of slots greater than
xenvif_gop_skb() uses, req_cons_peek will get to a value that req_prod cannot
possibly achieve (since it's limited by the 'real' req_cons) and, if this
happens enough times, req_cons_peek gets more than a ring size ahead of
req_cons and the tx queue then remains stopped forever waiting for an
unachievable amount of space to become available in the ring.

Having two routines trying to calculate the same value is always going to be
fragile, so this patch does away with that. All we essentially need to do is
make sure that we have 'enough stuff' on our internal queue without letting
it build up uncontrollably. So start_xmit() makes a cheap optimistic check
of how much space is needed for an skb and only turns the queue off if that
is unachievable. net_rx_action() is the place where we could do with an
accurate predicition but, since that has proven tricky to calculate, a cheap
worse-case (but not too bad) estimate is all we really need since the only
thing we *must* prevent is xenvif_gop_skb() consuming more slots than are
available.

Without this patch I can trivially stall netback permanently by just doing
a large guest to guest file copy between two Windows Server 2008R2 VMs on a
single host.

Patch tested with frontends in:
- Windows Server 2008R2
- CentOS 6.0
- Debian Squeeze
- Debian Wheezy
- SLES11

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Annie Li <annie.li@oracle.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Paul Durrant authored and David S. Miller committed Dec 10, 2013
1 parent 512137e commit ca2f09f
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 185 deletions.
27 changes: 11 additions & 16 deletions drivers/net/xen-netback/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,10 @@ struct xenvif {
char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
struct xen_netif_rx_back_ring rx;
struct sk_buff_head rx_queue;

/* Allow xenvif_start_xmit() to peek ahead in the rx request
* ring. This is a prediction of what rx_req_cons will be
* once all queued skbs are put on the ring.
/* Set when the RX interrupt is triggered by the frontend.
* The worker thread may need to wake the queue.
*/
RING_IDX rx_req_cons_peek;
bool rx_event;

/* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
* head/fragment page uses 2 copy operations because it
Expand Down Expand Up @@ -198,8 +196,6 @@ void xenvif_xenbus_fini(void);

int xenvif_schedulable(struct xenvif *vif);

int xenvif_rx_ring_full(struct xenvif *vif);

int xenvif_must_stop_queue(struct xenvif *vif);

/* (Un)Map communication rings. */
Expand All @@ -211,21 +207,20 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
/* Check for SKBs from frontend and schedule backend processing */
void xenvif_check_rx_xenvif(struct xenvif *vif);

/* Queue an SKB for transmission to the frontend */
void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
/* Notify xenvif that ring now has space to send an skb to the frontend */
void xenvif_notify_tx_completion(struct xenvif *vif);

/* Prevent the device from generating any further traffic. */
void xenvif_carrier_off(struct xenvif *vif);

/* Returns number of ring slots required to send an skb to the frontend */
unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);

int xenvif_tx_action(struct xenvif *vif, int budget);
void xenvif_rx_action(struct xenvif *vif);

int xenvif_kthread(void *data);
void xenvif_kick_thread(struct xenvif *vif);

/* Determine whether the needed number of slots (req) are available,
* and set req_event if not.
*/
bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed);

void xenvif_stop_queue(struct xenvif *vif);

extern bool separate_tx_rx_irq;

Expand Down
47 changes: 24 additions & 23 deletions drivers/net/xen-netback/interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,6 @@ int xenvif_schedulable(struct xenvif *vif)
return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
}

static int xenvif_rx_schedulable(struct xenvif *vif)
{
return xenvif_schedulable(vif) && !xenvif_rx_ring_full(vif);
}

static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
{
struct xenvif *vif = dev_id;
Expand Down Expand Up @@ -104,8 +99,8 @@ static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif *vif = dev_id;

if (xenvif_rx_schedulable(vif))
netif_wake_queue(vif->dev);
vif->rx_event = true;
xenvif_kick_thread(vif);

return IRQ_HANDLED;
}
Expand All @@ -121,24 +116,35 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
int min_slots_needed;

BUG_ON(skb->dev != dev);

/* Drop the packet if vif is not ready */
if (vif->task == NULL)
if (vif->task == NULL || !xenvif_schedulable(vif))
goto drop;

/* Drop the packet if the target domain has no receive buffers. */
if (!xenvif_rx_schedulable(vif))
goto drop;
/* At best we'll need one slot for the header and one for each
* frag.
*/
min_slots_needed = 1 + skb_shinfo(skb)->nr_frags;

/* Reserve ring slots for the worst-case number of fragments. */
vif->rx_req_cons_peek += xenvif_count_skb_slots(vif, skb);
/* If the skb is GSO then we'll also need an extra slot for the
* metadata.
*/
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
min_slots_needed++;

if (vif->can_queue && xenvif_must_stop_queue(vif))
netif_stop_queue(dev);
/* If the skb can't possibly fit in the remaining slots
* then turn off the queue to give the ring a chance to
* drain.
*/
if (!xenvif_rx_ring_slots_available(vif, min_slots_needed))
xenvif_stop_queue(vif);

xenvif_queue_tx_skb(vif, skb);
skb_queue_tail(&vif->rx_queue, skb);
xenvif_kick_thread(vif);

return NETDEV_TX_OK;

Expand All @@ -148,12 +154,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}

void xenvif_notify_tx_completion(struct xenvif *vif)
{
if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif))
netif_wake_queue(vif->dev);
}

static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
Expand Down Expand Up @@ -378,6 +378,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
if (err < 0)
goto err;

init_waitqueue_head(&vif->wq);

if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
err = bind_interdomain_evtchn_to_irqhandler(
Expand Down Expand Up @@ -410,7 +412,6 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
disable_irq(vif->rx_irq);
}

init_waitqueue_head(&vif->wq);
task = kthread_create(xenvif_kthread,
(void *)vif, "%s", vif->dev->name);
if (IS_ERR(task)) {
Expand Down
Loading

0 comments on commit ca2f09f

Please sign in to comment.