Skip to content

Commit

Permalink
net/hyperv: Add flow control based on hi/low watermark
Browse files Browse the repository at this point in the history
In the existing code, we only stop queue when the ringbuffer is full,
so the current packet has to be dropped or retried from upper layer.

This patch stops the tx queue when available ringbuffer is below
the low watermark. So the ringbuffer still has small amount of space
available for the current packet. This will reduce the overhead of
retries on sending.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Haiyang Zhang authored and David S. Miller committed Apr 3, 2012
1 parent ede7193 commit 33be96e
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 36 deletions.
31 changes: 0 additions & 31 deletions drivers/hv/ring_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,37 +30,6 @@
#include "hyperv_vmbus.h"


/* #defines */


/* Amount of space to write to */
#define BYTES_AVAIL_TO_WRITE(r, w, z) \
((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w))


/*
*
* hv_get_ringbuffer_availbytes()
*
* Get number of bytes available to read and to write to
* for the specified ring buffer
*/
static inline void
hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
u32 *read, u32 *write)
{
u32 read_loc, write_loc;

smp_read_barrier_depends();

/* Capture the read/write indices before they changed */
read_loc = rbi->ring_buffer->read_index;
write_loc = rbi->ring_buffer->write_index;

*write = BYTES_AVAIL_TO_WRITE(read_loc, write_loc, rbi->ring_datasize);
*read = rbi->ring_datasize - *write;
}

/*
* hv_get_next_write_location()
*
Expand Down
41 changes: 37 additions & 4 deletions drivers/net/hyperv/netvsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,24 @@ int netvsc_device_remove(struct hv_device *device)
return 0;
}


#define RING_AVAIL_PERCENT_HIWATER 20
#define RING_AVAIL_PERCENT_LOWATER 10

/*
* Get the percentage of available bytes to write in the ring.
* The return value is in range from 0 to 100.
*/
static inline u32 hv_ringbuf_avail_percent(
struct hv_ring_buffer_info *ring_info)
{
u32 avail_read, avail_write;

hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);

return avail_write * 100 / ring_info->ring_datasize;
}

static void netvsc_send_completion(struct hv_device *device,
struct vmpacket_descriptor *packet)
{
Expand Down Expand Up @@ -455,6 +473,8 @@ static void netvsc_send_completion(struct hv_device *device,
complete(&net_device->channel_init_wait);
} else if (nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
int num_outstanding_sends;

/* Get the send context */
nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
packet->trans_id;
Expand All @@ -463,10 +483,14 @@ static void netvsc_send_completion(struct hv_device *device,
nvsc_packet->completion.send.send_completion(
nvsc_packet->completion.send.send_completion_ctx);

atomic_dec(&net_device->num_outstanding_sends);
num_outstanding_sends =
atomic_dec_return(&net_device->num_outstanding_sends);

if (netif_queue_stopped(ndev) && !net_device->start_remove)
netif_wake_queue(ndev);
if (netif_queue_stopped(ndev) && !net_device->start_remove &&
(hv_ringbuf_avail_percent(&device->channel->outbound)
> RING_AVAIL_PERCENT_HIWATER ||
num_outstanding_sends < 1))
netif_wake_queue(ndev);
} else {
netdev_err(ndev, "Unknown send completion packet type- "
"%d received!!\n", nvsp_packet->hdr.msg_type);
Expand Down Expand Up @@ -519,10 +543,19 @@ int netvsc_send(struct hv_device *device,

if (ret == 0) {
atomic_inc(&net_device->num_outstanding_sends);
if (hv_ringbuf_avail_percent(&device->channel->outbound) <
RING_AVAIL_PERCENT_LOWATER) {
netif_stop_queue(ndev);
if (atomic_read(&net_device->
num_outstanding_sends) < 1)
netif_wake_queue(ndev);
}
} else if (ret == -EAGAIN) {
netif_stop_queue(ndev);
if (atomic_read(&net_device->num_outstanding_sends) < 1)
if (atomic_read(&net_device->num_outstanding_sends) < 1) {
netif_wake_queue(ndev);
ret = -ENOSPC;
}
} else {
netdev_err(ndev, "Unable to send packet %p ret %d\n",
packet, ret);
Expand Down
6 changes: 5 additions & 1 deletion drivers/net/hyperv/netvsc_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,13 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
net->stats.tx_packets++;
} else {
kfree(packet);
if (ret != -EAGAIN) {
dev_kfree_skb_any(skb);
net->stats.tx_dropped++;
}
}

return ret ? NETDEV_TX_BUSY : NETDEV_TX_OK;
return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK;
}

/*
Expand Down
27 changes: 27 additions & 0 deletions include/linux/hyperv.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,33 @@ struct hv_ring_buffer_debug_info {
u32 bytes_avail_towrite;
};


/*
*
* hv_get_ringbuffer_availbytes()
*
* Get number of bytes available to read and to write to
* for the specified ring buffer
*/
static inline void
hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
u32 *read, u32 *write)
{
u32 read_loc, write_loc, dsize;

smp_read_barrier_depends();

/* Capture the read/write indices before they changed */
read_loc = rbi->ring_buffer->read_index;
write_loc = rbi->ring_buffer->write_index;
dsize = rbi->ring_datasize;

*write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
read_loc - write_loc;
*read = dsize - *write;
}


/*
* We use the same version numbering for all Hyper-V modules.
*
Expand Down

0 comments on commit 33be96e

Please sign in to comment.