Skip to content

Commit

Permalink
enic: Add support for adaptive interrupt coalescing
Browse files Browse the repository at this point in the history
This patch adds support for adaptive interrupt coalescing.

For small pkts with low pkt rate, we can decrease the coalescing interrupt
dynamically which decreases the latency. This however increases the cpu
utilization. Based on testing with different coal intr and pkt rate we came up
with a table(mod_table) with rx_rate and coalescing interrupt value where we
get low latency without significant increase in cpu. mod_table table stores
the coalescing timer percentage value for different throughputs.

Function enic_calc_int_moderation() calculates the desired coalescing intr timer
value. This function is called in driver rx napi_poll. The actual value is set
by enic_set_int_moderation() which is called when napi_poll is complete. i.e
when we unmask the rx intr.

Adaptive coal intr is support only when driver is using msix intr. Because
intr is not shared.

Struct mod_range is used to store only the default adaptive coalescing intr
value.

Adaptive coal intr calue is calculated by

timer = range_start + ((rx_coal->range_end - range_start) *
		       mod_table[index].range_percent / 100);

rx_coal->range_end is the rx-usecs-high value set using ethtool.
range_start is rx-usecs-low, set using ethtool, if rx_small_pkt_bytes_cnt is
greater than 2 * rx_large_pkt_bytes_cnt. i.e small pkts are dominant. Else its
rx-usecs-low + 3.

Cc: Christian Benvenuti <benve@cisco.com>
Cc: Neel Patel <neepatel@cisco.com>
Signed-off-by: Sujith Sankar <ssujith@cisco.com>
Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Sujith Sankar authored and David S. Miller committed May 21, 2014
1 parent f6e92d1 commit 7c2ce6e
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 6 deletions.
30 changes: 30 additions & 0 deletions drivers/net/ethernet/cisco/enic/enic.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,42 @@
#define ENIC_CQ_MAX (ENIC_WQ_MAX + ENIC_RQ_MAX)
#define ENIC_INTR_MAX (ENIC_CQ_MAX + 2)

#define ENIC_AIC_LARGE_PKT_DIFF 3

struct enic_msix_entry {
int requested;
char devname[IFNAMSIZ];
irqreturn_t (*isr)(int, void *);
void *devid;
};

/* Store only the lower range. Higher range is given by fw. */
struct enic_intr_mod_range {
u32 small_pkt_range_start;
u32 large_pkt_range_start;
};

struct enic_intr_mod_table {
u32 rx_rate;
u32 range_percent;
};

#define ENIC_MAX_LINK_SPEEDS 3
#define ENIC_LINK_SPEED_10G 10000
#define ENIC_LINK_SPEED_4G 4000
#define ENIC_LINK_40G_INDEX 2
#define ENIC_LINK_10G_INDEX 1
#define ENIC_LINK_4G_INDEX 0
#define ENIC_RX_COALESCE_RANGE_END 125
#define ENIC_AIC_TS_BREAK 100

struct enic_rx_coal {
u32 small_pkt_range_start;
u32 large_pkt_range_start;
u32 range_end;
u32 use_adaptive_rx_coalesce;
};

/* priv_flags */
#define ENIC_SRIOV_ENABLED (1 << 0)

Expand Down Expand Up @@ -92,6 +121,7 @@ struct enic {
unsigned int mc_count;
unsigned int uc_count;
u32 port_mtu;
struct enic_rx_coal rx_coalesce_setting;
u32 rx_coalesce_usecs;
u32 tx_coalesce_usecs;
#ifdef CONFIG_PCI_IOV
Expand Down
61 changes: 55 additions & 6 deletions drivers/net/ethernet/cisco/enic/enic_ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@ static const struct enic_stat enic_rx_stats[] = {
static const unsigned int enic_n_tx_stats = ARRAY_SIZE(enic_tx_stats);
static const unsigned int enic_n_rx_stats = ARRAY_SIZE(enic_rx_stats);

void enic_intr_coal_set_rx(struct enic *enic, u32 timer)
{
int i;
int intr;

for (i = 0; i < enic->rq_count; i++) {
intr = enic_msix_rq_intr(enic, i);
vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
}
}

static int enic_get_settings(struct net_device *netdev,
struct ethtool_cmd *ecmd)
{
Expand Down Expand Up @@ -178,9 +189,14 @@ static int enic_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ecmd)
{
struct enic *enic = netdev_priv(netdev);
struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;

ecmd->tx_coalesce_usecs = enic->tx_coalesce_usecs;
ecmd->rx_coalesce_usecs = enic->rx_coalesce_usecs;
if (rxcoal->use_adaptive_rx_coalesce)
ecmd->use_adaptive_rx_coalesce = 1;
ecmd->rx_coalesce_usecs_low = rxcoal->small_pkt_range_start;
ecmd->rx_coalesce_usecs_high = rxcoal->range_end;

return 0;
}
Expand All @@ -191,17 +207,31 @@ static int enic_set_coalesce(struct net_device *netdev,
struct enic *enic = netdev_priv(netdev);
u32 tx_coalesce_usecs;
u32 rx_coalesce_usecs;
u32 rx_coalesce_usecs_low;
u32 rx_coalesce_usecs_high;
u32 coalesce_usecs_max;
unsigned int i, intr;
struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;

coalesce_usecs_max = vnic_dev_get_intr_coal_timer_max(enic->vdev);
tx_coalesce_usecs = min_t(u32, ecmd->tx_coalesce_usecs,
vnic_dev_get_intr_coal_timer_max(enic->vdev));
coalesce_usecs_max);
rx_coalesce_usecs = min_t(u32, ecmd->rx_coalesce_usecs,
vnic_dev_get_intr_coal_timer_max(enic->vdev));
coalesce_usecs_max);

rx_coalesce_usecs_low = min_t(u32, ecmd->rx_coalesce_usecs_low,
coalesce_usecs_max);
rx_coalesce_usecs_high = min_t(u32, ecmd->rx_coalesce_usecs_high,
coalesce_usecs_max);

switch (vnic_dev_get_intr_mode(enic->vdev)) {
case VNIC_DEV_INTR_MODE_INTX:
if (tx_coalesce_usecs != rx_coalesce_usecs)
return -EINVAL;
if (ecmd->use_adaptive_rx_coalesce ||
ecmd->rx_coalesce_usecs_low ||
ecmd->rx_coalesce_usecs_high)
return -EOPNOTSUPP;

intr = enic_legacy_io_intr();
vnic_intr_coalescing_timer_set(&enic->intr[intr],
Expand All @@ -210,6 +240,10 @@ static int enic_set_coalesce(struct net_device *netdev,
case VNIC_DEV_INTR_MODE_MSI:
if (tx_coalesce_usecs != rx_coalesce_usecs)
return -EINVAL;
if (ecmd->use_adaptive_rx_coalesce ||
ecmd->rx_coalesce_usecs_low ||
ecmd->rx_coalesce_usecs_high)
return -EOPNOTSUPP;

vnic_intr_coalescing_timer_set(&enic->intr[0],
tx_coalesce_usecs);
Expand All @@ -221,12 +255,27 @@ static int enic_set_coalesce(struct net_device *netdev,
tx_coalesce_usecs);
}

for (i = 0; i < enic->rq_count; i++) {
intr = enic_msix_rq_intr(enic, i);
vnic_intr_coalescing_timer_set(&enic->intr[intr],
rx_coalesce_usecs);
if (rxcoal->use_adaptive_rx_coalesce) {
if (!ecmd->use_adaptive_rx_coalesce) {
rxcoal->use_adaptive_rx_coalesce = 0;
enic_intr_coal_set_rx(enic, rx_coalesce_usecs);
}
} else {
if (ecmd->use_adaptive_rx_coalesce)
rxcoal->use_adaptive_rx_coalesce = 1;
else
enic_intr_coal_set_rx(enic, rx_coalesce_usecs);
}

if (ecmd->rx_coalesce_usecs_high) {
if (rx_coalesce_usecs_high <
(rx_coalesce_usecs_low + ENIC_AIC_LARGE_PKT_DIFF))
return -EINVAL;
rxcoal->range_end = rx_coalesce_usecs_high;
rxcoal->small_pkt_range_start = rx_coalesce_usecs_low;
rxcoal->large_pkt_range_start = rx_coalesce_usecs_low +
ENIC_AIC_LARGE_PKT_DIFF;
}
break;
default:
break;
Expand Down
150 changes: 150 additions & 0 deletions drivers/net/ethernet/cisco/enic/enic_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <linux/rtnetlink.h>
#include <linux/prefetch.h>
#include <net/ip6_checksum.h>
#include <linux/ktime.h>

#include "cq_enet_desc.h"
#include "vnic_dev.h"
Expand Down Expand Up @@ -72,6 +73,35 @@ MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
MODULE_DEVICE_TABLE(pci, enic_id_table);

#define ENIC_LARGE_PKT_THRESHOLD 1000
#define ENIC_MAX_COALESCE_TIMERS 10
/* Interrupt moderation table, which will be used to decide the
* coalescing timer values
* {rx_rate in Mbps, mapping percentage of the range}
*/
struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = {
{4000, 0},
{4400, 10},
{5060, 20},
{5230, 30},
{5540, 40},
{5820, 50},
{6120, 60},
{6435, 70},
{6745, 80},
{7000, 90},
{0xFFFFFFFF, 100}
};

/* This table helps the driver to pick different ranges for rx coalescing
* timer depending on the link speed.
*/
struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = {
{0, 0}, /* 0 - 4 Gbps */
{0, 3}, /* 4 - 10 Gbps */
{3, 6}, /* 10 - 40 Gbps */
};

int enic_is_dynamic(struct enic *enic)
{
return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
Expand Down Expand Up @@ -979,13 +1009,23 @@ static int enic_rq_alloc_buf(struct vnic_rq *rq)
return 0;
}

static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size,
u32 pkt_len)
{
if (ENIC_LARGE_PKT_THRESHOLD <= pkt_len)
pkt_size->large_pkt_bytes_cnt += pkt_len;
else
pkt_size->small_pkt_bytes_cnt += pkt_len;
}

static void enic_rq_indicate_buf(struct vnic_rq *rq,
struct cq_desc *cq_desc, struct vnic_rq_buf *buf,
int skipped, void *opaque)
{
struct enic *enic = vnic_dev_priv(rq->vdev);
struct net_device *netdev = enic->netdev;
struct sk_buff *skb;
struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];

u8 type, color, eop, sop, ingress_port, vlan_stripped;
u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
Expand Down Expand Up @@ -1056,6 +1096,9 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
napi_gro_receive(&enic->napi[q_number], skb);
else
netif_receive_skb(skb);
if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
enic_intr_update_pkt_size(&cq->pkt_size_counter,
bytes_written);
} else {

/* Buffer overflow
Expand Down Expand Up @@ -1134,6 +1177,64 @@ static int enic_poll(struct napi_struct *napi, int budget)
return rq_work_done;
}

static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq)
{
unsigned int intr = enic_msix_rq_intr(enic, rq->index);
struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
u32 timer = cq->tobe_rx_coal_timeval;

if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) {
vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval;
}
}

static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq)
{
struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter;
int index;
u32 timer;
u32 range_start;
u32 traffic;
u64 delta;
ktime_t now = ktime_get();

delta = ktime_us_delta(now, cq->prev_ts);
if (delta < ENIC_AIC_TS_BREAK)
return;
cq->prev_ts = now;

traffic = pkt_size_counter->large_pkt_bytes_cnt +
pkt_size_counter->small_pkt_bytes_cnt;
/* The table takes Mbps
* traffic *= 8 => bits
* traffic *= (10^6 / delta) => bps
* traffic /= 10^6 => Mbps
*
* Combining, traffic *= (8 / delta)
*/

traffic <<= 3;
traffic /= delta;

for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++)
if (traffic < mod_table[index].rx_rate)
break;
range_start = (pkt_size_counter->small_pkt_bytes_cnt >
pkt_size_counter->large_pkt_bytes_cnt << 1) ?
rx_coal->small_pkt_range_start :
rx_coal->large_pkt_range_start;
timer = range_start + ((rx_coal->range_end - range_start) *
mod_table[index].range_percent / 100);
/* Damping */
cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1;

pkt_size_counter->large_pkt_bytes_cnt = 0;
pkt_size_counter->small_pkt_bytes_cnt = 0;
}

static int enic_poll_msix(struct napi_struct *napi, int budget)
{
struct net_device *netdev = napi->dev;
Expand Down Expand Up @@ -1171,6 +1272,13 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)

if (err)
work_done = work_to_do;
if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
/* Call the function which refreshes
* the intr coalescing timer value based on
* the traffic. This is supported only in
* the case of MSI-x mode
*/
enic_calc_int_moderation(enic, &enic->rq[rq]);

if (work_done < work_to_do) {

Expand All @@ -1179,6 +1287,8 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
*/

napi_complete(napi);
if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
enic_set_int_moderation(enic, &enic->rq[rq]);
vnic_intr_unmask(&enic->intr[intr]);
}

Expand Down Expand Up @@ -1314,6 +1424,42 @@ static void enic_synchronize_irqs(struct enic *enic)
}
}

static void enic_set_rx_coal_setting(struct enic *enic)
{
unsigned int speed;
int index = -1;
struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;

/* If intr mode is not MSIX, do not do adaptive coalescing */
if (VNIC_DEV_INTR_MODE_MSIX != vnic_dev_get_intr_mode(enic->vdev)) {
netdev_info(enic->netdev, "INTR mode is not MSIX, Not initializing adaptive coalescing");
return;
}

/* 1. Read the link speed from fw
* 2. Pick the default range for the speed
* 3. Update it in enic->rx_coalesce_setting
*/
speed = vnic_dev_port_speed(enic->vdev);
if (ENIC_LINK_SPEED_10G < speed)
index = ENIC_LINK_40G_INDEX;
else if (ENIC_LINK_SPEED_4G < speed)
index = ENIC_LINK_10G_INDEX;
else
index = ENIC_LINK_4G_INDEX;

rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start;
rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start;
rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END;

/* Start with the value provided by UCSM */
for (index = 0; index < enic->rq_count; index++)
enic->cq[index].cur_rx_coal_timeval =
enic->config.intr_timer_usec;

rx_coal->use_adaptive_rx_coalesce = 1;
}

static int enic_dev_notify_set(struct enic *enic)
{
int err;
Expand Down Expand Up @@ -2231,6 +2377,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
enic->notify_timer.function = enic_notify_timer;
enic->notify_timer.data = (unsigned long)enic;

enic_set_rx_coal_setting(enic);
INIT_WORK(&enic->reset, enic_reset);
INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work);

Expand All @@ -2250,6 +2397,9 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}

enic->tx_coalesce_usecs = enic->config.intr_timer_usec;
/* rx coalesce time already got initialized. This gets used
* if adaptive coal is turned off
*/
enic->rx_coalesce_usecs = enic->tx_coalesce_usecs;

if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))
Expand Down
Loading

0 comments on commit 7c2ce6e

Please sign in to comment.