Skip to content

Commit

Permalink
Merge branch 'ethtool-perqueue-params'
Browse files Browse the repository at this point in the history
Kan Liang says:

====================
ethtool per queue parameters support

Modern network interface controllers usually support multiple receive
and transmit queues. Each queue may have its own parameters. For
example, Intel XL710/X710 hardware supports per queue interrupt
moderation. However, current ethtool does not support per queue
parameters option. User has to set parameters for the whole NIC.
This series extends ethtool to support per queue parameters option.

Since the support of per queue parameters vary with different cards,
it is impossible to address all cards in one patch. This series only
supports per queue coalesce options on i40e driver. The framework used
in the patch can be easily extended to other cards and parameters.

The lib bitmap needs to be extended to facilitate exchanging queue bitmaps
between user space and kernel space. Two patches from David's latest V8
patch series are also cited in this series. You may refer to
https://lkml.org/lkml/2016/2/9/919 for more details.

Changes since V6:
 - Rebase on commit 76d13b5. Did minor change in patch 6.

Changes since V5:
 - Add test_bitmap.c and bitmap.sh in the series. They are forgot
   to be added previously.
 - Update the first two patches to David's latest V8 version. The changes
   include
      - bitmap u32 API returns number of bits copied, unit tests updated
      - module_exit in test_bitmap
 - Also change the mode of bitmap.sh to 755 according to Ben's suggestion

Changes since V4:
 - Modify set/get_per_queue_coalesce function description
 - Change the queue number to be u32
 - Correct an error of calculating coalesce backup buffer address
 - Rename queue_num to n_queues
 - Don't log error message in __i40e_get_coalesce

Changes since V3:
 - Based on David's lib bitmap.
 - ETHTOOL_PERQUEUE should be handled before the containing switch
 - Make the rollback code unconditional
 - some minor changes according to Ben's feedback

Changes since V2:
 - Add queue-specific settings for interrupt moderation in i40e

Changes since V1:
 - Checking the sub-command number to determine whether the command
   requires CAP_NET_ADMIN
 - Refine the struct ethtool_per_queue_op and improve the comments
 - Use bitmap functions to parse queue mask
 - Improve comments
 - Use bitmap functions to parse queue mask
 - Improve comments
 - Add rollback support
 - Correct the way to find the vector for specific queue.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Feb 20, 2016
2 parents 76d13b5 + f3757a4 commit 2f86017
Show file tree
Hide file tree
Showing 16 changed files with 760 additions and 73 deletions.
7 changes: 0 additions & 7 deletions drivers/net/ethernet/intel/i40e/i40e.h
Original file line number Diff line number Diff line change
Expand Up @@ -521,13 +521,6 @@ struct i40e_vsi {
struct i40e_ring **tx_rings;

u16 work_limit;
/* high bit set means dynamic, use accessor routines to read/write.
* hardware only supports 2us resolution for the ITR registers.
* these values always store the USER setting, and must be converted
* before programming to a register.
*/
u16 rx_itr_setting;
u16 tx_itr_setting;
u16 int_rate_limit; /* value in usecs */

u16 rss_table_size; /* HW RSS table size */
Expand Down
15 changes: 10 additions & 5 deletions drivers/net/ethernet/intel/i40e/i40e_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,10 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
" rx_rings[%i]: vsi = %p, q_vector = %p\n",
i, rx_ring->vsi,
rx_ring->q_vector);
dev_info(&pf->pdev->dev,
" rx_rings[%i]: rx_itr_setting = %d (%s)\n",
i, rx_ring->rx_itr_setting,
ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed");
}
for (i = 0; i < vsi->num_queue_pairs; i++) {
struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]);
Expand Down Expand Up @@ -352,14 +356,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
dev_info(&pf->pdev->dev,
" tx_rings[%i]: DCB tc = %d\n",
i, tx_ring->dcb_tc);
dev_info(&pf->pdev->dev,
" tx_rings[%i]: tx_itr_setting = %d (%s)\n",
i, tx_ring->tx_itr_setting,
ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed");
}
rcu_read_unlock();
dev_info(&pf->pdev->dev,
" work_limit = %d, rx_itr_setting = %d (%s), tx_itr_setting = %d (%s)\n",
vsi->work_limit, vsi->rx_itr_setting,
ITR_IS_DYNAMIC(vsi->rx_itr_setting) ? "dynamic" : "fixed",
vsi->tx_itr_setting,
ITR_IS_DYNAMIC(vsi->tx_itr_setting) ? "dynamic" : "fixed");
" work_limit = %d\n",
vsi->work_limit);
dev_info(&pf->pdev->dev,
" max_frame = %d, rx_hdr_len = %d, rx_buf_len = %d dtype = %d\n",
vsi->max_frame, vsi->rx_hdr_len, vsi->rx_buf_len, vsi->dtype);
Expand Down
151 changes: 104 additions & 47 deletions drivers/net/ethernet/intel/i40e/i40e_ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -1879,23 +1879,34 @@ static int i40e_set_phys_id(struct net_device *netdev,
* 125us (8000 interrupts per second) == ITR(62)
*/

static int i40e_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec)
static int __i40e_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec,
int queue)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;

ec->tx_max_coalesced_frames_irq = vsi->work_limit;
ec->rx_max_coalesced_frames_irq = vsi->work_limit;

if (ITR_IS_DYNAMIC(vsi->rx_itr_setting))
/* rx and tx usecs has per queue value. If user doesn't specify the queue,
* return queue 0's value to represent.
*/
if (queue < 0) {
queue = 0;
} else if (queue >= vsi->num_queue_pairs) {
return -EINVAL;
}

if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting))
ec->use_adaptive_rx_coalesce = 1;

if (ITR_IS_DYNAMIC(vsi->tx_itr_setting))
if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting))
ec->use_adaptive_tx_coalesce = 1;

ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC;
ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC;
ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC;
ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC;

/* we use the _usecs_high to store/set the interrupt rate limit
* that the hardware supports, that almost but not quite
* fits the original intent of the ethtool variable,
Expand All @@ -1908,15 +1919,63 @@ static int i40e_get_coalesce(struct net_device *netdev,
return 0;
}

static int i40e_set_coalesce(struct net_device *netdev,
static int i40e_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
return __i40e_get_coalesce(netdev, ec, -1);
}

static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
struct ethtool_coalesce *ec)
{
return __i40e_get_coalesce(netdev, ec, queue);
}

static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
struct ethtool_coalesce *ec,
int queue)
{
struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = &pf->hw;
struct i40e_q_vector *q_vector;
u16 vector, intrl;

intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit);

vsi->rx_rings[queue]->rx_itr_setting = ec->rx_coalesce_usecs;
vsi->tx_rings[queue]->tx_itr_setting = ec->tx_coalesce_usecs;

if (ec->use_adaptive_rx_coalesce)
vsi->rx_rings[queue]->rx_itr_setting |= I40E_ITR_DYNAMIC;
else
vsi->rx_rings[queue]->rx_itr_setting &= ~I40E_ITR_DYNAMIC;

if (ec->use_adaptive_tx_coalesce)
vsi->tx_rings[queue]->tx_itr_setting |= I40E_ITR_DYNAMIC;
else
vsi->tx_rings[queue]->tx_itr_setting &= ~I40E_ITR_DYNAMIC;

q_vector = vsi->rx_rings[queue]->q_vector;
q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[queue]->rx_itr_setting);
vector = vsi->base_vector + q_vector->v_idx;
wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr);

q_vector = vsi->tx_rings[queue]->q_vector;
q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[queue]->tx_itr_setting);
vector = vsi->base_vector + q_vector->v_idx;
wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr);

wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl);
i40e_flush(hw);
}

static int __i40e_set_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec,
int queue)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = &pf->hw;
u16 vector;
int i;

if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
Expand All @@ -1933,57 +1992,53 @@ static int i40e_set_coalesce(struct net_device *netdev,
return -EINVAL;
}

vector = vsi->base_vector;
if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
(ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) {
vsi->rx_itr_setting = ec->rx_coalesce_usecs;
} else if (ec->rx_coalesce_usecs == 0) {
vsi->rx_itr_setting = ec->rx_coalesce_usecs;
if (ec->rx_coalesce_usecs == 0) {
if (ec->use_adaptive_rx_coalesce)
netif_info(pf, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
} else {
netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
return -EINVAL;
} else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
(ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
return -EINVAL;
}

vsi->int_rate_limit = ec->rx_coalesce_usecs_high;

if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
(ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) {
vsi->tx_itr_setting = ec->tx_coalesce_usecs;
} else if (ec->tx_coalesce_usecs == 0) {
vsi->tx_itr_setting = ec->tx_coalesce_usecs;
if (ec->tx_coalesce_usecs == 0) {
if (ec->use_adaptive_tx_coalesce)
netif_info(pf, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
} else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
(ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
return -EINVAL;
}

/* rx and tx usecs has per queue value. If user doesn't specify the queue,
* apply to all queues.
*/
if (queue < 0) {
for (i = 0; i < vsi->num_queue_pairs; i++)
i40e_set_itr_per_queue(vsi, ec, i);
} else if (queue < vsi->num_queue_pairs) {
i40e_set_itr_per_queue(vsi, ec, queue);
} else {
netif_info(pf, drv, netdev,
"Invalid value, tx-usecs range is 0-8160\n");
netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
vsi->num_queue_pairs - 1);
return -EINVAL;
}

if (ec->use_adaptive_rx_coalesce)
vsi->rx_itr_setting |= I40E_ITR_DYNAMIC;
else
vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC;

if (ec->use_adaptive_tx_coalesce)
vsi->tx_itr_setting |= I40E_ITR_DYNAMIC;
else
vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;

for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
u16 intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit);
return 0;
}

q_vector = vsi->q_vectors[i];
q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
wr32(hw, I40E_PFINT_ITRN(0, vector - 1), q_vector->rx.itr);
q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
wr32(hw, I40E_PFINT_ITRN(1, vector - 1), q_vector->tx.itr);
wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl);
i40e_flush(hw);
}
static int i40e_set_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec)
{
return __i40e_set_coalesce(netdev, ec, -1);
}

return 0;
static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue,
struct ethtool_coalesce *ec)
{
return __i40e_set_coalesce(netdev, ec, queue);
}

/**
Expand Down Expand Up @@ -2871,6 +2926,8 @@ static const struct ethtool_ops i40e_ethtool_ops = {
.get_ts_info = i40e_get_ts_info,
.get_priv_flags = i40e_get_priv_flags,
.set_priv_flags = i40e_set_priv_flags,
.get_per_queue_coalesce = i40e_get_per_queue_coalesce,
.set_per_queue_coalesce = i40e_set_per_queue_coalesce,
};

void i40e_set_ethtool_ops(struct net_device *netdev)
Expand Down
12 changes: 6 additions & 6 deletions drivers/net/ethernet/intel/i40e/i40e_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3124,11 +3124,11 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
struct i40e_q_vector *q_vector = vsi->q_vectors[i];

q_vector->itr_countdown = ITR_COUNTDOWN_START;
q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting);
q_vector->rx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
q_vector->rx.itr);
q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting);
q_vector->tx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
q_vector->tx.itr);
Expand Down Expand Up @@ -3220,10 +3220,10 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)

/* set the ITR configuration */
q_vector->itr_countdown = ITR_COUNTDOWN_START;
q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting);
q_vector->rx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting);
q_vector->tx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr);

Expand Down Expand Up @@ -7322,8 +7322,6 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
set_bit(__I40E_DOWN, &vsi->state);
vsi->flags = 0;
vsi->idx = vsi_idx;
vsi->rx_itr_setting = pf->rx_itr_default;
vsi->tx_itr_setting = pf->tx_itr_default;
vsi->int_rate_limit = 0;
vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ?
pf->rss_table_size : 64;
Expand Down Expand Up @@ -7490,6 +7488,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
tx_ring->dcb_tc = 0;
if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE)
tx_ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
tx_ring->tx_itr_setting = pf->tx_itr_default;
vsi->tx_rings[i] = tx_ring;

rx_ring = &tx_ring[1];
Expand All @@ -7506,6 +7505,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
set_ring_16byte_desc_enabled(rx_ring);
else
clear_ring_16byte_desc_enabled(rx_ring);
rx_ring->rx_itr_setting = pf->rx_itr_default;
vsi->rx_rings[i] = rx_ring;
}

Expand Down
9 changes: 5 additions & 4 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1882,6 +1882,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
bool rx = false, tx = false;
u32 rxval, txval;
int vector;
int idx = q_vector->v_idx;

vector = (q_vector->v_idx + vsi->base_vector);

Expand All @@ -1891,17 +1892,17 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);

if (q_vector->itr_countdown > 0 ||
(!ITR_IS_DYNAMIC(vsi->rx_itr_setting) &&
!ITR_IS_DYNAMIC(vsi->tx_itr_setting))) {
(!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) &&
!ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) {
goto enable_int;
}

if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) {
rx = i40e_set_new_dynamic_itr(&q_vector->rx);
rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
}

if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) {
tx = i40e_set_new_dynamic_itr(&q_vector->tx);
txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
}
Expand Down
8 changes: 8 additions & 0 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,14 @@ struct i40e_ring {
u8 dcb_tc; /* Traffic class of ring */
u8 __iomem *tail;

/* high bit set means dynamic, use accessor routines to read/write.
* hardware only supports 2us resolution for the ITR registers.
* these values always store the USER setting, and must be converted
* before programming to a register.
*/
u16 rx_itr_setting;
u16 tx_itr_setting;

u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
u16 rx_hdr_len;
Expand Down
10 changes: 10 additions & 0 deletions include/linux/bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@
* bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region
* bitmap_release_region(bitmap, pos, order) Free specified bit region
* bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region
* bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words)
* bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words)
*/

/*
Expand Down Expand Up @@ -163,6 +165,14 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
extern unsigned int bitmap_from_u32array(unsigned long *bitmap,
unsigned int nbits,
const u32 *buf,
unsigned int nwords);
extern unsigned int bitmap_to_u32array(u32 *buf,
unsigned int nwords,
const unsigned long *bitmap,
unsigned int nbits);
#ifdef __BIG_ENDIAN
extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits);
#else
Expand Down
Loading

0 comments on commit 2f86017

Please sign in to comment.