Skip to content

Commit

Permalink
gve: DQO: Add core netdev features
Browse files Browse the repository at this point in the history
Add napi netdev device registration, interrupt handling and initial tx
and rx polling stubs. The stubs will be filled in follow-on patches.

Also:
- LRO feature advertisement and handling
- Also update ethtool logic

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Bailey Forrest authored and David S. Miller committed Jun 24, 2021
1 parent 1f6228e commit 5e8c5ad
Show file tree
Hide file tree
Showing 8 changed files with 260 additions and 25 deletions.
2 changes: 1 addition & 1 deletion drivers/net/ethernet/google/gve/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Makefile for the Google virtual Ethernet (gve) driver

obj-$(CONFIG_GVE) += gve.o
gve-objs := gve_main.o gve_tx.o gve_rx.o gve_ethtool.o gve_adminq.o gve_utils.o
gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o
2 changes: 2 additions & 0 deletions drivers/net/ethernet/google/gve/gve.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
/* PTYPEs are always 10 bits. */
#define GVE_NUM_PTYPES 1024

#define GVE_RX_BUFFER_SIZE_DQO 2048

/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
struct gve_rx_desc *desc_ring; /* the descriptor ring */
Expand Down
2 changes: 2 additions & 0 deletions drivers/net/ethernet/google/gve/gve_adminq.c
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,8 @@ int gve_adminq_describe_device(struct gve_priv *priv)
if (gve_is_gqi(priv)) {
err = gve_set_desc_cnt(priv, descriptor);
} else {
/* DQO supports LRO. */
priv->dev->hw_features |= NETIF_F_LRO;
err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda);
}
if (err)
Expand Down
32 changes: 32 additions & 0 deletions drivers/net/ethernet/google/gve/gve_dqo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
* Google virtual Ethernet (gve) driver
*
* Copyright (C) 2015-2021 Google, Inc.
*/

#ifndef _GVE_DQO_H_
#define _GVE_DQO_H_

#include "gve_adminq.h"

#define GVE_ITR_ENABLE_BIT_DQO BIT(0)
#define GVE_ITR_CLEAR_PBA_BIT_DQO BIT(1)
#define GVE_ITR_NO_UPDATE_DQO (3 << 3)

#define GVE_TX_IRQ_RATELIMIT_US_DQO 50
#define GVE_RX_IRQ_RATELIMIT_US_DQO 20

netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev);
bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);

static inline void
gve_write_irq_doorbell_dqo(const struct gve_priv *priv,
const struct gve_notify_block *block, u32 val)
{
u32 index = be32_to_cpu(block->irq_db_index);

iowrite32(val, &priv->db_bar2[index]);
}

#endif /* _GVE_DQO_H_ */
12 changes: 10 additions & 2 deletions drivers/net/ethernet/google/gve/gve_ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,16 @@ gve_get_ethtool_stats(struct net_device *netdev,
for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
struct gve_tx_ring *tx = &priv->tx[ring];

data[i++] = tx->req;
data[i++] = tx->done;
if (gve_is_gqi(priv)) {
data[i++] = tx->req;
data[i++] = tx->done;
} else {
/* DQO doesn't currently support
* posted/completed descriptor counts;
*/
data[i++] = 0;
data[i++] = 0;
}
do {
start =
u64_stats_fetch_begin(&priv->tx[ring].statss);
Expand Down
188 changes: 166 additions & 22 deletions drivers/net/ethernet/google/gve/gve_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <linux/workqueue.h>
#include <net/sch_generic.h>
#include "gve.h"
#include "gve_dqo.h"
#include "gve_adminq.h"
#include "gve_register.h"

Expand All @@ -26,6 +27,16 @@
const char gve_version_str[] = GVE_VERSION;
static const char gve_version_prefix[] = GVE_VERSION_PREFIX;

static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct gve_priv *priv = netdev_priv(dev);

if (gve_is_gqi(priv))
return gve_tx(skb, dev);
else
return gve_tx_dqo(skb, dev);
}

static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
{
struct gve_priv *priv = netdev_priv(dev);
Expand Down Expand Up @@ -155,6 +166,15 @@ static irqreturn_t gve_intr(int irq, void *arg)
return IRQ_HANDLED;
}

static irqreturn_t gve_intr_dqo(int irq, void *arg)
{
struct gve_notify_block *block = arg;

/* Interrupts are automatically masked */
napi_schedule_irqoff(&block->napi);
return IRQ_HANDLED;
}

static int gve_napi_poll(struct napi_struct *napi, int budget)
{
struct gve_notify_block *block;
Expand Down Expand Up @@ -191,6 +211,54 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
return 0;
}

static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
{
struct gve_notify_block *block =
container_of(napi, struct gve_notify_block, napi);
struct gve_priv *priv = block->priv;
bool reschedule = false;
int work_done = 0;

/* Clear PCI MSI-X Pending Bit Array (PBA)
*
* This bit is set if an interrupt event occurs while the vector is
* masked. If this bit is set and we reenable the interrupt, it will
* fire again. Since we're just about to poll the queue state, we don't
* need it to fire again.
*
* Under high softirq load, it's possible that the interrupt condition
* is triggered twice before we got the chance to process it.
*/
gve_write_irq_doorbell_dqo(priv, block,
GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO);

if (block->tx)
reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);

if (block->rx) {
work_done = gve_rx_poll_dqo(block, budget);
reschedule |= work_done == budget;
}

if (reschedule)
return budget;

if (likely(napi_complete_done(napi, work_done))) {
/* Enable interrupts again.
*
* We don't need to repoll afterwards because HW supports the
* PCI MSI-X PBA feature.
*
* Another interrupt would be triggered if a new event came in
* since the last one.
*/
gve_write_irq_doorbell_dqo(priv, block,
GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
}

return work_done;
}

static int gve_alloc_notify_blocks(struct gve_priv *priv)
{
int num_vecs_requested = priv->num_ntfy_blks + 1;
Expand Down Expand Up @@ -264,7 +332,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
name, i);
block->priv = priv;
err = request_irq(priv->msix_vectors[msix_idx].vector,
gve_intr, 0, block->name, block);
gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
0, block->name, block);
if (err) {
dev_err(&priv->pdev->dev,
"Failed to receive msix vector %d\n", i);
Expand Down Expand Up @@ -417,11 +486,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
gve_clear_device_resources_ok(priv);
}

static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
int (*gve_poll)(struct napi_struct *, int))
{
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];

netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
netif_napi_add(priv->dev, &block->napi, gve_poll,
NAPI_POLL_WEIGHT);
}

Expand Down Expand Up @@ -512,11 +582,33 @@ static int gve_create_rings(struct gve_priv *priv)
return 0;
}

static void add_napi_init_sync_stats(struct gve_priv *priv,
int (*napi_poll)(struct napi_struct *napi,
int budget))
{
int i;

/* Add tx napi & init sync stats*/
for (i = 0; i < priv->tx_cfg.num_queues; i++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);

u64_stats_init(&priv->tx[i].statss);
priv->tx[i].ntfy_id = ntfy_idx;
gve_add_napi(priv, ntfy_idx, napi_poll);
}
/* Add rx napi & init sync stats*/
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);

u64_stats_init(&priv->rx[i].statss);
priv->rx[i].ntfy_id = ntfy_idx;
gve_add_napi(priv, ntfy_idx, napi_poll);
}
}

static int gve_alloc_rings(struct gve_priv *priv)
{
int ntfy_idx;
int err;
int i;

/* Setup tx rings */
priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
Expand All @@ -536,18 +628,11 @@ static int gve_alloc_rings(struct gve_priv *priv)
err = gve_rx_alloc_rings(priv);
if (err)
goto free_rx;
/* Add tx napi & init sync stats*/
for (i = 0; i < priv->tx_cfg.num_queues; i++) {
u64_stats_init(&priv->tx[i].statss);
ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
gve_add_napi(priv, ntfy_idx);
}
/* Add rx napi & init sync stats*/
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
u64_stats_init(&priv->rx[i].statss);
ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
gve_add_napi(priv, ntfy_idx);
}

if (gve_is_gqi(priv))
add_napi_init_sync_stats(priv, gve_napi_poll);
else
add_napi_init_sync_stats(priv, gve_napi_poll_dqo);

return 0;

Expand Down Expand Up @@ -798,9 +883,17 @@ static int gve_open(struct net_device *dev)
err = gve_register_qpls(priv);
if (err)
goto reset;

if (!gve_is_gqi(priv)) {
/* Hard code this for now. This may be tuned in the future for
* performance.
*/
priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
}
err = gve_create_rings(priv);
if (err)
goto reset;

gve_set_device_rings_ok(priv);

if (gve_get_report_stats(priv))
Expand Down Expand Up @@ -970,12 +1063,49 @@ static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
priv->tx_timeo_cnt++;
}

static int gve_set_features(struct net_device *netdev,
netdev_features_t features)
{
const netdev_features_t orig_features = netdev->features;
struct gve_priv *priv = netdev_priv(netdev);
int err;

if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
netdev->features ^= NETIF_F_LRO;
if (netif_carrier_ok(netdev)) {
/* To make this process as simple as possible we
* teardown the device, set the new configuration,
* and then bring the device up again.
*/
err = gve_close(netdev);
/* We have already tried to reset in close, just fail
* at this point.
*/
if (err)
goto err;

err = gve_open(netdev);
if (err)
goto err;
}
}

return 0;
err:
/* Reverts the change on error. */
netdev->features = orig_features;
netif_err(priv, drv, netdev,
"Set features failed! !!! DISABLING ALL QUEUES !!!\n");
return err;
}

static const struct net_device_ops gve_netdev_ops = {
.ndo_start_xmit = gve_tx,
.ndo_start_xmit = gve_start_xmit,
.ndo_open = gve_open,
.ndo_stop = gve_close,
.ndo_get_stats64 = gve_get_stats,
.ndo_tx_timeout = gve_tx_timeout,
.ndo_set_features = gve_set_features,
};

static void gve_handle_status(struct gve_priv *priv, u32 status)
Expand Down Expand Up @@ -1019,6 +1149,15 @@ void gve_handle_report_stats(struct gve_priv *priv)
/* tx stats */
if (priv->tx) {
for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
u32 last_completion = 0;
u32 tx_frames = 0;

/* DQO doesn't currently support these metrics. */
if (gve_is_gqi(priv)) {
last_completion = priv->tx[idx].done;
tx_frames = priv->tx[idx].req;
}

do {
start = u64_stats_fetch_begin(&priv->tx[idx].statss);
tx_bytes = priv->tx[idx].bytes_done;
Expand All @@ -1035,7 +1174,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
};
stats[stats_idx++] = (struct stats) {
.stat_name = cpu_to_be32(TX_FRAMES_SENT),
.value = cpu_to_be64(priv->tx[idx].req),
.value = cpu_to_be64(tx_frames),
.queue_id = cpu_to_be32(idx),
};
stats[stats_idx++] = (struct stats) {
Expand All @@ -1045,7 +1184,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
};
stats[stats_idx++] = (struct stats) {
.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
.value = cpu_to_be64(priv->tx[idx].done),
.value = cpu_to_be64(last_completion),
.queue_id = cpu_to_be32(idx),
};
}
Expand Down Expand Up @@ -1121,7 +1260,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
"Could not get device information: err=%d\n", err);
goto err;
}
if (priv->dev->max_mtu > PAGE_SIZE) {
if (gve_is_gqi(priv) && priv->dev->max_mtu > PAGE_SIZE) {
priv->dev->max_mtu = PAGE_SIZE;
err = gve_adminq_set_mtu(priv, priv->dev->mtu);
if (err) {
Expand Down Expand Up @@ -1332,7 +1471,12 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, dev);
dev->ethtool_ops = &gve_ethtool_ops;
dev->netdev_ops = &gve_netdev_ops;
/* advertise features */

/* Set default and supported features.
*
* Features might be set in other locations as well (such as
* `gve_adminq_describe_device`).
*/
dev->hw_features = NETIF_F_HIGHDMA;
dev->hw_features |= NETIF_F_SG;
dev->hw_features |= NETIF_F_HW_CSUM;
Expand Down
Loading

0 comments on commit 5e8c5ad

Please sign in to comment.