Skip to content

Commit

Permalink
Merge branch 'net-improve-core-queue-api-handling-while-device-is-down'
Browse files Browse the repository at this point in the history
Jakub Kicinski says:

====================
net: improve core queue API handling while device is down

The core netdev_rx_queue_restart() doesn't currently take into account
that the device may be down. The current and proposed queue API
implementations deal with this by rejecting queue API calls while
the device is down. We can do better, in theory we can still allow
devmem binding when the device is down - we shouldn't stop and start
the queues just try to allocate the memory. The reason we allocate
the memory is that memory provider binding checks if any compatible
page pool has been created (page_pool_check_memory_provider()).

Alternatively we could reject installing MP while the device is down
but the MP assignment survives ifdown (so presumably MP doesn't cease
to exist while down), and in general we allow configuration while down.

Previously I thought we need this as a fix, but gve rejects page pool
calls while down, and so did Saeed in the patches he posted. So this
series just makes the core act more sensibly but practically should
be a noop for now.

v1: https://lore.kernel.org/20250205190131.564456-1-kuba@kernel.org
====================

Link: https://patch.msgid.link/20250206225638.1387810-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Feb 8, 2025
2 parents 6a0ca73 + 285b3f7 commit acdefab
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 29 deletions.
10 changes: 4 additions & 6 deletions drivers/net/netdevsim/netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -645,8 +645,11 @@ nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
if (ns->rq_reset_mode > 3)
return -EINVAL;

if (ns->rq_reset_mode == 1)
if (ns->rq_reset_mode == 1) {
if (!netif_running(ns->netdev))
return -ENETDOWN;
return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
}

qmem->rq = nsim_queue_alloc();
if (!qmem->rq)
Expand Down Expand Up @@ -754,11 +757,6 @@ nsim_qreset_write(struct file *file, const char __user *data,
return -EINVAL;

rtnl_lock();
if (!netif_running(ns->netdev)) {
ret = -ENETDOWN;
goto exit_unlock;
}

if (queue >= ns->netdev->real_num_rx_queues) {
ret = -EINVAL;
goto exit_unlock;
Expand Down
4 changes: 4 additions & 0 deletions include/net/netdev_queues.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ struct netdev_stat_ops {
*
* @ndo_queue_stop: Stop the RX queue at the specified index. The stopped
* queue's memory is written at the specified address.
*
* Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
* the interface is closed. @ndo_queue_start and @ndo_queue_stop will only
* be called for an interface which is open.
*/
struct netdev_queue_mgmt_ops {
size_t ndo_queue_mem_size;
Expand Down
12 changes: 12 additions & 0 deletions net/core/dev.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,18 @@ void xdp_do_check_flushed(struct napi_struct *napi);
static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif

/* Best effort check that NAPI is not idle (can't be scheduled to run) */
static inline void napi_assert_will_not_race(const struct napi_struct *napi)
{
/* uninitialized instance, can't race */
if (!napi->poll_list.next)
return;

/* SCHED bit is set on disabled instances */
WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state));
WARN_ON(READ_ONCE(napi->list_owner) != -1);
}

void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);

#define XMIT_RECURSION_LIMIT 8
Expand Down
37 changes: 20 additions & 17 deletions net/core/netdev_rx_queue.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,44 +10,47 @@
int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
{
struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx);
const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops;
void *new_mem, *old_mem;
int err;

if (!dev->queue_mgmt_ops || !dev->queue_mgmt_ops->ndo_queue_stop ||
!dev->queue_mgmt_ops->ndo_queue_mem_free ||
!dev->queue_mgmt_ops->ndo_queue_mem_alloc ||
!dev->queue_mgmt_ops->ndo_queue_start)
if (!qops || !qops->ndo_queue_stop || !qops->ndo_queue_mem_free ||
!qops->ndo_queue_mem_alloc || !qops->ndo_queue_start)
return -EOPNOTSUPP;

ASSERT_RTNL();

new_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
new_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL);
if (!new_mem)
return -ENOMEM;

old_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
old_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL);
if (!old_mem) {
err = -ENOMEM;
goto err_free_new_mem;
}

err = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx);
err = qops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx);
if (err)
goto err_free_old_mem;

err = page_pool_check_memory_provider(dev, rxq);
if (err)
goto err_free_new_queue_mem;

err = dev->queue_mgmt_ops->ndo_queue_stop(dev, old_mem, rxq_idx);
if (err)
goto err_free_new_queue_mem;
if (netif_running(dev)) {
err = qops->ndo_queue_stop(dev, old_mem, rxq_idx);
if (err)
goto err_free_new_queue_mem;

err = dev->queue_mgmt_ops->ndo_queue_start(dev, new_mem, rxq_idx);
if (err)
goto err_start_queue;
err = qops->ndo_queue_start(dev, new_mem, rxq_idx);
if (err)
goto err_start_queue;
} else {
swap(new_mem, old_mem);
}

dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
qops->ndo_queue_mem_free(dev, old_mem);

kvfree(old_mem);
kvfree(new_mem);
Expand All @@ -62,15 +65,15 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
* WARN if we fail to recover the old rx queue, and at least free
* old_mem so we don't also leak that.
*/
if (dev->queue_mgmt_ops->ndo_queue_start(dev, old_mem, rxq_idx)) {
if (qops->ndo_queue_start(dev, old_mem, rxq_idx)) {
WARN(1,
"Failed to restart old queue in error path. RX queue %d may be unhealthy.",
rxq_idx);
dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
qops->ndo_queue_mem_free(dev, old_mem);
}

err_free_new_queue_mem:
dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem);
qops->ndo_queue_mem_free(dev, new_mem);

err_free_old_mem:
kvfree(old_mem);
Expand Down
7 changes: 2 additions & 5 deletions net/core/page_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include <trace/events/page_pool.h>

#include "dev.h"
#include "mp_dmabuf_devmem.h"
#include "netmem_priv.h"
#include "page_pool_priv.h"
Expand Down Expand Up @@ -1147,11 +1148,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool)
if (!pool->p.napi)
return;

/* To avoid races with recycling and additional barriers make sure
* pool and NAPI are unlinked when NAPI is disabled.
*/
WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state));
WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1);
napi_assert_will_not_race(pool->p.napi);

mutex_lock(&page_pools_lock);
WRITE_ONCE(pool->p.napi, NULL);
Expand Down
18 changes: 17 additions & 1 deletion tools/testing/selftests/net/nl_netdev.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,21 @@ def napi_list_check(nf) -> None:
comment=f"queue count after reset queue {q} mode {i}")


def nsim_rxq_reset_down(nf) -> None:
"""
Test that the queue API supports resetting a queue
while the interface is down. We should convert this
test to testing real HW once more devices support
queue API.
"""
with NetdevSimDev(queue_count=4) as nsimdev:
nsim = nsimdev.nsims[0]

ip(f"link set dev {nsim.ifname} down")
for i in [0, 2, 3]:
nsim.dfs_write("queue_reset", f"1 {i}")


def page_pool_check(nf) -> None:
with NetdevSimDev() as nsimdev:
nsim = nsimdev.nsims[0]
Expand Down Expand Up @@ -106,7 +121,8 @@ def get_pp():

def main() -> None:
nf = NetdevFamily()
ksft_run([empty_check, lo_check, page_pool_check, napi_list_check],
ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()

Expand Down

0 comments on commit acdefab

Please sign in to comment.