Skip to content

Commit

Permalink
net: RPS: Enable hardware acceleration of RFS
Browse files Browse the repository at this point in the history
Allow drivers for multiqueue hardware with flow filter tables to
accelerate RFS.  The driver must:

1. Set net_device::rx_cpu_rmap to a cpu_rmap of the RX completion
IRQs (in queue order).  This will provide a mapping from CPUs to the
queues for which completions are handled nearest to them.

2. Implement net_device_ops::ndo_rx_flow_steer.  This operation adds
or replaces a filter steering the given flow to the given RX queue, if
possible.

3. Periodically remove filters for which rps_may_expire_flow() returns
true.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Ben Hutchings authored and David S. Miller committed Jan 24, 2011
1 parent c39649c commit c445477
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 9 deletions.
33 changes: 30 additions & 3 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -554,14 +554,16 @@ struct rps_map {
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))

/*
* The rps_dev_flow structure contains the mapping of a flow to a CPU and the
* tail pointer for that CPU's input queue at the time of last enqueue.
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
* tail pointer for that CPU's input queue at the time of last enqueue, and
* a hardware filter index.
*/
struct rps_dev_flow {
u16 cpu;
u16 fill;
u16 filter;
unsigned int last_qtail;
};
#define RPS_NO_FILTER 0xffff

/*
* The rps_dev_flow_table structure contains a table of flow mappings.
Expand Down Expand Up @@ -611,6 +613,11 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,

extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;

#ifdef CONFIG_RFS_ACCEL
extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
u32 flow_id, u16 filter_id);
#endif

/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
struct rps_map __rcu *rps_map;
Expand Down Expand Up @@ -769,6 +776,13 @@ struct netdev_tc_txq {
* is always called from the stack with the rtnl lock held and netif tx
* queues stopped. This allows the netdevice to perform queue management
* safely.
*
* RFS acceleration.
* int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb,
* u16 rxq_index, u32 flow_id);
* Set hardware filter for RFS. rxq_index is the target queue index;
* flow_id is a flow ID to be passed to rps_may_expire_flow() later.
* Return the filter ID on success, or a negative error code.
*/
#define HAVE_NET_DEVICE_OPS
struct net_device_ops {
Expand Down Expand Up @@ -842,6 +856,12 @@ struct net_device_ops {
int (*ndo_fcoe_get_wwn)(struct net_device *dev,
u64 *wwn, int type);
#endif
#ifdef CONFIG_RFS_ACCEL
int (*ndo_rx_flow_steer)(struct net_device *dev,
const struct sk_buff *skb,
u16 rxq_index,
u32 flow_id);
#endif
};

/*
Expand Down Expand Up @@ -1056,6 +1076,13 @@ struct net_device {

/* Number of RX queues currently active in device */
unsigned int real_num_rx_queues;

#ifdef CONFIG_RFS_ACCEL
/* CPU reverse-mapping for RX completion interrupts, indexed
* by RX queue number. Assigned by driver. This must only be
* set if the ndo_rx_flow_steer operation is defined. */
struct cpu_rmap *rx_cpu_rmap;
#endif
#endif

rx_handler_func_t __rcu *rx_handler;
Expand Down
6 changes: 6 additions & 0 deletions net/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,12 @@ config RPS
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
default y

config RFS_ACCEL
boolean
depends on RPS && GENERIC_HARDIRQS
select CPU_RMAP
default y

config XPS
boolean
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
Expand Down
97 changes: 91 additions & 6 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@
#include <trace/events/skb.h>
#include <linux/pci.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>

#include "net-sysfs.h"

Expand Down Expand Up @@ -2588,6 +2589,53 @@ EXPORT_SYMBOL(__skb_get_rxhash);
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);

static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow *rflow, u16 next_cpu)
{
u16 tcpu;

tcpu = rflow->cpu = next_cpu;
if (tcpu != RPS_NO_CPU) {
#ifdef CONFIG_RFS_ACCEL
struct netdev_rx_queue *rxqueue;
struct rps_dev_flow_table *flow_table;
struct rps_dev_flow *old_rflow;
u32 flow_id;
u16 rxq_index;
int rc;

/* Should we steer this flow to a different hardware queue? */
if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap)
goto out;
rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
if (rxq_index == skb_get_rx_queue(skb))
goto out;

rxqueue = dev->_rx + rxq_index;
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (!flow_table)
goto out;
flow_id = skb->rxhash & flow_table->mask;
rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
rxq_index, flow_id);
if (rc < 0)
goto out;
old_rflow = rflow;
rflow = &flow_table->flows[flow_id];
rflow->cpu = next_cpu;
rflow->filter = rc;
if (old_rflow->filter == rflow->filter)
old_rflow->filter = RPS_NO_FILTER;
out:
#endif
rflow->last_qtail =
per_cpu(softnet_data, tcpu).input_queue_head;
}

return rflow;
}

/*
* get_rps_cpu is called from netif_receive_skb and returns the target
* CPU from the RPS map of the receiving queue for a given skb.
Expand Down Expand Up @@ -2658,12 +2706,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
if (unlikely(tcpu != next_cpu) &&
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
rflow->last_qtail)) >= 0)) {
tcpu = rflow->cpu = next_cpu;
if (tcpu != RPS_NO_CPU)
rflow->last_qtail = per_cpu(softnet_data,
tcpu).input_queue_head;
}
rflow->last_qtail)) >= 0))
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);

if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
*rflowp = rflow;
cpu = tcpu;
Expand All @@ -2684,6 +2729,46 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
return cpu;
}

#ifdef CONFIG_RFS_ACCEL

/**
* rps_may_expire_flow - check whether an RFS hardware filter may be removed
* @dev: Device on which the filter was set
* @rxq_index: RX queue index
* @flow_id: Flow ID passed to ndo_rx_flow_steer()
* @filter_id: Filter ID returned by ndo_rx_flow_steer()
*
* Drivers that implement ndo_rx_flow_steer() should periodically call
* this function for each installed filter and remove the filters for
* which it returns %true.
*/
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
u32 flow_id, u16 filter_id)
{
struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
struct rps_dev_flow_table *flow_table;
struct rps_dev_flow *rflow;
bool expire = true;
int cpu;

rcu_read_lock();
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (flow_table && flow_id <= flow_table->mask) {
rflow = &flow_table->flows[flow_id];
cpu = ACCESS_ONCE(rflow->cpu);
if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
((int)(per_cpu(softnet_data, cpu).input_queue_head -
rflow->last_qtail) <
(int)(10 * flow_table->mask)))
expire = false;
}
rcu_read_unlock();
return expire;
}
EXPORT_SYMBOL(rps_may_expire_flow);

#endif /* CONFIG_RFS_ACCEL */

/* Called from hardirq (IPI) context */
static void rps_trigger_softirq(void *data)
{
Expand Down

0 comments on commit c445477

Please sign in to comment.