Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 194284
b: refs/heads/master
c: fec5e65
h: refs/heads/master
v: v3
  • Loading branch information
Tom Herbert authored and David S. Miller committed Apr 16, 2010
1 parent b3ce6dc commit ea62091
Show file tree
Hide file tree
Showing 9 changed files with 390 additions and 30 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: b5d43998234331b9c01bd2165fdbb25115f4387f
refs/heads/master: fec5e652e58fa6017b2c9e06466cb2a6538de5b4
69 changes: 68 additions & 1 deletion trunk/include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -530,14 +530,73 @@ struct rps_map {
};
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))

/*
* The rps_dev_flow structure contains the mapping of a flow to a CPU and the
* tail pointer for that CPU's input queue at the time of last enqueue.
*/
struct rps_dev_flow {
u16 cpu;
u16 fill;
unsigned int last_qtail;
};

/*
* The rps_dev_flow_table structure contains a table of flow mappings.
*/
struct rps_dev_flow_table {
unsigned int mask;
struct rcu_head rcu;
struct work_struct free_work;
struct rps_dev_flow flows[0];
};
#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
(_num * sizeof(struct rps_dev_flow)))

/*
* The rps_sock_flow_table contains mappings of flows to the last CPU
* on which they were processed by the application (set in recvmsg).
*/
struct rps_sock_flow_table {
unsigned int mask;
u16 ents[0];
};
#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \
(_num * sizeof(u16)))

#define RPS_NO_CPU 0xffff

static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
u32 hash)
{
if (table && hash) {
unsigned int cpu, index = hash & table->mask;

/* We only give a hint, preemption can change cpu under us */
cpu = raw_smp_processor_id();

if (table->ents[index] != cpu)
table->ents[index] = cpu;
}
}

static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
u32 hash)
{
if (table && hash)
table->ents[hash & table->mask] = RPS_NO_CPU;
}

extern struct rps_sock_flow_table *rps_sock_flow_table;

/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
struct rps_map *rps_map;
struct rps_dev_flow_table *rps_flow_table;
struct kobject kobj;
struct netdev_rx_queue *first;
atomic_t count;
} ____cacheline_aligned_in_smp;
#endif
#endif /* CONFIG_RPS */

/*
* This structure defines the management hooks for network devices.
Expand Down Expand Up @@ -1333,11 +1392,19 @@ struct softnet_data {
/* Elements below can be accessed between CPUs for RPS */
#ifdef CONFIG_RPS
struct call_single_data csd ____cacheline_aligned_in_smp;
unsigned int input_queue_head;
#endif
struct sk_buff_head input_pkt_queue;
struct napi_struct backlog;
};

static inline void incr_input_queue_head(struct softnet_data *queue)
{
#ifdef CONFIG_RPS
queue->input_queue_head++;
#endif
}

DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);

#define HAVE_NETIF_QUEUE
Expand Down
38 changes: 38 additions & 0 deletions trunk/include/net/inet_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/jhash.h>
#include <linux/netdevice.h>

#include <net/flow.h>
#include <net/sock.h>
Expand Down Expand Up @@ -101,6 +102,7 @@ struct rtable;
* @uc_ttl - Unicast TTL
* @inet_sport - Source port
* @inet_id - ID counter for DF pkts
* @rxhash - flow hash received from netif layer
* @tos - TOS
* @mc_ttl - Multicasting TTL
* @is_icsk - is this an inet_connection_sock?
Expand All @@ -124,6 +126,9 @@ struct inet_sock {
__u16 cmsg_flags;
__be16 inet_sport;
__u16 inet_id;
#ifdef CONFIG_RPS
__u32 rxhash;
#endif

struct ip_options *opt;
__u8 tos;
Expand Down Expand Up @@ -219,4 +224,37 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0;
}

static inline void inet_rps_record_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table;

rcu_read_lock();
sock_flow_table = rcu_dereference(rps_sock_flow_table);
rps_record_sock_flow(sock_flow_table, inet_sk(sk)->rxhash);
rcu_read_unlock();
#endif
}

static inline void inet_rps_reset_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table;

rcu_read_lock();
sock_flow_table = rcu_dereference(rps_sock_flow_table);
rps_reset_sock_flow(sock_flow_table, inet_sk(sk)->rxhash);
rcu_read_unlock();
#endif
}

static inline void inet_rps_save_rxhash(const struct sock *sk, u32 rxhash)
{
#ifdef CONFIG_RPS
if (unlikely(inet_sk(sk)->rxhash != rxhash)) {
inet_rps_reset_flow(sk);
inet_sk(sk)->rxhash = rxhash;
}
#endif
}
#endif /* _INET_SOCK_H */
111 changes: 91 additions & 20 deletions trunk/net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -2203,19 +2203,28 @@ int weight_p __read_mostly = 64; /* old backlog weight */
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };

#ifdef CONFIG_RPS

/* One global table that all flow-based protocols share. */
struct rps_sock_flow_table *rps_sock_flow_table;
EXPORT_SYMBOL(rps_sock_flow_table);

/*
* get_rps_cpu is called from netif_receive_skb and returns the target
* CPU from the RPS map of the receiving queue for a given skb.
* rcu_read_lock must be held on entry.
*/
static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow **rflowp)
{
struct ipv6hdr *ip6;
struct iphdr *ip;
struct netdev_rx_queue *rxqueue;
struct rps_map *map;
struct rps_dev_flow_table *flow_table;
struct rps_sock_flow_table *sock_flow_table;
int cpu = -1;
u8 ip_proto;
u16 tcpu;
u32 addr1, addr2, ports, ihl;

if (skb_rx_queue_recorded(skb)) {
Expand All @@ -2232,7 +2241,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
} else
rxqueue = dev->_rx;

if (!rxqueue->rps_map)
if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
goto done;

if (skb->rxhash)
Expand Down Expand Up @@ -2284,9 +2293,48 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
skb->rxhash = 1;

got_hash:
flow_table = rcu_dereference(rxqueue->rps_flow_table);
sock_flow_table = rcu_dereference(rps_sock_flow_table);
if (flow_table && sock_flow_table) {
u16 next_cpu;
struct rps_dev_flow *rflow;

rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
tcpu = rflow->cpu;

next_cpu = sock_flow_table->ents[skb->rxhash &
sock_flow_table->mask];

/*
* If the desired CPU (where last recvmsg was done) is
* different from current CPU (one in the rx-queue flow
* table entry), switch if one of the following holds:
* - Current CPU is unset (equal to RPS_NO_CPU).
* - Current CPU is offline.
* - The current CPU's queue tail has advanced beyond the
* last packet that was enqueued using this table entry.
* This guarantees that all previous packets for the flow
* have been dequeued, thus preserving in order delivery.
*/
if (unlikely(tcpu != next_cpu) &&
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
rflow->last_qtail)) >= 0)) {
tcpu = rflow->cpu = next_cpu;
if (tcpu != RPS_NO_CPU)
rflow->last_qtail = per_cpu(softnet_data,
tcpu).input_queue_head;
}
if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
*rflowp = rflow;
cpu = tcpu;
goto done;
}
}

map = rcu_dereference(rxqueue->rps_map);
if (map) {
u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];

if (cpu_online(tcpu)) {
cpu = tcpu;
Expand Down Expand Up @@ -2320,13 +2368,14 @@ static void trigger_softirq(void *data)
__napi_schedule(&queue->backlog);
__get_cpu_var(netdev_rx_stat).received_rps++;
}
#endif /* CONFIG_SMP */
#endif /* CONFIG_RPS */

/*
* enqueue_to_backlog is called to queue an skb to a per CPU backlog
* queue (may be a remote CPU queue).
*/
static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
unsigned int *qtail)
{
struct softnet_data *queue;
unsigned long flags;
Expand All @@ -2341,6 +2390,10 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
if (queue->input_pkt_queue.qlen) {
enqueue:
__skb_queue_tail(&queue->input_pkt_queue, skb);
#ifdef CONFIG_RPS
*qtail = queue->input_queue_head +
queue->input_pkt_queue.qlen;
#endif
rps_unlock(queue);
local_irq_restore(flags);
return NET_RX_SUCCESS;
Expand All @@ -2355,11 +2408,10 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu)

cpu_set(cpu, rcpus->mask[rcpus->select]);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
} else
__napi_schedule(&queue->backlog);
#else
__napi_schedule(&queue->backlog);
goto enqueue;
}
#endif
__napi_schedule(&queue->backlog);
}
goto enqueue;
}
Expand Down Expand Up @@ -2401,18 +2453,25 @@ int netif_rx(struct sk_buff *skb)

#ifdef CONFIG_RPS
{
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;

rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb);

cpu = get_rps_cpu(skb->dev, skb, &rflow);
if (cpu < 0)
cpu = smp_processor_id();
ret = enqueue_to_backlog(skb, cpu);

ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

rcu_read_unlock();
}
#else
ret = enqueue_to_backlog(skb, get_cpu());
put_cpu();
{
unsigned int qtail;
ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
put_cpu();
}
#endif
return ret;
}
Expand Down Expand Up @@ -2830,14 +2889,22 @@ static int __netif_receive_skb(struct sk_buff *skb)
int netif_receive_skb(struct sk_buff *skb)
{
#ifdef CONFIG_RPS
int cpu;
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu, ret;

rcu_read_lock();

cpu = get_rps_cpu(skb->dev, skb);
cpu = get_rps_cpu(skb->dev, skb, &rflow);

if (cpu < 0)
return __netif_receive_skb(skb);
else
return enqueue_to_backlog(skb, cpu);
if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
} else {
rcu_read_unlock();
ret = __netif_receive_skb(skb);
}

return ret;
#else
return __netif_receive_skb(skb);
#endif
Expand All @@ -2856,6 +2923,7 @@ static void flush_backlog(void *arg)
if (skb->dev == dev) {
__skb_unlink(skb, &queue->input_pkt_queue);
kfree_skb(skb);
incr_input_queue_head(queue);
}
rps_unlock(queue);
}
Expand Down Expand Up @@ -3179,6 +3247,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
local_irq_enable();
break;
}
incr_input_queue_head(queue);
rps_unlock(queue);
local_irq_enable();

Expand Down Expand Up @@ -5542,8 +5611,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
local_irq_enable();

/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx(skb);
incr_input_queue_head(oldsd);
}

return NOTIFY_OK;
}
Expand Down
Loading

0 comments on commit ea62091

Please sign in to comment.