Skip to content

Commit

Permalink
bonding: modify the old and add new xmit hash policies
Browse files Browse the repository at this point in the history
This patch adds two new hash policy modes which use skb_flow_dissect:
3 - Encapsulated layer 2+3
4 - Encapsulated layer 3+4
There should be a good improvement for tunnel users in those modes.
It also changes the old hash functions to:
hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
hash ^= (hash >> 16);
hash ^= (hash >> 8);

Where hash will be initialized either to L2 hash, that is
SRCMAC[5] XOR DSTMAC[5], or to flow->ports which should be extracted
from the upper layer. Flow's dst and src are also extracted based on the
xmit policy either directly from the buffer or by using skb_flow_dissect,
but in both cases if the protocol is IPv6 then dst and src are obtained by
ipv6_addr_hash() on the real addresses. In case of a non-dissectable
packet, the algorithms fall back to L2 hashing.
The bond_set_mode_ops() function is now obsolete and thus deleted
because it was used only to set the proper hash policy. Also we trim a
pointer from struct bonding because we no longer need to keep the hash
function, now there's only a single hash function - bond_xmit_hash that
works based on bond->params.xmit_policy.

The hash function and skb_flow_dissect were suggested by Eric Dumazet.
The layer names were suggested by Andy Gospodarek, because I suck at
semantics.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Nikolay Aleksandrov authored and David S. Miller committed Oct 3, 2013
1 parent 357afe9 commit 32819dc
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 134 deletions.
2 changes: 1 addition & 1 deletion drivers/net/bonding/bond_3ad.c
Original file line number Diff line number Diff line change
Expand Up @@ -2403,7 +2403,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
goto out;
}

slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg);
slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
first_ok_slave = NULL;

bond_for_each_slave(bond, slave, iter) {
Expand Down
197 changes: 68 additions & 129 deletions drivers/net/bonding/bond_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
#include <net/netns/generic.h>
#include <net/pkt_sched.h>
#include <linux/rculist.h>
#include <net/flow_keys.h>
#include "bonding.h"
#include "bond_3ad.h"
#include "bond_alb.h"
Expand Down Expand Up @@ -159,7 +160,8 @@ MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on
module_param(xmit_hash_policy, charp, 0);
MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; "
"0 for layer 2 (default), 1 for layer 3+4, "
"2 for layer 2+3");
"2 for layer 2+3, 3 for encap layer 2+3, "
"4 for encap layer 3+4");
module_param(arp_interval, int, 0);
MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
module_param_array(arp_ip_target, charp, NULL, 0);
Expand Down Expand Up @@ -217,6 +219,8 @@ const struct bond_parm_tbl xmit_hashtype_tbl[] = {
{ "layer2", BOND_XMIT_POLICY_LAYER2},
{ "layer3+4", BOND_XMIT_POLICY_LAYER34},
{ "layer2+3", BOND_XMIT_POLICY_LAYER23},
{ "encap2+3", BOND_XMIT_POLICY_ENCAP23},
{ "encap3+4", BOND_XMIT_POLICY_ENCAP34},
{ NULL, -1},
};

Expand Down Expand Up @@ -3035,99 +3039,85 @@ static struct notifier_block bond_netdev_notifier = {

/*---------------------------- Hashing Policies -----------------------------*/

/*
* Hash for the output device based upon layer 2 data
*/
static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
/* L2 hash helper */
static inline u32 bond_eth_hash(struct sk_buff *skb)
{
struct ethhdr *data = (struct ethhdr *)skb->data;

if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
return (data->h_dest[5] ^ data->h_source[5]) % count;
return data->h_dest[5] ^ data->h_source[5];

return 0;
}

/*
* Hash for the output device based upon layer 2 and layer 3 data. If
* the packet is not IP, fall back on bond_xmit_hash_policy_l2()
*/
static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
/* Extract the appropriate headers based on bond's xmit policy */
static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
struct flow_keys *fk)
{
const struct ethhdr *data;
const struct ipv6hdr *iph6;
const struct iphdr *iph;
const struct ipv6hdr *ipv6h;
u32 v6hash;
const __be32 *s, *d;
int noff, proto = -1;

if (skb->protocol == htons(ETH_P_IP) &&
pskb_network_may_pull(skb, sizeof(*iph))) {
if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
return skb_flow_dissect(skb, fk);

fk->ports = 0;
noff = skb_network_offset(skb);
if (skb->protocol == htons(ETH_P_IP)) {
if (!pskb_may_pull(skb, noff + sizeof(*iph)))
return false;
iph = ip_hdr(skb);
data = (struct ethhdr *)skb->data;
return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
(data->h_dest[5] ^ data->h_source[5])) % count;
} else if (skb->protocol == htons(ETH_P_IPV6) &&
pskb_network_may_pull(skb, sizeof(*ipv6h))) {
ipv6h = ipv6_hdr(skb);
data = (struct ethhdr *)skb->data;
s = &ipv6h->saddr.s6_addr32[0];
d = &ipv6h->daddr.s6_addr32[0];
v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
}

return bond_xmit_hash_policy_l2(skb, count);
fk->src = iph->saddr;
fk->dst = iph->daddr;
noff += iph->ihl << 2;
if (!ip_is_fragment(iph))
proto = iph->protocol;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
if (!pskb_may_pull(skb, noff + sizeof(*iph6)))
return false;
iph6 = ipv6_hdr(skb);
fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
noff += sizeof(*iph6);
proto = iph6->nexthdr;
} else {
return false;
}
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
fk->ports = skb_flow_get_ports(skb, noff, proto);

return true;
}

/*
* Hash for the output device based upon layer 3 and layer 4 data. If
* the packet is a frag or not TCP or UDP, just use layer 3 data. If it is
* altogether not IP, fall back on bond_xmit_hash_policy_l2()
/**
* bond_xmit_hash - generate a hash value based on the xmit policy
* @bond: bonding device
* @skb: buffer to use for headers
* @count: modulo value
*
* This function will extract the necessary headers from the skb buffer and use
* them to generate a hash based on the xmit_policy set in the bonding device
* which will be reduced modulo count before returning.
*/
static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count)
{
u32 layer4_xor = 0;
const struct iphdr *iph;
const struct ipv6hdr *ipv6h;
const __be32 *s, *d;
const __be16 *l4 = NULL;
__be16 _l4[2];
int noff = skb_network_offset(skb);
int poff;

if (skb->protocol == htons(ETH_P_IP) &&
pskb_may_pull(skb, noff + sizeof(*iph))) {
iph = ip_hdr(skb);
poff = proto_ports_offset(iph->protocol);
struct flow_keys flow;
u32 hash;

if (!ip_is_fragment(iph) && poff >= 0) {
l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff,
sizeof(_l4), &_l4);
if (l4)
layer4_xor = ntohs(l4[0] ^ l4[1]);
}
return (layer4_xor ^
((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
} else if (skb->protocol == htons(ETH_P_IPV6) &&
pskb_may_pull(skb, noff + sizeof(*ipv6h))) {
ipv6h = ipv6_hdr(skb);
poff = proto_ports_offset(ipv6h->nexthdr);
if (poff >= 0) {
l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff,
sizeof(_l4), &_l4);
if (l4)
layer4_xor = ntohs(l4[0] ^ l4[1]);
}
s = &ipv6h->saddr.s6_addr32[0];
d = &ipv6h->daddr.s6_addr32[0];
layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
(layer4_xor >> 8);
return layer4_xor % count;
}
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
!bond_flow_dissect(bond, skb, &flow))
return bond_eth_hash(skb) % count;

if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
hash = bond_eth_hash(skb);
else
hash = (__force u32)flow.ports;
hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
hash ^= (hash >> 16);
hash ^= (hash >> 8);

return bond_xmit_hash_policy_l2(skb, count);
return hash % count;
}

/*-------------------------- Device entry points ----------------------------*/
Expand Down Expand Up @@ -3721,17 +3711,15 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
return NETDEV_TX_OK;
}

/*
* In bond_xmit_xor() , we determine the output device by using a pre-
/* In bond_xmit_xor() , we determine the output device by using a pre-
* determined xmit_hash_policy(), If the selected device is not enabled,
* find the next active slave.
*/
static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);

bond_xmit_slave_id(bond, skb,
bond->xmit_hash_policy(skb, bond->slave_cnt));
bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt));

return NETDEV_TX_OK;
}
Expand Down Expand Up @@ -3768,22 +3756,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)

/*------------------------- Device initialization ---------------------------*/

static void bond_set_xmit_hash_policy(struct bonding *bond)
{
switch (bond->params.xmit_policy) {
case BOND_XMIT_POLICY_LAYER23:
bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
break;
case BOND_XMIT_POLICY_LAYER34:
bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
break;
case BOND_XMIT_POLICY_LAYER2:
default:
bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
break;
}
}

/*
* Lookup the slave that corresponds to a qid
*/
Expand Down Expand Up @@ -3894,38 +3866,6 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
return ret;
}

/*
* set bond mode specific net device operations
*/
void bond_set_mode_ops(struct bonding *bond, int mode)
{
struct net_device *bond_dev = bond->dev;

switch (mode) {
case BOND_MODE_ROUNDROBIN:
break;
case BOND_MODE_ACTIVEBACKUP:
break;
case BOND_MODE_XOR:
bond_set_xmit_hash_policy(bond);
break;
case BOND_MODE_BROADCAST:
break;
case BOND_MODE_8023AD:
bond_set_xmit_hash_policy(bond);
break;
case BOND_MODE_ALB:
/* FALLTHRU */
case BOND_MODE_TLB:
break;
default:
/* Should never happen, mode already checked */
pr_err("%s: Error: Unknown bonding mode %d\n",
bond_dev->name, mode);
break;
}
}

static int bond_ethtool_get_settings(struct net_device *bond_dev,
struct ethtool_cmd *ecmd)
{
Expand Down Expand Up @@ -4027,7 +3967,6 @@ static void bond_setup(struct net_device *bond_dev)
ether_setup(bond_dev);
bond_dev->netdev_ops = &bond_netdev_ops;
bond_dev->ethtool_ops = &bond_ethtool_ops;
bond_set_mode_ops(bond, bond->params.mode);

bond_dev->destructor = bond_destructor;

Expand Down
2 changes: 0 additions & 2 deletions drivers/net/bonding/bond_sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,6 @@ static ssize_t bonding_store_mode(struct device *d,
/* don't cache arp_validate between modes */
bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
bond->params.mode = new_value;
bond_set_mode_ops(bond, bond->params.mode);
pr_info("%s: setting mode to %s (%d).\n",
bond->dev->name, bond_mode_tbl[new_value].modename,
new_value);
Expand Down Expand Up @@ -358,7 +357,6 @@ static ssize_t bonding_store_xmit_hash(struct device *d,
ret = -EINVAL;
} else {
bond->params.xmit_policy = new_value;
bond_set_mode_ops(bond, bond->params.mode);
pr_info("%s: setting xmit hash policy to %s (%d).\n",
bond->dev->name,
xmit_hashtype_tbl[new_value].modename, new_value);
Expand Down
3 changes: 1 addition & 2 deletions drivers/net/bonding/bonding.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ struct bonding {
char proc_file_name[IFNAMSIZ];
#endif /* CONFIG_PROC_FS */
struct list_head bond_list;
int (*xmit_hash_policy)(struct sk_buff *, int);
u16 rr_tx_counter;
struct ad_bond_info ad_info;
struct alb_bond_info alb_info;
Expand Down Expand Up @@ -409,7 +408,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
void bond_mii_monitor(struct work_struct *);
void bond_loadbalance_arp_mon(struct work_struct *);
void bond_activebackup_arp_mon(struct work_struct *);
void bond_set_mode_ops(struct bonding *bond, int mode);
int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count);
int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl);
void bond_select_active_slave(struct bonding *bond);
void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/linux/if_bonding.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@
#define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */
#define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */
#define BOND_XMIT_POLICY_LAYER23 2 /* layer 2+3 (IP ^ MAC) */
#define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */
#define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */

typedef struct ifbond {
__s32 bond_mode;
Expand Down

0 comments on commit 32819dc

Please sign in to comment.