Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 327588
b: refs/heads/master
c: 6b923cb
h: refs/heads/master
v: v3
  • Loading branch information
John Eaglesham authored and David S. Miller committed Aug 23, 2012
1 parent 21fd5e9 commit c7e56cb
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 32 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: b87fb39e399137257a6db3224ea854117e9486e9
refs/heads/master: 6b923cb7188d46905f43fa84210c4c3e5f9cd8fb
30 changes: 25 additions & 5 deletions trunk/Documentation/networking/bonding.txt
Original file line number Diff line number Diff line change
Expand Up @@ -752,12 +752,22 @@ xmit_hash_policy
protocol information to generate the hash.

Uses XOR of hardware MAC addresses and IP addresses to
generate the hash. The formula is
generate the hash. The IPv4 formula is

(((source IP XOR dest IP) AND 0xffff) XOR
( source MAC XOR destination MAC ))
modulo slave count

The IPv6 formula is

hash = (source ip quad 2 XOR dest IP quad 2) XOR
(source ip quad 3 XOR dest IP quad 3) XOR
(source ip quad 4 XOR dest IP quad 4)

(((hash >> 24) XOR (hash >> 16) XOR (hash >> 8) XOR hash)
XOR (source MAC XOR destination MAC))
modulo slave count

This algorithm will place all traffic to a particular
network peer on the same slave. For non-IP traffic,
the formula is the same as for the layer2 transmit
Expand All @@ -778,19 +788,29 @@ xmit_hash_policy
slaves, although a single connection will not span
multiple slaves.

The formula for unfragmented TCP and UDP packets is
The formula for unfragmented IPv4 TCP and UDP packets is

((source port XOR dest port) XOR
((source IP XOR dest IP) AND 0xffff)
modulo slave count

For fragmented TCP or UDP packets and all other IP
protocol traffic, the source and destination port
The formula for unfragmented IPv6 TCP and UDP packets is

hash = (source port XOR dest port) XOR
((source ip quad 2 XOR dest IP quad 2) XOR
(source ip quad 3 XOR dest IP quad 3) XOR
(source ip quad 4 XOR dest IP quad 4))

((hash >> 24) XOR (hash >> 16) XOR (hash >> 8) XOR hash)
modulo slave count

For fragmented TCP or UDP packets and all other IPv4 and
IPv6 protocol traffic, the source and destination port
information is omitted. For non-IP traffic, the
formula is the same as for the layer2 transmit hash
policy.

This policy is intended to mimic the behavior of
The IPv4 policy is intended to mimic the behavior of
certain switches, notably Cisco switches with PFC2 as
well as some Foundry and IBM products.

Expand Down
89 changes: 63 additions & 26 deletions trunk/drivers/net/bonding/bond_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3351,57 +3351,94 @@ static struct notifier_block bond_netdev_notifier = {

/*---------------------------- Hashing Policies -----------------------------*/

/*
* Hash for the output device based upon layer 2 data
*/
static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
{
struct ethhdr *data = (struct ethhdr *)skb->data;

if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
return (data->h_dest[5] ^ data->h_source[5]) % count;

return 0;
}

/*
* Hash for the output device based upon layer 2 and layer 3 data. If
* the packet is not IP mimic bond_xmit_hash_policy_l2()
* the packet is not IP, fall back on bond_xmit_hash_policy_l2()
*/
static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
{
struct ethhdr *data = (struct ethhdr *)skb->data;
struct iphdr *iph = ip_hdr(skb);

if (skb->protocol == htons(ETH_P_IP)) {
struct iphdr *iph;
struct ipv6hdr *ipv6h;
u32 v6hash;
__be32 *s, *d;

if (skb->protocol == htons(ETH_P_IP) &&
skb_network_header_len(skb) >= sizeof(*iph)) {
iph = ip_hdr(skb);
return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
(data->h_dest[5] ^ data->h_source[5])) % count;
} else if (skb->protocol == htons(ETH_P_IPV6) &&
skb_network_header_len(skb) >= sizeof(*ipv6h)) {
ipv6h = ipv6_hdr(skb);
s = &ipv6h->saddr.s6_addr32[0];
d = &ipv6h->daddr.s6_addr32[0];
v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
}

return (data->h_dest[5] ^ data->h_source[5]) % count;
return bond_xmit_hash_policy_l2(skb, count);
}

/*
* Hash for the output device based upon layer 3 and layer 4 data. If
* the packet is a frag or not TCP or UDP, just use layer 3 data. If it is
* altogether not IP, mimic bond_xmit_hash_policy_l2()
* altogether not IP, fall back on bond_xmit_hash_policy_l2()
*/
static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
{
struct ethhdr *data = (struct ethhdr *)skb->data;
struct iphdr *iph = ip_hdr(skb);
__be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
int layer4_xor = 0;

if (skb->protocol == htons(ETH_P_IP)) {
u32 layer4_xor = 0;
struct iphdr *iph;
struct ipv6hdr *ipv6h;
__be32 *s, *d;
__be16 *layer4hdr;

if (skb->protocol == htons(ETH_P_IP) &&
skb_network_header_len(skb) >= sizeof(*iph)) {
iph = ip_hdr(skb);
if (!ip_is_fragment(iph) &&
(iph->protocol == IPPROTO_TCP ||
iph->protocol == IPPROTO_UDP)) {
layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1)));
iph->protocol == IPPROTO_UDP) &&
(skb_headlen(skb) - skb_network_offset(skb) >=
iph->ihl * sizeof(u32) + sizeof(*layer4hdr) * 2)) {
layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1));
}
return (layer4_xor ^
((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;

} else if (skb->protocol == htons(ETH_P_IPV6) &&
skb_network_header_len(skb) >= sizeof(*ipv6h)) {
ipv6h = ipv6_hdr(skb);
if ((ipv6h->nexthdr == IPPROTO_TCP ||
ipv6h->nexthdr == IPPROTO_UDP) &&
(skb_headlen(skb) - skb_network_offset(skb) >=
sizeof(*ipv6h) + sizeof(*layer4hdr) * 2)) {
layer4hdr = (__be16 *)(ipv6h + 1);
layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1));
}
s = &ipv6h->saddr.s6_addr32[0];
d = &ipv6h->daddr.s6_addr32[0];
layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
(layer4_xor >> 8);
return layer4_xor % count;
}

return (data->h_dest[5] ^ data->h_source[5]) % count;
}

/*
* Hash for the output device based upon layer 2 data
*/
static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
{
struct ethhdr *data = (struct ethhdr *)skb->data;

return (data->h_dest[5] ^ data->h_source[5]) % count;
return bond_xmit_hash_policy_l2(skb, count);
}

/*-------------------------- Device entry points ----------------------------*/
Expand Down

0 comments on commit c7e56cb

Please sign in to comment.