Skip to content

Commit

Permalink
IB/mlx4: Add support for IBoE
Browse files Browse the repository at this point in the history
Add support for IBoE to mlx4_ib.  The bulk of the code is handling the
new address vector fields; mlx4 needs the MAC address of a remote node
to include it in a WQE (for datagrams) or in the QP context (for
connected QPs).  Address resolution is done by assuming all unicast
GIDs are either link-local IPv6 addresses.

Multicast group attach/detach needs to update the NIC's multicast
filters; but since attaching a QP to a multicast group can be done
before the QP is bound to a port, for IBoE we need to keep track of
all multicast groups that a QP is attached too before it transitions
from INIT to RTR (since it does not have a port in the INIT state).

Signed-off-by: Eli Cohen <eli@mellanox.co.il>

[ Many things cleaned up and otherwise monkeyed with; hope I didn't
  introduce too many bugs.  - Roland ]

Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Eli Cohen authored and Roland Dreier committed Oct 25, 2010
1 parent 7ac870e commit fa417f7
Show file tree
Hide file tree
Showing 6 changed files with 714 additions and 108 deletions.
153 changes: 120 additions & 33 deletions drivers/infiniband/hw/mlx4/ah.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,66 +30,153 @@
* SOFTWARE.
*/

#include <rdma/ib_addr.h>

#include <linux/slab.h>
#include <linux/inet.h>
#include <linux/string.h>

#include "mlx4_ib.h"

struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port)
{
struct mlx4_dev *dev = to_mdev(pd->device)->dev;
struct mlx4_ib_ah *ah;
struct in6_addr in6;

ah = kmalloc(sizeof *ah, GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
*is_mcast = 0;

memset(&ah->av, 0, sizeof ah->av);
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
if (rdma_link_local_addr(&in6))
rdma_get_ll_mac(&in6, mac);
else if (rdma_is_multicast_addr(&in6)) {
rdma_get_mcast_mac(&in6, mac);
*is_mcast = 1;
} else
return -EINVAL;

ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.g_slid = ah_attr->src_path_bits;
ah->av.dlid = cpu_to_be16(ah_attr->dlid);
if (ah_attr->static_rate) {
ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
--ah->av.stat_rate;
}
ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
return 0;
}

static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
{
struct mlx4_dev *dev = to_mdev(pd->device)->dev;

ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.ib.g_slid = ah_attr->src_path_bits;
if (ah_attr->ah_flags & IB_AH_GRH) {
ah->av.g_slid |= 0x80;
ah->av.gid_index = ah_attr->grh.sgid_index;
ah->av.hop_limit = ah_attr->grh.hop_limit;
ah->av.sl_tclass_flowlabel |=
ah->av.ib.g_slid |= 0x80;
ah->av.ib.gid_index = ah_attr->grh.sgid_index;
ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
ah->av.ib.sl_tclass_flowlabel |=
cpu_to_be32((ah_attr->grh.traffic_class << 20) |
ah_attr->grh.flow_label);
memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
}

ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid);
if (ah_attr->static_rate) {
ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
--ah->av.ib.stat_rate;
}
ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);

return &ah->ibah;
}

static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
u8 mac[6];
int err;
int is_mcast;

err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
if (err)
return ERR_PTR(err);

memcpy(ah->av.eth.mac, mac, 6);
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.eth.gid_index = ah_attr->grh.sgid_index;
if (ah_attr->static_rate) {
ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
--ah->av.eth.stat_rate;
}

/*
* HW requires multicast LID so we just choose one.
*/
if (is_mcast)
ah->av.ib.dlid = cpu_to_be16(0xc000);

memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);

return &ah->ibah;
}

struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct mlx4_ib_ah *ah;
struct ib_ah *ret;

ah = kzalloc(sizeof *ah, GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);

if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
if (!(ah_attr->ah_flags & IB_AH_GRH)) {
ret = ERR_PTR(-EINVAL);
} else {
/*
* TBD: need to handle the case when we get
* called in an atomic context and there we
* might sleep. We don't expect this
* currently since we're working with link
* local addresses which we can translate
* without going to sleep.
*/
ret = create_iboe_ah(pd, ah_attr, ah);
}

if (IS_ERR(ret))
kfree(ah);

return ret;
} else
return create_ib_ah(pd, ah_attr, ah); /* never fails */
}

int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
{
struct mlx4_ib_ah *ah = to_mah(ibah);
enum rdma_link_layer ll;

memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(ah->av.dlid);
ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24;
if (ah->av.stat_rate)
ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
if (ah->av.ib.stat_rate)
ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;

if (mlx4_ib_ah_grh_present(ah)) {
ah_attr->ah_flags = IB_AH_GRH;

ah_attr->grh.traffic_class =
be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20;
ah_attr->grh.flow_label =
be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
ah_attr->grh.hop_limit = ah->av.hop_limit;
ah_attr->grh.sgid_index = ah->av.gid_index;
memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
ah_attr->grh.hop_limit = ah->av.ib.hop_limit;
ah_attr->grh.sgid_index = ah->av.ib.gid_index;
memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
}

return 0;
Expand Down
32 changes: 20 additions & 12 deletions drivers/infiniband/hw/mlx4/mad.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,19 +311,25 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
struct ib_mad_agent *agent;
int p, q;
int ret;
enum rdma_link_layer ll;

for (p = 0; p < dev->num_ports; ++p)
for (p = 0; p < dev->num_ports; ++p) {
ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1);
for (q = 0; q <= 1; ++q) {
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
NULL, NULL);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
}
dev->send_agent[p][q] = agent;
if (ll == IB_LINK_LAYER_INFINIBAND) {
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
NULL, NULL);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
}
dev->send_agent[p][q] = agent;
} else
dev->send_agent[p][q] = NULL;
}
}

return 0;

Expand All @@ -344,8 +350,10 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
for (p = 0; p < dev->num_ports; ++p) {
for (q = 0; q <= 1; ++q) {
agent = dev->send_agent[p][q];
dev->send_agent[p][q] = NULL;
ib_unregister_mad_agent(agent);
if (agent) {
dev->send_agent[p][q] = NULL;
ib_unregister_mad_agent(agent);
}
}

if (dev->sm_ah[p])
Expand Down
Loading

0 comments on commit fa417f7

Please sign in to comment.