Skip to content

Commit

Permalink
IB/core: Ethernet L2 attributes in verbs/cm structures
Browse files Browse the repository at this point in the history
This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.

When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.

Thus, those attributes were added to the following structures:

* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id

For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.

On the active side, the CM fills. its internal structures from the
path provided by the ULP.  We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).

On the passive side, the CM fills its internal structures from the WC
associated with the REQ message.  We add there taking the ETH L2
attributes from the WC.

When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.

ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB.  Vendor drivers are modified to support the new
function signature.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
  • Loading branch information
Matan Barak authored and Roland Dreier committed Jan 14, 2014
1 parent 7e22e91 commit dd5f03b
Show file tree
Hide file tree
Showing 18 changed files with 343 additions and 24 deletions.
97 changes: 94 additions & 3 deletions drivers/infiniband/core/addr.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ int rdma_addr_size(struct sockaddr *addr)
}
EXPORT_SYMBOL(rdma_addr_size);

static struct rdma_addr_client self;

void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
Expand Down Expand Up @@ -119,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
}
EXPORT_SYMBOL(rdma_copy_addr);

int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
{
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
Expand All @@ -142,6 +145,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
return ret;

ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;

Expand All @@ -153,6 +158,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
&((struct sockaddr_in6 *) addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
break;
}
}
Expand Down Expand Up @@ -238,7 +245,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
src_in->sin_addr.s_addr = fl4.saddr;

if (rt->dst.dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
Expand Down Expand Up @@ -286,7 +293,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
}

if (dst->dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
Expand Down Expand Up @@ -437,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
}
EXPORT_SYMBOL(rdma_addr_cancel);

struct resolve_cb_context {
struct rdma_dev_addr *addr;
struct completion comp;
};

static void resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
rdma_dev_addr));
complete(&((struct resolve_cb_context *)context)->comp);
}

int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
u16 *vlan_id)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
struct resolve_cb_context ctx;
struct net_device *dev;

union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;


ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
if (ret)
return ret;

ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
if (ret)
return ret;

memset(&dev_addr, 0, sizeof(dev_addr));

ctx.addr = &dev_addr;
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx);
if (ret)
return ret;

wait_for_completion(&ctx.comp);

memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
if (!dev)
return -ENODEV;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);

int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} gid_addr;

ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);

if (ret)
return ret;
memset(&dev_addr, 0, sizeof(dev_addr));
ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
if (ret)
return ret;

memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);

static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
Expand All @@ -461,11 +550,13 @@ static int __init addr_init(void)
return -ENOMEM;

register_netevent_notifier(&nb);
rdma_addr_register_client(&self);
return 0;
}

static void __exit addr_cleanup(void)
{
rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}
Expand Down
50 changes: 50 additions & 0 deletions drivers/infiniband/core/cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <linux/kdev_t.h>
#include <linux/etherdevice.h>

#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
Expand Down Expand Up @@ -177,6 +178,8 @@ struct cm_av {
struct ib_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
u8 valid;
u8 smac[ETH_ALEN];
};

struct cm_work {
Expand Down Expand Up @@ -346,6 +349,23 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}

int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
{
struct cm_id_private *cm_id_priv;

cm_id_priv = container_of(id, struct cm_id_private, id);

if (smac != NULL)
memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac));

if (alt_smac != NULL)
memcpy(cm_id_priv->alt_av.smac, alt_smac,
sizeof(cm_id_priv->alt_av.smac));

return 0;
}
EXPORT_SYMBOL(ib_update_cm_av);

static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
{
struct cm_device *cm_dev;
Expand Down Expand Up @@ -376,6 +396,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
memcpy(av->smac, path->smac, sizeof(av->smac));

av->valid = 1;
return 0;
}

Expand Down Expand Up @@ -1554,6 +1577,9 @@ static int cm_req_handler(struct cm_work *work)

cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);

memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
if (ret) {
ib_get_cached_gid(work->port->cm_dev->ib_device,
Expand Down Expand Up @@ -3500,6 +3526,30 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
if (!cm_id_priv->av.valid)
return -EINVAL;
if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
*qp_attr_mask |= IB_QP_VID;
}
if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
memcpy(qp_attr->smac, cm_id_priv->av.smac,
sizeof(qp_attr->smac));
*qp_attr_mask |= IB_QP_SMAC;
}
if (cm_id_priv->alt_av.valid) {
if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
qp_attr->alt_vlan_id =
cm_id_priv->alt_av.ah_attr.vlan_id;
*qp_attr_mask |= IB_QP_ALT_VID;
}
if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
memcpy(qp_attr->alt_smac,
cm_id_priv->alt_av.smac,
sizeof(qp_attr->alt_smac));
*qp_attr_mask |= IB_QP_ALT_SMAC;
}
}
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
Expand Down
60 changes: 53 additions & 7 deletions drivers/infiniband/core/cma.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
int ret;

if (addr->sa_family != AF_IB) {
ret = rdma_translate_ip(addr, dev_addr);
ret = rdma_translate_ip(addr, dev_addr, NULL);
} else {
cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
ret = 0;
Expand Down Expand Up @@ -603,6 +603,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
union ib_gid sgid;

mutex_lock(&id_priv->qp_mutex);
if (!id_priv->id.qp) {
Expand All @@ -625,6 +626,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
if (ret)
goto out;

ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
qp_attr.ah_attr.grh.sgid_index, &sgid);
if (ret)
goto out;

if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
== RDMA_TRANSPORT_IB &&
rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
== IB_LINK_LAYER_ETHERNET) {
ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);

if (ret)
goto out;
}
if (conn_param)
qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
Expand Down Expand Up @@ -725,6 +740,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
else
ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
qp_attr_mask);

if (qp_attr->qp_state == IB_QPS_RTR)
qp_attr->rq_psn = id_priv->seq_num;
break;
Expand Down Expand Up @@ -1266,6 +1282,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
int offset, ret;
u8 smac[ETH_ALEN];
u8 alt_smac[ETH_ALEN];
u8 *psmac = smac;
u8 *palt_smac = alt_smac;
int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
RDMA_TRANSPORT_IB) &&
(rdma_port_get_link_layer(cm_id->device,
ib_event->param.req_rcvd.port) ==
IB_LINK_LAYER_ETHERNET));

listen_id = cm_id->context;
if (!cma_check_req_qp_type(&listen_id->id, ib_event))
Expand Down Expand Up @@ -1310,12 +1335,29 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
if (ret)
goto err3;

if (is_iboe) {
if (ib_event->param.req_rcvd.primary_path != NULL)
rdma_addr_find_smac_by_sgid(
&ib_event->param.req_rcvd.primary_path->sgid,
psmac, NULL);
else
psmac = NULL;
if (ib_event->param.req_rcvd.alternate_path != NULL)
rdma_addr_find_smac_by_sgid(
&ib_event->param.req_rcvd.alternate_path->sgid,
palt_smac, NULL);
else
palt_smac = NULL;
}
/*
* Acquire mutex to prevent user executing rdma_destroy_id()
* while we're accessing the cm_id.
*/
mutex_lock(&lock);
if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
if (is_iboe)
ib_update_cm_av(cm_id, psmac, palt_smac);
if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
(conn_id->id.qp_type != IB_QPT_UD))
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
Expand Down Expand Up @@ -1474,7 +1516,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;

ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
Expand Down Expand Up @@ -1873,7 +1915,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
u16 vid;


work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
Expand All @@ -1897,10 +1939,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}

vid = rdma_vlan_dev_vlan_id(ndev);
route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);

iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr,
route->path_rec->vlan_id);
iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr,
route->path_rec->vlan_id);

route->path_rec->hop_limit = 1;
route->path_rec->reversible = 1;
Expand Down
Loading

0 comments on commit dd5f03b

Please sign in to comment.