Skip to content

Commit

Permalink
IB/mlx4: Add VLAN support for IBoE
Browse files Browse the repository at this point in the history
This patch allows IBoE traffic to be encapsulated in 802.1Q tagged
VLAN frames.  The VLAN tag is encoded in the GID and derived from it
by a simple computation.

The netdev notifier callback is modified to catch VLAN device
addition/removal and the port's GID table is updated to reflect the
change, so that for each netdevice there is an entry in the GID table.
When the port's GID table is exhausted, GID entries will not be added.
Only children of the main interfaces can add to the GID table; if a
VLAN interface is added on another VLAN interface (e.g. "vconfig add
eth2.6 8"), then that interfaces will not add an entry to the GID
table.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Eli Cohen authored and Roland Dreier committed Oct 25, 2010
1 parent af7bd46 commit 4c3eb3c
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 28 deletions.
10 changes: 10 additions & 0 deletions drivers/infiniband/hw/mlx4/ah.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
*/

#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>

#include <linux/slab.h>
#include <linux/inet.h>
Expand Down Expand Up @@ -91,17 +92,26 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
union ib_gid sgid;
u8 mac[6];
int err;
int is_mcast;
u16 vlan_tag;

err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
if (err)
return ERR_PTR(err);

memcpy(ah->av.eth.mac, mac, 6);
err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid);
if (err)
return ERR_PTR(err);
vlan_tag = rdma_get_vlan_id(&sgid);
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.eth.gid_index = ah_attr->grh.sgid_index;
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
if (ah_attr->static_rate) {
ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
Expand Down
99 changes: 87 additions & 12 deletions drivers/infiniband/hw/mlx4/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>

#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
Expand Down Expand Up @@ -79,6 +80,8 @@ static void init_query_mad(struct ib_smp *mad)
mad->method = IB_MGMT_METHOD_GET;
}

static union ib_gid zgid;

static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
Expand Down Expand Up @@ -755,12 +758,17 @@ static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_board_id
};

static void mlx4_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
{
memcpy(eui, dev->dev_addr, 3);
memcpy(eui + 5, dev->dev_addr + 3, 3);
eui[3] = 0xFF;
eui[4] = 0xFE;
if (vlan_id < 0x1000) {
eui[3] = vlan_id >> 8;
eui[4] = vlan_id & 0xff;
} else {
eui[3] = 0xff;
eui[4] = 0xfe;
}
eui[0] ^= 2;
}

Expand Down Expand Up @@ -802,28 +810,93 @@ static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
{
struct net_device *ndev = dev->iboe.netdevs[port - 1];
struct update_gid_work *work;
struct net_device *tmp;
int i;
u8 *hits;
int ret;
union ib_gid gid;
int free;
int found;
int need_update = 0;
u16 vid;

work = kzalloc(sizeof *work, GFP_ATOMIC);
if (!work)
return -ENOMEM;

if (!clear) {
mlx4_addrconf_ifid_eui48(&work->gids[0].raw[8], ndev);
work->gids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
hits = kzalloc(128, GFP_ATOMIC);
if (!hits) {
ret = -ENOMEM;
goto out;
}

read_lock(&dev_base_lock);
for_each_netdev(&init_net, tmp) {
if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
vid = rdma_vlan_dev_vlan_id(tmp);
mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
found = 0;
free = -1;
for (i = 0; i < 128; ++i) {
if (free < 0 &&
!memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
free = i;
if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
hits[i] = 1;
found = 1;
break;
}
}

if (!found) {
if (tmp == ndev &&
(memcmp(&dev->iboe.gid_table[port - 1][0],
&gid, sizeof gid) ||
!memcmp(&dev->iboe.gid_table[port - 1][0],
&zgid, sizeof gid))) {
dev->iboe.gid_table[port - 1][0] = gid;
++need_update;
hits[0] = 1;
} else if (free >= 0) {
dev->iboe.gid_table[port - 1][free] = gid;
hits[free] = 1;
++need_update;
}
}
}
}
read_unlock(&dev_base_lock);

for (i = 0; i < 128; ++i)
if (!hits[i]) {
if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
++need_update;
dev->iboe.gid_table[port - 1][i] = zgid;
}

INIT_WORK(&work->work, update_gids_task);
work->port = port;
work->dev = dev;
queue_work(wq, &work->work);
if (need_update) {
memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
INIT_WORK(&work->work, update_gids_task);
work->port = port;
work->dev = dev;
queue_work(wq, &work->work);
} else
kfree(work);

kfree(hits);
return 0;

out:
kfree(work);
return ret;
}

static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
{
switch (event) {
case NETDEV_UP:
case NETDEV_CHANGEADDR:
update_ipv6_gids(dev, port, 0);
break;

Expand Down Expand Up @@ -871,9 +944,11 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event
}
}

if (dev == iboe->netdevs[0])
if (dev == iboe->netdevs[0] ||
(iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
handle_en_event(ibdev, 1, event);
else if (dev == iboe->netdevs[1])
else if (dev == iboe->netdevs[1]
|| (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
handle_en_event(ibdev, 2, event);

spin_unlock(&iboe->lock);
Expand Down
79 changes: 64 additions & 15 deletions drivers/infiniband/hw/mlx4/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_addr.h>

#include <linux/mlx4/qp.h>

Expand All @@ -57,10 +58,11 @@ enum {
enum {
/*
* Largest possible UD header: send with GRH and immediate
* data plus 14 bytes for an Ethernet header. (LRH would only
* use 8 bytes, so Ethernet is the biggest case)
* data plus 18 bytes for an Ethernet header with VLAN/802.1Q
* tag. (LRH would only use 8 bytes, so Ethernet is the
* biggest case)
*/
MLX4_IB_UD_HEADER_SIZE = 78,
MLX4_IB_UD_HEADER_SIZE = 82,
MLX4_IB_LSO_HEADER_SPARE = 128,
};

Expand Down Expand Up @@ -879,6 +881,8 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
IB_LINK_LAYER_ETHERNET;
u8 mac[6];
int is_mcast;
u16 vlan_tag;
int vidx;

path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
Expand Down Expand Up @@ -907,10 +911,10 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
memcpy(path->rgid, ah->grh.dgid.raw, 16);
}

path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 0xf) << 2);

if (is_eth) {
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 7) << 3) | ((ah->sl & 8) >> 1);

if (!(ah->ah_flags & IB_AH_GRH))
return -1;

Expand All @@ -922,7 +926,18 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
path->ackto = MLX4_IB_LINK_TYPE_ETH;
/* use index 0 into MAC table for IBoE */
path->grh_mylmc &= 0x80;
}

vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
if (vlan_tag < 0x1000) {
if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
return -ENOENT;

path->vlan_index = vidx;
path->fl = 1 << 6;
}
} else
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 0xf) << 2);

return 0;
}
Expand Down Expand Up @@ -1277,21 +1292,30 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
union ib_gid sgid;
u16 pkey;
int send_size;
int header_size;
int spc;
int i;
int is_eth;
int is_vlan = 0;
int is_grh;
u16 vlan;

send_size = 0;
for (i = 0; i < wr->num_sge; ++i)
send_size += wr->sg_list[i].length;

is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
ib_ud_header_init(send_size, !is_eth, is_eth, 0, is_grh, 0, &sqp->ud_header);
if (is_eth) {
ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &sgid);
vlan = rdma_get_vlan_id(&sgid);
is_vlan = vlan < 0x1000;
}
ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);

if (!is_eth) {
sqp->ud_header.lrh.service_level =
Expand Down Expand Up @@ -1345,7 +1369,15 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
memcpy(sqp->ud_header.eth.smac_h, smac, 6);
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
if (!is_vlan) {
sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
} else {
u16 pcp;

sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13;
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
Expand Down Expand Up @@ -1507,13 +1539,14 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
}

static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
struct ib_send_wr *wr)
struct ib_send_wr *wr, __be16 *vlan)
{
memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
*vlan = dseg->vlan;
}

static void set_mlx_icrc_seg(void *dseg)
Expand Down Expand Up @@ -1616,6 +1649,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
__be32 uninitialized_var(lso_hdr_sz);
__be32 blh;
int i;
__be16 vlan = cpu_to_be16(0xffff);

spin_lock_irqsave(&qp->sq.lock, flags);

Expand Down Expand Up @@ -1719,7 +1753,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;

case IB_QPT_UD:
set_datagram_seg(wqe, wr);
set_datagram_seg(wqe, wr, &vlan);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;

Expand Down Expand Up @@ -1797,6 +1831,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;

if (be16_to_cpu(vlan) < 0x1000) {
ctrl->ins_vlan = 1 << 6;
ctrl->vlan_tag = vlan;
}

stamp = ind + qp->sq_spare_wqes;
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);

Expand Down Expand Up @@ -1946,17 +1985,27 @@ static int to_ib_qp_access_flags(int mlx4_flags)
return ib_flags;
}

static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
struct mlx4_qp_path *path)
{
struct mlx4_dev *dev = ibdev->dev;
int is_eth;

memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;

if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
return;

is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
IB_LINK_LAYER_ETHERNET;
if (is_eth)
ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
((path->sched_queue & 4) << 1);
else
ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;

ib_ah_attr->dlid = be16_to_cpu(path->rlid);
ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
Expand Down Expand Up @@ -2009,8 +2058,8 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
to_ib_qp_access_flags(be32_to_cpu(context.params2));

if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
}
Expand Down
Loading

0 comments on commit 4c3eb3c

Please sign in to comment.