Skip to content

Commit

Permalink
RDMA/netdev: Fix netlink support in IPoIB
Browse files Browse the repository at this point in the history
IPoIB netlink support was broken by the below commit since integrating
the rdma_netdev support relies on an allocation flow for netdevs that
was controlled by the ipoib driver while netdev's rtnl_newlink
implementation assumes that the netdev will be allocated by netlink.
Such situation leads to crash in __ipoib_device_add, once trying to
reuse netlink device.

This patch fixes the kernel oops for both mlx4 and mlx5
devices triggered by the following command:

Fixes: cd565b4 ("IB/IPoIB: Support acceleration options callbacks")
Signed-off-by: Denis Drozdov <denisd@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
  • Loading branch information
Denis Drozdov authored and Saeed Mahameed committed Oct 11, 2018
1 parent f6a8a19 commit 5d6b0cb
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 72 deletions.
28 changes: 21 additions & 7 deletions drivers/infiniband/core/verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -2643,13 +2643,27 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
if (!netdev)
return ERR_PTR(-ENOMEM);

rc = params.initialize_rdma_netdev(device, port_num, netdev,
params.param);
if (rc) {
free_netdev(netdev);
return ERR_PTR(rc);
}

return netdev;
}
EXPORT_SYMBOL(rdma_alloc_netdev);

int rdma_init_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
struct net_device *netdev)
{
struct rdma_netdev_alloc_params params;
int rc;

if (!device->rdma_netdev_get_params)
return -EOPNOTSUPP;

rc = device->rdma_netdev_get_params(device, port_num, type, &params);
if (rc)
return rc;

return params.initialize_rdma_netdev(device, port_num,
netdev, params.param);
}
EXPORT_SYMBOL(rdma_init_netdev);
8 changes: 6 additions & 2 deletions drivers/infiniband/ulp/ipoib/ipoib.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,8 +499,10 @@ void ipoib_reap_ah(struct work_struct *work);
struct ipoib_path *__path_find(struct net_device *dev, void *gid);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
const char *format);
struct net_device *ipoib_intf_alloc(struct ib_device *hca, u8 port,
const char *format);
int ipoib_intf_init(struct ib_device *hca, u8 port, const char *format,
struct net_device *dev);
void ipoib_ib_tx_timer_func(struct timer_list *t);
void ipoib_ib_dev_flush_light(struct work_struct *work);
void ipoib_ib_dev_flush_normal(struct work_struct *work);
Expand Down Expand Up @@ -531,6 +533,8 @@ int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv,
struct ipoib_tx_buf *tx_req);

struct rtnl_link_ops *ipoib_get_link_ops(void);

static inline void ipoib_build_sge(struct ipoib_dev_priv *priv,
struct ipoib_tx_buf *tx_req)
{
Expand Down
123 changes: 70 additions & 53 deletions drivers/infiniband/ulp/ipoib/ipoib_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2115,77 +2115,58 @@ static const struct net_device_ops ipoib_netdev_default_pf = {
.ndo_stop = ipoib_ib_dev_stop_default,
};

static struct net_device
*ipoib_create_netdev_default(struct ib_device *hca,
const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *))
{
struct net_device *dev;
struct rdma_netdev *rn;

dev = alloc_netdev((int)sizeof(struct rdma_netdev),
name,
name_assign_type, setup);
if (!dev)
return NULL;

rn = netdev_priv(dev);

rn->send = ipoib_send;
rn->attach_mcast = ipoib_mcast_attach;
rn->detach_mcast = ipoib_mcast_detach;
rn->hca = hca;
dev->netdev_ops = &ipoib_netdev_default_pf;

return dev;
}

static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
const char *name)
static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u8 port,
const char *name)
{
struct net_device *dev;

dev = rdma_alloc_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
NET_NAME_UNKNOWN, ipoib_setup_common);
if (!IS_ERR(dev))
if (!IS_ERR(dev) || PTR_ERR(dev) != -EOPNOTSUPP)
return dev;
if (PTR_ERR(dev) != -EOPNOTSUPP)
return NULL;

return ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
ipoib_setup_common);
dev = alloc_netdev(sizeof(struct rdma_netdev), name, NET_NAME_UNKNOWN,
ipoib_setup_common);
if (!dev)
return ERR_PTR(-ENOMEM);
return dev;
}

struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
const char *name)
int ipoib_intf_init(struct ib_device *hca, u8 port, const char *name,
struct net_device *dev)
{
struct net_device *dev;
struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_dev_priv *priv;
struct rdma_netdev *rn;
int rc;

priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return NULL;
return -ENOMEM;

priv->ca = hca;
priv->port = port;

dev = ipoib_get_netdev(hca, port, name);
if (!dev)
goto free_priv;
rc = rdma_init_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
NET_NAME_UNKNOWN, ipoib_setup_common, dev);
if (rc) {
if (rc != -EOPNOTSUPP)
goto out;

dev->netdev_ops = &ipoib_netdev_default_pf;
rn->send = ipoib_send;
rn->attach_mcast = ipoib_mcast_attach;
rn->detach_mcast = ipoib_mcast_detach;
rn->hca = hca;
}

priv->rn_ops = dev->netdev_ops;

/* fixme : should be after the query_cap */
if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION)
dev->netdev_ops = &ipoib_netdev_ops_vf;
else
dev->netdev_ops = &ipoib_netdev_ops_pf;

rn = netdev_priv(dev);
rn->clnt_priv = priv;

/*
* Only the child register_netdev flows can handle priv_destructor
* being set, so we force it to NULL here and handle manually until it
Expand All @@ -2196,10 +2177,35 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,

ipoib_build_priv(dev);

return priv;
free_priv:
return 0;

out:
kfree(priv);
return NULL;
return rc;
}

struct net_device *ipoib_intf_alloc(struct ib_device *hca, u8 port,
const char *name)
{
struct net_device *dev;
int rc;

dev = ipoib_alloc_netdev(hca, port, name);
if (IS_ERR(dev))
return dev;

rc = ipoib_intf_init(hca, port, name, dev);
if (rc) {
free_netdev(dev);
return ERR_PTR(rc);
}

/*
* Upon success the caller must ensure ipoib_intf_free is called or
* register_netdevice succeed'd and priv_destructor is set to
* ipoib_intf_free.
*/
return dev;
}

void ipoib_intf_free(struct net_device *dev)
Expand Down Expand Up @@ -2382,16 +2388,19 @@ int ipoib_add_pkey_attr(struct net_device *dev)
static struct net_device *ipoib_add_port(const char *format,
struct ib_device *hca, u8 port)
{
struct rtnl_link_ops *ops = ipoib_get_link_ops();
struct rdma_netdev_alloc_params params;
struct ipoib_dev_priv *priv;
struct net_device *ndev;
int result;

priv = ipoib_intf_alloc(hca, port, format);
if (!priv) {
pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
return ERR_PTR(-ENOMEM);
ndev = ipoib_intf_alloc(hca, port, format);
if (IS_ERR(ndev)) {
pr_warn("%s, %d: ipoib_intf_alloc failed %ld\n", hca->name, port,
PTR_ERR(ndev));
return ndev;
}
ndev = priv->dev;
priv = ipoib_priv(ndev);

INIT_IB_EVENT_HANDLER(&priv->event_handler,
priv->ca, ipoib_event);
Expand All @@ -2412,6 +2421,14 @@ static struct net_device *ipoib_add_port(const char *format,
return ERR_PTR(result);
}

if (hca->rdma_netdev_get_params) {
int rc = hca->rdma_netdev_get_params(hca, port,
RDMA_NETDEV_IPOIB,
&params);

if (!rc && ops->priv_size < params.sizeof_priv)
ops->priv_size = params.sizeof_priv;
}
/*
* We cannot set priv_destructor before register_netdev because we
* need priv to be always valid during the error flow to execute
Expand Down
23 changes: 21 additions & 2 deletions drivers/infiniband/ulp/ipoib/ipoib_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,26 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
} else
child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]);

err = ipoib_intf_init(ppriv->ca, ppriv->port, dev->name, dev);
if (err) {
ipoib_warn(ppriv, "failed to initialize pkey device\n");
return err;
}

err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
child_pkey, IPOIB_RTNL_CHILD);
if (err)
return err;

if (!err && data)
if (data) {
err = ipoib_changelink(dev, tb, data, extack);
return err;
if (err) {
unregister_netdevice(dev);
return err;
}
}

return 0;
}

static size_t ipoib_get_size(const struct net_device *dev)
Expand All @@ -149,6 +163,11 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.fill_info = ipoib_fill_info,
};

struct rtnl_link_ops *ipoib_get_link_ops(void)
{
return &ipoib_link_ops;
}

int __init ipoib_netlink_init(void)
{
return rtnl_link_register(&ipoib_link_ops);
Expand Down
19 changes: 11 additions & 8 deletions drivers/infiniband/ulp/ipoib/ipoib_vlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ static bool is_child_unique(struct ipoib_dev_priv *ppriv,

/*
* NOTE: If this function fails then the priv->dev will remain valid, however
* priv can have been freed and must not be touched by caller in the error
* priv will have been freed and must not be touched by caller in the error
* case.
*
* If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to
Expand All @@ -100,6 +100,12 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,

ASSERT_RTNL();

/*
* We do not need to touch priv if register_netdevice fails, so just
* always use this flow.
*/
ndev->priv_destructor = ipoib_intf_free;

/*
* Racing with unregister of the parent must be prevented by the
* caller.
Expand All @@ -120,9 +126,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
goto out_early;
}

/* We do not need to touch priv if register_netdevice fails */
ndev->priv_destructor = ipoib_intf_free;

result = register_netdevice(ndev);
if (result) {
ipoib_warn(priv, "failed to initialize; error %i", result);
Expand Down Expand Up @@ -182,12 +185,12 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
snprintf(intf_name, sizeof(intf_name), "%s.%04x",
ppriv->dev->name, pkey);

priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv) {
result = -ENOMEM;
ndev = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (IS_ERR(ndev)) {
result = PTR_ERR(ndev);
goto out;
}
ndev = priv->dev;
priv = ipoib_priv(ndev);

result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);

Expand Down
7 changes: 7 additions & 0 deletions include/rdma/ib_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -4198,4 +4198,11 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *));

int rdma_init_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
struct net_device *netdev);

#endif /* IB_VERBS_H */

0 comments on commit 5d6b0cb

Please sign in to comment.