Skip to content

Commit

Permalink
IB/core: Avoid deadlock during netlink message handling
Browse files Browse the repository at this point in the history
When rdmacm module is not loaded, and when netlink message is received to
get char device info, it results into a deadlock due to recursive locking
of rdma_nl_mutex with the below call sequence.

[..]
  rdma_nl_rcv()
  mutex_lock()
   [..]
   rdma_nl_rcv_msg()
      ib_get_client_nl_info()
         request_module()
           iw_cm_init()
             rdma_nl_register()
               mutex_lock(); <- Deadlock, acquiring mutex again

Due to above call sequence, following call trace and deadlock is observed.

  kernel: __mutex_lock+0x35e/0x860
  kernel: ? __mutex_lock+0x129/0x860
  kernel: ? rdma_nl_register+0x1a/0x90 [ib_core]
  kernel: rdma_nl_register+0x1a/0x90 [ib_core]
  kernel: ? 0xffffffffc029b000
  kernel: iw_cm_init+0x34/0x1000 [iw_cm]
  kernel: do_one_initcall+0x67/0x2d4
  kernel: ? kmem_cache_alloc_trace+0x1ec/0x2a0
  kernel: do_init_module+0x5a/0x223
  kernel: load_module+0x1998/0x1e10
  kernel: ? __symbol_put+0x60/0x60
  kernel: __do_sys_finit_module+0x94/0xe0
  kernel: do_syscall_64+0x5a/0x270
  kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe

  process stack trace:
  [<0>] __request_module+0x1c9/0x460
  [<0>] ib_get_client_nl_info+0x5e/0xb0 [ib_core]
  [<0>] nldev_get_chardev+0x1ac/0x320 [ib_core]
  [<0>] rdma_nl_rcv_msg+0xeb/0x1d0 [ib_core]
  [<0>] rdma_nl_rcv+0xcd/0x120 [ib_core]
  [<0>] netlink_unicast+0x179/0x220
  [<0>] netlink_sendmsg+0x2f6/0x3f0
  [<0>] sock_sendmsg+0x30/0x40
  [<0>] ___sys_sendmsg+0x27a/0x290
  [<0>] __sys_sendmsg+0x58/0xa0
  [<0>] do_syscall_64+0x5a/0x270
  [<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe

To overcome this deadlock and to allow multiple netlink messages to
progress in parallel, following scheme is implemented.

1. Split the lock protecting the cb_table into a per-index lock, and make
   it a rwlock. This lock is used to ensure no callbacks are running after
   unregistration returns. Since a module will not be registered once it
   is already running callbacks, this avoids the deadlock.

2. Use smp_store_release() to update the cb_table during registration so
   that no lock is required. This avoids lockdep problems with thinking
   all the rwsems are the same lock class.

Fixes: 0e2d00e ("RDMA: Add NLDEV_GET_CHARDEV to allow char dev discovery and autoload")
Link: https://lore.kernel.org/r/20191015080733.18625-1-leon@kernel.org
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
  • Loading branch information
Parav Pandit authored and Jason Gunthorpe committed Oct 24, 2019
1 parent a15542b commit 549af00
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 54 deletions.
1 change: 1 addition & 0 deletions drivers/infiniband/core/core_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);

void rdma_nl_init(void);
void rdma_nl_exit(void);

int ib_nl_handle_resolve_resp(struct sk_buff *skb,
Expand Down
2 changes: 2 additions & 0 deletions drivers/infiniband/core/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -2716,6 +2716,8 @@ static int __init ib_core_init(void)
goto err_comp_unbound;
}

rdma_nl_init();

ret = addr_init();
if (ret) {
pr_warn("Could't init IB address resolution\n");
Expand Down
107 changes: 53 additions & 54 deletions drivers/infiniband/core/netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,12 @@
#include <linux/module.h>
#include "core_priv.h"

static DEFINE_MUTEX(rdma_nl_mutex);
static struct {
const struct rdma_nl_cbs *cb_table;
const struct rdma_nl_cbs *cb_table;
/* Synchronizes between ongoing netlink commands and netlink client
* unregistration.
*/
struct rw_semaphore sem;
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];

bool rdma_nl_chk_listeners(unsigned int group)
Expand Down Expand Up @@ -75,70 +78,53 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
return (op < max_num_ops[type]) ? true : false;
}

static bool
is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op)
static const struct rdma_nl_cbs *
get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
{
const struct rdma_nl_cbs *cb_table;

if (!is_nl_msg_valid(type, op))
return false;

/*
* Currently only NLDEV client is supporting netlink commands in
* non init_net net namespace.
*/
if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
return false;
return NULL;

if (!rdma_nl_types[type].cb_table) {
mutex_unlock(&rdma_nl_mutex);
request_module("rdma-netlink-subsys-%d", type);
mutex_lock(&rdma_nl_mutex);
}
cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
if (!cb_table) {
/*
* Didn't get valid reference of the table, attempt module
* load once.
*/
up_read(&rdma_nl_types[type].sem);

cb_table = rdma_nl_types[type].cb_table;
request_module("rdma-netlink-subsys-%d", type);

down_read(&rdma_nl_types[type].sem);
cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
}
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
return false;
return true;
return NULL;
return cb_table;
}

void rdma_nl_register(unsigned int index,
const struct rdma_nl_cbs cb_table[])
{
mutex_lock(&rdma_nl_mutex);
if (!is_nl_msg_valid(index, 0)) {
/*
* All clients are not interesting in success/failure of
* this call. They want to see the print to error log and
* continue their initialization. Print warning for them,
* because it is programmer's error to be here.
*/
mutex_unlock(&rdma_nl_mutex);
WARN(true,
"The not-valid %u index was supplied to RDMA netlink\n",
index);
if (WARN_ON(!is_nl_msg_valid(index, 0)) ||
WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table)))
return;
}

if (rdma_nl_types[index].cb_table) {
mutex_unlock(&rdma_nl_mutex);
WARN(true,
"The %u index is already registered in RDMA netlink\n",
index);
return;
}

rdma_nl_types[index].cb_table = cb_table;
mutex_unlock(&rdma_nl_mutex);
/* Pairs with the READ_ONCE in is_nl_valid() */
smp_store_release(&rdma_nl_types[index].cb_table, cb_table);
}
EXPORT_SYMBOL(rdma_nl_register);

void rdma_nl_unregister(unsigned int index)
{
mutex_lock(&rdma_nl_mutex);
down_write(&rdma_nl_types[index].sem);
rdma_nl_types[index].cb_table = NULL;
mutex_unlock(&rdma_nl_mutex);
up_write(&rdma_nl_types[index].sem);
}
EXPORT_SYMBOL(rdma_nl_unregister);

Expand Down Expand Up @@ -170,39 +156,46 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
unsigned int index = RDMA_NL_GET_CLIENT(type);
unsigned int op = RDMA_NL_GET_OP(type);
const struct rdma_nl_cbs *cb_table;
int err = -EINVAL;

if (!is_nl_valid(skb, index, op))
if (!is_nl_msg_valid(index, op))
return -EINVAL;

cb_table = rdma_nl_types[index].cb_table;
down_read(&rdma_nl_types[index].sem);
cb_table = get_cb_table(skb, index, op);
if (!cb_table)
goto done;

if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
!netlink_capable(skb, CAP_NET_ADMIN)) {
err = -EPERM;
goto done;
}

/*
* LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't
* mistakenly call the .dump() function.
*/
if (index == RDMA_NL_LS) {
if (cb_table[op].doit)
return cb_table[op].doit(skb, nlh, extack);
return -EINVAL;
err = cb_table[op].doit(skb, nlh, extack);
goto done;
}
/* FIXME: Convert IWCM to properly handle doit callbacks */
if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
struct netlink_dump_control c = {
.dump = cb_table[op].dump,
};
if (c.dump)
return netlink_dump_start(skb->sk, skb, nlh, &c);
return -EINVAL;
err = netlink_dump_start(skb->sk, skb, nlh, &c);
goto done;
}

if (cb_table[op].doit)
return cb_table[op].doit(skb, nlh, extack);

return 0;
err = cb_table[op].doit(skb, nlh, extack);
done:
up_read(&rdma_nl_types[index].sem);
return err;
}

/*
Expand Down Expand Up @@ -263,9 +256,7 @@ static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,

static void rdma_nl_rcv(struct sk_buff *skb)
{
mutex_lock(&rdma_nl_mutex);
rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
mutex_unlock(&rdma_nl_mutex);
}

int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
Expand Down Expand Up @@ -297,6 +288,14 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
}
EXPORT_SYMBOL(rdma_nl_multicast);

void rdma_nl_init(void)
{
int idx;

for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
init_rwsem(&rdma_nl_types[idx].sem);
}

void rdma_nl_exit(void)
{
int idx;
Expand Down

0 comments on commit 549af00

Please sign in to comment.