Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/dledford/rdma

Pull rdma fixes from Doug Ledford:
 "Most are minor to important fixes.

  There is one performance enhancement that I took on the grounds that
  failing to check if other processes can run before running what's
  intended to be a background, idle-time task is a bug, even though the
  primary effect of the fix is to improve performance (and it was a very
  simple patch)"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/mlx5: Postpone remove_keys under knowledge of coming preemption
  IB/mlx4: Use vmalloc for WR buffers when needed
  IB/mlx4: Use correct order of variables in log message
  iser-target: Remove explicit mlx4 work-around
  mlx4: Expose correct max_sge_rd limit
  IB/mad: Require CM send method for everything except ClassPortInfo
  IB/cma: Add a missing rcu_read_unlock()
  IB core: Fix ib_sg_to_pages()
  IB/srp: Fix srp_map_sg_fr()
  IB/srp: Fix indirect data buffer rkey endianness
  IB/srp: Initialize dma_length in srp_map_idb
  IB/srp: Fix possible send queue overflow
  IB/srp: Fix a memory leak
  IB/sa: Put netlink request into the request list before sending
  IB/iser: use sector_div instead of do_div
  IB/core: use RCU for uverbs id lookup
  IB/qib: Minor fixes to qib per SFF 8636
  IB/core: Fix user mode post wr corruption
  IB/qib: Fix qib_mr structure
  • Loading branch information
Linus Torvalds committed Dec 10, 2015
2 parents a80c47d + ab5cdc3 commit 0bd0f1e
Show file tree
Hide file tree
Showing 19 changed files with 146 additions and 102 deletions.
5 changes: 1 addition & 4 deletions drivers/infiniband/core/cma.c
Original file line number Diff line number Diff line change
Expand Up @@ -1126,10 +1126,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,

rcu_read_lock();
err = fib_lookup(dev_net(net_dev), &fl4, &res, 0);
if (err)
return false;

ret = FIB_RES_DEV(res) == net_dev;
ret = err == 0 && FIB_RES_DEV(res) == net_dev;
rcu_read_unlock();

return ret;
Expand Down
5 changes: 5 additions & 0 deletions drivers/infiniband/core/mad.c
Original file line number Diff line number Diff line change
Expand Up @@ -1811,6 +1811,11 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr,
if (qp_num == 0)
valid = 1;
} else {
/* CM attributes other than ClassPortInfo only use Send method */
if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) &&
(mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) &&
(mad_hdr->method != IB_MGMT_METHOD_SEND))
goto out;
/* Filter GSI packets sent to QP0 */
if (qp_num != 0)
valid = 1;
Expand Down
32 changes: 17 additions & 15 deletions drivers/infiniband/core/sa_query.c
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
return len;
}

static int ib_nl_send_msg(struct ib_sa_query *query)
static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
Expand All @@ -526,7 +526,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
if (len <= 0)
return -EMSGSIZE;

skb = nlmsg_new(len, GFP_KERNEL);
skb = nlmsg_new(len, gfp_mask);
if (!skb)
return -ENOMEM;

Expand All @@ -544,7 +544,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);

ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask);
if (!ret)
ret = len;
else
Expand All @@ -553,7 +553,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
return ret;
}

static int ib_nl_make_request(struct ib_sa_query *query)
static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
{
unsigned long flags;
unsigned long delay;
Expand All @@ -562,25 +562,27 @@ static int ib_nl_make_request(struct ib_sa_query *query)
INIT_LIST_HEAD(&query->list);
query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);

/* Put the request on the list first.*/
spin_lock_irqsave(&ib_nl_request_lock, flags);
ret = ib_nl_send_msg(query);
if (ret <= 0) {
ret = -EIO;
goto request_out;
} else {
ret = 0;
}

delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
query->timeout = delay + jiffies;
list_add_tail(&query->list, &ib_nl_request_list);
/* Start the timeout if this is the only request */
if (ib_nl_request_list.next == &query->list)
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);

request_out:
spin_unlock_irqrestore(&ib_nl_request_lock, flags);

ret = ib_nl_send_msg(query, gfp_mask);
if (ret <= 0) {
ret = -EIO;
/* Remove the request */
spin_lock_irqsave(&ib_nl_request_lock, flags);
list_del(&query->list);
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
} else {
ret = 0;
}

return ret;
}

Expand Down Expand Up @@ -1108,7 +1110,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)

if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query))
if (!ib_nl_make_request(query, gfp_mask))
return id;
}
ib_sa_disable_local_svc(query);
Expand Down
27 changes: 17 additions & 10 deletions drivers/infiniband/core/uverbs_cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,11 @@ static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
* The ib_uobject locking scheme is as follows:
*
* - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
* needs to be held during all idr operations. When an object is
* needs to be held during all idr write operations. When an object is
* looked up, a reference must be taken on the object's kref before
* dropping this lock.
* dropping this lock. For read operations, the rcu_read_lock()
* and rcu_write_lock() but similarly the kref reference is grabbed
* before the rcu_read_unlock().
*
* - Each object also has an rwsem. This rwsem must be held for
* reading while an operation that uses the object is performed.
Expand Down Expand Up @@ -96,7 +98,7 @@ static void init_uobj(struct ib_uobject *uobj, u64 user_handle,

static void release_uobj(struct kref *kref)
{
kfree(container_of(kref, struct ib_uobject, ref));
kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
}

static void put_uobj(struct ib_uobject *uobj)
Expand Down Expand Up @@ -145,15 +147,15 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
{
struct ib_uobject *uobj;

spin_lock(&ib_uverbs_idr_lock);
rcu_read_lock();
uobj = idr_find(idr, id);
if (uobj) {
if (uobj->context == context)
kref_get(&uobj->ref);
else
uobj = NULL;
}
spin_unlock(&ib_uverbs_idr_lock);
rcu_read_unlock();

return uobj;
}
Expand Down Expand Up @@ -2446,6 +2448,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
int i, sg_ind;
int is_ud;
ssize_t ret = -EINVAL;
size_t next_size;

if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
Expand Down Expand Up @@ -2490,7 +2493,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}

ud = alloc_wr(sizeof(*ud), user_wr->num_sge);
next_size = sizeof(*ud);
ud = alloc_wr(next_size, user_wr->num_sge);
if (!ud) {
ret = -ENOMEM;
goto out_put;
Expand All @@ -2511,7 +2515,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
user_wr->opcode == IB_WR_RDMA_READ) {
struct ib_rdma_wr *rdma;

rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge);
next_size = sizeof(*rdma);
rdma = alloc_wr(next_size, user_wr->num_sge);
if (!rdma) {
ret = -ENOMEM;
goto out_put;
Expand All @@ -2525,7 +2530,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
struct ib_atomic_wr *atomic;

atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge);
next_size = sizeof(*atomic);
atomic = alloc_wr(next_size, user_wr->num_sge);
if (!atomic) {
ret = -ENOMEM;
goto out_put;
Expand All @@ -2540,7 +2546,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
} else if (user_wr->opcode == IB_WR_SEND ||
user_wr->opcode == IB_WR_SEND_WITH_IMM ||
user_wr->opcode == IB_WR_SEND_WITH_INV) {
next = alloc_wr(sizeof(*next), user_wr->num_sge);
next_size = sizeof(*next);
next = alloc_wr(next_size, user_wr->num_sge);
if (!next) {
ret = -ENOMEM;
goto out_put;
Expand Down Expand Up @@ -2572,7 +2579,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,

if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(sizeof *next, sizeof (struct ib_sge));
ALIGN(next_size, sizeof(struct ib_sge));
if (copy_from_user(next->sg_list,
buf + sizeof cmd +
cmd.wr_count * cmd.wqe_size +
Expand Down
43 changes: 22 additions & 21 deletions drivers/infiniband/core/verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1516,7 +1516,7 @@ EXPORT_SYMBOL(ib_map_mr_sg);
* @sg_nents: number of entries in sg
* @set_page: driver page assignment function pointer
*
* Core service helper for drivers to covert the largest
* Core service helper for drivers to convert the largest
* prefix of given sg list to a page vector. The sg list
* prefix converted is the prefix that meet the requirements
* of ib_map_mr_sg.
Expand All @@ -1533,7 +1533,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
u64 last_end_dma_addr = 0, last_page_addr = 0;
unsigned int last_page_off = 0;
u64 page_mask = ~((u64)mr->page_size - 1);
int i;
int i, ret;

mr->iova = sg_dma_address(&sgl[0]);
mr->length = 0;
Expand All @@ -1544,27 +1544,29 @@ int ib_sg_to_pages(struct ib_mr *mr,
u64 end_dma_addr = dma_addr + dma_len;
u64 page_addr = dma_addr & page_mask;

if (i && page_addr != dma_addr) {
if (last_end_dma_addr != dma_addr) {
/* gap */
goto done;

} else if (last_page_off + dma_len <= mr->page_size) {
/* chunk this fragment with the last */
mr->length += dma_len;
last_end_dma_addr += dma_len;
last_page_off += dma_len;
continue;
} else {
/* map starting from the next page */
page_addr = last_page_addr + mr->page_size;
dma_len -= mr->page_size - last_page_off;
}
/*
* For the second and later elements, check whether either the
* end of element i-1 or the start of element i is not aligned
* on a page boundary.
*/
if (i && (last_page_off != 0 || page_addr != dma_addr)) {
/* Stop mapping if there is a gap. */
if (last_end_dma_addr != dma_addr)
break;

/*
* Coalesce this element with the last. If it is small
* enough just update mr->length. Otherwise start
* mapping from the next page.
*/
goto next_page;
}

do {
if (unlikely(set_page(mr, page_addr)))
goto done;
ret = set_page(mr, page_addr);
if (unlikely(ret < 0))
return i ? : ret;
next_page:
page_addr += mr->page_size;
} while (page_addr < end_dma_addr);

Expand All @@ -1574,7 +1576,6 @@ int ib_sg_to_pages(struct ib_mr *mr,
last_page_off = end_dma_addr & ~page_mask;
}

done:
return i;
}
EXPORT_SYMBOL(ib_sg_to_pages);
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/mlx4/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
props->max_sge_rd = props->max_sge;
props->max_sge_rd = MLX4_MAX_SGE_RD;
props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->quotas.mpt;
Expand Down
19 changes: 13 additions & 6 deletions drivers/infiniband/hw/mlx4/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>

#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
Expand Down Expand Up @@ -795,8 +796,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_mtt;

qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
if (!qp->sq.wrid)
qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
if (!qp->rq.wrid)
qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
Expand Down Expand Up @@ -886,8 +893,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (qp_has_rq(init_attr))
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
kvfree(qp->sq.wrid);
kvfree(qp->rq.wrid);
}

err_mtt:
Expand Down Expand Up @@ -1062,8 +1069,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
&qp->db);
ib_umem_release(qp->umem);
} else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
kvfree(qp->sq.wrid);
kvfree(qp->rq.wrid);
if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
free_proxy_bufs(&dev->ib_dev, qp);
Expand Down
11 changes: 8 additions & 3 deletions drivers/infiniband/hw/mlx4/srq.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>

#include "mlx4_ib.h"
#include "user.h"
Expand Down Expand Up @@ -172,8 +173,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,

srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
if (!srq->wrid) {
err = -ENOMEM;
goto err_mtt;
srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
GFP_KERNEL, PAGE_KERNEL);
if (!srq->wrid) {
err = -ENOMEM;
goto err_mtt;
}
}
}

Expand Down Expand Up @@ -204,7 +209,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
else
kfree(srq->wrid);
kvfree(srq->wrid);

err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
Expand Down
14 changes: 13 additions & 1 deletion drivers/infiniband/hw/mlx5/mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,19 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
}
}
} else if (ent->cur > 2 * ent->limit) {
if (!someone_adding(cache) &&
/*
* The remove_keys() logic is performed as garbage collection
* task. Such task is intended to be run when no other active
* processes are running.
*
* The need_resched() will return TRUE if there are user tasks
* to be activated in near future.
*
* In such case, we don't execute remove_keys() and postpone
* the garbage collection work to try to run in next cycle,
* in order to free CPU resources to other tasks.
*/
if (!need_resched() && !someone_adding(cache) &&
time_after(jiffies, cache->last_add + 300 * HZ)) {
remove_keys(dev, i, 1);
if (ent->cur > ent->limit)
Expand Down
Loading

0 comments on commit 0bd0f1e

Please sign in to comment.