From e217b30452a4e51f3091afdb704d940670464a41 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 27 Sep 2005 10:19:39 -0700 Subject: [PATCH] --- yaml --- r: 10748 b: refs/heads/master c: 72f30b6f2f5f68dd426e9fe83817b882c2d04e50 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/Documentation/networking/ip-sysctl.txt | 2 +- trunk/drivers/infiniband/core/agent.c | 293 ++++-- trunk/drivers/infiniband/core/agent.h | 13 +- trunk/drivers/infiniband/core/agent_priv.h | 62 ++ trunk/drivers/infiniband/core/cm.c | 217 +++-- trunk/drivers/infiniband/core/cm_msgs.h | 1 - trunk/drivers/infiniband/core/device.c | 12 - trunk/drivers/infiniband/core/mad.c | 335 +++---- trunk/drivers/infiniband/core/mad_priv.h | 8 +- trunk/drivers/infiniband/core/mad_rmpp.c | 114 +-- trunk/drivers/infiniband/core/mad_rmpp.h | 2 - trunk/drivers/infiniband/core/sa_query.c | 270 +++--- trunk/drivers/infiniband/core/smi.h | 2 - trunk/drivers/infiniband/core/sysfs.c | 16 - trunk/drivers/infiniband/core/ucm.c | 267 ++---- trunk/drivers/infiniband/core/ucm.h | 83 ++ trunk/drivers/infiniband/core/user_mad.c | 399 ++++---- trunk/drivers/infiniband/core/uverbs.h | 62 +- trunk/drivers/infiniband/core/uverbs_cmd.c | 858 ++++-------------- trunk/drivers/infiniband/core/uverbs_main.c | 503 ++++------ trunk/drivers/infiniband/core/verbs.c | 18 +- trunk/drivers/infiniband/hw/mthca/Makefile | 3 +- .../drivers/infiniband/hw/mthca/mthca_catas.c | 153 ---- trunk/drivers/infiniband/hw/mthca/mthca_cmd.c | 11 +- trunk/drivers/infiniband/hw/mthca/mthca_dev.h | 22 - trunk/drivers/infiniband/hw/mthca/mthca_eq.c | 21 +- trunk/drivers/infiniband/hw/mthca/mthca_mad.c | 72 +- .../drivers/infiniband/hw/mthca/mthca_main.c | 11 - trunk/drivers/infiniband/hw/mthca/mthca_mcg.c | 11 +- .../infiniband/hw/mthca/mthca_memfree.c | 3 +- .../infiniband/hw/mthca/mthca_memfree.h | 3 +- .../infiniband/hw/mthca/mthca_provider.c | 49 +- trunk/drivers/infiniband/hw/mthca/mthca_qp.c | 16 +- trunk/drivers/infiniband/hw/mthca/mthca_srq.c | 43 +- .../drivers/infiniband/hw/mthca/mthca_user.h | 6 - trunk/drivers/infiniband/ulp/ipoib/ipoib.h | 23 +- trunk/drivers/infiniband/ulp/ipoib/ipoib_ib.c | 120 +-- .../drivers/infiniband/ulp/ipoib/ipoib_main.c | 15 +- .../infiniband/ulp/ipoib/ipoib_verbs.c | 9 +- trunk/drivers/usb/host/ehci-hcd.c | 8 + trunk/include/linux/etherdevice.h | 16 - trunk/include/net/sctp/user.h | 8 +- trunk/include/rdma/ib_cm.h | 10 +- trunk/include/rdma/ib_mad.h | 66 +- trunk/include/rdma/ib_user_cm.h | 10 +- trunk/include/rdma/ib_user_verbs.h | 222 +---- trunk/include/rdma/ib_verbs.h | 6 +- trunk/net/ethernet/eth.c | 17 +- trunk/net/ipv4/fib_frontend.c | 2 +- trunk/net/sctp/sm_make_chunk.c | 2 +- trunk/net/sctp/socket.c | 90 +- trunk/net/sctp/ulpevent.c | 6 +- 53 files changed, 1837 insertions(+), 2756 deletions(-) create mode 100644 trunk/drivers/infiniband/core/agent_priv.h create mode 100644 trunk/drivers/infiniband/core/ucm.h delete mode 100644 trunk/drivers/infiniband/hw/mthca/mthca_catas.c diff --git a/[refs] b/[refs] index 701b63ae078e..b10da5cf5d3e 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 9fcc2e8a752f7d3d889114221b67c459557823e9 +refs/heads/master: 72f30b6f2f5f68dd426e9fe83817b882c2d04e50 diff --git a/trunk/Documentation/networking/ip-sysctl.txt b/trunk/Documentation/networking/ip-sysctl.txt index 65895bb51414..b433c8a27e2d 100644 --- a/trunk/Documentation/networking/ip-sysctl.txt +++ b/trunk/Documentation/networking/ip-sysctl.txt @@ -309,7 +309,7 @@ tcp_tso_win_divisor - INTEGER can be consumed by a single TSO frame. The setting of this parameter is a choice between burstiness and building larger TSO frames. - Default: 3 + Default: 8 tcp_frto - BOOLEAN Enables F-RTO, an enhanced recovery algorithm for TCP retransmission diff --git a/trunk/drivers/infiniband/core/agent.c b/trunk/drivers/infiniband/core/agent.c index 0c3c6952faae..5ac86f566dc0 100644 --- a/trunk/drivers/infiniband/core/agent.c +++ b/trunk/drivers/infiniband/core/agent.c @@ -37,41 +37,58 @@ * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $ */ -#include "agent.h" -#include "smi.h" +#include + +#include -#define SPFX "ib_agent: " +#include -struct ib_agent_port_private { - struct list_head port_list; - struct ib_mad_agent *agent[2]; -}; +#include "smi.h" +#include "agent_priv.h" +#include "mad_priv.h" +#include "agent.h" -static DEFINE_SPINLOCK(ib_agent_port_list_lock); +spinlock_t ib_agent_port_list_lock; static LIST_HEAD(ib_agent_port_list); -static struct ib_agent_port_private * -__ib_get_agent_port(struct ib_device *device, int port_num) +/* + * Caller must hold ib_agent_port_list_lock + */ +static inline struct ib_agent_port_private * +__ib_get_agent_port(struct ib_device *device, int port_num, + struct ib_mad_agent *mad_agent) { struct ib_agent_port_private *entry; - list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if (entry->agent[0]->device == device && - entry->agent[0]->port_num == port_num) - return entry; + BUG_ON(!(!!device ^ !!mad_agent)); /* Exactly one MUST be (!NULL) */ + + if (device) { + list_for_each_entry(entry, &ib_agent_port_list, port_list) { + if (entry->smp_agent->device == device && + entry->port_num == port_num) + return entry; + } + } else { + list_for_each_entry(entry, &ib_agent_port_list, port_list) { + if ((entry->smp_agent == mad_agent) || + (entry->perf_mgmt_agent == mad_agent)) + return entry; + } } return NULL; } -static struct ib_agent_port_private * -ib_get_agent_port(struct ib_device *device, int port_num) +static inline struct ib_agent_port_private * +ib_get_agent_port(struct ib_device *device, int port_num, + struct ib_mad_agent *mad_agent) { struct ib_agent_port_private *entry; unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); - entry = __ib_get_agent_port(device, port_num); + entry = __ib_get_agent_port(device, port_num, mad_agent); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); + return entry; } @@ -83,76 +100,192 @@ int smi_check_local_dr_smp(struct ib_smp *smp, if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return 1; - - port_priv = ib_get_agent_port(device, port_num); + port_priv = ib_get_agent_port(device, port_num, NULL); if (!port_priv) { printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d " - "not open\n", device->name, port_num); + "not open\n", + device->name, port_num); return 1; } - return smi_check_local_smp(port_priv->agent[0], smp); + return smi_check_local_smp(port_priv->smp_agent, smp); } -int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, - struct ib_wc *wc, struct ib_device *device, - int port_num, int qpn) +static int agent_mad_send(struct ib_mad_agent *mad_agent, + struct ib_agent_port_private *port_priv, + struct ib_mad_private *mad_priv, + struct ib_grh *grh, + struct ib_wc *wc) { - struct ib_agent_port_private *port_priv; - struct ib_mad_agent *agent; - struct ib_mad_send_buf *send_buf; - struct ib_ah *ah; - int ret; + struct ib_agent_send_wr *agent_send_wr; + struct ib_sge gather_list; + struct ib_send_wr send_wr; + struct ib_send_wr *bad_send_wr; + struct ib_ah_attr ah_attr; + unsigned long flags; + int ret = 1; - port_priv = ib_get_agent_port(device, port_num); - if (!port_priv) { - printk(KERN_ERR SPFX "Unable to find port agent\n"); - return -ENODEV; + agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL); + if (!agent_send_wr) + goto out; + agent_send_wr->mad = mad_priv; + + gather_list.addr = dma_map_single(mad_agent->device->dma_device, + &mad_priv->mad, + sizeof(mad_priv->mad), + DMA_TO_DEVICE); + gather_list.length = sizeof(mad_priv->mad); + gather_list.lkey = mad_agent->mr->lkey; + + send_wr.next = NULL; + send_wr.opcode = IB_WR_SEND; + send_wr.sg_list = &gather_list; + send_wr.num_sge = 1; + send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */ + send_wr.wr.ud.timeout_ms = 0; + send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; + + ah_attr.dlid = wc->slid; + ah_attr.port_num = mad_agent->port_num; + ah_attr.src_path_bits = wc->dlid_path_bits; + ah_attr.sl = wc->sl; + ah_attr.static_rate = 0; + ah_attr.ah_flags = 0; /* No GRH */ + if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { + if (wc->wc_flags & IB_WC_GRH) { + ah_attr.ah_flags = IB_AH_GRH; + /* Should sgid be looked up ? */ + ah_attr.grh.sgid_index = 0; + ah_attr.grh.hop_limit = grh->hop_limit; + ah_attr.grh.flow_label = be32_to_cpu( + grh->version_tclass_flow) & 0xfffff; + ah_attr.grh.traffic_class = (be32_to_cpu( + grh->version_tclass_flow) >> 20) & 0xff; + memcpy(ah_attr.grh.dgid.raw, + grh->sgid.raw, + sizeof(ah_attr.grh.dgid)); + } } - agent = port_priv->agent[qpn]; - ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); - if (IS_ERR(ah)) { - ret = PTR_ERR(ah); - printk(KERN_ERR SPFX "ib_create_ah_from_wc error:%d\n", ret); - return ret; + agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr); + if (IS_ERR(agent_send_wr->ah)) { + printk(KERN_ERR SPFX "No memory for address handle\n"); + kfree(agent_send_wr); + goto out; } - send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, - IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_KERNEL); - if (IS_ERR(send_buf)) { - ret = PTR_ERR(send_buf); - printk(KERN_ERR SPFX "ib_create_send_mad error:%d\n", ret); - goto err1; + send_wr.wr.ud.ah = agent_send_wr->ah; + if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { + send_wr.wr.ud.pkey_index = wc->pkey_index; + send_wr.wr.ud.remote_qkey = IB_QP1_QKEY; + } else { /* for SMPs */ + send_wr.wr.ud.pkey_index = 0; + send_wr.wr.ud.remote_qkey = 0; } + send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr; + send_wr.wr_id = (unsigned long)agent_send_wr; - memcpy(send_buf->mad, mad, sizeof *mad); - send_buf->ah = ah; - if ((ret = ib_post_send_mad(send_buf, NULL))) { - printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret); - goto err2; + pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr); + + /* Send */ + spin_lock_irqsave(&port_priv->send_list_lock, flags); + if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) { + spin_unlock_irqrestore(&port_priv->send_list_lock, flags); + dma_unmap_single(mad_agent->device->dma_device, + pci_unmap_addr(agent_send_wr, mapping), + sizeof(mad_priv->mad), + DMA_TO_DEVICE); + ib_destroy_ah(agent_send_wr->ah); + kfree(agent_send_wr); + } else { + list_add_tail(&agent_send_wr->send_list, + &port_priv->send_posted_list); + spin_unlock_irqrestore(&port_priv->send_list_lock, flags); + ret = 0; } - return 0; -err2: - ib_free_send_mad(send_buf); -err1: - ib_destroy_ah(ah); + +out: return ret; } +int agent_send(struct ib_mad_private *mad, + struct ib_grh *grh, + struct ib_wc *wc, + struct ib_device *device, + int port_num) +{ + struct ib_agent_port_private *port_priv; + struct ib_mad_agent *mad_agent; + + port_priv = ib_get_agent_port(device, port_num, NULL); + if (!port_priv) { + printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n", + device->name, port_num); + return 1; + } + + /* Get mad agent based on mgmt_class in MAD */ + switch (mad->mad.mad.mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + mad_agent = port_priv->smp_agent; + break; + case IB_MGMT_CLASS_PERF_MGMT: + mad_agent = port_priv->perf_mgmt_agent; + break; + default: + return 1; + } + + return agent_mad_send(mad_agent, port_priv, mad, grh, wc); +} + static void agent_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - ib_destroy_ah(mad_send_wc->send_buf->ah); - ib_free_send_mad(mad_send_wc->send_buf); + struct ib_agent_port_private *port_priv; + struct ib_agent_send_wr *agent_send_wr; + unsigned long flags; + + /* Find matching MAD agent */ + port_priv = ib_get_agent_port(NULL, 0, mad_agent); + if (!port_priv) { + printk(KERN_ERR SPFX "agent_send_handler: no matching MAD " + "agent %p\n", mad_agent); + return; + } + + agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id; + spin_lock_irqsave(&port_priv->send_list_lock, flags); + /* Remove completed send from posted send MAD list */ + list_del(&agent_send_wr->send_list); + spin_unlock_irqrestore(&port_priv->send_list_lock, flags); + + dma_unmap_single(mad_agent->device->dma_device, + pci_unmap_addr(agent_send_wr, mapping), + sizeof(agent_send_wr->mad->mad), + DMA_TO_DEVICE); + + ib_destroy_ah(agent_send_wr->ah); + + /* Release allocated memory */ + kmem_cache_free(ib_mad_cache, agent_send_wr->mad); + kfree(agent_send_wr); } int ib_agent_port_open(struct ib_device *device, int port_num) { + int ret; struct ib_agent_port_private *port_priv; unsigned long flags; - int ret; + + /* First, check if port already open for SMI */ + port_priv = ib_get_agent_port(device, port_num, NULL); + if (port_priv) { + printk(KERN_DEBUG SPFX "%s port %d already open\n", + device->name, port_num); + return 0; + } /* Create new device info */ port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); @@ -161,25 +294,32 @@ int ib_agent_port_open(struct ib_device *device, int port_num) ret = -ENOMEM; goto error1; } + memset(port_priv, 0, sizeof *port_priv); + port_priv->port_num = port_num; + spin_lock_init(&port_priv->send_list_lock); + INIT_LIST_HEAD(&port_priv->send_posted_list); - /* Obtain send only MAD agent for SMI QP */ - port_priv->agent[0] = ib_register_mad_agent(device, port_num, - IB_QPT_SMI, NULL, 0, + /* Obtain send only MAD agent for SM class (SMI QP) */ + port_priv->smp_agent = ib_register_mad_agent(device, port_num, + IB_QPT_SMI, + NULL, 0, &agent_send_handler, - NULL, NULL); - if (IS_ERR(port_priv->agent[0])) { - ret = PTR_ERR(port_priv->agent[0]); + NULL, NULL); + + if (IS_ERR(port_priv->smp_agent)) { + ret = PTR_ERR(port_priv->smp_agent); goto error2; } - /* Obtain send only MAD agent for GSI QP */ - port_priv->agent[1] = ib_register_mad_agent(device, port_num, - IB_QPT_GSI, NULL, 0, - &agent_send_handler, - NULL, NULL); - if (IS_ERR(port_priv->agent[1])) { - ret = PTR_ERR(port_priv->agent[1]); + /* Obtain send only MAD agent for PerfMgmt class (GSI QP) */ + port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num, + IB_QPT_GSI, + NULL, 0, + &agent_send_handler, + NULL, NULL); + if (IS_ERR(port_priv->perf_mgmt_agent)) { + ret = PTR_ERR(port_priv->perf_mgmt_agent); goto error3; } @@ -190,7 +330,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) return 0; error3: - ib_unregister_mad_agent(port_priv->agent[0]); + ib_unregister_mad_agent(port_priv->smp_agent); error2: kfree(port_priv); error1: @@ -203,7 +343,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num) unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); - port_priv = __ib_get_agent_port(device, port_num); + port_priv = __ib_get_agent_port(device, port_num, NULL); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); printk(KERN_ERR SPFX "Port %d not found\n", port_num); @@ -212,8 +352,9 @@ int ib_agent_port_close(struct ib_device *device, int port_num) list_del(&port_priv->port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - ib_unregister_mad_agent(port_priv->agent[1]); - ib_unregister_mad_agent(port_priv->agent[0]); + ib_unregister_mad_agent(port_priv->perf_mgmt_agent); + ib_unregister_mad_agent(port_priv->smp_agent); kfree(port_priv); + return 0; } diff --git a/trunk/drivers/infiniband/core/agent.h b/trunk/drivers/infiniband/core/agent.h index c5f3cfec942a..d9426842254a 100644 --- a/trunk/drivers/infiniband/core/agent.h +++ b/trunk/drivers/infiniband/core/agent.h @@ -39,14 +39,17 @@ #ifndef __AGENT_H_ #define __AGENT_H_ -#include +extern spinlock_t ib_agent_port_list_lock; -extern int ib_agent_port_open(struct ib_device *device, int port_num); +extern int ib_agent_port_open(struct ib_device *device, + int port_num); extern int ib_agent_port_close(struct ib_device *device, int port_num); -extern int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, - struct ib_wc *wc, struct ib_device *device, - int port_num, int qpn); +extern int agent_send(struct ib_mad_private *mad, + struct ib_grh *grh, + struct ib_wc *wc, + struct ib_device *device, + int port_num); #endif /* __AGENT_H_ */ diff --git a/trunk/drivers/infiniband/core/agent_priv.h b/trunk/drivers/infiniband/core/agent_priv.h new file mode 100644 index 000000000000..2ec6d7f1b7d0 --- /dev/null +++ b/trunk/drivers/infiniband/core/agent_priv.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $ + */ + +#ifndef __IB_AGENT_PRIV_H__ +#define __IB_AGENT_PRIV_H__ + +#include + +#define SPFX "ib_agent: " + +struct ib_agent_send_wr { + struct list_head send_list; + struct ib_ah *ah; + struct ib_mad_private *mad; + DECLARE_PCI_UNMAP_ADDR(mapping) +}; + +struct ib_agent_port_private { + struct list_head port_list; + struct list_head send_posted_list; + spinlock_t send_list_lock; + int port_num; + struct ib_mad_agent *smp_agent; /* SM class */ + struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */ +}; + +#endif /* __IB_AGENT_PRIV_H__ */ diff --git a/trunk/drivers/infiniband/core/cm.c b/trunk/drivers/infiniband/core/cm.c index 580c3a2bb102..54db6d4831f1 100644 --- a/trunk/drivers/infiniband/core/cm.c +++ b/trunk/drivers/infiniband/core/cm.c @@ -135,7 +135,6 @@ struct cm_id_private { __be64 tid; __be32 local_qpn; __be32 remote_qpn; - enum ib_qp_type qp_type; __be32 sq_psn; __be32 rq_psn; int timeout_ms; @@ -176,7 +175,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, - 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, + ah, 0, sizeof(struct ib_mad_hdr), + sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), GFP_ATOMIC); if (IS_ERR(m)) { ib_destroy_ah(ah); @@ -184,8 +184,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, } /* Timeout set by caller if response is expected. */ - m->ah = ah; - m->retries = cm_id_priv->max_cm_retries; + m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries; atomic_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; @@ -206,20 +205,20 @@ static int cm_alloc_response_msg(struct cm_port *port, return PTR_ERR(ah); m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, - 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, + ah, 0, sizeof(struct ib_mad_hdr), + sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), GFP_ATOMIC); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); } - m->ah = ah; *msg = m; return 0; } static void cm_free_msg(struct ib_mad_send_buf *msg) { - ib_destroy_ah(msg->ah); + ib_destroy_ah(msg->send_wr.wr.ud.ah); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); @@ -367,15 +366,9 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); if ((cur_cm_id_priv->id.service_mask & service_id) == - (service_mask & cur_cm_id_priv->id.service_id) && - (cm_id_priv->id.device == cur_cm_id_priv->id.device)) - return cur_cm_id_priv; - - if (cm_id_priv->id.device < cur_cm_id_priv->id.device) - link = &(*link)->rb_left; - else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) - link = &(*link)->rb_right; - else if (service_id < cur_cm_id_priv->id.service_id) + (service_mask & cur_cm_id_priv->id.service_id)) + return cm_id_priv; + if (service_id < cur_cm_id_priv->id.service_id) link = &(*link)->rb_left; else link = &(*link)->rb_right; @@ -385,8 +378,7 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) return NULL; } -static struct cm_id_private * cm_find_listen(struct ib_device *device, - __be64 service_id) +static struct cm_id_private * cm_find_listen(__be64 service_id) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; @@ -394,15 +386,9 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device, while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); if ((cm_id_priv->id.service_mask & service_id) == - cm_id_priv->id.service_id && - (cm_id_priv->id.device == device)) + (cm_id_priv->id.service_mask & cm_id_priv->id.service_id)) return cm_id_priv; - - if (device < cm_id_priv->id.device) - node = node->rb_left; - else if (device > cm_id_priv->id.device) - node = node->rb_right; - else if (service_id < cm_id_priv->id.service_id) + if (service_id < cm_id_priv->id.service_id) node = node->rb_left; else node = node->rb_right; @@ -537,8 +523,7 @@ static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); } -struct ib_cm_id *ib_create_cm_id(struct ib_device *device, - ib_cm_handler cm_handler, +struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, void *context) { struct cm_id_private *cm_id_priv; @@ -550,7 +535,6 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, memset(cm_id_priv, 0, sizeof *cm_id_priv); cm_id_priv->id.state = IB_CM_IDLE; - cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.remote_cm_qpn = 1; @@ -678,7 +662,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id) break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); spin_unlock_irqrestore(&cm_id_priv->lock, flags); break; case IB_CM_SIDR_REQ_RCVD: @@ -689,7 +674,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id) case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); /* Fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -706,7 +692,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id) ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); break; @@ -880,6 +867,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { struct cm_id_private *cm_id_priv; + struct ib_send_wr *bad_send_wr; struct cm_req_msg *req_msg; unsigned long flags; int ret; @@ -923,7 +911,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->retry_count = param->retry_count; cm_id_priv->path_mtu = param->primary_path->mtu; - cm_id_priv->qp_type = param->qp_type; ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); if (ret) @@ -932,7 +919,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; + cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); @@ -941,7 +928,8 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_req_get_primary_local_ack_timeout(req_msg); spin_lock_irqsave(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &cm_id_priv->msg->send_wr, &bad_send_wr); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); goto error2; @@ -964,6 +952,7 @@ static int cm_issue_rej(struct cm_port *port, void *ari, u8 ari_length) { struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; struct cm_rej_msg *rej_msg, *rcv_msg; int ret; @@ -986,7 +975,7 @@ static int cm_issue_rej(struct cm_port *port, memcpy(rej_msg->ari, ari, ari_length); } - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr); if (ret) cm_free_msg(msg); @@ -1058,6 +1047,7 @@ static void cm_format_req_event(struct cm_work *work, req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.req_rcvd; param->listen_id = listen_id; + param->device = cm_id_priv->av.port->mad_agent->device; param->port = cm_id_priv->av.port->port_num; param->primary_path = &work->path[0]; if (req_msg->alt_local_lid) @@ -1166,6 +1156,7 @@ static void cm_dup_req_handler(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1194,7 +1185,8 @@ static void cm_dup_req_handler(struct cm_work *work, } spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, + &bad_send_wr); if (ret) goto free; return; @@ -1234,8 +1226,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, } /* Find matching listen request. */ - listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, - req_msg->service_id); + listen_cm_id_priv = cm_find_listen(req_msg->service_id); if (!listen_cm_id_priv) { spin_unlock_irqrestore(&cm.lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, @@ -1263,7 +1254,7 @@ static int cm_req_handler(struct cm_work *work) req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); + cm_id = ib_create_cm_id(NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); @@ -1314,7 +1305,6 @@ static int cm_req_handler(struct cm_work *work) cm_req_get_primary_local_ack_timeout(req_msg); cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); - cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); cm_process_work(cm_id_priv, work); @@ -1359,6 +1349,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; struct cm_rep_msg *rep_msg; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1380,10 +1371,11 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); - msg->timeout_ms = cm_id_priv->timeout_ms; + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1421,6 +1413,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; unsigned long flags; void *data; int ret; @@ -1447,7 +1440,8 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1492,6 +1486,7 @@ static void cm_dup_rep_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1519,7 +1514,8 @@ static void cm_dup_rep_handler(struct cm_work *work) goto unlock; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, + &bad_send_wr); if (ret) goto free; goto deref; @@ -1587,7 +1583,8 @@ static int cm_rep_handler(struct cm_work *work) /* todo: handle peer_to_peer */ - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1621,7 +1618,8 @@ static int cm_establish_handler(struct cm_work *work) goto out; } - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1660,7 +1658,8 @@ static int cm_rtu_handler(struct cm_work *work) } cm_id_priv->id.state = IB_CM_ESTABLISHED; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1697,6 +1696,7 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1718,10 +1718,11 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - msg->timeout_ms = cm_id_priv->timeout_ms; + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); if (ret) { cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -1755,6 +1756,7 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; unsigned long flags; void *data; int ret; @@ -1784,7 +1786,8 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id, cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, + &bad_send_wr); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1801,6 +1804,7 @@ static int cm_dreq_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_dreq_msg *dreq_msg; struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1819,7 +1823,8 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); break; case IB_CM_ESTABLISHED: case IB_CM_MRA_REP_RCVD: @@ -1833,7 +1838,8 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv->private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (ib_post_send_mad(msg, NULL)) + if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr)) cm_free_msg(msg); goto deref; default: @@ -1880,7 +1886,8 @@ static int cm_drep_handler(struct cm_work *work) } cm_enter_timewait(cm_id_priv); - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1905,6 +1912,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1948,7 +1956,8 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, if (ret) goto out; - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); if (ret) cm_free_msg(msg); @@ -2024,7 +2033,8 @@ static int cm_rej_handler(struct cm_work *work) case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); /* fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -2034,7 +2044,8 @@ static int cm_rej_handler(struct cm_work *work) cm_reset_to_idle(cm_id_priv); break; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); /* fall through */ case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: @@ -2069,6 +2080,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; void *data; unsigned long flags; int ret; @@ -2092,7 +2104,8 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REQ, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); if (ret) goto error2; cm_id->state = IB_CM_MRA_REQ_SENT; @@ -2105,7 +2118,8 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REP, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); if (ret) goto error2; cm_id->state = IB_CM_MRA_REP_SENT; @@ -2118,7 +2132,8 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_OTHER, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); if (ret) goto error2; cm_id->lap_state = IB_CM_MRA_LAP_SENT; @@ -2180,14 +2195,14 @@ static int cm_mra_handler(struct cm_work *work) case IB_CM_REQ_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + (unsigned long) cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + (unsigned long) cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; @@ -2195,7 +2210,7 @@ static int cm_mra_handler(struct cm_work *work) if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + (unsigned long) cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; @@ -2258,6 +2273,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2278,10 +2294,11 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, alternate_path, private_data, private_data_len); - msg->timeout_ms = cm_id_priv->timeout_ms; + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2325,6 +2342,7 @@ static int cm_lap_handler(struct cm_work *work) struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2358,7 +2376,8 @@ static int cm_lap_handler(struct cm_work *work) cm_id_priv->private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (ib_post_send_mad(msg, NULL)) + if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr)) cm_free_msg(msg); goto deref; default: @@ -2414,6 +2433,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2436,7 +2456,8 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, info, info_length, private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2475,7 +2496,8 @@ static int cm_apr_handler(struct cm_work *work) goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); cm_id_priv->msg = NULL; ret = atomic_inc_and_test(&cm_id_priv->work_count); @@ -2550,6 +2572,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2572,12 +2595,13 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, param); - msg->timeout_ms = cm_id_priv->timeout_ms; + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_IDLE) - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); else ret = -EINVAL; @@ -2605,6 +2629,7 @@ static void cm_format_sidr_req_event(struct cm_work *work, param = &work->cm_event.param.sidr_req_rcvd; param->pkey = __be16_to_cpu(sidr_req_msg->pkey); param->listen_id = listen_id; + param->device = work->port->mad_agent->device; param->port = work->port->port_num; work->cm_event.private_data = &sidr_req_msg->private_data; } @@ -2617,7 +2642,7 @@ static int cm_sidr_req_handler(struct cm_work *work) struct ib_wc *wc; unsigned long flags; - cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); + cm_id = ib_create_cm_id(NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -2641,8 +2666,7 @@ static int cm_sidr_req_handler(struct cm_work *work) spin_unlock_irqrestore(&cm.lock, flags); goto out; /* Duplicate message. */ } - cur_cm_id_priv = cm_find_listen(cm_id->device, - sidr_req_msg->service_id); + cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id); if (!cur_cm_id_priv) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irqrestore(&cm.lock, flags); @@ -2691,6 +2715,7 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2712,7 +2737,8 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); - ret = ib_post_send_mad(msg, NULL); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2765,7 +2791,8 @@ static int cm_sidr_rep_handler(struct cm_work *work) goto out; } cm_id_priv->id.state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_format_sidr_rep_event(work); @@ -2833,7 +2860,9 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_mad_send_buf *msg = mad_send_wc->send_buf; + struct ib_mad_send_buf *msg; + + msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id; switch (mad_send_wc->status) { case IB_WC_SUCCESS: @@ -3035,10 +3064,10 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE; + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; if (cm_id_priv->responder_resources) - qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; @@ -3068,18 +3097,14 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | - IB_QP_DEST_QPN | IB_QP_RQ_PSN; + IB_QP_DEST_QPN | IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; qp_attr->ah_attr = cm_id_priv->av.ah_attr; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { - *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | - IB_QP_MIN_RNR_TIMER; - qp_attr->max_dest_rd_atomic = - cm_id_priv->responder_resources; - qp_attr->min_rnr_timer = 0; - } + qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources; + qp_attr->min_rnr_timer = 0; if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; @@ -3108,17 +3133,14 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: - *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; + *qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC; + qp_attr->timeout = cm_id_priv->local_ack_timeout; + qp_attr->retry_cnt = cm_id_priv->retry_count; + qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { - *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | - IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = cm_id_priv->local_ack_timeout; - qp_attr->retry_cnt = cm_id_priv->retry_count; - qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; - qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; - } + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; qp_attr->path_mig_state = IB_MIG_REARM; @@ -3301,7 +3323,6 @@ static void __exit ib_cm_cleanup(void) flush_workqueue(cm.wq); destroy_workqueue(cm.wq); ib_unregister_client(&cm_client); - idr_destroy(&cm.local_id_table); } module_init(ib_cm_init); diff --git a/trunk/drivers/infiniband/core/cm_msgs.h b/trunk/drivers/infiniband/core/cm_msgs.h index 4d3aee90c249..813ab70bf6d5 100644 --- a/trunk/drivers/infiniband/core/cm_msgs.h +++ b/trunk/drivers/infiniband/core/cm_msgs.h @@ -186,7 +186,6 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, req_msg->offset40 = cpu_to_be32((be32_to_cpu( req_msg->offset40) & 0xFFFFFFF9) | 0x2); - break; default: req_msg->offset40 = cpu_to_be32(be32_to_cpu( req_msg->offset40) & diff --git a/trunk/drivers/infiniband/core/device.c b/trunk/drivers/infiniband/core/device.c index 5a6e44976405..d3cf84e01587 100644 --- a/trunk/drivers/infiniband/core/device.c +++ b/trunk/drivers/infiniband/core/device.c @@ -514,12 +514,6 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { - if (device->node_type == IB_NODE_SWITCH) { - if (port_num) - return -EINVAL; - } else if (port_num < 1 || port_num > device->phys_port_cnt) - return -EINVAL; - return device->query_port(device, port_num, port_attr); } EXPORT_SYMBOL(ib_query_port); @@ -589,12 +583,6 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { - if (device->node_type == IB_NODE_SWITCH) { - if (port_num) - return -EINVAL; - } else if (port_num < 1 || port_num > device->phys_port_cnt) - return -EINVAL; - return device->modify_port(device, port_num, port_modify_mask, port_modify); } diff --git a/trunk/drivers/infiniband/core/mad.c b/trunk/drivers/infiniband/core/mad.c index 88f9f8c9eacc..a14ca87fda18 100644 --- a/trunk/drivers/infiniband/core/mad.c +++ b/trunk/drivers/infiniband/core/mad.c @@ -579,7 +579,7 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list) } static void snoop_send(struct ib_mad_qp_info *qp_info, - struct ib_mad_send_buf *send_buf, + struct ib_send_wr *send_wr, struct ib_mad_send_wc *mad_send_wc, int mad_snoop_flags) { @@ -597,7 +597,7 @@ static void snoop_send(struct ib_mad_qp_info *qp_info, atomic_inc(&mad_snoop_priv->refcount); spin_unlock_irqrestore(&qp_info->snoop_lock, flags); mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, - send_buf, mad_send_wc); + send_wr, mad_send_wc); if (atomic_dec_and_test(&mad_snoop_priv->refcount)) wake_up(&mad_snoop_priv->wait); spin_lock_irqsave(&qp_info->snoop_lock, flags); @@ -654,10 +654,10 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, * Return < 0 if error */ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_wr_private *mad_send_wr) + struct ib_smp *smp, + struct ib_send_wr *send_wr) { int ret; - struct ib_smp *smp = mad_send_wr->send_buf.mad; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; @@ -666,7 +666,6 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_device *device = mad_agent_priv->agent.device; u8 port_num = mad_agent_priv->agent.port_num; struct ib_wc mad_wc; - struct ib_send_wr *send_wr = &mad_send_wr->send_wr; if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) { ret = -EINVAL; @@ -746,7 +745,13 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } - local->mad_send_wr = mad_send_wr; + local->send_wr = *send_wr; + local->send_wr.sg_list = local->sg_list; + memcpy(local->sg_list, send_wr->sg_list, + sizeof *send_wr->sg_list * send_wr->num_sge); + local->send_wr.next = NULL; + local->tid = send_wr->wr.ud.mad_hdr->tid; + local->wr_id = send_wr->wr_id; /* Reference MAD agent until send side of local completion handled */ atomic_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ @@ -776,17 +781,17 @@ static int get_buf_length(int hdr_len, int data_len) struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, - int rmpp_active, + struct ib_ah *ah, int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask) { struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_buf *send_buf; int buf_size; void *buf; - mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, - agent); + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, agent); buf_size = get_buf_length(hdr_len, data_len); if ((!mad_agent->rmpp_version && @@ -794,40 +799,45 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, (!rmpp_active && buf_size > sizeof(struct ib_mad))) return ERR_PTR(-EINVAL); - buf = kmalloc(sizeof *mad_send_wr + buf_size, gfp_mask); + buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); - memset(buf, 0, sizeof *mad_send_wr + buf_size); - - mad_send_wr = buf + buf_size; - mad_send_wr->send_buf.mad = buf; - - mad_send_wr->mad_agent_priv = mad_agent_priv; - mad_send_wr->sg_list[0].length = buf_size; - mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; - - mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; - mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - mad_send_wr->send_wr.num_sge = 1; - mad_send_wr->send_wr.opcode = IB_WR_SEND; - mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; - mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; - mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; + memset(buf, 0, sizeof *send_buf + buf_size); + + send_buf = buf + buf_size; + send_buf->mad = buf; + + send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device, + buf, buf_size, DMA_TO_DEVICE); + pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr); + send_buf->sge.length = buf_size; + send_buf->sge.lkey = mad_agent->mr->lkey; + + send_buf->send_wr.wr_id = (unsigned long) send_buf; + send_buf->send_wr.sg_list = &send_buf->sge; + send_buf->send_wr.num_sge = 1; + send_buf->send_wr.opcode = IB_WR_SEND; + send_buf->send_wr.send_flags = IB_SEND_SIGNALED; + send_buf->send_wr.wr.ud.ah = ah; + send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr; + send_buf->send_wr.wr.ud.remote_qpn = remote_qpn; + send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; + send_buf->send_wr.wr.ud.pkey_index = pkey_index; if (rmpp_active) { - struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad; + struct ib_rmpp_mad *rmpp_mad; + rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad; rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - - IB_MGMT_RMPP_HDR + data_len); + offsetof(struct ib_rmpp_mad, data) + data_len); rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); } - mad_send_wr->send_buf.mad_agent = mad_agent; + send_buf->mad_agent = mad_agent; atomic_inc(&mad_agent_priv->refcount); - return &mad_send_wr->send_buf; + return send_buf; } EXPORT_SYMBOL(ib_create_send_mad); @@ -837,6 +847,10 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf) mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); + + dma_unmap_single(send_buf->mad_agent->device->dma_device, + pci_unmap_addr(send_buf, mapping), + send_buf->sge.length, DMA_TO_DEVICE); kfree(send_buf->mad); if (atomic_dec_and_test(&mad_agent_priv->refcount)) @@ -847,10 +861,8 @@ EXPORT_SYMBOL(ib_free_send_mad); int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; - struct list_head *list; struct ib_send_wr *bad_send_wr; - struct ib_mad_agent *mad_agent; - struct ib_sge *sge; + struct list_head *list; unsigned long flags; int ret; @@ -859,17 +871,10 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; - mad_agent = mad_send_wr->send_buf.mad_agent; - sge = mad_send_wr->sg_list; - sge->addr = dma_map_single(mad_agent->device->dma_device, - mad_send_wr->send_buf.mad, sge->length, - DMA_TO_DEVICE); - pci_unmap_addr_set(mad_send_wr, mapping, sge->addr); - spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { - ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, - &bad_send_wr); + ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp, + &mad_send_wr->send_wr, &bad_send_wr); list = &qp_info->send_queue.list; } else { ret = 0; @@ -881,11 +886,6 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); - if (ret) - dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, mapping), - sge->length, DMA_TO_DEVICE); - return ret; } @@ -893,28 +893,45 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client */ -int ib_post_send_mad(struct ib_mad_send_buf *send_buf, - struct ib_mad_send_buf **bad_send_buf) +int ib_post_send_mad(struct ib_mad_agent *mad_agent, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) { - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_buf *next_send_buf; - struct ib_mad_send_wr_private *mad_send_wr; - unsigned long flags; int ret = -EINVAL; + struct ib_mad_agent_private *mad_agent_priv; + + /* Validate supplied parameters */ + if (!bad_send_wr) + goto error1; + + if (!mad_agent || !send_wr) + goto error2; + + if (!mad_agent->send_handler) + goto error2; + + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, + agent); /* Walk list of send WRs and post each on send list */ - for (; send_buf; send_buf = next_send_buf) { + while (send_wr) { + unsigned long flags; + struct ib_send_wr *next_send_wr; + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_smp *smp; + + /* Validate more parameters */ + if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG) + goto error2; - mad_send_wr = container_of(send_buf, - struct ib_mad_send_wr_private, - send_buf); - mad_agent_priv = mad_send_wr->mad_agent_priv; - - if (!send_buf->mad_agent->send_handler || - (send_buf->timeout_ms && - !send_buf->mad_agent->recv_handler)) { - ret = -EINVAL; - goto error; + if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler) + goto error2; + + if (!send_wr->wr.ud.mad_hdr) { + printk(KERN_ERR PFX "MAD header must be supplied " + "in WR %p\n", send_wr); + goto error2; } /* @@ -922,24 +939,40 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, * current one completes, and the user modifies the work * request associated with the completion */ - next_send_buf = send_buf->next; - mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; + next_send_wr = (struct ib_send_wr *)send_wr->next; - if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - ret = handle_outgoing_dr_smp(mad_agent_priv, - mad_send_wr); + smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + ret = handle_outgoing_dr_smp(mad_agent_priv, smp, + send_wr); if (ret < 0) /* error */ - goto error; + goto error2; else if (ret == 1) /* locally consumed */ - continue; + goto next; } - mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; + /* Allocate MAD send WR tracking structure */ + mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC); + if (!mad_send_wr) { + printk(KERN_ERR PFX "No memory for " + "ib_mad_send_wr_private\n"); + ret = -ENOMEM; + goto error2; + } + memset(mad_send_wr, 0, sizeof *mad_send_wr); + + mad_send_wr->send_wr = *send_wr; + mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; + memcpy(mad_send_wr->sg_list, send_wr->sg_list, + sizeof *send_wr->sg_list * send_wr->num_sge); + mad_send_wr->wr_id = send_wr->wr_id; + mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid; + mad_send_wr->mad_agent_priv = mad_agent_priv; /* Timeout will be updated after send completes */ - mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); - mad_send_wr->retries = send_buf->retries; - /* Reference for work request to QP + response */ + mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr. + ud.timeout_ms); + mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; + /* One reference for each work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -962,13 +995,16 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, list_del(&mad_send_wr->agent_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); atomic_dec(&mad_agent_priv->refcount); - goto error; + goto error2; } +next: + send_wr = next_send_wr; } return 0; -error: - if (bad_send_buf) - *bad_send_buf = send_buf; + +error2: + *bad_send_wr = send_wr; +error1: return ret; } EXPORT_SYMBOL(ib_post_send_mad); @@ -1411,7 +1447,8 @@ find_mad_agent(struct ib_mad_port_private *port_priv, * of MAD. */ hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32; - list_for_each_entry(entry, &port_priv->agent_list, agent_list) { + list_for_each_entry(entry, &port_priv->agent_list, + agent_list) { if (entry->agent.hi_tid == hi_tid) { mad_agent = entry; break; @@ -1534,7 +1571,8 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) */ list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && + if (is_data_mad(mad_agent_priv, + mad_send_wr->send_wr.wr.ud.mad_hdr) && mad_send_wr->tid == tid && mad_send_wr->timeout) { /* Verify request has not been canceled */ return (mad_send_wr->status == IB_WC_SUCCESS) ? @@ -1590,14 +1628,14 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ - mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; + mad_recv_wc->wc->wr_id = mad_send_wr->wr_id; mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, mad_recv_wc); atomic_dec(&mad_agent_priv->refcount); mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; - mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_send_wc.wr_id = mad_send_wr->wr_id; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, @@ -1690,11 +1728,11 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { - agent_send_response(&response->mad.mad, - &recv->grh, wc, - port_priv->device, - port_priv->port_num, - qp_info->qp->qp_num); + /* Send response */ + if (!agent_send(response, &recv->grh, wc, + port_priv->device, + port_priv->port_num)) + response = NULL; goto out; } } @@ -1828,15 +1866,15 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, if (mad_send_wr->status != IB_WC_SUCCESS ) mad_send_wc->status = mad_send_wr->status; - if (ret == IB_RMPP_RESULT_INTERNAL) - ib_rmpp_send_handler(mad_send_wc); - else + if (ret != IB_RMPP_RESULT_INTERNAL) mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); /* Release reference on agent taken when sending */ if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); + + kfree(mad_send_wr); return; done: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); @@ -1850,7 +1888,6 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; struct ib_send_wr *bad_send_wr; - struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; @@ -1861,9 +1898,6 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, qp_info = send_queue->qp_info; retry: - dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, mapping), - mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); @@ -1880,17 +1914,17 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, } spin_unlock_irqrestore(&send_queue->lock, flags); - mad_send_wc.send_buf = &mad_send_wr->send_buf; - mad_send_wc.status = wc->status; - mad_send_wc.vendor_err = wc->vendor_err; + /* Restore client wr_id in WC and complete send */ + wc->wr_id = mad_send_wr->wr_id; if (atomic_read(&qp_info->snoop_count)) - snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, + snoop_send(qp_info, &mad_send_wr->send_wr, + (struct ib_mad_send_wc *)wc, IB_MAD_SNOOP_SEND_COMPLETIONS); - ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc); if (queued_send_wr) { ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, - &bad_send_wr); + &bad_send_wr); if (ret) { printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); mad_send_wr = queued_send_wr; @@ -2032,37 +2066,38 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &cancel_list, agent_list) { - mad_send_wc.send_buf = &mad_send_wr->send_buf; - list_del(&mad_send_wr->agent_list); + mad_send_wc.wr_id = mad_send_wr->wr_id; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); + + list_del(&mad_send_wr->agent_list); + kfree(mad_send_wr); atomic_dec(&mad_agent_priv->refcount); } } static struct ib_mad_send_wr_private* -find_send_wr(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_buf *send_buf) +find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id) { struct ib_mad_send_wr_private *mad_send_wr; list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, agent_list) { - if (&mad_send_wr->send_buf == send_buf) + if (mad_send_wr->wr_id == wr_id) return mad_send_wr; } list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && - &mad_send_wr->send_buf == send_buf) + if (is_data_mad(mad_agent_priv, + mad_send_wr->send_wr.wr.ud.mad_hdr) && + mad_send_wr->wr_id == wr_id) return mad_send_wr; } return NULL; } -int ib_modify_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, u32 timeout_ms) +int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; @@ -2072,7 +2107,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); - mad_send_wr = find_send_wr(mad_agent_priv, send_buf); + mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id); if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; @@ -2084,7 +2119,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } - mad_send_wr->send_buf.timeout_ms = timeout_ms; + mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms; if (active) mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); else @@ -2095,10 +2130,9 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, } EXPORT_SYMBOL(ib_modify_mad); -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf) +void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id) { - ib_modify_mad(mad_agent, send_buf, 0); + ib_modify_mad(mad_agent, wr_id, 0); } EXPORT_SYMBOL(ib_cancel_mad); @@ -2132,9 +2166,10 @@ static void local_completions(void *data) * Defined behavior is to complete response * before request */ - build_smp_wc((unsigned long) local->mad_send_wr, + build_smp_wc(local->wr_id, be16_to_cpu(IB_LID_PERMISSIVE), - 0, recv_mad_agent->agent.port_num, &wc); + 0 /* pkey index */, + recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; local->mad_priv->header.recv_wc.mad_len = @@ -2161,11 +2196,11 @@ static void local_completions(void *data) /* Complete send */ mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; - mad_send_wc.send_buf = &local->mad_send_wr->send_buf; + mad_send_wc.wr_id = local->wr_id; if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) - snoop_send(mad_agent_priv->qp_info, - &local->mad_send_wr->send_buf, - &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); + snoop_send(mad_agent_priv->qp_info, &local->send_wr, + &mad_send_wc, + IB_MAD_SNOOP_SEND_COMPLETIONS); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); @@ -2186,7 +2221,8 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) if (!mad_send_wr->retries--) return -ETIMEDOUT; - mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr. + wr.ud.timeout_ms); if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { ret = ib_retry_rmpp(mad_send_wr); @@ -2249,10 +2285,11 @@ static void timeout_sends(void *data) mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; else mad_send_wc.status = mad_send_wr->status; - mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_send_wc.wr_id = mad_send_wr->wr_id; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); + kfree(mad_send_wr); atomic_dec(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); } @@ -2646,47 +2683,40 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int start, end, i; + int num_ports, cur_port, i; if (device->node_type == IB_NODE_SWITCH) { - start = 0; - end = 0; + num_ports = 1; + cur_port = 0; } else { - start = 1; - end = device->phys_port_cnt; + num_ports = device->phys_port_cnt; + cur_port = 1; } - - for (i = start; i <= end; i++) { - if (ib_mad_port_open(device, i)) { + for (i = 0; i < num_ports; i++, cur_port++) { + if (ib_mad_port_open(device, cur_port)) { printk(KERN_ERR PFX "Couldn't open %s port %d\n", - device->name, i); - goto error; + device->name, cur_port); + goto error_device_open; } - if (ib_agent_port_open(device, i)) { + if (ib_agent_port_open(device, cur_port)) { printk(KERN_ERR PFX "Couldn't open %s port %d " "for agents\n", - device->name, i); - goto error_agent; + device->name, cur_port); + goto error_device_open; } } return; -error_agent: - if (ib_mad_port_close(device, i)) - printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, i); - -error: - i--; - - while (i >= start) { - if (ib_agent_port_close(device, i)) +error_device_open: + while (i > 0) { + cur_port--; + if (ib_agent_port_close(device, cur_port)) printk(KERN_ERR PFX "Couldn't close %s port %d " "for agents\n", - device->name, i); - if (ib_mad_port_close(device, i)) + device->name, cur_port); + if (ib_mad_port_close(device, cur_port)) printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, i); + device->name, cur_port); i--; } } @@ -2724,6 +2754,7 @@ static int __init ib_mad_init_module(void) int ret; spin_lock_init(&ib_mad_port_list_lock); + spin_lock_init(&ib_agent_port_list_lock); ib_mad_cache = kmem_cache_create("ib_mad", sizeof(struct ib_mad_private), diff --git a/trunk/drivers/infiniband/core/mad_priv.h b/trunk/drivers/infiniband/core/mad_priv.h index 570f78682af3..f1ba794e0daa 100644 --- a/trunk/drivers/infiniband/core/mad_priv.h +++ b/trunk/drivers/infiniband/core/mad_priv.h @@ -118,10 +118,9 @@ struct ib_mad_send_wr_private { struct ib_mad_list_head mad_list; struct list_head agent_list; struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_buf send_buf; - DECLARE_PCI_UNMAP_ADDR(mapping) struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; + u64 wr_id; /* client WR ID */ __be64 tid; unsigned long timeout; int retries; @@ -142,7 +141,10 @@ struct ib_mad_local_private { struct list_head completion_list; struct ib_mad_private *mad_priv; struct ib_mad_agent_private *recv_mad_agent; - struct ib_mad_send_wr_private *mad_send_wr; + struct ib_send_wr send_wr; + struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; + u64 wr_id; /* client WR ID */ + __be64 tid; }; struct ib_mad_mgmt_method_table { diff --git a/trunk/drivers/infiniband/core/mad_rmpp.c b/trunk/drivers/infiniband/core/mad_rmpp.c index 3249e1d8c07b..e23836d0e21b 100644 --- a/trunk/drivers/infiniband/core/mad_rmpp.c +++ b/trunk/drivers/infiniband/core/mad_rmpp.c @@ -103,12 +103,12 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) static int data_offset(u8 mgmt_class) { if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) - return IB_MGMT_SA_HDR; + return offsetof(struct ib_sa_mad, data); else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) - return IB_MGMT_VENDOR_HDR; + return offsetof(struct ib_vendor_mad, data); else - return IB_MGMT_RMPP_HDR; + return offsetof(struct ib_rmpp_mad, data); } static void format_ack(struct ib_rmpp_mad *ack, @@ -135,52 +135,55 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, struct ib_mad_recv_wc *recv_wc) { struct ib_mad_send_buf *msg; - int ret; + struct ib_send_wr *bad_send_wr; + int hdr_len, ret; + hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR, - IB_MGMT_RMPP_DATA, GFP_KERNEL); + recv_wc->wc->pkey_index, rmpp_recv->ah, 1, + hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len, + GFP_KERNEL); if (!msg) return; - format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, - rmpp_recv); - msg->ah = rmpp_recv->ah; - ret = ib_post_send_mad(msg, NULL); + format_ack((struct ib_rmpp_mad *) msg->mad, + (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); + ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr, + &bad_send_wr); if (ret) ib_free_send_mad(msg); } -static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, - struct ib_mad_recv_wc *recv_wc) +static int alloc_response_msg(struct ib_mad_agent *agent, + struct ib_mad_recv_wc *recv_wc, + struct ib_mad_send_buf **msg) { - struct ib_mad_send_buf *msg; + struct ib_mad_send_buf *m; struct ib_ah *ah; + int hdr_len; ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, recv_wc->recv_buf.grh, agent->port_num); if (IS_ERR(ah)) - return (void *) ah; - - msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, 1, - IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, - GFP_KERNEL); - if (IS_ERR(msg)) + return PTR_ERR(ah); + + hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); + m = ib_create_send_mad(agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, ah, 1, hdr_len, + sizeof(struct ib_rmpp_mad) - hdr_len, + GFP_KERNEL); + if (IS_ERR(m)) { ib_destroy_ah(ah); - else - msg->ah = ah; - - return msg; + return PTR_ERR(m); + } + *msg = m; + return 0; } -void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) +static void free_msg(struct ib_mad_send_buf *msg) { - struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad; - - if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_ACK) - ib_destroy_ah(mad_send_wc->send_buf->ah); - ib_free_send_mad(mad_send_wc->send_buf); + ib_destroy_ah(msg->send_wr.wr.ud.ah); + ib_free_send_mad(msg); } static void nack_recv(struct ib_mad_agent_private *agent, @@ -188,13 +191,14 @@ static void nack_recv(struct ib_mad_agent_private *agent, { struct ib_mad_send_buf *msg; struct ib_rmpp_mad *rmpp_mad; + struct ib_send_wr *bad_send_wr; int ret; - msg = alloc_response_msg(&agent->agent, recv_wc); - if (IS_ERR(msg)) + ret = alloc_response_msg(&agent->agent, recv_wc, &msg); + if (ret) return; - rmpp_mad = msg->mad; + rmpp_mad = (struct ib_rmpp_mad *) msg->mad; memcpy(rmpp_mad, recv_wc->recv_buf.mad, data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class)); @@ -206,11 +210,9 @@ static void nack_recv(struct ib_mad_agent_private *agent, rmpp_mad->rmpp_hdr.seg_num = 0; rmpp_mad->rmpp_hdr.paylen_newwin = 0; - ret = ib_post_send_mad(msg, NULL); - if (ret) { - ib_destroy_ah(msg->ah); - ib_free_send_mad(msg); - } + ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr); + if (ret) + free_msg(msg); } static void recv_timeout_handler(void *data) @@ -583,7 +585,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) int timeout; u32 paylen; - rmpp_mad = mad_send_wr->send_buf.mad; + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); @@ -610,7 +612,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) } /* 2 seconds for an ACK until we can find the packet lifetime */ - timeout = mad_send_wr->send_buf.timeout_ms; + timeout = mad_send_wr->send_wr.wr.ud.timeout_ms; if (!timeout || timeout > 2000) mad_send_wr->timeout = msecs_to_jiffies(2000); mad_send_wr->seg_num++; @@ -638,7 +640,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, wc.status = IB_WC_REM_ABORT_ERR; wc.vendor_err = rmpp_status; - wc.send_buf = &mad_send_wr->send_buf; + wc.wr_id = mad_send_wr->wr_id; ib_mad_complete_send_wr(mad_send_wr, &wc); return; out: @@ -692,12 +694,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (seg_num > mad_send_wr->last_ack) { mad_send_wr->last_ack = seg_num; - mad_send_wr->retries = mad_send_wr->send_buf.retries; + mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; } mad_send_wr->newwin = newwin; if (mad_send_wr->last_ack == mad_send_wr->total_seg) { /* If no response is expected, the ACK completes the send */ - if (!mad_send_wr->send_buf.timeout_ms) { + if (!mad_send_wr->send_wr.wr.ud.timeout_ms) { struct ib_mad_send_wc wc; ib_mark_mad_done(mad_send_wr); @@ -705,13 +707,13 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, wc.status = IB_WC_SUCCESS; wc.vendor_err = 0; - wc.send_buf = &mad_send_wr->send_buf; + wc.wr_id = mad_send_wr->wr_id; ib_mad_complete_send_wr(mad_send_wr, &wc); return; } if (mad_send_wr->refcount == 1) - ib_reset_mad_timeout(mad_send_wr, - mad_send_wr->send_buf.timeout_ms); + ib_reset_mad_timeout(mad_send_wr, mad_send_wr-> + send_wr.wr.ud.timeout_ms); } else if (mad_send_wr->refcount == 1 && mad_send_wr->seg_num < mad_send_wr->newwin && mad_send_wr->seg_num <= mad_send_wr->total_seg) { @@ -840,7 +842,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) struct ib_rmpp_mad *rmpp_mad; int i, total_len, ret; - rmpp_mad = mad_send_wr->send_buf.mad; + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; @@ -861,7 +863,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) / (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset); - mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR - + mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); /* We need to wait for the final ACK even if there isn't a response */ @@ -876,15 +878,23 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc) { struct ib_rmpp_mad *rmpp_mad; + struct ib_mad_send_buf *msg; int ret; - rmpp_mad = mad_send_wr->send_buf.mad; + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ - if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { + msg = (struct ib_mad_send_buf *) (unsigned long) + mad_send_wc->wr_id; + if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK) + ib_free_send_mad(msg); + else + free_msg(msg); return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ + } if (mad_send_wc->status != IB_WC_SUCCESS || mad_send_wr->status != IB_WC_SUCCESS) @@ -895,7 +905,7 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, if (mad_send_wr->last_ack == mad_send_wr->total_seg) { mad_send_wr->timeout = - msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); + msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms); return IB_RMPP_RESULT_PROCESSED; /* Send done */ } @@ -916,7 +926,7 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) struct ib_rmpp_mad *rmpp_mad; int ret; - rmpp_mad = mad_send_wr->send_buf.mad; + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ diff --git a/trunk/drivers/infiniband/core/mad_rmpp.h b/trunk/drivers/infiniband/core/mad_rmpp.h index f0616fd22494..c4924dfb8e75 100644 --- a/trunk/drivers/infiniband/core/mad_rmpp.h +++ b/trunk/drivers/infiniband/core/mad_rmpp.h @@ -51,8 +51,6 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); -void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc); - void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent); int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr); diff --git a/trunk/drivers/infiniband/core/sa_query.c b/trunk/drivers/infiniband/core/sa_query.c index 89ce9dc210d4..262618210c1c 100644 --- a/trunk/drivers/infiniband/core/sa_query.c +++ b/trunk/drivers/infiniband/core/sa_query.c @@ -73,10 +73,11 @@ struct ib_sa_device { struct ib_sa_query { void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); void (*release)(struct ib_sa_query *); - struct ib_sa_port *port; - struct ib_mad_send_buf *mad_buf; - struct ib_sa_sm_ah *sm_ah; - int id; + struct ib_sa_port *port; + struct ib_sa_mad *mad; + struct ib_sa_sm_ah *sm_ah; + DECLARE_PCI_UNMAP_ADDR(mapping) + int id; }; struct ib_sa_service_query { @@ -425,7 +426,6 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) { unsigned long flags; struct ib_mad_agent *agent; - struct ib_mad_send_buf *mad_buf; spin_lock_irqsave(&idr_lock, flags); if (idr_find(&query_idr, id) != query) { @@ -433,10 +433,9 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) return; } agent = query->port->agent; - mad_buf = query->mad_buf; spin_unlock_irqrestore(&idr_lock, flags); - ib_cancel_mad(agent, mad_buf); + ib_cancel_mad(agent, id); } EXPORT_SYMBOL(ib_sa_cancel_query); @@ -458,46 +457,71 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, int timeout_ms) { + struct ib_sa_port *port = query->port; unsigned long flags; - int ret, id; + int ret; + struct ib_sge gather_list; + struct ib_send_wr *bad_wr, wr = { + .opcode = IB_WR_SEND, + .sg_list = &gather_list, + .num_sge = 1, + .send_flags = IB_SEND_SIGNALED, + .wr = { + .ud = { + .mad_hdr = &query->mad->mad_hdr, + .remote_qpn = 1, + .remote_qkey = IB_QP1_QKEY, + .timeout_ms = timeout_ms, + } + } + }; retry: if (!idr_pre_get(&query_idr, GFP_ATOMIC)) return -ENOMEM; spin_lock_irqsave(&idr_lock, flags); - ret = idr_get_new(&query_idr, query, &id); + ret = idr_get_new(&query_idr, query, &query->id); spin_unlock_irqrestore(&idr_lock, flags); if (ret == -EAGAIN) goto retry; if (ret) return ret; - query->mad_buf->timeout_ms = timeout_ms; - query->mad_buf->context[0] = query; - query->id = id; + wr.wr_id = query->id; - spin_lock_irqsave(&query->port->ah_lock, flags); - kref_get(&query->port->sm_ah->ref); - query->sm_ah = query->port->sm_ah; - spin_unlock_irqrestore(&query->port->ah_lock, flags); + spin_lock_irqsave(&port->ah_lock, flags); + kref_get(&port->sm_ah->ref); + query->sm_ah = port->sm_ah; + wr.wr.ud.ah = port->sm_ah->ah; + spin_unlock_irqrestore(&port->ah_lock, flags); - query->mad_buf->ah = query->sm_ah->ah; + gather_list.addr = dma_map_single(port->agent->device->dma_device, + query->mad, + sizeof (struct ib_sa_mad), + DMA_TO_DEVICE); + gather_list.length = sizeof (struct ib_sa_mad); + gather_list.lkey = port->agent->mr->lkey; + pci_unmap_addr_set(query, mapping, gather_list.addr); - ret = ib_post_send_mad(query->mad_buf, NULL); + ret = ib_post_send_mad(port->agent, &wr, &bad_wr); if (ret) { + dma_unmap_single(port->agent->device->dma_device, + pci_unmap_addr(query, mapping), + sizeof (struct ib_sa_mad), + DMA_TO_DEVICE); + kref_put(&query->sm_ah->ref, free_sm_ah); spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, id); + idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); - - kref_put(&query->sm_ah->ref, free_sm_ah); } /* * It's not safe to dereference query any more, because the * send may already have completed and freed the query in - * another context. + * another context. So use wr.wr_id, which has a copy of the + * query's id. */ - return ret ? ret : id; + return ret ? ret : wr.wr_id; } static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, @@ -519,6 +543,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) { + kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); } @@ -558,58 +583,43 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, { struct ib_sa_path_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - struct ib_mad_agent *agent; - struct ib_sa_mad *mad; + struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; + struct ib_mad_agent *agent = port->agent; int ret; - if (!sa_dev) - return -ENODEV; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - agent = port->agent; - query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - - query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, - 0, IB_MGMT_SA_HDR, - IB_MGMT_SA_DATA, gfp_mask); - if (!query->sa_query.mad_buf) { - ret = -ENOMEM; - goto err1; + query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); + if (!query->sa_query.mad) { + kfree(query); + return -ENOMEM; } query->callback = callback; query->context = context; - mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); + init_mad(query->sa_query.mad, agent); - query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; - query->sa_query.release = ib_sa_path_rec_release; - query->sa_query.port = port; - mad->mad_hdr.method = IB_MGMT_METHOD_GET; - mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); - mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; + query->sa_query.release = ib_sa_path_rec_release; + query->sa_query.port = port; + query->sa_query.mad->mad_hdr.method = IB_MGMT_METHOD_GET; + query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); + query->sa_query.mad->sa_hdr.comp_mask = comp_mask; - ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), + rec, query->sa_query.mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) - goto err2; - - return ret; - -err2: - *sa_query = NULL; - ib_free_send_mad(query->sa_query.mad_buf); + if (ret < 0) { + *sa_query = NULL; + kfree(query->sa_query.mad); + kfree(query); + } -err1: - kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_path_rec_get); @@ -633,6 +643,7 @@ static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) { + kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); } @@ -674,17 +685,10 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, { struct ib_sa_service_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - struct ib_mad_agent *agent; - struct ib_sa_mad *mad; + struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; + struct ib_mad_agent *agent = port->agent; int ret; - if (!sa_dev) - return -ENODEV; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - agent = port->agent; - if (method != IB_MGMT_METHOD_GET && method != IB_MGMT_METHOD_SET && method != IB_SA_METHOD_DELETE) @@ -693,45 +697,37 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - - query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, - 0, IB_MGMT_SA_HDR, - IB_MGMT_SA_DATA, gfp_mask); - if (!query->sa_query.mad_buf) { - ret = -ENOMEM; - goto err1; + query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); + if (!query->sa_query.mad) { + kfree(query); + return -ENOMEM; } query->callback = callback; query->context = context; - mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); + init_mad(query->sa_query.mad, agent); - query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; - query->sa_query.release = ib_sa_service_rec_release; - query->sa_query.port = port; - mad->mad_hdr.method = method; - mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); - mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; + query->sa_query.release = ib_sa_service_rec_release; + query->sa_query.port = port; + query->sa_query.mad->mad_hdr.method = method; + query->sa_query.mad->mad_hdr.attr_id = + cpu_to_be16(IB_SA_ATTR_SERVICE_REC); + query->sa_query.mad->sa_hdr.comp_mask = comp_mask; ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), - rec, mad->data); + rec, query->sa_query.mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) - goto err2; - - return ret; - -err2: - *sa_query = NULL; - ib_free_send_mad(query->sa_query.mad_buf); + if (ret < 0) { + *sa_query = NULL; + kfree(query->sa_query.mad); + kfree(query); + } -err1: - kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_service_rec_query); @@ -755,6 +751,7 @@ static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) { + kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } @@ -771,59 +768,43 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, { struct ib_sa_mcmember_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - struct ib_mad_agent *agent; - struct ib_sa_mad *mad; + struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; + struct ib_mad_agent *agent = port->agent; int ret; - if (!sa_dev) - return -ENODEV; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - agent = port->agent; - query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - - query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, - 0, IB_MGMT_SA_HDR, - IB_MGMT_SA_DATA, gfp_mask); - if (!query->sa_query.mad_buf) { - ret = -ENOMEM; - goto err1; + query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); + if (!query->sa_query.mad) { + kfree(query); + return -ENOMEM; } query->callback = callback; query->context = context; - mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); + init_mad(query->sa_query.mad, agent); - query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; - query->sa_query.release = ib_sa_mcmember_rec_release; - query->sa_query.port = port; - mad->mad_hdr.method = method; - mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); - mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; + query->sa_query.release = ib_sa_mcmember_rec_release; + query->sa_query.port = port; + query->sa_query.mad->mad_hdr.method = method; + query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); + query->sa_query.mad->sa_hdr.comp_mask = comp_mask; ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), - rec, mad->data); + rec, query->sa_query.mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) - goto err2; - - return ret; - -err2: - *sa_query = NULL; - ib_free_send_mad(query->sa_query.mad_buf); + if (ret < 0) { + *sa_query = NULL; + kfree(query->sa_query.mad); + kfree(query); + } -err1: - kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_mcmember_rec_query); @@ -831,9 +812,16 @@ EXPORT_SYMBOL(ib_sa_mcmember_rec_query); static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; + struct ib_sa_query *query; unsigned long flags; + spin_lock_irqsave(&idr_lock, flags); + query = idr_find(&query_idr, mad_send_wc->wr_id); + spin_unlock_irqrestore(&idr_lock, flags); + + if (!query) + return; + if (query->callback) switch (mad_send_wc->status) { case IB_WC_SUCCESS: @@ -850,25 +838,30 @@ static void send_handler(struct ib_mad_agent *agent, break; } - spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, query->id); - spin_unlock_irqrestore(&idr_lock, flags); - - ib_free_send_mad(mad_send_wc->send_buf); + dma_unmap_single(agent->device->dma_device, + pci_unmap_addr(query, mapping), + sizeof (struct ib_sa_mad), + DMA_TO_DEVICE); kref_put(&query->sm_ah->ref, free_sm_ah); + query->release(query); + + spin_lock_irqsave(&idr_lock, flags); + idr_remove(&query_idr, mad_send_wc->wr_id); + spin_unlock_irqrestore(&idr_lock, flags); } static void recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_sa_query *query; - struct ib_mad_send_buf *mad_buf; + unsigned long flags; - mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id; - query = mad_buf->context[0]; + spin_lock_irqsave(&idr_lock, flags); + query = idr_find(&query_idr, mad_recv_wc->wc->wr_id); + spin_unlock_irqrestore(&idr_lock, flags); - if (query->callback) { + if (query && query->callback) { if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? @@ -982,7 +975,6 @@ static int __init ib_sa_init(void) static void __exit ib_sa_cleanup(void) { ib_unregister_client(&sa_client); - idr_destroy(&query_idr); } module_init(ib_sa_init); diff --git a/trunk/drivers/infiniband/core/smi.h b/trunk/drivers/infiniband/core/smi.h index 2b3c40198f81..db25503a0736 100644 --- a/trunk/drivers/infiniband/core/smi.h +++ b/trunk/drivers/infiniband/core/smi.h @@ -39,8 +39,6 @@ #ifndef __SMI_H_ #define __SMI_H_ -#include - int smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, int port_num, diff --git a/trunk/drivers/infiniband/core/sysfs.c b/trunk/drivers/infiniband/core/sysfs.c index 7ce7a6c782fa..211ba3223f65 100644 --- a/trunk/drivers/infiniband/core/sysfs.c +++ b/trunk/drivers/infiniband/core/sysfs.c @@ -65,11 +65,6 @@ struct port_table_attribute { int index; }; -static inline int ibdev_is_alive(const struct ib_device *dev) -{ - return dev->reg_state == IB_DEV_REGISTERED; -} - static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -79,8 +74,6 @@ static ssize_t port_attr_show(struct kobject *kobj, if (!port_attr->show) return -EIO; - if (!ibdev_is_alive(p->ibdev)) - return -ENODEV; return port_attr->show(p, port_attr, buf); } @@ -588,9 +581,6 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf) { struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); - if (!ibdev_is_alive(dev)) - return -ENODEV; - switch (dev->node_type) { case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type); case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); @@ -605,9 +595,6 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf) struct ib_device_attr attr; ssize_t ret; - if (!ibdev_is_alive(dev)) - return -ENODEV; - ret = ib_query_device(dev, &attr); if (ret) return ret; @@ -625,9 +612,6 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf) struct ib_device_attr attr; ssize_t ret; - if (!ibdev_is_alive(dev)) - return -ENODEV; - ret = ib_query_device(dev, &attr); if (ret) return ret; diff --git a/trunk/drivers/infiniband/core/ucm.c b/trunk/drivers/infiniband/core/ucm.c index 28477565ecba..021b8f1d36d3 100644 --- a/trunk/drivers/infiniband/core/ucm.c +++ b/trunk/drivers/infiniband/core/ucm.c @@ -41,81 +41,37 @@ #include #include #include -#include #include -#include -#include +#include "ucm.h" MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); MODULE_LICENSE("Dual BSD/GPL"); -struct ib_ucm_device { - int devnum; - struct cdev dev; - struct class_device class_dev; - struct ib_device *ib_dev; -}; - -struct ib_ucm_file { - struct semaphore mutex; - struct file *filp; - struct ib_ucm_device *device; - - struct list_head ctxs; - struct list_head events; - wait_queue_head_t poll_wait; -}; - -struct ib_ucm_context { - int id; - wait_queue_head_t wait; - atomic_t ref; - int events_reported; - - struct ib_ucm_file *file; - struct ib_cm_id *cm_id; - __u64 uid; - - struct list_head events; /* list of pending events. */ - struct list_head file_list; /* member in file ctx list */ -}; - -struct ib_ucm_event { - struct ib_ucm_context *ctx; - struct list_head file_list; /* member in file event list */ - struct list_head ctx_list; /* member in ctx event list */ +static int ucm_debug_level; - struct ib_cm_id *cm_id; - struct ib_ucm_event_resp resp; - void *data; - void *info; - int data_len; - int info_len; -}; +module_param_named(debug_level, ucm_debug_level, int, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); enum { IB_UCM_MAJOR = 231, - IB_UCM_BASE_MINOR = 224, - IB_UCM_MAX_DEVICES = 32 + IB_UCM_MINOR = 255 }; -#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) +#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR) -static void ib_ucm_add_one(struct ib_device *device); -static void ib_ucm_remove_one(struct ib_device *device); +#define PFX "UCM: " -static struct ib_client ucm_client = { - .name = "ucm", - .add = ib_ucm_add_one, - .remove = ib_ucm_remove_one -}; +#define ucm_dbg(format, arg...) \ + do { \ + if (ucm_debug_level > 0) \ + printk(KERN_DEBUG PFX format, ## arg); \ + } while (0) -static DECLARE_MUTEX(ctx_id_mutex); -static DEFINE_IDR(ctx_id_table); -static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); +static struct semaphore ctx_id_mutex; +static struct idr ctx_id_table; static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { @@ -196,13 +152,17 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) goto error; list_add_tail(&ctx->file_list, &file->ctxs); + ucm_dbg("Allocated CM ID <%d>\n", ctx->id); return ctx; error: kfree(ctx); return NULL; } - +/* + * Event portion of the API, handle CM events + * and allow event polling. + */ static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, struct ib_sa_path_rec *kpath) { @@ -249,7 +209,6 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, ureq->retry_count = kreq->retry_count; ureq->rnr_retry_count = kreq->rnr_retry_count; ureq->srq = kreq->srq; - ureq->port = kreq->port; ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path); ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path); @@ -336,8 +295,6 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, case IB_CM_SIDR_REQ_RECEIVED: uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; - uvt->resp.u.sidr_req_resp.port = - evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; case IB_CM_SIDR_REP_RECEIVED: @@ -430,7 +387,9 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - + /* + * wait + */ down(&file->mutex); while (list_empty(&file->events)) { @@ -512,6 +471,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, return result; } + static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) @@ -534,27 +494,29 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = ib_create_cm_id(file->device->ib_dev, - ib_ucm_event_handler, ctx); + ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx); if (IS_ERR(ctx->cm_id)) { result = PTR_ERR(ctx->cm_id); - goto err1; + goto err; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { result = -EFAULT; - goto err2; + goto err; } + return 0; -err2: - ib_destroy_cm_id(ctx->cm_id); -err1: +err: down(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); up(&ctx_id_mutex); + + if (!IS_ERR(ctx->cm_id)) + ib_destroy_cm_id(ctx->cm_id); + kfree(ctx); return result; } @@ -1222,6 +1184,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; + ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n", + hdr.cmd, hdr.in, hdr.out, len); + if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; @@ -1266,7 +1231,8 @@ static int ib_ucm_open(struct inode *inode, struct file *filp) filp->private_data = file; file->filp = filp; - file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev); + + ucm_dbg("Created struct\n"); return 0; } @@ -1297,17 +1263,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } -static void ib_ucm_release_class_dev(struct class_device *class_dev) -{ - struct ib_ucm_device *dev; - - dev = container_of(class_dev, struct ib_ucm_device, class_dev); - cdev_del(&dev->dev); - clear_bit(dev->devnum, dev_map); - kfree(dev); -} - -static struct file_operations ucm_fops = { +static struct file_operations ib_ucm_fops = { .owner = THIS_MODULE, .open = ib_ucm_open, .release = ib_ucm_close, @@ -1315,142 +1271,55 @@ static struct file_operations ucm_fops = { .poll = ib_ucm_poll, }; -static struct class ucm_class = { - .name = "infiniband_cm", - .release = ib_ucm_release_class_dev -}; - -static ssize_t show_dev(struct class_device *class_dev, char *buf) -{ - struct ib_ucm_device *dev; - - dev = container_of(class_dev, struct ib_ucm_device, class_dev); - return print_dev_t(buf, dev->dev.dev); -} -static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); -static ssize_t show_ibdev(struct class_device *class_dev, char *buf) -{ - struct ib_ucm_device *dev; - - dev = container_of(class_dev, struct ib_ucm_device, class_dev); - return sprintf(buf, "%s\n", dev->ib_dev->name); -} -static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static struct class *ib_ucm_class; +static struct cdev ib_ucm_cdev; -static void ib_ucm_add_one(struct ib_device *device) +static int __init ib_ucm_init(void) { - struct ib_ucm_device *ucm_dev; - - if (!device->alloc_ucontext) - return; - - ucm_dev = kmalloc(sizeof *ucm_dev, GFP_KERNEL); - if (!ucm_dev) - return; - - memset(ucm_dev, 0, sizeof *ucm_dev); - ucm_dev->ib_dev = device; - - ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); - if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) - goto err; + int result; - set_bit(ucm_dev->devnum, dev_map); + result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm"); + if (result) { + ucm_dbg("Error <%d> registering dev\n", result); + goto err_chr; + } - cdev_init(&ucm_dev->dev, &ucm_fops); - ucm_dev->dev.owner = THIS_MODULE; - kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum); - if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) - goto err; + cdev_init(&ib_ucm_cdev, &ib_ucm_fops); - ucm_dev->class_dev.class = &ucm_class; - ucm_dev->class_dev.dev = device->dma_device; - snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d", - ucm_dev->devnum); - if (class_device_register(&ucm_dev->class_dev)) + result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1); + if (result) { + ucm_dbg("Error <%d> adding cdev\n", result); goto err_cdev; + } - if (class_device_create_file(&ucm_dev->class_dev, - &class_device_attr_dev)) - goto err_class; - if (class_device_create_file(&ucm_dev->class_dev, - &class_device_attr_ibdev)) + ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm"); + if (IS_ERR(ib_ucm_class)) { + result = PTR_ERR(ib_ucm_class); + ucm_dbg("Error <%d> creating class\n", result); goto err_class; - - ib_set_client_data(device, &ucm_client, ucm_dev); - return; - -err_class: - class_device_unregister(&ucm_dev->class_dev); -err_cdev: - cdev_del(&ucm_dev->dev); - clear_bit(ucm_dev->devnum, dev_map); -err: - kfree(ucm_dev); - return; -} - -static void ib_ucm_remove_one(struct ib_device *device) -{ - struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client); - - if (!ucm_dev) - return; - - class_device_unregister(&ucm_dev->class_dev); -} - -static ssize_t show_abi_version(struct class *class, char *buf) -{ - return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION); -} -static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); - -static int __init ib_ucm_init(void) -{ - int ret; - - ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, - "infiniband_cm"); - if (ret) { - printk(KERN_ERR "ucm: couldn't register device number\n"); - goto err; } - ret = class_register(&ucm_class); - if (ret) { - printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n"); - goto err_chrdev; - } + class_device_create(ib_ucm_class, NULL, IB_UCM_DEV, NULL, "ucm"); - ret = class_create_file(&ucm_class, &class_attr_abi_version); - if (ret) { - printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); - goto err_class; - } + idr_init(&ctx_id_table); + init_MUTEX(&ctx_id_mutex); - ret = ib_register_client(&ucm_client); - if (ret) { - printk(KERN_ERR "ucm: couldn't register client\n"); - goto err_class; - } return 0; - err_class: - class_unregister(&ucm_class); -err_chrdev: - unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); -err: - return ret; + cdev_del(&ib_ucm_cdev); +err_cdev: + unregister_chrdev_region(IB_UCM_DEV, 1); +err_chr: + return result; } static void __exit ib_ucm_cleanup(void) { - ib_unregister_client(&ucm_client); - class_unregister(&ucm_class); - unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); - idr_destroy(&ctx_id_table); + class_device_destroy(ib_ucm_class, IB_UCM_DEV); + class_destroy(ib_ucm_class); + cdev_del(&ib_ucm_cdev); + unregister_chrdev_region(IB_UCM_DEV, 1); } module_init(ib_ucm_init); diff --git a/trunk/drivers/infiniband/core/ucm.h b/trunk/drivers/infiniband/core/ucm.h new file mode 100644 index 000000000000..f46f37bc1201 --- /dev/null +++ b/trunk/drivers/infiniband/core/ucm.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $ + */ + +#ifndef UCM_H +#define UCM_H + +#include +#include +#include +#include + +#include +#include + +struct ib_ucm_file { + struct semaphore mutex; + struct file *filp; + + struct list_head ctxs; /* list of active connections */ + struct list_head events; /* list of pending events */ + wait_queue_head_t poll_wait; +}; + +struct ib_ucm_context { + int id; + wait_queue_head_t wait; + atomic_t ref; + int events_reported; + + struct ib_ucm_file *file; + struct ib_cm_id *cm_id; + __u64 uid; + + struct list_head events; /* list of pending events. */ + struct list_head file_list; /* member in file ctx list */ +}; + +struct ib_ucm_event { + struct ib_ucm_context *ctx; + struct list_head file_list; /* member in file event list */ + struct list_head ctx_list; /* member in ctx event list */ + + struct ib_cm_id *cm_id; + struct ib_ucm_event_resp resp; + void *data; + void *info; + int data_len; + int info_len; +}; + +#endif /* UCM_H */ diff --git a/trunk/drivers/infiniband/core/user_mad.c b/trunk/drivers/infiniband/core/user_mad.c index 97128e25f78b..a64d6b4dcc16 100644 --- a/trunk/drivers/infiniband/core/user_mad.c +++ b/trunk/drivers/infiniband/core/user_mad.c @@ -64,39 +64,18 @@ enum { IB_UMAD_MINOR_BASE = 0 }; -/* - * Our lifetime rules for these structs are the following: each time a - * device special file is opened, we look up the corresponding struct - * ib_umad_port by minor in the umad_port[] table while holding the - * port_lock. If this lookup succeeds, we take a reference on the - * ib_umad_port's struct ib_umad_device while still holding the - * port_lock; if the lookup fails, we fail the open(). We drop these - * references in the corresponding close(). - * - * In addition to references coming from open character devices, there - * is one more reference to each ib_umad_device representing the - * module's reference taken when allocating the ib_umad_device in - * ib_umad_add_one(). - * - * When destroying an ib_umad_device, we clear all of its - * ib_umad_ports from umad_port[] while holding port_lock before - * dropping the module's reference to the ib_umad_device. This is - * always safe because any open() calls will either succeed and obtain - * a reference before we clear the umad_port[] entries, or fail after - * we clear the umad_port[] entries. - */ - struct ib_umad_port { - struct cdev *dev; - struct class_device *class_dev; + int devnum; + struct cdev dev; + struct class_device class_dev; - struct cdev *sm_dev; - struct class_device *sm_class_dev; + int sm_devnum; + struct cdev sm_dev; + struct class_device sm_class_dev; struct semaphore sm_sem; struct ib_device *ib_dev; struct ib_umad_device *umad_dev; - int dev_num; u8 port_num; }; @@ -117,31 +96,21 @@ struct ib_umad_file { }; struct ib_umad_packet { + struct ib_ah *ah; struct ib_mad_send_buf *msg; struct list_head list; int length; + DECLARE_PCI_UNMAP_ADDR(mapping) struct ib_user_mad mad; }; -static struct class *umad_class; - static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); - -static DEFINE_SPINLOCK(port_lock); -static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; +static spinlock_t map_lock; static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); -static void ib_umad_release_dev(struct kref *ref) -{ - struct ib_umad_device *dev = - container_of(ref, struct ib_umad_device, ref); - - kfree(dev); -} - static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, struct ib_umad_packet *packet) @@ -170,19 +139,22 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; - struct ib_umad_packet *timeout; - struct ib_umad_packet *packet = send_wc->send_buf->context[0]; + struct ib_umad_packet *timeout, *packet = + (void *) (unsigned long) send_wc->wr_id; - ib_destroy_ah(packet->msg->ah); + ib_destroy_ah(packet->msg->send_wr.wr.ud.ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL); + timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr), + GFP_KERNEL); if (!timeout) goto out; - timeout->length = IB_MGMT_MAD_HDR; - timeout->mad.hdr.id = packet->mad.hdr.id; + memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr)); + + timeout->length = sizeof (struct ib_mad_hdr); + timeout->mad.hdr.id = packet->mad.hdr.id; timeout->mad.hdr.status = ETIMEDOUT; memcpy(timeout->mad.data, packet->mad.data, sizeof (struct ib_mad_hdr)); @@ -205,10 +177,11 @@ static void recv_handler(struct ib_mad_agent *agent, goto out; length = mad_recv_wc->mad_len; - packet = kzalloc(sizeof *packet + length, GFP_KERNEL); + packet = kmalloc(sizeof *packet + length, GFP_KERNEL); if (!packet) goto out; + memset(packet, 0, sizeof *packet + length); packet->length = length; ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data); @@ -274,7 +247,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, else ret = -ENOSPC; } else if (copy_to_user(buf, &packet->mad, - packet->length + sizeof (struct ib_user_mad))) + packet->length + sizeof (struct ib_user_mad))) ret = -EFAULT; else ret = packet->length + sizeof (struct ib_user_mad); @@ -295,23 +268,26 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; - struct ib_ah *ah; + struct ib_send_wr *bad_wr; struct ib_rmpp_mad *rmpp_mad; u8 method; __be64 *tid; - int ret, length, hdr_len, copy_offset; + int ret, length, hdr_len, data_len, rmpp_hdr_size; int rmpp_active = 0; if (count < sizeof (struct ib_user_mad)) return -EINVAL; length = count - sizeof (struct ib_user_mad); - packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); + packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr), GFP_KERNEL); if (!packet) return -ENOMEM; if (copy_from_user(&packet->mad, buf, - sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) { + sizeof (struct ib_user_mad) + + sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr))) { ret = -EFAULT; goto err; } @@ -322,6 +298,8 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } + packet->length = length; + down_read(&file->agent_mutex); agent = file->agent[packet->mad.hdr.id]; @@ -343,9 +321,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } - ah = ib_create_ah(agent->qp->pd, &ah_attr); - if (IS_ERR(ah)) { - ret = PTR_ERR(ah); + packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); + if (IS_ERR(packet->ah)) { + ret = PTR_ERR(packet->ah); goto err_up; } @@ -359,44 +337,64 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, /* Validate that the management class can support RMPP */ if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { - hdr_len = IB_MGMT_SA_HDR; + hdr_len = offsetof(struct ib_sa_mad, data); + data_len = length - hdr_len; } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) { - hdr_len = IB_MGMT_VENDOR_HDR; + hdr_len = offsetof(struct ib_vendor_mad, data); + data_len = length - hdr_len; } else { ret = -EINVAL; goto err_ah; } rmpp_active = 1; - copy_offset = IB_MGMT_RMPP_HDR; } else { - hdr_len = IB_MGMT_MAD_HDR; - copy_offset = IB_MGMT_MAD_HDR; + if (length > sizeof(struct ib_mad)) { + ret = -EINVAL; + goto err_ah; + } + hdr_len = offsetof(struct ib_mad, data); + data_len = length - hdr_len; } packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), - 0, rmpp_active, - hdr_len, length - hdr_len, + 0, packet->ah, rmpp_active, + hdr_len, data_len, GFP_KERNEL); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; } - packet->msg->ah = ah; - packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; - packet->msg->retries = packet->mad.hdr.retries; - packet->msg->context[0] = packet; + packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms; + packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries; - /* Copy MAD headers (RMPP header in place) */ - memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); - /* Now, copy rest of message from user into send buffer */ - if (copy_from_user(packet->msg->mad + copy_offset, - buf + sizeof (struct ib_user_mad) + copy_offset, - length - copy_offset)) { - ret = -EFAULT; - goto err_msg; + /* Override send WR WRID initialized in ib_create_send_mad */ + packet->msg->send_wr.wr_id = (unsigned long) packet; + + if (!rmpp_active) { + /* Copy message from user into send buffer */ + if (copy_from_user(packet->msg->mad, + buf + sizeof(struct ib_user_mad), length)) { + ret = -EFAULT; + goto err_msg; + } + } else { + rmpp_hdr_size = sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr); + + /* Only copy MAD headers (RMPP header in place) */ + memcpy(packet->msg->mad, packet->mad.data, + sizeof(struct ib_mad_hdr)); + + /* Now, copy rest of message from user into send buffer */ + if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data, + buf + sizeof (struct ib_user_mad) + rmpp_hdr_size, + length - rmpp_hdr_size)) { + ret = -EFAULT; + goto err_msg; + } } /* @@ -405,29 +403,29 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, * transaction ID matches the agent being used to send the * MAD. */ - method = ((struct ib_mad_hdr *) packet->msg->mad)->method; + method = packet->msg->mad->mad_hdr.method; if (!(method & IB_MGMT_METHOD_RESP) && method != IB_MGMT_METHOD_TRAP_REPRESS && method != IB_MGMT_METHOD_SEND) { - tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; + tid = &packet->msg->mad->mad_hdr.tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); } - ret = ib_post_send_mad(packet->msg, NULL); + ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr); if (ret) goto err_msg; up_read(&file->agent_mutex); - return count; + return sizeof (struct ib_user_mad_hdr) + packet->length; err_msg: ib_free_send_mad(packet->msg); err_ah: - ib_destroy_ah(ah); + ib_destroy_ah(packet->ah); err_up: up_read(&file->agent_mutex); @@ -567,23 +565,15 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd, static int ib_umad_open(struct inode *inode, struct file *filp) { - struct ib_umad_port *port; + struct ib_umad_port *port = + container_of(inode->i_cdev, struct ib_umad_port, dev); struct ib_umad_file *file; - spin_lock(&port_lock); - port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE]; - if (port) - kref_get(&port->umad_dev->ref); - spin_unlock(&port_lock); - - if (!port) - return -ENXIO; - - file = kzalloc(sizeof *file, GFP_KERNEL); - if (!file) { - kref_put(&port->umad_dev->ref, ib_umad_release_dev); + file = kmalloc(sizeof *file, GFP_KERNEL); + if (!file) return -ENOMEM; - } + + memset(file, 0, sizeof *file); spin_lock_init(&file->recv_lock); init_rwsem(&file->agent_mutex); @@ -599,7 +589,6 @@ static int ib_umad_open(struct inode *inode, struct file *filp) static int ib_umad_close(struct inode *inode, struct file *filp) { struct ib_umad_file *file = filp->private_data; - struct ib_umad_device *dev = file->port->umad_dev; struct ib_umad_packet *packet, *tmp; int i; @@ -614,8 +603,6 @@ static int ib_umad_close(struct inode *inode, struct file *filp) kfree(file); - kref_put(&dev->ref, ib_umad_release_dev); - return 0; } @@ -632,46 +619,30 @@ static struct file_operations umad_fops = { static int ib_umad_sm_open(struct inode *inode, struct file *filp) { - struct ib_umad_port *port; + struct ib_umad_port *port = + container_of(inode->i_cdev, struct ib_umad_port, sm_dev); struct ib_port_modify props = { .set_port_cap_mask = IB_PORT_SM }; int ret; - spin_lock(&port_lock); - port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS]; - if (port) - kref_get(&port->umad_dev->ref); - spin_unlock(&port_lock); - - if (!port) - return -ENXIO; - if (filp->f_flags & O_NONBLOCK) { - if (down_trylock(&port->sm_sem)) { - ret = -EAGAIN; - goto fail; - } + if (down_trylock(&port->sm_sem)) + return -EAGAIN; } else { - if (down_interruptible(&port->sm_sem)) { - ret = -ERESTARTSYS; - goto fail; - } + if (down_interruptible(&port->sm_sem)) + return -ERESTARTSYS; } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); if (ret) { up(&port->sm_sem); - goto fail; + return ret; } filp->private_data = port; return 0; - -fail: - kref_put(&port->umad_dev->ref, ib_umad_release_dev); - return ret; } static int ib_umad_sm_close(struct inode *inode, struct file *filp) @@ -685,8 +656,6 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); up(&port->sm_sem); - kref_put(&port->umad_dev->ref, ib_umad_release_dev); - return ret; } @@ -702,12 +671,20 @@ static struct ib_client umad_client = { .remove = ib_umad_remove_one }; -static ssize_t show_ibdev(struct class_device *class_dev, char *buf) +static ssize_t show_dev(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); - if (!port) - return -ENODEV; + if (class_dev == &port->class_dev) + return print_dev_t(buf, port->dev.dev); + else + return print_dev_t(buf, port->sm_dev.dev); +} +static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); + +static ssize_t show_ibdev(struct class_device *class_dev, char *buf) +{ + struct ib_umad_port *port = class_get_devdata(class_dev); return sprintf(buf, "%s\n", port->ib_dev->name); } @@ -717,13 +694,38 @@ static ssize_t show_port(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); - if (!port) - return -ENODEV; - return sprintf(buf, "%d\n", port->port_num); } static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); +static void ib_umad_release_dev(struct kref *ref) +{ + struct ib_umad_device *dev = + container_of(ref, struct ib_umad_device, ref); + + kfree(dev); +} + +static void ib_umad_release_port(struct class_device *class_dev) +{ + struct ib_umad_port *port = class_get_devdata(class_dev); + + if (class_dev == &port->class_dev) { + cdev_del(&port->dev); + clear_bit(port->devnum, dev_map); + } else { + cdev_del(&port->sm_dev); + clear_bit(port->sm_devnum, dev_map); + } + + kref_put(&port->umad_dev->ref, ib_umad_release_dev); +} + +static struct class umad_class = { + .name = "infiniband_mad", + .release = ib_umad_release_port +}; + static ssize_t show_abi_version(struct class *class, char *buf) { return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); @@ -733,102 +735,91 @@ static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_port *port) { - spin_lock(&port_lock); - port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); - if (port->dev_num >= IB_UMAD_MAX_PORTS) { - spin_unlock(&port_lock); + spin_lock(&map_lock); + port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (port->devnum >= IB_UMAD_MAX_PORTS) { + spin_unlock(&map_lock); return -1; } - set_bit(port->dev_num, dev_map); - spin_unlock(&port_lock); + port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS); + if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) { + spin_unlock(&map_lock); + return -1; + } + set_bit(port->devnum, dev_map); + set_bit(port->sm_devnum, dev_map); + spin_unlock(&map_lock); port->ib_dev = device; port->port_num = port_num; init_MUTEX(&port->sm_sem); - port->dev = cdev_alloc(); - if (!port->dev) + cdev_init(&port->dev, &umad_fops); + port->dev.owner = THIS_MODULE; + kobject_set_name(&port->dev.kobj, "umad%d", port->devnum); + if (cdev_add(&port->dev, base_dev + port->devnum, 1)) return -1; - port->dev->owner = THIS_MODULE; - port->dev->ops = &umad_fops; - kobject_set_name(&port->dev->kobj, "umad%d", port->dev_num); - if (cdev_add(port->dev, base_dev + port->dev_num, 1)) - goto err_cdev; - port->class_dev = class_device_create(umad_class, NULL, port->dev->dev, - device->dma_device, - "umad%d", port->dev_num); - if (IS_ERR(port->class_dev)) + port->class_dev.class = &umad_class; + port->class_dev.dev = device->dma_device; + + snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum); + + if (class_device_register(&port->class_dev)) goto err_cdev; - if (class_device_create_file(port->class_dev, &class_device_attr_ibdev)) + class_set_devdata(&port->class_dev, port); + kref_get(&port->umad_dev->ref); + + if (class_device_create_file(&port->class_dev, &class_device_attr_dev)) goto err_class; - if (class_device_create_file(port->class_dev, &class_device_attr_port)) + if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev)) goto err_class; - - port->sm_dev = cdev_alloc(); - if (!port->sm_dev) + if (class_device_create_file(&port->class_dev, &class_device_attr_port)) goto err_class; - port->sm_dev->owner = THIS_MODULE; - port->sm_dev->ops = &umad_sm_fops; - kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num); - if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) - goto err_sm_cdev; - port->sm_class_dev = class_device_create(umad_class, NULL, port->sm_dev->dev, - device->dma_device, - "issm%d", port->dev_num); - if (IS_ERR(port->sm_class_dev)) + cdev_init(&port->sm_dev, &umad_sm_fops); + port->sm_dev.owner = THIS_MODULE; + kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); + if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1)) + return -1; + + port->sm_class_dev.class = &umad_class; + port->sm_class_dev.dev = device->dma_device; + + snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); + + if (class_device_register(&port->sm_class_dev)) goto err_sm_cdev; - class_set_devdata(port->class_dev, port); - class_set_devdata(port->sm_class_dev, port); + class_set_devdata(&port->sm_class_dev, port); + kref_get(&port->umad_dev->ref); - if (class_device_create_file(port->sm_class_dev, &class_device_attr_ibdev)) + if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev)) goto err_sm_class; - if (class_device_create_file(port->sm_class_dev, &class_device_attr_port)) + if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev)) + goto err_sm_class; + if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port)) goto err_sm_class; - - spin_lock(&port_lock); - umad_port[port->dev_num] = port; - spin_unlock(&port_lock); return 0; err_sm_class: - class_device_destroy(umad_class, port->sm_dev->dev); + class_device_unregister(&port->sm_class_dev); err_sm_cdev: - cdev_del(port->sm_dev); + cdev_del(&port->sm_dev); err_class: - class_device_destroy(umad_class, port->dev->dev); + class_device_unregister(&port->class_dev); err_cdev: - cdev_del(port->dev); - clear_bit(port->dev_num, dev_map); + cdev_del(&port->dev); + clear_bit(port->devnum, dev_map); return -1; } -static void ib_umad_kill_port(struct ib_umad_port *port) -{ - class_set_devdata(port->class_dev, NULL); - class_set_devdata(port->sm_class_dev, NULL); - - class_device_destroy(umad_class, port->dev->dev); - class_device_destroy(umad_class, port->sm_dev->dev); - - cdev_del(port->dev); - cdev_del(port->sm_dev); - - spin_lock(&port_lock); - umad_port[port->dev_num] = NULL; - spin_unlock(&port_lock); - - clear_bit(port->dev_num, dev_map); -} - static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; @@ -841,12 +832,15 @@ static void ib_umad_add_one(struct ib_device *device) e = device->phys_port_cnt; } - umad_dev = kzalloc(sizeof *umad_dev + + umad_dev = kmalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), GFP_KERNEL); if (!umad_dev) return; + memset(umad_dev, 0, sizeof *umad_dev + + (e - s + 1) * sizeof (struct ib_umad_port)); + kref_init(&umad_dev->ref); umad_dev->start_port = s; @@ -864,8 +858,10 @@ static void ib_umad_add_one(struct ib_device *device) return; err: - while (--i >= s) - ib_umad_kill_port(&umad_dev->port[i]); + while (--i >= s) { + class_device_unregister(&umad_dev->port[i - s].class_dev); + class_device_unregister(&umad_dev->port[i - s].sm_class_dev); + } kref_put(&umad_dev->ref, ib_umad_release_dev); } @@ -878,8 +874,10 @@ static void ib_umad_remove_one(struct ib_device *device) if (!umad_dev) return; - for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) - ib_umad_kill_port(&umad_dev->port[i]); + for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) { + class_device_unregister(&umad_dev->port[i].class_dev); + class_device_unregister(&umad_dev->port[i].sm_class_dev); + } kref_put(&umad_dev->ref, ib_umad_release_dev); } @@ -888,6 +886,8 @@ static int __init ib_umad_init(void) { int ret; + spin_lock_init(&map_lock); + ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { @@ -895,14 +895,13 @@ static int __init ib_umad_init(void) goto out; } - umad_class = class_create(THIS_MODULE, "infiniband_mad"); - if (IS_ERR(umad_class)) { - ret = PTR_ERR(umad_class); + ret = class_register(&umad_class); + if (ret) { printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); goto out_chrdev; } - ret = class_create_file(umad_class, &class_attr_abi_version); + ret = class_create_file(&umad_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); goto out_class; @@ -917,7 +916,7 @@ static int __init ib_umad_init(void) return 0; out_class: - class_destroy(umad_class); + class_unregister(&umad_class); out_chrdev: unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); @@ -929,7 +928,7 @@ static int __init ib_umad_init(void) static void __exit ib_umad_cleanup(void) { ib_unregister_client(&umad_client); - class_destroy(umad_class); + class_unregister(&umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); } diff --git a/trunk/drivers/infiniband/core/uverbs.h b/trunk/drivers/infiniband/core/uverbs.h index 031cdf3c066d..cc124344dd2c 100644 --- a/trunk/drivers/infiniband/core/uverbs.h +++ b/trunk/drivers/infiniband/core/uverbs.h @@ -3,7 +3,6 @@ * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -39,47 +38,29 @@ #ifndef UVERBS_H #define UVERBS_H +/* Include device.h and fs.h until cdev.h is self-sufficient */ +#include +#include +#include #include #include #include #include -/* - * Our lifetime rules for these structs are the following: - * - * struct ib_uverbs_device: One reference is held by the module and - * released in ib_uverbs_remove_one(). Another reference is taken by - * ib_uverbs_open() each time the character special file is opened, - * and released in ib_uverbs_release_file() when the file is released. - * - * struct ib_uverbs_file: One reference is held by the VFS and - * released when the file is closed. Another reference is taken when - * an asynchronous event queue file is created and released when the - * event file is closed. - * - * struct ib_uverbs_event_file: One reference is held by the VFS and - * released when the file is closed. For asynchronous event files, - * another reference is held by the corresponding main context file - * and released when that file is closed. For completion event files, - * a reference is taken when a CQ is created that uses the file, and - * released when the CQ is destroyed. - */ - struct ib_uverbs_device { - struct kref ref; int devnum; - struct cdev *dev; - struct class_device *class_dev; + struct cdev dev; + struct class_device class_dev; struct ib_device *ib_dev; - int num_comp_vectors; + int num_comp; }; struct ib_uverbs_event_file { struct kref ref; - struct file *file; struct ib_uverbs_file *uverbs_file; spinlock_t lock; + int fd; int is_async; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; @@ -92,7 +73,8 @@ struct ib_uverbs_file { struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; - struct ib_uverbs_event_file *async_file; + struct ib_uverbs_event_file async_file; + struct ib_uverbs_event_file comp_file[1]; }; struct ib_uverbs_event { @@ -128,23 +110,10 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - int is_async, int *fd); -void ib_uverbs_release_event_file(struct kref *ref); -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); - -void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_event_file *ev_file, - struct ib_ucq_object *uobj); -void ib_uverbs_release_uevent(struct ib_uverbs_file *file, - struct ib_uevent_object *uobj); - void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); -void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event); int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, void *addr, size_t size, int write); @@ -156,26 +125,21 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); const char __user *buf, int in_len, \ int out_len) +IB_UVERBS_DECLARE_CMD(query_params); IB_UVERBS_DECLARE_CMD(get_context); IB_UVERBS_DECLARE_CMD(query_device); IB_UVERBS_DECLARE_CMD(query_port); +IB_UVERBS_DECLARE_CMD(query_gid); +IB_UVERBS_DECLARE_CMD(query_pkey); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); -IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); -IB_UVERBS_DECLARE_CMD(poll_cq); -IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); -IB_UVERBS_DECLARE_CMD(post_send); -IB_UVERBS_DECLARE_CMD(post_recv); -IB_UVERBS_DECLARE_CMD(post_srq_recv); -IB_UVERBS_DECLARE_CMD(create_ah); -IB_UVERBS_DECLARE_CMD(destroy_ah); IB_UVERBS_DECLARE_CMD(attach_mcast); IB_UVERBS_DECLARE_CMD(detach_mcast); IB_UVERBS_DECLARE_CMD(create_srq); diff --git a/trunk/drivers/infiniband/core/uverbs_cmd.c b/trunk/drivers/infiniband/core/uverbs_cmd.c index 8c89abc8c764..562445165d2b 100644 --- a/trunk/drivers/infiniband/core/uverbs_cmd.c +++ b/trunk/drivers/infiniband/core/uverbs_cmd.c @@ -1,7 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. - * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,9 +33,6 @@ * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ */ -#include -#include - #include #include "uverbs.h" @@ -49,6 +45,29 @@ (udata)->outlen = (olen); \ } while (0) +ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_params cmd; + struct ib_uverbs_query_params_resp resp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + resp.num_cq_events = file->device->num_comp; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -58,7 +77,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_udata udata; struct ib_device *ibdev = file->device->ib_dev; struct ib_ucontext *ucontext; - struct file *filp; + int i; int ret; if (out_len < sizeof resp) @@ -91,42 +110,26 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); - resp.num_comp_vectors = file->device->num_comp_vectors; - - filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd); - if (IS_ERR(filp)) { - ret = PTR_ERR(filp); - goto err_free; - } + resp.async_fd = file->async_file.fd; + for (i = 0; i < file->device->num_comp; ++i) + if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + + i * sizeof (__u32), + &file->comp_file[i].fd, sizeof (__u32))) { + ret = -EFAULT; + goto err_free; + } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_file; + goto err_free; } - file->async_file = filp->private_data; - - INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev, - ib_uverbs_event_handler); - ret = ib_register_event_handler(&file->event_handler); - if (ret) - goto err_file; - - kref_get(&file->async_file->ref); - kref_get(&file->ref); file->ucontext = ucontext; - - fd_install(resp.async_fd, filp); - up(&file->mutex); return in_len; -err_file: - put_unused_fd(resp.async_fd); - fput(filp); - err_free: ibdev->dealloc_ucontext(ucontext); @@ -252,6 +255,62 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, return in_len; } +ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_gid cmd; + struct ib_uverbs_query_gid_resp resp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index, + (union ib_gid *) resp.gid); + if (ret) + return ret; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_pkey cmd; + struct ib_uverbs_query_pkey_resp resp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index, + &resp.pkey); + if (ret) + return ret; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -290,20 +349,24 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->uobject = uobj; atomic_set(&pd->usecnt, 0); - down(&ib_uverbs_idr_mutex); - retry: if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { ret = -ENOMEM; - goto err_up; + goto err_pd; } + down(&ib_uverbs_idr_mutex); ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); + up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) goto retry; if (ret) - goto err_up; + goto err_pd; + + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->pd_list); + up(&file->mutex); memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -311,22 +374,21 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_list; } - down(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->pd_list); - up(&file->mutex); - - up(&ib_uverbs_idr_mutex); - return in_len; -err_idr: - idr_remove(&ib_uverbs_pd_idr, uobj->id); +err_list: + down(&file->mutex); + list_del(&uobj->list); + up(&file->mutex); -err_up: + down(&ib_uverbs_idr_mutex); + idr_remove(&ib_uverbs_pd_idr, uobj->id); up(&ib_uverbs_idr_mutex); + +err_pd: ib_dealloc_pd(pd); err: @@ -397,14 +459,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; - /* - * Local write permission is required if remote write or - * remote atomic permission is also requested. - */ - if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && - !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) - return -EINVAL; - obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; @@ -470,22 +524,24 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, resp.mr_handle = obj->uobject.id; + down(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); + up(&file->mutex); + if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_list; } - down(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); - up(&file->mutex); - up(&ib_uverbs_idr_mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_mr_idr, obj->uobject.id); +err_list: + down(&file->mutex); + list_del(&obj->uobject.list); + up(&file->mutex); err_unreg: ib_dereg_mr(mr); @@ -539,35 +595,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, return ret ? ret : in_len; } -ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_create_comp_channel cmd; - struct ib_uverbs_create_comp_channel_resp resp; - struct file *filp; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd); - if (IS_ERR(filp)) - return PTR_ERR(filp); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - put_unused_fd(resp.fd); - fput(filp); - return -EFAULT; - } - - fd_install(resp.fd, filp); - return in_len; -} - ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -576,7 +603,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; struct ib_ucq_object *uobj; - struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; @@ -590,12 +616,9 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - if (cmd.comp_vector >= file->device->num_comp_vectors) + if (cmd.event_handler >= file->device->num_comp) return -EINVAL; - if (cmd.comp_channel >= 0) - ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; @@ -618,23 +641,27 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cq->uobject = &uobj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = ev_file; + cq->cq_context = file; atomic_set(&cq->usecnt, 0); - down(&ib_uverbs_idr_mutex); - retry: if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { ret = -ENOMEM; - goto err_up; + goto err_cq; } + down(&ib_uverbs_idr_mutex); ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id); + up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) goto retry; if (ret) - goto err_up; + goto err_cq; + + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); + up(&file->mutex); memset(&resp, 0, sizeof resp); resp.cq_handle = uobj->uobject.id; @@ -643,22 +670,21 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_list; } - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); - up(&file->mutex); - - up(&ib_uverbs_idr_mutex); - return in_len; -err_idr: - idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); +err_list: + down(&file->mutex); + list_del(&uobj->uobject.list); + up(&file->mutex); -err_up: + down(&ib_uverbs_idr_mutex); + idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); up(&ib_uverbs_idr_mutex); + +err_cq: ib_destroy_cq(cq); err: @@ -666,93 +692,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, return ret; } -ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_poll_cq cmd; - struct ib_uverbs_poll_cq_resp *resp; - struct ib_cq *cq; - struct ib_wc *wc; - int ret = 0; - int i; - int rsize; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL); - if (!wc) - return -ENOMEM; - - rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc); - resp = kmalloc(rsize, GFP_KERNEL); - if (!resp) { - ret = -ENOMEM; - goto out_wc; - } - - down(&ib_uverbs_idr_mutex); - cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (!cq || cq->uobject->context != file->ucontext) { - ret = -EINVAL; - goto out; - } - - resp->count = ib_poll_cq(cq, cmd.ne, wc); - - for (i = 0; i < resp->count; i++) { - resp->wc[i].wr_id = wc[i].wr_id; - resp->wc[i].status = wc[i].status; - resp->wc[i].opcode = wc[i].opcode; - resp->wc[i].vendor_err = wc[i].vendor_err; - resp->wc[i].byte_len = wc[i].byte_len; - resp->wc[i].imm_data = wc[i].imm_data; - resp->wc[i].qp_num = wc[i].qp_num; - resp->wc[i].src_qp = wc[i].src_qp; - resp->wc[i].wc_flags = wc[i].wc_flags; - resp->wc[i].pkey_index = wc[i].pkey_index; - resp->wc[i].slid = wc[i].slid; - resp->wc[i].sl = wc[i].sl; - resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits; - resp->wc[i].port_num = wc[i].port_num; - } - - if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize)) - ret = -EFAULT; - -out: - up(&ib_uverbs_idr_mutex); - kfree(resp); - -out_wc: - kfree(wc); - return ret ? ret : in_len; -} - -ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_req_notify_cq cmd; - struct ib_cq *cq; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - down(&ib_uverbs_idr_mutex); - cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (cq && cq->uobject->context == file->ucontext) { - ib_req_notify_cq(cq, cmd.solicited_only ? - IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - ret = in_len; - } - up(&ib_uverbs_idr_mutex); - - return ret; -} - ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -761,7 +700,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_cq_resp resp; struct ib_cq *cq; struct ib_ucq_object *uobj; - struct ib_uverbs_event_file *ev_file; + struct ib_uverbs_event *evt, *tmp; u64 user_handle; int ret = -EINVAL; @@ -777,8 +716,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, goto out; user_handle = cq->uobject->user_handle; - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); - ev_file = cq->cq_context; + uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); ret = ib_destroy_cq(cq); if (ret) @@ -790,7 +728,19 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - ib_uverbs_release_ucq(file, ev_file, uobj); + spin_lock_irq(&file->comp_file[0].lock); + list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->comp_file[0].lock); + + spin_lock_irq(&file->async_file.lock); + list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file.lock); resp.comp_events_reported = uobj->comp_events_reported; resp.async_events_reported = uobj->async_events_reported; @@ -909,22 +859,24 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, resp.qp_handle = uobj->uobject.id; + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); + up(&file->mutex); + if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_list; } - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); - up(&file->mutex); - up(&ib_uverbs_idr_mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_qp_idr, uobj->uobject.id); +err_list: + down(&file->mutex); + list_del(&uobj->uobject.list); + up(&file->mutex); err_destroy: ib_destroy_qp(qp); @@ -1027,6 +979,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_uverbs_destroy_qp_resp resp; struct ib_qp *qp; struct ib_uevent_object *uobj; + struct ib_uverbs_event *evt, *tmp; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1052,7 +1005,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - ib_uverbs_release_uevent(file, uobj); + spin_lock_irq(&file->async_file.lock); + list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file.lock); resp.events_reported = uobj->events_reported; @@ -1068,468 +1026,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, return ret ? ret : in_len; } -ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_post_send cmd; - struct ib_uverbs_post_send_resp resp; - struct ib_uverbs_send_wr *user_wr; - struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; - struct ib_qp *qp; - int i, sg_ind; - ssize_t ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + - cmd.sge_count * sizeof (struct ib_uverbs_sge)) - return -EINVAL; - - if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) - return -EINVAL; - - user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); - if (!user_wr) - return -ENOMEM; - - down(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) - goto out; - - sg_ind = 0; - last = NULL; - for (i = 0; i < cmd.wr_count; ++i) { - if (copy_from_user(user_wr, - buf + sizeof cmd + i * cmd.wqe_size, - cmd.wqe_size)) { - ret = -EFAULT; - goto out; - } - - if (user_wr->num_sge + sg_ind > cmd.sge_count) { - ret = -EINVAL; - goto out; - } - - next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + - user_wr->num_sge * sizeof (struct ib_sge), - GFP_KERNEL); - if (!next) { - ret = -ENOMEM; - goto out; - } - - if (!last) - wr = next; - else - last->next = next; - last = next; - - next->next = NULL; - next->wr_id = user_wr->wr_id; - next->num_sge = user_wr->num_sge; - next->opcode = user_wr->opcode; - next->send_flags = user_wr->send_flags; - next->imm_data = user_wr->imm_data; - - if (qp->qp_type == IB_QPT_UD) { - next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, - user_wr->wr.ud.ah); - if (!next->wr.ud.ah) { - ret = -EINVAL; - goto out; - } - next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; - next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; - } else { - switch (next->opcode) { - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - case IB_WR_RDMA_READ: - next->wr.rdma.remote_addr = - user_wr->wr.rdma.remote_addr; - next->wr.rdma.rkey = - user_wr->wr.rdma.rkey; - break; - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - next->wr.atomic.remote_addr = - user_wr->wr.atomic.remote_addr; - next->wr.atomic.compare_add = - user_wr->wr.atomic.compare_add; - next->wr.atomic.swap = user_wr->wr.atomic.swap; - next->wr.atomic.rkey = user_wr->wr.atomic.rkey; - break; - default: - break; - } - } - - if (next->num_sge) { - next->sg_list = (void *) next + - ALIGN(sizeof *next, sizeof (struct ib_sge)); - if (copy_from_user(next->sg_list, - buf + sizeof cmd + - cmd.wr_count * cmd.wqe_size + - sg_ind * sizeof (struct ib_sge), - next->num_sge * sizeof (struct ib_sge))) { - ret = -EFAULT; - goto out; - } - sg_ind += next->num_sge; - } else - next->sg_list = NULL; - } - - resp.bad_wr = 0; - ret = qp->device->post_send(qp, wr, &bad_wr); - if (ret) - for (next = wr; next; next = next->next) { - ++resp.bad_wr; - if (next == bad_wr) - break; - } - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - -out: - up(&ib_uverbs_idr_mutex); - - while (wr) { - next = wr->next; - kfree(wr); - wr = next; - } - - kfree(user_wr); - - return ret ? ret : in_len; -} - -static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, - int in_len, - u32 wr_count, - u32 sge_count, - u32 wqe_size) -{ - struct ib_uverbs_recv_wr *user_wr; - struct ib_recv_wr *wr = NULL, *last, *next; - int sg_ind; - int i; - int ret; - - if (in_len < wqe_size * wr_count + - sge_count * sizeof (struct ib_uverbs_sge)) - return ERR_PTR(-EINVAL); - - if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) - return ERR_PTR(-EINVAL); - - user_wr = kmalloc(wqe_size, GFP_KERNEL); - if (!user_wr) - return ERR_PTR(-ENOMEM); - - sg_ind = 0; - last = NULL; - for (i = 0; i < wr_count; ++i) { - if (copy_from_user(user_wr, buf + i * wqe_size, - wqe_size)) { - ret = -EFAULT; - goto err; - } - - if (user_wr->num_sge + sg_ind > sge_count) { - ret = -EINVAL; - goto err; - } - - next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + - user_wr->num_sge * sizeof (struct ib_sge), - GFP_KERNEL); - if (!next) { - ret = -ENOMEM; - goto err; - } - - if (!last) - wr = next; - else - last->next = next; - last = next; - - next->next = NULL; - next->wr_id = user_wr->wr_id; - next->num_sge = user_wr->num_sge; - - if (next->num_sge) { - next->sg_list = (void *) next + - ALIGN(sizeof *next, sizeof (struct ib_sge)); - if (copy_from_user(next->sg_list, - buf + wr_count * wqe_size + - sg_ind * sizeof (struct ib_sge), - next->num_sge * sizeof (struct ib_sge))) { - ret = -EFAULT; - goto err; - } - sg_ind += next->num_sge; - } else - next->sg_list = NULL; - } - - kfree(user_wr); - return wr; - -err: - kfree(user_wr); - - while (wr) { - next = wr->next; - kfree(wr); - wr = next; - } - - return ERR_PTR(ret); -} - -ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_post_recv cmd; - struct ib_uverbs_post_recv_resp resp; - struct ib_recv_wr *wr, *next, *bad_wr; - struct ib_qp *qp; - ssize_t ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, - in_len - sizeof cmd, cmd.wr_count, - cmd.sge_count, cmd.wqe_size); - if (IS_ERR(wr)) - return PTR_ERR(wr); - - down(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) - goto out; - - resp.bad_wr = 0; - ret = qp->device->post_recv(qp, wr, &bad_wr); - if (ret) - for (next = wr; next; next = next->next) { - ++resp.bad_wr; - if (next == bad_wr) - break; - } - - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - -out: - up(&ib_uverbs_idr_mutex); - - while (wr) { - next = wr->next; - kfree(wr); - wr = next; - } - - return ret ? ret : in_len; -} - -ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_post_srq_recv cmd; - struct ib_uverbs_post_srq_recv_resp resp; - struct ib_recv_wr *wr, *next, *bad_wr; - struct ib_srq *srq; - ssize_t ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, - in_len - sizeof cmd, cmd.wr_count, - cmd.sge_count, cmd.wqe_size); - if (IS_ERR(wr)) - return PTR_ERR(wr); - - down(&ib_uverbs_idr_mutex); - - srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); - if (!srq || srq->uobject->context != file->ucontext) - goto out; - - resp.bad_wr = 0; - ret = srq->device->post_srq_recv(srq, wr, &bad_wr); - if (ret) - for (next = wr; next; next = next->next) { - ++resp.bad_wr; - if (next == bad_wr) - break; - } - - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - -out: - up(&ib_uverbs_idr_mutex); - - while (wr) { - next = wr->next; - kfree(wr); - wr = next; - } - - return ret ? ret : in_len; -} - -ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_create_ah cmd; - struct ib_uverbs_create_ah_resp resp; - struct ib_uobject *uobj; - struct ib_pd *pd; - struct ib_ah *ah; - struct ib_ah_attr attr; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - down(&ib_uverbs_idr_mutex); - - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); - if (!pd || pd->uobject->context != file->ucontext) { - ret = -EINVAL; - goto err_up; - } - - uobj->user_handle = cmd.user_handle; - uobj->context = file->ucontext; - - attr.dlid = cmd.attr.dlid; - attr.sl = cmd.attr.sl; - attr.src_path_bits = cmd.attr.src_path_bits; - attr.static_rate = cmd.attr.static_rate; - attr.port_num = cmd.attr.port_num; - attr.grh.flow_label = cmd.attr.grh.flow_label; - attr.grh.sgid_index = cmd.attr.grh.sgid_index; - attr.grh.hop_limit = cmd.attr.grh.hop_limit; - attr.grh.traffic_class = cmd.attr.grh.traffic_class; - memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); - - ah = ib_create_ah(pd, &attr); - if (IS_ERR(ah)) { - ret = PTR_ERR(ah); - goto err_up; - } - - ah->uobject = uobj; - -retry: - if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) { - ret = -ENOMEM; - goto err_destroy; - } - - ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id); - - if (ret == -EAGAIN) - goto retry; - if (ret) - goto err_destroy; - - resp.ah_handle = uobj->id; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_idr; - } - - down(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->ah_list); - up(&file->mutex); - - up(&ib_uverbs_idr_mutex); - - return in_len; - -err_idr: - idr_remove(&ib_uverbs_ah_idr, uobj->id); - -err_destroy: - ib_destroy_ah(ah); - -err_up: - up(&ib_uverbs_idr_mutex); - - kfree(uobj); - return ret; -} - -ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, - const char __user *buf, int in_len, int out_len) -{ - struct ib_uverbs_destroy_ah cmd; - struct ib_ah *ah; - struct ib_uobject *uobj; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - down(&ib_uverbs_idr_mutex); - - ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle); - if (!ah || ah->uobject->context != file->ucontext) - goto out; - - uobj = ah->uobject; - - ret = ib_destroy_ah(ah); - if (ret) - goto out; - - idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle); - - down(&file->mutex); - list_del(&uobj->list); - up(&file->mutex); - - kfree(uobj); - -out: - up(&ib_uverbs_idr_mutex); - - return ret ? ret : in_len; -} - ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1652,22 +1148,24 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, resp.srq_handle = uobj->uobject.id; + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); + up(&file->mutex); + if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_list; } - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); - up(&file->mutex); - up(&ib_uverbs_idr_mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id); +err_list: + down(&file->mutex); + list_del(&uobj->uobject.list); + up(&file->mutex); err_destroy: ib_destroy_srq(srq); @@ -1719,6 +1217,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_srq_resp resp; struct ib_srq *srq; struct ib_uevent_object *uobj; + struct ib_uverbs_event *evt, *tmp; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1744,7 +1243,12 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - ib_uverbs_release_uevent(file, uobj); + spin_lock_irq(&file->async_file.lock); + list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file.lock); resp.events_reported = uobj->events_reported; diff --git a/trunk/drivers/infiniband/core/uverbs_main.c b/trunk/drivers/infiniband/core/uverbs_main.c index 0eb38f479b39..12511808de21 100644 --- a/trunk/drivers/infiniband/core/uverbs_main.c +++ b/trunk/drivers/infiniband/core/uverbs_main.c @@ -3,7 +3,6 @@ * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -44,7 +43,6 @@ #include #include #include -#include #include @@ -64,8 +62,6 @@ enum { #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) -static struct class *uverbs_class; - DECLARE_MUTEX(ib_uverbs_idr_mutex); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); @@ -76,37 +72,31 @@ DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); static spinlock_t map_lock; -static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, - [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, - [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, - [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, - [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, - [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, - [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid, + [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; static struct vfsmount *uverbs_event_mnt; @@ -114,54 +104,7 @@ static struct vfsmount *uverbs_event_mnt; static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); -static void ib_uverbs_release_dev(struct kref *ref) -{ - struct ib_uverbs_device *dev = - container_of(ref, struct ib_uverbs_device, ref); - - kfree(dev); -} - -void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_event_file *ev_file, - struct ib_ucq_object *uobj) -{ - struct ib_uverbs_event *evt, *tmp; - - if (ev_file) { - spin_lock_irq(&ev_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&ev_file->lock); - - kref_put(&ev_file->ref, ib_uverbs_release_event_file); - } - - spin_lock_irq(&file->async_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file->lock); -} - -void ib_uverbs_release_uevent(struct ib_uverbs_file *file, - struct ib_uevent_object *uobj) -{ - struct ib_uverbs_event *evt, *tmp; - - spin_lock_irq(&file->async_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file->lock); -} - -static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, - struct ib_ucontext *context) +static int ib_dealloc_ucontext(struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; @@ -170,46 +113,30 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, down(&ib_uverbs_idr_mutex); - list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { - struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id); - idr_remove(&ib_uverbs_ah_idr, uobj->id); - ib_destroy_ah(ah); - list_del(&uobj->list); - kfree(uobj); - } + /* XXX Free AHs */ list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); - struct ib_uevent_object *uevent = - container_of(uobj, struct ib_uevent_object, uobject); idr_remove(&ib_uverbs_qp_idr, uobj->id); ib_destroy_qp(qp); list_del(&uobj->list); - ib_uverbs_release_uevent(file, uevent); - kfree(uevent); + kfree(container_of(uobj, struct ib_uevent_object, uobject)); } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); - struct ib_uverbs_event_file *ev_file = cq->cq_context; - struct ib_ucq_object *ucq = - container_of(uobj, struct ib_ucq_object, uobject); idr_remove(&ib_uverbs_cq_idr, uobj->id); ib_destroy_cq(cq); list_del(&uobj->list); - ib_uverbs_release_ucq(file, ev_file, ucq); - kfree(ucq); + kfree(container_of(uobj, struct ib_ucq_object, uobject)); } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id); - struct ib_uevent_object *uevent = - container_of(uobj, struct ib_uevent_object, uobject); idr_remove(&ib_uverbs_srq_idr, uobj->id); ib_destroy_srq(srq); list_del(&uobj->list); - ib_uverbs_release_uevent(file, uevent); - kfree(uevent); + kfree(container_of(uobj, struct ib_uevent_object, uobject)); } /* XXX Free MWs */ @@ -248,8 +175,6 @@ static void ib_uverbs_release_file(struct kref *ref) container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); - kref_put(&file->device->ref, ib_uverbs_release_dev); - kfree(file); } @@ -263,19 +188,25 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, spin_lock_irq(&file->lock); - while (list_empty(&file->event_list)) { + while (list_empty(&file->event_list) && file->fd >= 0) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->event_list))) + !list_empty(&file->event_list) || + file->fd < 0)) return -ERESTARTSYS; spin_lock_irq(&file->lock); } + if (file->fd < 0) { + spin_unlock_irq(&file->lock); + return -ENODEV; + } + event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) @@ -317,19 +248,26 @@ static unsigned int ib_uverbs_event_poll(struct file *filp, poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); - if (!list_empty(&file->event_list)) + if (file->fd < 0) + pollflags = POLLERR; + else if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } -void ib_uverbs_release_event_file(struct kref *ref) +static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) { - struct ib_uverbs_event_file *file = - container_of(ref, struct ib_uverbs_event_file, ref); + struct ib_uverbs_event *entry, *tmp; - kfree(file); + spin_lock_irq(&file->lock); + if (file->fd != -1) { + file->fd = -1; + list_for_each_entry_safe(entry, tmp, &file->event_list, list) + kfree(entry); + } + spin_unlock_irq(&file->lock); } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) @@ -342,30 +280,21 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; - struct ib_uverbs_event *entry, *tmp; - - spin_lock_irq(&file->lock); - file->file = NULL; - list_for_each_entry_safe(entry, tmp, &file->event_list, list) { - if (entry->counter) - list_del(&entry->obj_list); - kfree(entry); - } - spin_unlock_irq(&file->lock); + ib_uverbs_event_release(file); ib_uverbs_event_fasync(-1, filp, 0); - - if (file->is_async) { - ib_unregister_event_handler(&file->uverbs_file->event_handler); - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); - } - kref_put(&file->ref, ib_uverbs_release_event_file); + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); return 0; } static struct file_operations uverbs_event_fops = { - .owner = THIS_MODULE, + /* + * No .owner field since we artificially create event files, + * so there is no increment to the module reference count in + * the open path. All event files come from a uverbs command + * file, which already takes a module reference, so this is OK. + */ .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, @@ -374,37 +303,27 @@ static struct file_operations uverbs_event_fops = { void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { - struct ib_uverbs_event_file *file = cq_context; - struct ib_ucq_object *uobj; - struct ib_uverbs_event *entry; - unsigned long flags; - - if (!file) - return; - - spin_lock_irqsave(&file->lock, flags); - if (!file->file) { - spin_unlock_irqrestore(&file->lock, flags); - return; - } + struct ib_uverbs_file *file = cq_context; + struct ib_ucq_object *uobj; + struct ib_uverbs_event *entry; + unsigned long flags; entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) { - spin_unlock_irqrestore(&file->lock, flags); + if (!entry) return; - } uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; - list_add_tail(&entry->list, &file->event_list); + spin_lock_irqsave(&file->comp_file[0].lock, flags); + list_add_tail(&entry->list, &file->comp_file[0].event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); - spin_unlock_irqrestore(&file->lock, flags); + spin_unlock_irqrestore(&file->comp_file[0].lock, flags); - wake_up_interruptible(&file->poll_wait); - kill_fasync(&file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->comp_file[0].poll_wait); + kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, @@ -415,40 +334,32 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, struct ib_uverbs_event *entry; unsigned long flags; - spin_lock_irqsave(&file->async_file->lock, flags); - if (!file->async_file->file) { - spin_unlock_irqrestore(&file->async_file->lock, flags); - return; - } - entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) { - spin_unlock_irqrestore(&file->async_file->lock, flags); + if (!entry) return; - } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->counter = counter; - list_add_tail(&entry->list, &file->async_file->event_list); + spin_lock_irqsave(&file->async_file.lock, flags); + list_add_tail(&entry->list, &file->async_file.event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file->lock, flags); + spin_unlock_irqrestore(&file->async_file.lock, flags); - wake_up_interruptible(&file->async_file->poll_wait); - kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->async_file.poll_wait); + kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uverbs_event_file *ev_file = context_ptr; struct ib_ucq_object *uobj; uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); - ib_uverbs_async_handler(ev_file->uverbs_file, uobj->uobject.user_handle, + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); @@ -478,8 +389,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) &uobj->events_reported); } -void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event) +static void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); @@ -488,90 +399,38 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, NULL, NULL); } -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - int is_async, int *fd) +static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, + struct ib_uverbs_file *uverbs_file) { - struct ib_uverbs_event_file *ev_file; struct file *filp; - int ret; - ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); - if (!ev_file) - return ERR_PTR(-ENOMEM); - - kref_init(&ev_file->ref); - spin_lock_init(&ev_file->lock); - INIT_LIST_HEAD(&ev_file->event_list); - init_waitqueue_head(&ev_file->poll_wait); - ev_file->uverbs_file = uverbs_file; - ev_file->async_queue = NULL; - ev_file->is_async = is_async; - - *fd = get_unused_fd(); - if (*fd < 0) { - ret = *fd; - goto err; - } + spin_lock_init(&file->lock); + INIT_LIST_HEAD(&file->event_list); + init_waitqueue_head(&file->poll_wait); + file->uverbs_file = uverbs_file; + file->async_queue = NULL; + + file->fd = get_unused_fd(); + if (file->fd < 0) + return file->fd; filp = get_empty_filp(); if (!filp) { - ret = -ENFILE; - goto err_fd; + put_unused_fd(file->fd); + return -ENFILE; } - ev_file->file = filp; - - /* - * fops_get() can't fail here, because we're coming from a - * system call on a uverbs file, which will already have a - * module reference. - */ - filp->f_op = fops_get(&uverbs_event_fops); + filp->f_op = &uverbs_event_fops; filp->f_vfsmnt = mntget(uverbs_event_mnt); filp->f_dentry = dget(uverbs_event_mnt->mnt_root); filp->f_mapping = filp->f_dentry->d_inode->i_mapping; filp->f_flags = O_RDONLY; filp->f_mode = FMODE_READ; - filp->private_data = ev_file; - - return filp; - -err_fd: - put_unused_fd(*fd); - -err: - kfree(ev_file); - return ERR_PTR(ret); -} - -/* - * Look up a completion event file by FD. If lookup is successful, - * takes a ref to the event file struct that it returns; if - * unsuccessful, returns NULL. - */ -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) -{ - struct ib_uverbs_event_file *ev_file = NULL; - struct file *filp; - - filp = fget(fd); - if (!filp) - return NULL; - - if (filp->f_op != &uverbs_event_fops) - goto out; - - ev_file = filp->private_data; - if (ev_file->is_async) { - ev_file = NULL; - goto out; - } + filp->private_data = file; - kref_get(&ev_file->ref); + fd_install(file->fd, filp); -out: - fput(filp); - return ev_file; + return 0; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, @@ -591,11 +450,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[hdr.command] || - !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) + !uverbs_cmd_table[hdr.command]) return -EINVAL; - if (!file->ucontext && + if (!file->ucontext && + hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS && hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) return -EINVAL; @@ -615,57 +474,84 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) static int ib_uverbs_open(struct inode *inode, struct file *filp) { - struct ib_uverbs_device *dev; + struct ib_uverbs_device *dev = + container_of(inode->i_cdev, struct ib_uverbs_device, dev); struct ib_uverbs_file *file; + int i = 0; int ret; - spin_lock(&map_lock); - dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR]; - if (dev) - kref_get(&dev->ref); - spin_unlock(&map_lock); - - if (!dev) - return -ENXIO; - - if (!try_module_get(dev->ib_dev->owner)) { - ret = -ENODEV; - goto err; - } + if (!try_module_get(dev->ib_dev->owner)) + return -ENODEV; - file = kmalloc(sizeof *file, GFP_KERNEL); + file = kmalloc(sizeof *file + + (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file), + GFP_KERNEL); if (!file) { ret = -ENOMEM; - goto err_module; + goto err; } - file->device = dev; - file->ucontext = NULL; - file->async_file = NULL; + file->device = dev; kref_init(&file->ref); init_MUTEX(&file->mutex); + file->ucontext = NULL; + + kref_get(&file->ref); + ret = ib_uverbs_event_init(&file->async_file, file); + if (ret) + goto err_kref; + + file->async_file.is_async = 1; + + for (i = 0; i < dev->num_comp; ++i) { + kref_get(&file->ref); + ret = ib_uverbs_event_init(&file->comp_file[i], file); + if (ret) + goto err_async; + file->comp_file[i].is_async = 0; + } + + filp->private_data = file; + INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev, + ib_uverbs_event_handler); + if (ib_register_event_handler(&file->event_handler)) + goto err_async; + return 0; -err_module: - module_put(dev->ib_dev->owner); +err_async: + while (i--) + ib_uverbs_event_release(&file->comp_file[i]); -err: - kref_put(&dev->ref, ib_uverbs_release_dev); + ib_uverbs_event_release(&file->async_file); + +err_kref: + /* + * One extra kref_put() because we took a reference before the + * event file creation that failed and got us here. + */ + kref_put(&file->ref, ib_uverbs_release_file); + kref_put(&file->ref, ib_uverbs_release_file); +err: + module_put(dev->ib_dev->owner); return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; + int i; - ib_uverbs_cleanup_ucontext(file, file->ucontext); + ib_unregister_event_handler(&file->event_handler); + ib_uverbs_event_release(&file->async_file); + ib_dealloc_ucontext(file->ucontext); - if (file->async_file) - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); + for (i = 0; i < file->device->num_comp; ++i) + ib_uverbs_event_release(&file->comp_file[i]); kref_put(&file->ref, ib_uverbs_release_file); @@ -695,25 +581,27 @@ static struct ib_client uverbs_client = { static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { - struct ib_uverbs_device *dev = class_get_devdata(class_dev); - - if (!dev) - return -ENODEV; + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); return sprintf(buf, "%s\n", dev->ib_dev->name); } static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); -static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf) +static void ib_uverbs_release_class_dev(struct class_device *class_dev) { - struct ib_uverbs_device *dev = class_get_devdata(class_dev); - - if (!dev) - return -ENODEV; + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); - return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); + cdev_del(&dev->dev); + clear_bit(dev->devnum, dev_map); + kfree(dev); } -static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); + +static struct class uverbs_class = { + .name = "infiniband_verbs", + .release = ib_uverbs_release_class_dev +}; static ssize_t show_abi_version(struct class *class, char *buf) { @@ -734,8 +622,6 @@ static void ib_uverbs_add_one(struct ib_device *device) memset(uverbs_dev, 0, sizeof *uverbs_dev); - kref_init(&uverbs_dev->ref); - spin_lock(&map_lock); uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { @@ -745,49 +631,41 @@ static void ib_uverbs_add_one(struct ib_device *device) set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); - uverbs_dev->ib_dev = device; - uverbs_dev->num_comp_vectors = 1; + uverbs_dev->ib_dev = device; + uverbs_dev->num_comp = 1; - uverbs_dev->dev = cdev_alloc(); - if (!uverbs_dev->dev) + if (device->mmap) + cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); + else + cdev_init(&uverbs_dev->dev, &uverbs_fops); + uverbs_dev->dev.owner = THIS_MODULE; + kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) goto err; - uverbs_dev->dev->owner = THIS_MODULE; - uverbs_dev->dev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; - kobject_set_name(&uverbs_dev->dev->kobj, "uverbs%d", uverbs_dev->devnum); - if (cdev_add(uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) - goto err_cdev; - uverbs_dev->class_dev = class_device_create(uverbs_class, NULL, - uverbs_dev->dev->dev, - device->dma_device, - "uverbs%d", uverbs_dev->devnum); - if (IS_ERR(uverbs_dev->class_dev)) + uverbs_dev->class_dev.class = &uverbs_class; + uverbs_dev->class_dev.dev = device->dma_device; + uverbs_dev->class_dev.devt = uverbs_dev->dev.dev; + snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum); + if (class_device_register(&uverbs_dev->class_dev)) goto err_cdev; - class_set_devdata(uverbs_dev->class_dev, uverbs_dev); - - if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_ibdev)) + if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) goto err_class; - if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_abi_version)) - goto err_class; - - spin_lock(&map_lock); - dev_table[uverbs_dev->devnum] = uverbs_dev; - spin_unlock(&map_lock); ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: - class_device_destroy(uverbs_class, uverbs_dev->dev->dev); + class_device_unregister(&uverbs_dev->class_dev); err_cdev: - cdev_del(uverbs_dev->dev); + cdev_del(&uverbs_dev->dev); clear_bit(uverbs_dev->devnum, dev_map); err: - kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + kfree(uverbs_dev); return; } @@ -798,16 +676,7 @@ static void ib_uverbs_remove_one(struct ib_device *device) if (!uverbs_dev) return; - class_set_devdata(uverbs_dev->class_dev, NULL); - class_device_destroy(uverbs_class, uverbs_dev->dev->dev); - cdev_del(uverbs_dev->dev); - - spin_lock(&map_lock); - dev_table[uverbs_dev->devnum] = NULL; - spin_unlock(&map_lock); - - clear_bit(uverbs_dev->devnum, dev_map); - kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + class_device_unregister(&uverbs_dev->class_dev); } static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, @@ -837,14 +706,13 @@ static int __init ib_uverbs_init(void) goto out; } - uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); - if (IS_ERR(uverbs_class)) { - ret = PTR_ERR(uverbs_class); + ret = class_register(&uverbs_class); + if (ret) { printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } - ret = class_create_file(uverbs_class, &class_attr_abi_version); + ret = class_create_file(&uverbs_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); goto out_class; @@ -878,7 +746,7 @@ static int __init ib_uverbs_init(void) unregister_filesystem(&uverbs_event_fs); out_class: - class_destroy(uverbs_class); + class_unregister(&uverbs_class); out_chrdev: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); @@ -892,15 +760,8 @@ static void __exit ib_uverbs_cleanup(void) ib_unregister_client(&uverbs_client); mntput(uverbs_event_mnt); unregister_filesystem(&uverbs_event_fs); - class_destroy(uverbs_class); + class_unregister(&uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); - idr_destroy(&ib_uverbs_pd_idr); - idr_destroy(&ib_uverbs_mr_idr); - idr_destroy(&ib_uverbs_mw_idr); - idr_destroy(&ib_uverbs_ah_idr); - idr_destroy(&ib_uverbs_cq_idr); - idr_destroy(&ib_uverbs_qp_idr); - idr_destroy(&ib_uverbs_srq_idr); } module_init(ib_uverbs_init); diff --git a/trunk/drivers/infiniband/core/verbs.c b/trunk/drivers/infiniband/core/verbs.c index 72d3ef786db5..5081d903e561 100644 --- a/trunk/drivers/infiniband/core/verbs.c +++ b/trunk/drivers/infiniband/core/verbs.c @@ -523,22 +523,16 @@ EXPORT_SYMBOL(ib_dealloc_fmr); int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - if (!qp->device->attach_mcast) - return -ENOSYS; - if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) - return -EINVAL; - - return qp->device->attach_mcast(qp, gid, lid); + return qp->device->attach_mcast ? + qp->device->attach_mcast(qp, gid, lid) : + -ENOSYS; } EXPORT_SYMBOL(ib_attach_mcast); int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - if (!qp->device->detach_mcast) - return -ENOSYS; - if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) - return -EINVAL; - - return qp->device->detach_mcast(qp, gid, lid); + return qp->device->detach_mcast ? + qp->device->detach_mcast(qp, gid, lid) : + -ENOSYS; } EXPORT_SYMBOL(ib_detach_mcast); diff --git a/trunk/drivers/infiniband/hw/mthca/Makefile b/trunk/drivers/infiniband/hw/mthca/Makefile index 47ec5a7cba0b..c44f7bae5424 100644 --- a/trunk/drivers/infiniband/hw/mthca/Makefile +++ b/trunk/drivers/infiniband/hw/mthca/Makefile @@ -7,5 +7,4 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \ mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \ - mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \ - mthca_catas.o + mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_catas.c b/trunk/drivers/infiniband/hw/mthca/mthca_catas.c deleted file mode 100644 index 7ac52af43b99..000000000000 --- a/trunk/drivers/infiniband/hw/mthca/mthca_catas.c +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2005 Cisco Systems. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id$ - */ - -#include "mthca_dev.h" - -enum { - MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, - - MTHCA_CATAS_TYPE_INTERNAL = 0, - MTHCA_CATAS_TYPE_UPLINK = 3, - MTHCA_CATAS_TYPE_DDR = 4, - MTHCA_CATAS_TYPE_PARITY = 5, -}; - -static DEFINE_SPINLOCK(catas_lock); - -static void handle_catas(struct mthca_dev *dev) -{ - struct ib_event event; - const char *type; - int i; - - event.device = &dev->ib_dev; - event.event = IB_EVENT_DEVICE_FATAL; - event.element.port_num = 0; - - ib_dispatch_event(&event); - - switch (swab32(readl(dev->catas_err.map)) >> 24) { - case MTHCA_CATAS_TYPE_INTERNAL: - type = "internal error"; - break; - case MTHCA_CATAS_TYPE_UPLINK: - type = "uplink bus error"; - break; - case MTHCA_CATAS_TYPE_DDR: - type = "DDR data error"; - break; - case MTHCA_CATAS_TYPE_PARITY: - type = "internal parity error"; - break; - default: - type = "unknown error"; - break; - } - - mthca_err(dev, "Catastrophic error detected: %s\n", type); - for (i = 0; i < dev->catas_err.size; ++i) - mthca_err(dev, " buf[%02x]: %08x\n", - i, swab32(readl(dev->catas_err.map + i))); -} - -static void poll_catas(unsigned long dev_ptr) -{ - struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; - unsigned long flags; - int i; - - for (i = 0; i < dev->catas_err.size; ++i) - if (readl(dev->catas_err.map + i)) { - handle_catas(dev); - return; - } - - spin_lock_irqsave(&catas_lock, flags); - if (dev->catas_err.stop) - mod_timer(&dev->catas_err.timer, - jiffies + MTHCA_CATAS_POLL_INTERVAL); - spin_unlock_irqrestore(&catas_lock, flags); - - return; -} - -void mthca_start_catas_poll(struct mthca_dev *dev) -{ - unsigned long addr; - - init_timer(&dev->catas_err.timer); - dev->catas_err.stop = 0; - dev->catas_err.map = NULL; - - addr = pci_resource_start(dev->pdev, 0) + - ((pci_resource_len(dev->pdev, 0) - 1) & - dev->catas_err.addr); - - if (!request_mem_region(addr, dev->catas_err.size * 4, - DRV_NAME)) { - mthca_warn(dev, "couldn't request catastrophic error region " - "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); - return; - } - - dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); - if (!dev->catas_err.map) { - mthca_warn(dev, "couldn't map catastrophic error region " - "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); - release_mem_region(addr, dev->catas_err.size * 4); - return; - } - - dev->catas_err.timer.data = (unsigned long) dev; - dev->catas_err.timer.function = poll_catas; - dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; - add_timer(&dev->catas_err.timer); -} - -void mthca_stop_catas_poll(struct mthca_dev *dev) -{ - spin_lock_irq(&catas_lock); - dev->catas_err.stop = 1; - spin_unlock_irq(&catas_lock); - - del_timer_sync(&dev->catas_err.timer); - - if (dev->catas_err.map) { - iounmap(dev->catas_err.map); - release_mem_region(pci_resource_start(dev->pdev, 0) + - ((pci_resource_len(dev->pdev, 0) - 1) & - dev->catas_err.addr), - dev->catas_err.size * 4); - } -} diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_cmd.c b/trunk/drivers/infiniband/hw/mthca/mthca_cmd.c index 49f211d55df7..378646b5a1b8 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/trunk/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1,7 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -707,13 +706,9 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); dev->cmd.max_cmds = 1 << lg; - MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET); - MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET); mthca_dbg(dev, "FW version %012llx, max commands %d\n", (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); - mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n", - (unsigned long long) dev->catas_err.addr, dev->catas_err.size); if (mthca_is_memfree(dev)) { MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); @@ -938,9 +933,9 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, goto out; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); - dev_lim->max_srq_sz = (1 << field) - 1; + dev_lim->max_srq_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); - dev_lim->max_qp_sz = (1 << field) - 1; + dev_lim->max_qp_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); dev_lim->reserved_qps = 1 << (field & 0xf); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); @@ -1050,8 +1045,6 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars); mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", dev_lim->max_pds, dev_lim->reserved_mgms); - mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", - dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz); mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_dev.h b/trunk/drivers/infiniband/hw/mthca/mthca_dev.h index 7e68bd4a3780..7bff5a8425f4 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/trunk/drivers/infiniband/hw/mthca/mthca_dev.h @@ -83,8 +83,6 @@ enum { /* Arbel FW gives us these, but we need them for Tavor */ MTHCA_MPT_ENTRY_SIZE = 0x40, MTHCA_MTT_SEG_SIZE = 0x40, - - MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) }; enum { @@ -130,16 +128,12 @@ struct mthca_limits { int num_uars; int max_sg; int num_qps; - int max_wqes; - int max_qp_init_rdma; int reserved_qps; int num_srqs; - int max_srq_wqes; int reserved_srqs; int num_eecs; int reserved_eecs; int num_cqs; - int max_cqes; int reserved_cqs; int num_eqs; int reserved_eqs; @@ -154,7 +148,6 @@ struct mthca_limits { int reserved_mcgs; int num_pds; int reserved_pds; - u32 flags; u8 port_width_cap; }; @@ -258,14 +251,6 @@ struct mthca_mcg_table { struct mthca_icm_table *table; }; -struct mthca_catas_err { - u64 addr; - u32 __iomem *map; - unsigned long stop; - u32 size; - struct timer_list timer; -}; - struct mthca_dev { struct ib_device ib_dev; struct pci_dev *pdev; @@ -326,8 +311,6 @@ struct mthca_dev { struct mthca_av_table av_table; struct mthca_mcg_table mcg_table; - struct mthca_catas_err catas_err; - struct mthca_uar driver_uar; struct mthca_db_table *db_tab; struct mthca_pd driver_pd; @@ -415,9 +398,6 @@ void mthca_cleanup_mcg_table(struct mthca_dev *dev); int mthca_register_device(struct mthca_dev *dev); void mthca_unregister_device(struct mthca_dev *dev); -void mthca_start_catas_poll(struct mthca_dev *dev); -void mthca_stop_catas_poll(struct mthca_dev *dev); - int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); @@ -467,8 +447,6 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); -int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type); void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_eq.c b/trunk/drivers/infiniband/hw/mthca/mthca_eq.c index e5a047a6dbeb..8dfafda5ed24 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/trunk/drivers/infiniband/hw/mthca/mthca_eq.c @@ -83,8 +83,7 @@ enum { MTHCA_EVENT_TYPE_PATH_MIG = 0x01, MTHCA_EVENT_TYPE_COMM_EST = 0x02, MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, - MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, - MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14, + MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13, MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, @@ -111,9 +110,8 @@ enum { (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT)) -#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ - (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ - (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT)) +#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE) #define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD) #define MTHCA_EQ_DB_INC_CI (1 << 24) @@ -143,9 +141,6 @@ struct mthca_eqe { struct { __be32 qpn; } __attribute__((packed)) qp; - struct { - __be32 srqn; - } __attribute__((packed)) srq; struct { __be32 cqn; u32 reserved1; @@ -310,16 +305,6 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) IB_EVENT_SQ_DRAINED); break; - case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE: - mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, - IB_EVENT_QP_LAST_WQE_REACHED); - break; - - case MTHCA_EVENT_TYPE_SRQ_LIMIT: - mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff, - IB_EVENT_SRQ_LIMIT_REACHED); - break; - case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_QP_FATAL); diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_mad.c b/trunk/drivers/infiniband/hw/mthca/mthca_mad.c index 8561b297a19b..9804174f7f3c 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/trunk/drivers/infiniband/hw/mthca/mthca_mad.c @@ -46,6 +46,11 @@ enum { MTHCA_VENDOR_CLASS2 = 0xa }; +struct mthca_trap_mad { + struct ib_mad *mad; + DECLARE_PCI_UNMAP_ADDR(mapping) +}; + static void update_sm_ah(struct mthca_dev *dev, u8 port_num, u16 lid, u8 sl) { @@ -111,14 +116,49 @@ static void forward_trap(struct mthca_dev *dev, struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; - struct ib_mad_send_buf *send_buf; + struct mthca_trap_mad *tmad; + struct ib_sge gather_list; + struct ib_send_wr *bad_wr, wr = { + .opcode = IB_WR_SEND, + .sg_list = &gather_list, + .num_sge = 1, + .send_flags = IB_SEND_SIGNALED, + .wr = { + .ud = { + .remote_qpn = qpn, + .remote_qkey = qpn ? IB_QP1_QKEY : 0, + .timeout_ms = 0 + } + } + }; struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; int ret; unsigned long flags; if (agent) { - send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + tmad = kmalloc(sizeof *tmad, GFP_KERNEL); + if (!tmad) + return; + + tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL); + if (!tmad->mad) { + kfree(tmad); + return; + } + + memcpy(tmad->mad, mad, sizeof *mad); + + wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr; + wr.wr_id = (unsigned long) tmad; + + gather_list.addr = dma_map_single(agent->device->dma_device, + tmad->mad, + sizeof *tmad->mad, + DMA_TO_DEVICE); + gather_list.length = sizeof *tmad->mad; + gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey; + pci_unmap_addr_set(tmad, mapping, gather_list.addr); + /* * We rely here on the fact that MLX QPs don't use the * address handle after the send is posted (this is @@ -126,15 +166,21 @@ static void forward_trap(struct mthca_dev *dev, * it's OK for our devices). */ spin_lock_irqsave(&dev->sm_lock, flags); - memcpy(send_buf->mad, mad, sizeof *mad); - if ((send_buf->ah = dev->sm_ah[port_num - 1])) - ret = ib_post_send_mad(send_buf, NULL); + wr.wr.ud.ah = dev->sm_ah[port_num - 1]; + if (wr.wr.ud.ah) + ret = ib_post_send_mad(agent, &wr, &bad_wr); else ret = -EINVAL; spin_unlock_irqrestore(&dev->sm_lock, flags); - if (ret) - ib_free_send_mad(send_buf); + if (ret) { + dma_unmap_single(agent->device->dma_device, + pci_unmap_addr(tmad, mapping), + sizeof *tmad->mad, + DMA_TO_DEVICE); + kfree(tmad->mad); + kfree(tmad); + } } } @@ -221,7 +267,15 @@ int mthca_process_mad(struct ib_device *ibdev, static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { - ib_free_send_mad(mad_send_wc->send_buf); + struct mthca_trap_mad *tmad = + (void *) (unsigned long) mad_send_wc->wr_id; + + dma_unmap_single(agent->device->dma_device, + pci_unmap_addr(tmad, mapping), + sizeof *tmad->mad, + DMA_TO_DEVICE); + kfree(tmad->mad); + kfree(tmad); } int mthca_create_agents(struct mthca_dev *dev) diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_main.c b/trunk/drivers/infiniband/hw/mthca/mthca_main.c index 883d1e5a79bc..23a3f56c7899 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_main.c +++ b/trunk/drivers/infiniband/hw/mthca/mthca_main.c @@ -162,18 +162,9 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; mdev->limits.max_sg = dev_lim->max_sg; - mdev->limits.max_wqes = dev_lim->max_qp_sz; - mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; - mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; mdev->limits.reserved_srqs = dev_lim->reserved_srqs; mdev->limits.reserved_eecs = dev_lim->reserved_eecs; - /* - * Subtract 1 from the limit because we need to allocate a - * spare CQE so the HCA HW can tell the difference between an - * empty CQ and a full CQ. - */ - mdev->limits.max_cqes = dev_lim->max_cq_sz - 1; mdev->limits.reserved_cqs = dev_lim->reserved_cqs; mdev->limits.reserved_eqs = dev_lim->reserved_eqs; mdev->limits.reserved_mtts = dev_lim->reserved_mtts; @@ -181,7 +172,6 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; mdev->limits.port_width_cap = dev_lim->max_port_width; - mdev->limits.flags = dev_lim->flags; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. @@ -1196,7 +1186,6 @@ MODULE_DEVICE_TABLE(pci, mthca_pci_table); static struct pci_driver mthca_driver = { .name = DRV_NAME, - .owner = THIS_MODULE, .id_table = mthca_pci_table, .probe = mthca_init_one, .remove = __devexit_p(mthca_remove_one) diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_mcg.c b/trunk/drivers/infiniband/hw/mthca/mthca_mcg.c index b47ea7daf088..a2707605f4c8 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/trunk/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -37,6 +37,10 @@ #include "mthca_dev.h" #include "mthca_cmd.h" +enum { + MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) +}; + struct mthca_mgm { __be32 next_gid_index; u32 reserved[3]; @@ -185,12 +189,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } for (i = 0; i < MTHCA_QP_PER_MGM; ++i) - if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) { - mthca_dbg(dev, "QP %06x already a member of MGM\n", - ibqp->qp_num); - err = 0; - goto out; - } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { + if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31)); break; } diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_memfree.c b/trunk/drivers/infiniband/hw/mthca/mthca_memfree.c index d72fe95cba08..9ad8b3b6cfef 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/trunk/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -487,8 +487,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, } } -int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, - u32 qn, __be32 **db) +int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) { int group; int start, end, dir; diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_memfree.h b/trunk/drivers/infiniband/hw/mthca/mthca_memfree.h index 4fdca26eea85..29433f295253 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/trunk/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -173,8 +173,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); -int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, - u32 qn, __be32 **db); +int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db); void mthca_free_db(struct mthca_dev *dev, int type, int db_index); #endif /* MTHCA_MEMFREE_H */ diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_provider.c b/trunk/drivers/infiniband/hw/mthca/mthca_provider.c index 1b9477edbd7b..3f5319a46577 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/trunk/drivers/infiniband/hw/mthca/mthca_provider.c @@ -37,7 +37,6 @@ */ #include -#include #include #include "mthca_dev.h" @@ -91,26 +90,15 @@ static int mthca_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; - props->max_qp_wr = mdev->limits.max_wqes; + props->max_qp_wr = 0xffff; props->max_sge = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; - props->max_cqe = mdev->limits.max_cqes; + props->max_cqe = 0xffff; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds; props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; - props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma; - props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; - props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; - props->max_srq_wr = mdev->limits.max_srq_wqes; - props->max_srq_sge = mdev->limits.max_sg; + props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift; props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; - props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? - IB_ATOMIC_HCA : IB_ATOMIC_NONE; - props->max_pkeys = mdev->limits.pkey_table_len; - props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; - props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; - props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * - props->max_mcast_grp; err = 0; out: @@ -162,13 +150,9 @@ static int mthca_query_port(struct ib_device *ibdev, props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; props->max_msg_sz = 0x80000000; props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; - props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; - props->max_mtu = out_mad->data[41] & 0xf; - props->active_mtu = out_mad->data[36] >> 4; - props->subnet_timeout = out_mad->data[51] & 0x1f; out: kfree(in_mad); @@ -650,9 +634,6 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, int nent; int err; - if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) - return ERR_PTR(-EINVAL); - if (context) { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return ERR_PTR(-EFAULT); @@ -1077,26 +1058,6 @@ int mthca_register_device(struct mthca_dev *dev) strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; - dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; @@ -1116,7 +1077,6 @@ int mthca_register_device(struct mthca_dev *dev) if (dev->mthca_flags & MTHCA_FLAG_SRQ) { dev->ib_dev.create_srq = mthca_create_srq; - dev->ib_dev.modify_srq = mthca_modify_srq; dev->ib_dev.destroy_srq = mthca_destroy_srq; if (mthca_is_memfree(dev)) @@ -1175,13 +1135,10 @@ int mthca_register_device(struct mthca_dev *dev) } } - mthca_start_catas_poll(dev); - return 0; } void mthca_unregister_device(struct mthca_dev *dev) { - mthca_stop_catas_poll(dev); ib_unregister_device(&dev->ib_dev); } diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_qp.c b/trunk/drivers/infiniband/hw/mthca/mthca_qp.c index 62ff091505da..5fa00669f9b8 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/trunk/drivers/infiniband/hw/mthca/mthca_qp.c @@ -338,7 +338,8 @@ static const struct { [UC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | - IB_QP_RQ_PSN), + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC), [RC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | @@ -367,7 +368,8 @@ static const struct { .trans = MTHCA_TRANS_RTR2RTS, .req_param = { [UD] = IB_QP_SQ_PSN, - [UC] = IB_QP_SQ_PSN, + [UC] = (IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), [RC] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -444,6 +446,8 @@ static const struct { [UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [UC] = (IB_QP_AV | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -474,7 +478,7 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), - [UC] = IB_QP_CUR_STATE, + [UC] = (IB_QP_CUR_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_MIN_RNR_TIMER), [MLX] = (IB_QP_CUR_STATE | @@ -1108,10 +1112,8 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, struct mthca_qp *qp) { /* Sanity check QP size before proceeding */ - if (cap->max_send_wr > dev->limits.max_wqes || - cap->max_recv_wr > dev->limits.max_wqes || - cap->max_send_sge > dev->limits.max_sg || - cap->max_recv_sge > dev->limits.max_sg) + if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || + cap->max_send_sge > 64 || cap->max_recv_sge > 64) return -EINVAL; if (mthca_is_memfree(dev)) { diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_srq.c b/trunk/drivers/infiniband/hw/mthca/mthca_srq.c index 64f70aa1b3c0..18998d48c53e 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/trunk/drivers/infiniband/hw/mthca/mthca_srq.c @@ -186,8 +186,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, int err; /* Sanity check SRQ size before proceeding */ - if (attr->max_wr > dev->limits.max_srq_wqes || - attr->max_sge > dev->limits.max_sg) + if (attr->max_wr > 16 << 20 || attr->max_sge > 64) return -EINVAL; srq->max = attr->max_wr; @@ -333,29 +332,6 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) mthca_free_mailbox(dev, mailbox); } -int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask) -{ - struct mthca_dev *dev = to_mdev(ibsrq->device); - struct mthca_srq *srq = to_msrq(ibsrq); - int ret; - u8 status; - - /* We don't support resizing SRQs (yet?) */ - if (attr_mask & IB_SRQ_MAX_WR) - return -EINVAL; - - if (attr_mask & IB_SRQ_LIMIT) { - ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); - if (ret) - return ret; - if (status) - return -EINVAL; - } - - return 0; -} - void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type) { @@ -378,7 +354,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn, event.device = &dev->ib_dev; event.event = event_type; - event.element.srq = &srq->ibsrq; + event.element.srq = &srq->ibsrq; srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); out: @@ -439,14 +415,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); - - if (next_ind < 0) { - mthca_err(dev, "SRQ %06x full\n", srq->srqn); - err = -ENOMEM; - *bad_wr = wr; - break; - } - prev_wqe = srq->last; srq->last = wqe; @@ -538,13 +506,6 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); - if (next_ind < 0) { - mthca_err(dev, "SRQ %06x full\n", srq->srqn); - err = -ENOMEM; - *bad_wr = wr; - break; - } - ((struct mthca_next_seg *) wqe)->nda_op = cpu_to_be32((next_ind << srq->wqe_shift) | 1); ((struct mthca_next_seg *) wqe)->ee_nds = 0; diff --git a/trunk/drivers/infiniband/hw/mthca/mthca_user.h b/trunk/drivers/infiniband/hw/mthca/mthca_user.h index bb015c6494c4..41613ec8a04e 100644 --- a/trunk/drivers/infiniband/hw/mthca/mthca_user.h +++ b/trunk/drivers/infiniband/hw/mthca/mthca_user.h @@ -37,12 +37,6 @@ #include -/* - * Increment this value if any changes that break userspace ABI - * compatibility are made. - */ -#define MTHCA_UVERBS_ABI_VERSION 1 - /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to diff --git a/trunk/drivers/infiniband/ulp/ipoib/ipoib.h b/trunk/drivers/infiniband/ulp/ipoib/ipoib.h index c994a916a58a..4ea1c1ca85bc 100644 --- a/trunk/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/trunk/drivers/infiniband/ulp/ipoib/ipoib.h @@ -100,12 +100,7 @@ struct ipoib_pseudoheader { struct ipoib_mcast; -struct ipoib_rx_buf { - struct sk_buff *skb; - dma_addr_t mapping; -}; - -struct ipoib_tx_buf { +struct ipoib_buf { struct sk_buff *skb; DECLARE_PCI_UNMAP_ADDR(mapping) }; @@ -155,14 +150,14 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_rx_buf *rx_ring; + struct ipoib_buf *rx_ring; - spinlock_t tx_lock; - struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; - struct ib_sge tx_sge; - struct ib_send_wr tx_wr; + spinlock_t tx_lock; + struct ipoib_buf *tx_ring; + unsigned tx_head; + unsigned tx_tail; + struct ib_sge tx_sge; + struct ib_send_wr tx_wr; struct ib_wc ibwc[IPOIB_NUM_WC]; @@ -282,7 +277,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid); -int ipoib_init_qp(struct net_device *dev); +int ipoib_qp_create(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); void ipoib_transport_dev_cleanup(struct net_device *dev); diff --git a/trunk/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/trunk/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 192fef884e21..f7440096b5ed 100644 --- a/trunk/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/trunk/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -95,65 +95,57 @@ void ipoib_free_ah(struct kref *kref) } } -static int ipoib_ib_post_receive(struct net_device *dev, int id) +static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv, + unsigned int wr_id, + dma_addr_t addr) { - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_sge list; - struct ib_recv_wr param; + struct ib_sge list = { + .addr = addr, + .length = IPOIB_BUF_SIZE, + .lkey = priv->mr->lkey, + }; + struct ib_recv_wr param = { + .wr_id = wr_id | IPOIB_OP_RECV, + .sg_list = &list, + .num_sge = 1, + }; struct ib_recv_wr *bad_wr; - int ret; - - list.addr = priv->rx_ring[id].mapping; - list.length = IPOIB_BUF_SIZE; - list.lkey = priv->mr->lkey; - - param.next = NULL; - param.wr_id = id | IPOIB_OP_RECV; - param.sg_list = &list; - param.num_sge = 1; - - ret = ib_post_recv(priv->qp, ¶m, &bad_wr); - if (unlikely(ret)) { - ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); - dma_unmap_single(priv->ca->dma_device, - priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - dev_kfree_skb_any(priv->rx_ring[id].skb); - priv->rx_ring[id].skb = NULL; - } - return ret; + return ib_post_recv(priv->qp, ¶m, &bad_wr); } -static int ipoib_alloc_rx_skb(struct net_device *dev, int id) +static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; dma_addr_t addr; + int ret; skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); - if (!skb) - return -ENOMEM; - - /* - * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte - * header. So we need 4 more bytes to get to 48 and align the - * IP header to a multiple of 16. - */ - skb_reserve(skb, 4); + if (!skb) { + ipoib_warn(priv, "failed to allocate receive buffer\n"); + priv->rx_ring[id].skb = NULL; + return -ENOMEM; + } + skb_reserve(skb, 4); /* 16 byte align IP header */ + priv->rx_ring[id].skb = skb; addr = dma_map_single(priv->ca->dma_device, skb->data, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(addr))) { + pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr); + + ret = ipoib_ib_receive(priv, id, addr); + if (ret) { + ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n", + id, ret); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); - return -EIO; + priv->rx_ring[id].skb = NULL; } - priv->rx_ring[id].skb = skb; - priv->rx_ring[id].mapping = addr; - - return 0; + return ret; } static int ipoib_ib_post_receives(struct net_device *dev) @@ -162,10 +154,6 @@ static int ipoib_ib_post_receives(struct net_device *dev) int i; for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { - if (ipoib_alloc_rx_skb(dev, i)) { - ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); - return -ENOMEM; - } if (ipoib_ib_post_receive(dev, i)) { ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); return -EIO; @@ -188,36 +176,28 @@ static void ipoib_ib_handle_wc(struct net_device *dev, wr_id &= ~IPOIB_OP_RECV; if (wr_id < IPOIB_RX_RING_SIZE) { - struct sk_buff *skb = priv->rx_ring[wr_id].skb; - dma_addr_t addr = priv->rx_ring[wr_id].mapping; + struct sk_buff *skb = priv->rx_ring[wr_id].skb; + + priv->rx_ring[wr_id].skb = NULL; - if (unlikely(wc->status != IB_WC_SUCCESS)) { + dma_unmap_single(priv->ca->dma_device, + pci_unmap_addr(&priv->rx_ring[wr_id], + mapping), + IPOIB_BUF_SIZE, + DMA_FROM_DEVICE); + + if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); - priv->rx_ring[wr_id].skb = NULL; return; } - /* - * If we can't allocate a new RX buffer, dump - * this packet and reuse the old buffer. - */ - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { - ++priv->stats.rx_dropped; - goto repost; - } - ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - skb_put(skb, wc->byte_len); skb_pull(skb, IB_GRH_BYTES); @@ -240,8 +220,8 @@ static void ipoib_ib_handle_wc(struct net_device *dev, dev_kfree_skb_any(skb); } - repost: - if (unlikely(ipoib_ib_post_receive(dev, wr_id))) + /* repost receive */ + if (ipoib_ib_post_receive(dev, wr_id)) ipoib_warn(priv, "ipoib_ib_post_receive failed " "for buf %d\n", wr_id); } else @@ -249,7 +229,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, wr_id); } else { - struct ipoib_tx_buf *tx_req; + struct ipoib_buf *tx_req; unsigned long flags; if (wr_id >= IPOIB_TX_RING_SIZE) { @@ -322,7 +302,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_tx_buf *tx_req; + struct ipoib_buf *tx_req; dma_addr_t addr; if (skb->len > dev->mtu + INFINIBAND_ALEN) { @@ -407,9 +387,9 @@ int ipoib_ib_dev_open(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - ret = ipoib_init_qp(dev); + ret = ipoib_qp_create(dev); if (ret) { - ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); + ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret); return -1; } @@ -488,7 +468,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) struct ib_qp_attr qp_attr; int attr_mask; unsigned long begin; - struct ipoib_tx_buf *tx_req; + struct ipoib_buf *tx_req; int i; /* Kill the existing QP and allocate a new one */ diff --git a/trunk/drivers/infiniband/ulp/ipoib/ipoib_main.c b/trunk/drivers/infiniband/ulp/ipoib/ipoib_main.c index cd4f42328dbe..6c5bf07489f4 100644 --- a/trunk/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/trunk/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -637,11 +637,8 @@ static void ipoib_timeout(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_warn(priv, "transmit timeout: latency %d msecs\n", - jiffies_to_msecs(jiffies - dev->trans_start)); - ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", - netif_queue_stopped(dev), - priv->tx_head, priv->tx_tail); + ipoib_warn(priv, "transmit timeout: latency %ld\n", + jiffies - dev->trans_start); /* XXX reset QP, etc. */ } @@ -732,7 +729,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) /* Allocate RX/TX "rings" to hold queued skbs */ - priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf), + priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", @@ -740,9 +737,9 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out; } memset(priv->rx_ring, 0, - IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf)); + IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); - priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), + priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", @@ -750,7 +747,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out_rx_ring_cleanup; } memset(priv->tx_ring, 0, - IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf)); + IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); /* priv->tx_head & tx_tail are already 0 */ diff --git a/trunk/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/trunk/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index b5902a7ec240..79f59d0563ed 100644 --- a/trunk/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/trunk/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -92,7 +92,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) return ret; } -int ipoib_init_qp(struct net_device *dev) +int ipoib_qp_create(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; @@ -149,11 +149,10 @@ int ipoib_init_qp(struct net_device *dev) return 0; out_fail: - qp_attr.qp_state = IB_QPS_RESET; - if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) - ipoib_warn(priv, "Failed to modify QP to RESET state\n"); + ib_destroy_qp(priv->qp); + priv->qp = NULL; - return ret; + return -EINVAL; } int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) diff --git a/trunk/drivers/usb/host/ehci-hcd.c b/trunk/drivers/usb/host/ehci-hcd.c index f5eb9e7b5b18..b3eb02613bff 100644 --- a/trunk/drivers/usb/host/ehci-hcd.c +++ b/trunk/drivers/usb/host/ehci-hcd.c @@ -182,6 +182,9 @@ static int ehci_halt (struct ehci_hcd *ehci) { u32 temp = readl (&ehci->regs->status); + /* disable any irqs left enabled by previous code */ + writel (0, &ehci->regs->intr_enable); + if ((temp & STS_HALT) != 0) return 0; @@ -335,12 +338,17 @@ static int bios_handoff (struct ehci_hcd *ehci, int where, u32 cap) #endif +/* Reboot notifiers kick in for silicon on any bus (not just pci, etc). + * This forcibly disables dma and IRQs, helping kexec and other cases + * where the next system software may expect clean state. + */ static int ehci_reboot (struct notifier_block *self, unsigned long code, void *null) { struct ehci_hcd *ehci; ehci = container_of (self, struct ehci_hcd, reboot_notifier); + (void) ehci_halt (ehci); /* make BIOS/etc use companion controller during reboot */ writel (0, &ehci->regs->configured_flag); diff --git a/trunk/include/linux/etherdevice.h b/trunk/include/linux/etherdevice.h index cc84934f9059..4522c7186bf3 100644 --- a/trunk/include/linux/etherdevice.h +++ b/trunk/include/linux/etherdevice.h @@ -104,22 +104,6 @@ static inline void random_ether_addr(u8 *addr) addr [0] &= 0xfe; /* clear multicast bit */ addr [0] |= 0x02; /* set local assignment bit (IEEE802) */ } - -/** - * compare_ether_addr - Compare two Ethernet addresses - * @addr1: Pointer to a six-byte array containing the Ethernet address - * @addr2 Pointer other six-byte array containing the Ethernet address - * - * Compare two ethernet addresses, returns 0 if equal - */ -static inline unsigned compare_ether_addr(const u8 *_a, const u8 *_b) -{ - const u16 *a = (const u16 *) _a; - const u16 *b = (const u16 *) _b; - - BUILD_BUG_ON(ETH_ALEN != 6); - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; -} #endif /* __KERNEL__ */ #endif /* _LINUX_ETHERDEVICE_H */ diff --git a/trunk/include/net/sctp/user.h b/trunk/include/net/sctp/user.h index f1c3bc54526a..1c5f19f995ad 100644 --- a/trunk/include/net/sctp/user.h +++ b/trunk/include/net/sctp/user.h @@ -171,10 +171,10 @@ struct sctp_sndrcvinfo { */ enum sctp_sinfo_flags { - SCTP_UNORDERED = 1, /* Send/receive message unordered. */ - SCTP_ADDR_OVER = 2, /* Override the primary destination. */ - SCTP_ABORT=4, /* Send an ABORT message to the peer. */ - SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ + MSG_UNORDERED = 1, /* Send/receive message unordered. */ + MSG_ADDR_OVER = 2, /* Override the primary destination. */ + MSG_ABORT=4, /* Send an ABORT message to the peer. */ + /* MSG_EOF is already defined per socket.h */ }; diff --git a/trunk/include/rdma/ib_cm.h b/trunk/include/rdma/ib_cm.h index 0a9fcd59eb43..5308683c8c41 100644 --- a/trunk/include/rdma/ib_cm.h +++ b/trunk/include/rdma/ib_cm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -109,6 +109,7 @@ struct ib_cm_id; struct ib_cm_req_event_param { struct ib_cm_id *listen_id; + struct ib_device *device; u8 port; struct ib_sa_path_rec *primary_path; @@ -219,6 +220,7 @@ struct ib_cm_apr_event_param { struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; + struct ib_device *device; u8 port; u16 pkey; }; @@ -282,7 +284,6 @@ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, struct ib_cm_id { ib_cm_handler cm_handler; void *context; - struct ib_device *device; __be64 service_id; __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ @@ -294,8 +295,6 @@ struct ib_cm_id { /** * ib_create_cm_id - Allocate a communication identifier. - * @device: Device associated with the cm_id. All related communication will - * be associated with the specified device. * @cm_handler: Callback invoked to notify the user of CM events. * @context: User specified context associated with the communication * identifier. @@ -303,8 +302,7 @@ struct ib_cm_id { * Communication identifiers are used to track connection states, service * ID resolution requests, and listen requests. */ -struct ib_cm_id *ib_create_cm_id(struct ib_device *device, - ib_cm_handler cm_handler, +struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, void *context); /** diff --git a/trunk/include/rdma/ib_mad.h b/trunk/include/rdma/ib_mad.h index 2c133506742b..4172e6841e3d 100644 --- a/trunk/include/rdma/ib_mad.h +++ b/trunk/include/rdma/ib_mad.h @@ -109,14 +109,10 @@ #define IB_QP_SET_QKEY 0x80000000 enum { - IB_MGMT_MAD_HDR = 24, IB_MGMT_MAD_DATA = 232, - IB_MGMT_RMPP_HDR = 36, IB_MGMT_RMPP_DATA = 220, - IB_MGMT_VENDOR_HDR = 40, IB_MGMT_VENDOR_DATA = 216, - IB_MGMT_SA_HDR = 56, - IB_MGMT_SA_DATA = 200, + IB_MGMT_SA_DATA = 200 }; struct ib_mad_hdr { @@ -207,25 +203,26 @@ struct ib_class_port_info /** * ib_mad_send_buf - MAD data buffer and work request for sends. - * @next: A pointer used to chain together MADs for posting. - * @mad: References an allocated MAD data buffer. + * @mad: References an allocated MAD data buffer. The size of the data + * buffer is specified in the @send_wr.length field. + * @mapping: DMA mapping information. * @mad_agent: MAD agent that allocated the buffer. - * @ah: The address handle to use when sending the MAD. * @context: User-controlled context fields. - * @timeout_ms: Time to wait for a response. - * @retries: Number of times to retry a request for a response. + * @send_wr: An initialized work request structure used when sending the MAD. + * The wr_id field of the work request is initialized to reference this + * data structure. + * @sge: A scatter-gather list referenced by the work request. * * Users are responsible for initializing the MAD buffer itself, with the * exception of specifying the payload length field in any RMPP MAD. */ struct ib_mad_send_buf { - struct ib_mad_send_buf *next; - void *mad; + struct ib_mad *mad; + DECLARE_PCI_UNMAP_ADDR(mapping) struct ib_mad_agent *mad_agent; - struct ib_ah *ah; void *context[2]; - int timeout_ms; - int retries; + struct ib_send_wr send_wr; + struct ib_sge sge; }; /** @@ -290,7 +287,7 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, * or @mad_send_wc. */ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, + struct ib_send_wr *send_wr, struct ib_mad_send_wc *mad_send_wc); /** @@ -337,13 +334,13 @@ struct ib_mad_agent { /** * ib_mad_send_wc - MAD send completion information. - * @send_buf: Send MAD data buffer associated with the send MAD request. + * @wr_id: Work request identifier associated with the send MAD request. * @status: Completion status. * @vendor_err: Optional vendor error information returned with a failed * request. */ struct ib_mad_send_wc { - struct ib_mad_send_buf *send_buf; + u64 wr_id; enum ib_wc_status status; u32 vendor_err; }; @@ -369,7 +366,7 @@ struct ib_mad_recv_buf { * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. * @mad_len: The length of the received MAD, without duplicated headers. * - * For received response, the wr_id contains a pointer to the ib_mad_send_buf + * For received response, the wr_id field of the wc is set to the wr_id * for the corresponding send request. */ struct ib_mad_recv_wc { @@ -466,9 +463,9 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); /** * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client. - * @send_buf: Specifies the information needed to send the MAD(s). - * @bad_send_buf: Specifies the MAD on which an error was encountered. This - * parameter is optional if only a single MAD is posted. + * @mad_agent: Specifies the associated registration to post the send to. + * @send_wr: Specifies the information needed to send the MAD(s). + * @bad_send_wr: Specifies the MAD on which an error was encountered. * * Sent MADs are not guaranteed to complete in the order that they were posted. * @@ -482,8 +479,9 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); * defined data being transferred. The paylen_newwin field should be * specified in network-byte order. */ -int ib_post_send_mad(struct ib_mad_send_buf *send_buf, - struct ib_mad_send_buf **bad_send_buf); +int ib_post_send_mad(struct ib_mad_agent *mad_agent, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); /** * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. @@ -509,25 +507,23 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); /** * ib_cancel_mad - Cancels an outstanding send MAD operation. * @mad_agent: Specifies the registration associated with sent MAD. - * @send_buf: Indicates the MAD to cancel. + * @wr_id: Indicates the work request identifier of the MAD to cancel. * * MADs will be returned to the user through the corresponding * ib_mad_send_handler. */ -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf); +void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id); /** * ib_modify_mad - Modifies an outstanding send MAD operation. * @mad_agent: Specifies the registration associated with sent MAD. - * @send_buf: Indicates the MAD to modify. + * @wr_id: Indicates the work request identifier of the MAD to modify. * @timeout_ms: New timeout value for sent MAD. * * This call will reset the timeout value for a sent MAD to the specified * value. */ -int ib_modify_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, u32 timeout_ms); +int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms); /** * ib_redirect_mad_qp - Registers a QP for MAD services. @@ -576,6 +572,7 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * @remote_qpn: Specifies the QPN of the receiving node. * @pkey_index: Specifies which PKey the MAD will be sent using. This field * is valid only if the remote_qpn is QP 1. + * @ah: References the address handle used to transfer to the remote node. * @rmpp_active: Indicates if the send will enable RMPP. * @hdr_len: Indicates the size of the data header of the MAD. This length * should include the common MAD header, RMPP header, plus any class @@ -585,10 +582,11 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * additional padding that may be necessary. * @gfp_mask: GFP mask used for the memory allocation. * - * This routine allocates a MAD for sending. The returned MAD send buffer - * will reference a data buffer usable for sending a MAD, along + * This is a helper routine that may be used to allocate a MAD. Users are + * not required to allocate outbound MADs using this call. The returned + * MAD send buffer will reference a data buffer usable for sending a MAD, along * with an initialized work request structure. Users may modify the returned - * MAD data buffer before posting the send. + * MAD data buffer or work request before posting the send. * * The returned data buffer will be cleared. Users are responsible for * initializing the common MAD and any class specific headers. If @rmpp_active @@ -596,7 +594,7 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, */ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, - int rmpp_active, + struct ib_ah *ah, int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask); diff --git a/trunk/include/rdma/ib_user_cm.h b/trunk/include/rdma/ib_user_cm.h index 3037588b8464..e4d1654276ad 100644 --- a/trunk/include/rdma/ib_user_cm.h +++ b/trunk/include/rdma/ib_user_cm.h @@ -38,7 +38,7 @@ #include -#define IB_USER_CM_ABI_VERSION 3 +#define IB_USER_CM_ABI_VERSION 2 enum { IB_USER_CM_CMD_CREATE_ID, @@ -299,6 +299,8 @@ struct ib_ucm_event_get { }; struct ib_ucm_req_event_resp { + /* device */ + /* port */ struct ib_ucm_path_rec primary_path; struct ib_ucm_path_rec alternate_path; __be64 remote_ca_guid; @@ -314,7 +316,6 @@ struct ib_ucm_req_event_resp { __u8 retry_count; __u8 rnr_retry_count; __u8 srq; - __u8 port; }; struct ib_ucm_rep_event_resp { @@ -352,9 +353,10 @@ struct ib_ucm_apr_event_resp { }; struct ib_ucm_sidr_req_event_resp { + /* device */ + /* port */ __u16 pkey; - __u8 port; - __u8 reserved; + __u8 reserved[2]; }; struct ib_ucm_sidr_rep_event_resp { diff --git a/trunk/include/rdma/ib_user_verbs.h b/trunk/include/rdma/ib_user_verbs.h index 072f3a2edace..fd85725391a4 100644 --- a/trunk/include/rdma/ib_user_verbs.h +++ b/trunk/include/rdma/ib_user_verbs.h @@ -1,7 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. - * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -43,12 +42,15 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_VERBS_ABI_VERSION 3 +#define IB_USER_VERBS_ABI_VERSION 2 enum { + IB_USER_VERBS_CMD_QUERY_PARAMS, IB_USER_VERBS_CMD_GET_CONTEXT, IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_CMD_QUERY_PORT, + IB_USER_VERBS_CMD_QUERY_GID, + IB_USER_VERBS_CMD_QUERY_PKEY, IB_USER_VERBS_CMD_ALLOC_PD, IB_USER_VERBS_CMD_DEALLOC_PD, IB_USER_VERBS_CMD_CREATE_AH, @@ -63,7 +65,6 @@ enum { IB_USER_VERBS_CMD_ALLOC_MW, IB_USER_VERBS_CMD_BIND_MW, IB_USER_VERBS_CMD_DEALLOC_MW, - IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, IB_USER_VERBS_CMD_CREATE_CQ, IB_USER_VERBS_CMD_RESIZE_CQ, IB_USER_VERBS_CMD_DESTROY_CQ, @@ -89,11 +90,8 @@ enum { * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * Specifically: - * - Do not use pointer types -- pass pointers in __u64 instead. - * - Make sure that any structure larger than 4 bytes is padded to a - * multiple of 8 bytes. Otherwise the structure size will be - * different between 32-bit and 64-bit architectures. + * In particular do not use pointer types -- pass pointers in __u64 + * instead. */ struct ib_uverbs_async_event_desc { @@ -120,14 +118,27 @@ struct ib_uverbs_cmd_hdr { __u16 out_words; }; +/* + * No driver_data for "query params" command, since this is intended + * to be a core function with no possible device dependence. + */ +struct ib_uverbs_query_params { + __u64 response; +}; + +struct ib_uverbs_query_params_resp { + __u32 num_cq_events; +}; + struct ib_uverbs_get_context { __u64 response; + __u64 cq_fd_tab; __u64 driver_data[0]; }; struct ib_uverbs_get_context_resp { __u32 async_fd; - __u32 num_comp_vectors; + __u32 reserved; }; struct ib_uverbs_query_device { @@ -209,6 +220,31 @@ struct ib_uverbs_query_port_resp { __u8 reserved[3]; }; +struct ib_uverbs_query_gid { + __u64 response; + __u8 port_num; + __u8 index; + __u8 reserved[6]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_gid_resp { + __u8 gid[16]; +}; + +struct ib_uverbs_query_pkey { + __u64 response; + __u8 port_num; + __u8 index; + __u8 reserved[6]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_pkey_resp { + __u16 pkey; + __u16 reserved; +}; + struct ib_uverbs_alloc_pd { __u64 response; __u64 driver_data[0]; @@ -242,21 +278,11 @@ struct ib_uverbs_dereg_mr { __u32 mr_handle; }; -struct ib_uverbs_create_comp_channel { - __u64 response; -}; - -struct ib_uverbs_create_comp_channel_resp { - __u32 fd; -}; - struct ib_uverbs_create_cq { __u64 response; __u64 user_handle; __u32 cqe; - __u32 comp_vector; - __s32 comp_channel; - __u32 reserved; + __u32 event_handler; __u64 driver_data[0]; }; @@ -265,41 +291,6 @@ struct ib_uverbs_create_cq_resp { __u32 cqe; }; -struct ib_uverbs_poll_cq { - __u64 response; - __u32 cq_handle; - __u32 ne; -}; - -struct ib_uverbs_wc { - __u64 wr_id; - __u32 status; - __u32 opcode; - __u32 vendor_err; - __u32 byte_len; - __u32 imm_data; - __u32 qp_num; - __u32 src_qp; - __u32 wc_flags; - __u16 pkey_index; - __u16 slid; - __u8 sl; - __u8 dlid_path_bits; - __u8 port_num; - __u8 reserved; -}; - -struct ib_uverbs_poll_cq_resp { - __u32 count; - __u32 reserved; - struct ib_uverbs_wc wc[0]; -}; - -struct ib_uverbs_req_notify_cq { - __u32 cq_handle; - __u32 solicited_only; -}; - struct ib_uverbs_destroy_cq { __u64 response; __u32 cq_handle; @@ -397,127 +388,6 @@ struct ib_uverbs_destroy_qp_resp { __u32 events_reported; }; -/* - * The ib_uverbs_sge structure isn't used anywhere, since we assume - * the ib_sge structure is packed the same way on 32-bit and 64-bit - * architectures in both kernel and user space. It's just here to - * document the ABI. - */ -struct ib_uverbs_sge { - __u64 addr; - __u32 length; - __u32 lkey; -}; - -struct ib_uverbs_send_wr { - __u64 wr_id; - __u32 num_sge; - __u32 opcode; - __u32 send_flags; - __u32 imm_data; - union { - struct { - __u64 remote_addr; - __u32 rkey; - __u32 reserved; - } rdma; - struct { - __u64 remote_addr; - __u64 compare_add; - __u64 swap; - __u32 rkey; - __u32 reserved; - } atomic; - struct { - __u32 ah; - __u32 remote_qpn; - __u32 remote_qkey; - __u32 reserved; - } ud; - } wr; -}; - -struct ib_uverbs_post_send { - __u64 response; - __u32 qp_handle; - __u32 wr_count; - __u32 sge_count; - __u32 wqe_size; - struct ib_uverbs_send_wr send_wr[0]; -}; - -struct ib_uverbs_post_send_resp { - __u32 bad_wr; -}; - -struct ib_uverbs_recv_wr { - __u64 wr_id; - __u32 num_sge; - __u32 reserved; -}; - -struct ib_uverbs_post_recv { - __u64 response; - __u32 qp_handle; - __u32 wr_count; - __u32 sge_count; - __u32 wqe_size; - struct ib_uverbs_recv_wr recv_wr[0]; -}; - -struct ib_uverbs_post_recv_resp { - __u32 bad_wr; -}; - -struct ib_uverbs_post_srq_recv { - __u64 response; - __u32 srq_handle; - __u32 wr_count; - __u32 sge_count; - __u32 wqe_size; - struct ib_uverbs_recv_wr recv[0]; -}; - -struct ib_uverbs_post_srq_recv_resp { - __u32 bad_wr; -}; - -struct ib_uverbs_global_route { - __u8 dgid[16]; - __u32 flow_label; - __u8 sgid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 reserved; -}; - -struct ib_uverbs_ah_attr { - struct ib_uverbs_global_route grh; - __u16 dlid; - __u8 sl; - __u8 src_path_bits; - __u8 static_rate; - __u8 is_global; - __u8 port_num; - __u8 reserved; -}; - -struct ib_uverbs_create_ah { - __u64 response; - __u64 user_handle; - __u32 pd_handle; - __u32 reserved; - struct ib_uverbs_ah_attr attr; -}; - -struct ib_uverbs_create_ah_resp { - __u32 ah_handle; -}; - -struct ib_uverbs_destroy_ah { - __u32 ah_handle; -}; - struct ib_uverbs_attach_mcast { __u8 gid[16]; __u32 qp_handle; diff --git a/trunk/include/rdma/ib_verbs.h b/trunk/include/rdma/ib_verbs.h index f72d46d54e0a..e6f4c9e55df7 100644 --- a/trunk/include/rdma/ib_verbs.h +++ b/trunk/include/rdma/ib_verbs.h @@ -595,8 +595,11 @@ struct ib_send_wr { } atomic; struct { struct ib_ah *ah; + struct ib_mad_hdr *mad_hdr; u32 remote_qpn; u32 remote_qkey; + int timeout_ms; /* valid for MADs only */ + int retries; /* valid for MADs only */ u16 pkey_index; /* valid for GSI only */ u8 port_num; /* valid for DR SMPs on switch only */ } ud; @@ -948,9 +951,6 @@ struct ib_device { IB_DEV_UNREGISTERED } reg_state; - u64 uverbs_cmd_mask; - int uverbs_abi_ver; - u8 node_type; u8 phys_port_cnt; }; diff --git a/trunk/net/ethernet/eth.c b/trunk/net/ethernet/eth.c index e24577367274..68a5ca866442 100644 --- a/trunk/net/ethernet/eth.c +++ b/trunk/net/ethernet/eth.c @@ -146,6 +146,19 @@ int eth_rebuild_header(struct sk_buff *skb) return 0; } +static inline unsigned int compare_eth_addr(const unsigned char *__a, const unsigned char *__b) +{ + const unsigned short *dest = (unsigned short *) __a; + const unsigned short *devaddr = (unsigned short *) __b; + unsigned int res; + + BUILD_BUG_ON(ETH_ALEN != 6); + res = ((dest[0] ^ devaddr[0]) | + (dest[1] ^ devaddr[1]) | + (dest[2] ^ devaddr[2])) != 0; + + return res; +} /* * Determine the packet's protocol ID. The rule here is that we @@ -163,7 +176,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) eth = eth_hdr(skb); if (*eth->h_dest&1) { - if (!compare_ether_addr(eth->h_dest, dev->broadcast)) + if (!compare_eth_addr(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_MULTICAST; @@ -178,7 +191,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) */ else if(1 /*dev->flags&IFF_PROMISC*/) { - if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr))) + if (unlikely(compare_eth_addr(eth->h_dest, dev->dev_addr))) skb->pkt_type = PACKET_OTHERHOST; } diff --git a/trunk/net/ipv4/fib_frontend.c b/trunk/net/ipv4/fib_frontend.c index 990633c09dfe..e61bc7177eb1 100644 --- a/trunk/net/ipv4/fib_frontend.c +++ b/trunk/net/ipv4/fib_frontend.c @@ -591,7 +591,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, break; case NETDEV_DOWN: fib_del_ifaddr(ifa); - if (ifa->ifa_dev->ifa_list == NULL) { + if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) { /* Last address was deleted from this interface. Disable IP. */ diff --git a/trunk/net/sctp/sm_make_chunk.c b/trunk/net/sctp/sm_make_chunk.c index 660c61bdf164..10e82ec2ebd3 100644 --- a/trunk/net/sctp/sm_make_chunk.c +++ b/trunk/net/sctp/sm_make_chunk.c @@ -554,7 +554,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, dp.ppid = sinfo->sinfo_ppid; /* Set the flags for an unordered send. */ - if (sinfo->sinfo_flags & SCTP_UNORDERED) { + if (sinfo->sinfo_flags & MSG_UNORDERED) { flags |= SCTP_DATA_UNORDERED; dp.ssn = 0; } else diff --git a/trunk/net/sctp/socket.c b/trunk/net/sctp/socket.c index b529af5e6f2a..02e068d3450d 100644 --- a/trunk/net/sctp/socket.c +++ b/trunk/net/sctp/socket.c @@ -1010,19 +1010,6 @@ static int __sctp_connect(struct sock* sk, err = -EAGAIN; goto out_free; } - } else { - /* - * If an unprivileged user inherits a 1-many - * style socket with open associations on a - * privileged port, it MAY be permitted to - * accept new associations, but it SHOULD NOT - * be permitted to open new associations. - */ - if (ep->base.bind_addr.port < PROT_SOCK && - !capable(CAP_NET_BIND_SERVICE)) { - err = -EACCES; - goto out_free; - } } scope = sctp_scope(&to); @@ -1402,27 +1389,27 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, SCTP_DEBUG_PRINTK("msg_len: %zu, sinfo_flags: 0x%x\n", msg_len, sinfo_flags); - /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */ - if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) { + /* MSG_EOF or MSG_ABORT cannot be set on a TCP-style socket. */ + if (sctp_style(sk, TCP) && (sinfo_flags & (MSG_EOF | MSG_ABORT))) { err = -EINVAL; goto out_nounlock; } - /* If SCTP_EOF is set, no data can be sent. Disallow sending zero - * length messages when SCTP_EOF|SCTP_ABORT is not set. - * If SCTP_ABORT is set, the message length could be non zero with + /* If MSG_EOF is set, no data can be sent. Disallow sending zero + * length messages when MSG_EOF|MSG_ABORT is not set. + * If MSG_ABORT is set, the message length could be non zero with * the msg_iov set to the user abort reason. */ - if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) || - (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) { + if (((sinfo_flags & MSG_EOF) && (msg_len > 0)) || + (!(sinfo_flags & (MSG_EOF|MSG_ABORT)) && (msg_len == 0))) { err = -EINVAL; goto out_nounlock; } - /* If SCTP_ADDR_OVER is set, there must be an address + /* If MSG_ADDR_OVER is set, there must be an address * specified in msg_name. */ - if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) { + if ((sinfo_flags & MSG_ADDR_OVER) && (!msg->msg_name)) { err = -EINVAL; goto out_nounlock; } @@ -1471,14 +1458,14 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } - if (sinfo_flags & SCTP_EOF) { + if (sinfo_flags & MSG_EOF) { SCTP_DEBUG_PRINTK("Shutting down association: %p\n", asoc); sctp_primitive_SHUTDOWN(asoc, NULL); err = 0; goto out_unlock; } - if (sinfo_flags & SCTP_ABORT) { + if (sinfo_flags & MSG_ABORT) { SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); sctp_primitive_ABORT(asoc, msg); err = 0; @@ -1490,7 +1477,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, if (!asoc) { SCTP_DEBUG_PRINTK("There is no association yet.\n"); - if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) { + if (sinfo_flags & (MSG_EOF | MSG_ABORT)) { err = -EINVAL; goto out_unlock; } @@ -1528,19 +1515,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, err = -EAGAIN; goto out_unlock; } - } else { - /* - * If an unprivileged user inherits a one-to-many - * style socket with open associations on a privileged - * port, it MAY be permitted to accept new associations, - * but it SHOULD NOT be permitted to open new - * associations. - */ - if (ep->base.bind_addr.port < PROT_SOCK && - !capable(CAP_NET_BIND_SERVICE)) { - err = -EACCES; - goto out_unlock; - } } scope = sctp_scope(&to); @@ -1637,10 +1611,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* If an address is passed with the sendto/sendmsg call, it is used * to override the primary destination address in the TCP model, or - * when SCTP_ADDR_OVER flag is set in the UDP model. + * when MSG_ADDR_OVER flag is set in the UDP model. */ if ((sctp_style(sk, TCP) && msg_name) || - (sinfo_flags & SCTP_ADDR_OVER)) { + (sinfo_flags & MSG_ADDR_OVER)) { chunk_tp = sctp_assoc_lookup_paddr(asoc, &to); if (!chunk_tp) { err = -EINVAL; @@ -2332,14 +2306,16 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl return -EINVAL; if (get_user(val, (int __user *)optval)) return -EFAULT; - if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN))) + if ((val < 8) || (val > SCTP_MAX_CHUNK_LEN)) return -EINVAL; sp->user_frag = val; - /* Update the frag_point of the existing associations. */ - list_for_each(pos, &(sp->ep->asocs)) { - asoc = list_entry(pos, struct sctp_association, asocs); - asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); + if (val) { + /* Update the frag_point of the existing associations. */ + list_for_each(pos, &(sp->ep->asocs)) { + asoc = list_entry(pos, struct sctp_association, asocs); + asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); + } } return 0; @@ -2408,14 +2384,14 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva static int sctp_setsockopt_adaption_layer(struct sock *sk, char __user *optval, int optlen) { - struct sctp_setadaption adaption; + __u32 val; - if (optlen != sizeof(struct sctp_setadaption)) + if (optlen < sizeof(__u32)) return -EINVAL; - if (copy_from_user(&adaption, optval, optlen)) + if (copy_from_user(&val, optval, sizeof(__u32))) return -EFAULT; - sctp_sk(sk)->adaption_ind = adaption.ssb_adaption_ind; + sctp_sk(sk)->adaption_ind = val; return 0; } @@ -3696,15 +3672,17 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, static int sctp_getsockopt_adaption_layer(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_setadaption adaption; + __u32 val; - if (len != sizeof(struct sctp_setadaption)) + if (len < sizeof(__u32)) return -EINVAL; - adaption.ssb_adaption_ind = sctp_sk(sk)->adaption_ind; - if (copy_to_user(optval, &adaption, len)) + len = sizeof(__u32); + val = sctp_sk(sk)->adaption_ind; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) return -EFAULT; - return 0; } @@ -4662,8 +4640,8 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg, /* Minimally, validate the sinfo_flags. */ if (cmsgs->info->sinfo_flags & - ~(SCTP_UNORDERED | SCTP_ADDR_OVER | - SCTP_ABORT | SCTP_EOF)) + ~(MSG_UNORDERED | MSG_ADDR_OVER | + MSG_ABORT | MSG_EOF)) return -EINVAL; break; diff --git a/trunk/net/sctp/ulpevent.c b/trunk/net/sctp/ulpevent.c index e049f41faa47..057e7fac3af0 100644 --- a/trunk/net/sctp/ulpevent.c +++ b/trunk/net/sctp/ulpevent.c @@ -698,7 +698,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, event->ssn = ntohs(chunk->subh.data_hdr->ssn); event->ppid = chunk->subh.data_hdr->ppid; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) { - event->flags |= SCTP_UNORDERED; + event->flags |= MSG_UNORDERED; event->cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); } event->tsn = ntohl(chunk->subh.data_hdr->tsn); @@ -824,7 +824,7 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, * * recvmsg() flags: * - * SCTP_UNORDERED - This flag is present when the message was sent + * MSG_UNORDERED - This flag is present when the message was sent * non-ordered. */ sinfo.sinfo_flags = event->flags; @@ -839,7 +839,7 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, * This field will hold the current cumulative TSN as * known by the underlying SCTP layer. Note this field is * ignored when sending and only valid for a receive - * operation when sinfo_flags are set to SCTP_UNORDERED. + * operation when sinfo_flags are set to MSG_UNORDERED. */ sinfo.sinfo_cumtsn = event->cumtsn; /* sinfo_assoc_id: sizeof (sctp_assoc_t)