From 5723e7e2d2e31290e26c2fda68a15cc8cc42d77c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 2 Feb 2010 19:23:54 +0000 Subject: [PATCH] --- yaml --- r: 184992 b: refs/heads/master c: 9c03dc9f19351edf25c1107e3cfd3cc538c7ab9e h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/drivers/infiniband/core/ucm.c | 63 +-- trunk/drivers/infiniband/core/user_mad.c | 173 +++--- trunk/drivers/infiniband/core/uverbs.h | 11 +- trunk/drivers/infiniband/core/uverbs_main.c | 175 +++--- trunk/drivers/infiniband/hw/cxgb3/cxio_hal.c | 15 +- trunk/drivers/infiniband/hw/cxgb3/cxio_hal.h | 4 +- trunk/drivers/infiniband/hw/cxgb3/cxio_wr.h | 17 +- trunk/drivers/infiniband/hw/cxgb3/iwch.c | 80 +-- trunk/drivers/infiniband/hw/cxgb3/iwch.h | 2 - .../infiniband/hw/cxgb3/iwch_provider.c | 2 +- trunk/drivers/infiniband/hw/cxgb3/iwch_qp.c | 9 +- trunk/drivers/infiniband/hw/ehca/ehca_irq.c | 5 +- trunk/drivers/infiniband/hw/ehca/ehca_qp.c | 4 +- trunk/drivers/infiniband/hw/ehca/ehca_sqp.c | 2 +- trunk/drivers/infiniband/hw/mlx4/qp.c | 2 +- trunk/drivers/infiniband/hw/nes/nes.c | 1 - trunk/drivers/infiniband/hw/nes/nes.h | 9 +- trunk/drivers/infiniband/hw/nes/nes_cm.c | 11 +- trunk/drivers/infiniband/hw/nes/nes_hw.c | 484 ++++++++--------- trunk/drivers/infiniband/hw/nes/nes_hw.h | 2 +- trunk/drivers/infiniband/hw/nes/nes_nic.c | 61 ++- trunk/drivers/infiniband/hw/nes/nes_verbs.c | 6 +- .../infiniband/ulp/ipoib/ipoib_ethtool.c | 10 +- .../drivers/infiniband/ulp/iser/iscsi_iser.c | 47 +- .../drivers/infiniband/ulp/iser/iscsi_iser.h | 97 ++-- .../infiniband/ulp/iser/iser_initiator.c | 506 ++++++++++++------ .../drivers/infiniband/ulp/iser/iser_memory.c | 64 ++- .../drivers/infiniband/ulp/iser/iser_verbs.c | 281 +++++----- trunk/drivers/infiniband/ulp/srp/ib_srp.c | 72 ++- trunk/drivers/infiniband/ulp/srp/ib_srp.h | 6 +- trunk/drivers/net/cxgb3/adapter.h | 5 - trunk/drivers/net/cxgb3/cxgb3_main.c | 57 +- trunk/drivers/net/cxgb3/cxgb3_offload.h | 5 +- trunk/drivers/net/cxgb3/regs.h | 16 - trunk/drivers/net/cxgb3/sge.c | 10 +- trunk/drivers/net/cxgb3/t3_hw.c | 5 +- trunk/include/rdma/ib_verbs.h | 4 +- trunk/include/rdma/rdma_cm.h | 1 + 39 files changed, 1160 insertions(+), 1166 deletions(-) diff --git a/[refs] b/[refs] index 4e7d1e733620..017e2db1c260 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 3bbddbada87f30da8bcc5de7b9d06d8f2495aba5 +refs/heads/master: 9c03dc9f19351edf25c1107e3cfd3cc538c7ab9e diff --git a/trunk/drivers/infiniband/core/ucm.c b/trunk/drivers/infiniband/core/ucm.c index 1b09b735c5a8..f504c9b00c1b 100644 --- a/trunk/drivers/infiniband/core/ucm.c +++ b/trunk/drivers/infiniband/core/ucm.c @@ -1215,18 +1215,15 @@ static void ib_ucm_release_dev(struct device *dev) ucm_dev = container_of(dev, struct ib_ucm_device, dev); cdev_del(&ucm_dev->cdev); - if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) - clear_bit(ucm_dev->devnum, dev_map); - else - clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map); + clear_bit(ucm_dev->devnum, dev_map); kfree(ucm_dev); } static const struct file_operations ucm_fops = { - .owner = THIS_MODULE, - .open = ib_ucm_open, + .owner = THIS_MODULE, + .open = ib_ucm_open, .release = ib_ucm_close, - .write = ib_ucm_write, + .write = ib_ucm_write, .poll = ib_ucm_poll, }; @@ -1240,32 +1237,8 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); -static dev_t overflow_maj; -static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); -static int find_overflow_devnum(void) -{ - int ret; - - if (!overflow_maj) { - ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES, - "infiniband_cm"); - if (ret) { - printk(KERN_ERR "ucm: couldn't register dynamic device number\n"); - return ret; - } - } - - ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES); - if (ret >= IB_UCM_MAX_DEVICES) - return -1; - - return ret; -} - static void ib_ucm_add_one(struct ib_device *device) { - int devnum; - dev_t base; struct ib_ucm_device *ucm_dev; if (!device->alloc_ucontext || @@ -1278,25 +1251,16 @@ static void ib_ucm_add_one(struct ib_device *device) ucm_dev->ib_dev = device; - devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); - if (devnum >= IB_UCM_MAX_DEVICES) { - devnum = find_overflow_devnum(); - if (devnum < 0) - goto err; - - ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES; - base = devnum + overflow_maj; - set_bit(devnum, overflow_map); - } else { - ucm_dev->devnum = devnum; - base = devnum + IB_UCM_BASE_DEV; - set_bit(devnum, dev_map); - } + ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); + if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) + goto err; + + set_bit(ucm_dev->devnum, dev_map); cdev_init(&ucm_dev->cdev, &ucm_fops); ucm_dev->cdev.owner = THIS_MODULE; kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); - if (cdev_add(&ucm_dev->cdev, base, 1)) + if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) goto err; ucm_dev->dev.class = &cm_class; @@ -1317,10 +1281,7 @@ static void ib_ucm_add_one(struct ib_device *device) device_unregister(&ucm_dev->dev); err_cdev: cdev_del(&ucm_dev->cdev); - if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) - clear_bit(devnum, dev_map); - else - clear_bit(devnum, overflow_map); + clear_bit(ucm_dev->devnum, dev_map); err: kfree(ucm_dev); return; @@ -1379,8 +1340,6 @@ static void __exit ib_ucm_cleanup(void) ib_unregister_client(&ucm_client); class_remove_file(&cm_class, &class_attr_abi_version); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); - if (overflow_maj) - unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } diff --git a/trunk/drivers/infiniband/core/user_mad.c b/trunk/drivers/infiniband/core/user_mad.c index 02d360cfc2f7..7de02969ed7d 100644 --- a/trunk/drivers/infiniband/core/user_mad.c +++ b/trunk/drivers/infiniband/core/user_mad.c @@ -65,9 +65,12 @@ enum { }; /* - * Our lifetime rules for these structs are the following: - * device special file is opened, we take a reference on the - * ib_umad_port's struct ib_umad_device. We drop these + * Our lifetime rules for these structs are the following: each time a + * device special file is opened, we look up the corresponding struct + * ib_umad_port by minor in the umad_port[] table while holding the + * port_lock. If this lookup succeeds, we take a reference on the + * ib_umad_port's struct ib_umad_device while still holding the + * port_lock; if the lookup fails, we fail the open(). We drop these * references in the corresponding close(). * * In addition to references coming from open character devices, there @@ -75,14 +78,19 @@ enum { * module's reference taken when allocating the ib_umad_device in * ib_umad_add_one(). * - * When destroying an ib_umad_device, we drop the module's reference. + * When destroying an ib_umad_device, we clear all of its + * ib_umad_ports from umad_port[] while holding port_lock before + * dropping the module's reference to the ib_umad_device. This is + * always safe because any open() calls will either succeed and obtain + * a reference before we clear the umad_port[] entries, or fail after + * we clear the umad_port[] entries. */ struct ib_umad_port { - struct cdev cdev; + struct cdev *cdev; struct device *dev; - struct cdev sm_cdev; + struct cdev *sm_cdev; struct device *sm_dev; struct semaphore sm_sem; @@ -128,6 +136,7 @@ static struct class *umad_class; static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static DEFINE_SPINLOCK(port_lock); +static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); @@ -487,8 +496,8 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, ah_attr.ah_flags = IB_AH_GRH; memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; - ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); - ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; + ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); + ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } @@ -519,9 +528,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_ah; } - packet->msg->ah = ah; + packet->msg->ah = ah; packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; - packet->msg->retries = packet->mad.hdr.retries; + packet->msg->retries = packet->mad.hdr.retries; packet->msg->context[0] = packet; /* Copy MAD header. Any RMPP header is already in place. */ @@ -770,11 +779,15 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, /* * ib_umad_open() does not need the BKL: * - * - the ib_umad_port structures are properly reference counted, and + * - umad_port[] accesses are protected by port_lock, the + * ib_umad_port structures are properly reference counted, and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - the ioctl method does not affect any global state outside of the * file structure being operated on; + * - the port is added to umad_port[] as the last part of module + * initialization so the open method will either immediately run + * -ENXIO, or all required initialization will be done. */ static int ib_umad_open(struct inode *inode, struct file *filp) { @@ -782,10 +795,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp) struct ib_umad_file *file; int ret = 0; - port = container_of(inode->i_cdev, struct ib_umad_port, cdev); + spin_lock(&port_lock); + port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE]; if (port) kref_get(&port->umad_dev->ref); - else + spin_unlock(&port_lock); + + if (!port) return -ENXIO; mutex_lock(&port->file_mutex); @@ -856,16 +872,16 @@ static int ib_umad_close(struct inode *inode, struct file *filp) } static const struct file_operations umad_fops = { - .owner = THIS_MODULE, - .read = ib_umad_read, - .write = ib_umad_write, - .poll = ib_umad_poll, + .owner = THIS_MODULE, + .read = ib_umad_read, + .write = ib_umad_write, + .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = ib_umad_compat_ioctl, + .compat_ioctl = ib_umad_compat_ioctl, #endif - .open = ib_umad_open, - .release = ib_umad_close + .open = ib_umad_open, + .release = ib_umad_close }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) @@ -876,10 +892,13 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) }; int ret; - port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev); + spin_lock(&port_lock); + port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS]; if (port) kref_get(&port->umad_dev->ref); - else + spin_unlock(&port_lock); + + if (!port) return -ENXIO; if (filp->f_flags & O_NONBLOCK) { @@ -930,8 +949,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) } static const struct file_operations umad_sm_fops = { - .owner = THIS_MODULE, - .open = ib_umad_sm_open, + .owner = THIS_MODULE, + .open = ib_umad_sm_open, .release = ib_umad_sm_close }; @@ -971,51 +990,16 @@ static ssize_t show_abi_version(struct class *class, char *buf) } static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); -static dev_t overflow_maj; -static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); -static int find_overflow_devnum(void) -{ - int ret; - - if (!overflow_maj) { - ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, - "infiniband_mad"); - if (ret) { - printk(KERN_ERR "user_mad: couldn't register dynamic device number\n"); - return ret; - } - } - - ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS); - if (ret >= IB_UMAD_MAX_PORTS) - return -1; - - return ret; -} - static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_port *port) { - int devnum; - dev_t base; - spin_lock(&port_lock); - devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); - if (devnum >= IB_UMAD_MAX_PORTS) { + port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (port->dev_num >= IB_UMAD_MAX_PORTS) { spin_unlock(&port_lock); - devnum = find_overflow_devnum(); - if (devnum < 0) - return -1; - - spin_lock(&port_lock); - port->dev_num = devnum + IB_UMAD_MAX_PORTS; - base = devnum + overflow_maj; - set_bit(devnum, overflow_map); - } else { - port->dev_num = devnum; - base = devnum + base_dev; - set_bit(devnum, dev_map); + return -1; } + set_bit(port->dev_num, dev_map); spin_unlock(&port_lock); port->ib_dev = device; @@ -1024,14 +1008,17 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); - cdev_init(&port->cdev, &umad_fops); - port->cdev.owner = THIS_MODULE; - kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); - if (cdev_add(&port->cdev, base, 1)) + port->cdev = cdev_alloc(); + if (!port->cdev) + return -1; + port->cdev->owner = THIS_MODULE; + port->cdev->ops = &umad_fops; + kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num); + if (cdev_add(port->cdev, base_dev + port->dev_num, 1)) goto err_cdev; port->dev = device_create(umad_class, device->dma_device, - port->cdev.dev, port, + port->cdev->dev, port, "umad%d", port->dev_num); if (IS_ERR(port->dev)) goto err_cdev; @@ -1041,15 +1028,17 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (device_create_file(port->dev, &dev_attr_port)) goto err_dev; - base += IB_UMAD_MAX_PORTS; - cdev_init(&port->sm_cdev, &umad_sm_fops); - port->sm_cdev.owner = THIS_MODULE; - kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); - if (cdev_add(&port->sm_cdev, base, 1)) + port->sm_cdev = cdev_alloc(); + if (!port->sm_cdev) + goto err_dev; + port->sm_cdev->owner = THIS_MODULE; + port->sm_cdev->ops = &umad_sm_fops; + kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num); + if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) goto err_sm_cdev; port->sm_dev = device_create(umad_class, device->dma_device, - port->sm_cdev.dev, port, + port->sm_cdev->dev, port, "issm%d", port->dev_num); if (IS_ERR(port->sm_dev)) goto err_sm_cdev; @@ -1059,23 +1048,24 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (device_create_file(port->sm_dev, &dev_attr_port)) goto err_sm_dev; + spin_lock(&port_lock); + umad_port[port->dev_num] = port; + spin_unlock(&port_lock); + return 0; err_sm_dev: - device_destroy(umad_class, port->sm_cdev.dev); + device_destroy(umad_class, port->sm_cdev->dev); err_sm_cdev: - cdev_del(&port->sm_cdev); + cdev_del(port->sm_cdev); err_dev: - device_destroy(umad_class, port->cdev.dev); + device_destroy(umad_class, port->cdev->dev); err_cdev: - cdev_del(&port->cdev); - if (port->dev_num < IB_UMAD_MAX_PORTS) - clear_bit(devnum, dev_map); - else - clear_bit(devnum, overflow_map); + cdev_del(port->cdev); + clear_bit(port->dev_num, dev_map); return -1; } @@ -1089,11 +1079,15 @@ static void ib_umad_kill_port(struct ib_umad_port *port) dev_set_drvdata(port->dev, NULL); dev_set_drvdata(port->sm_dev, NULL); - device_destroy(umad_class, port->cdev.dev); - device_destroy(umad_class, port->sm_cdev.dev); + device_destroy(umad_class, port->cdev->dev); + device_destroy(umad_class, port->sm_cdev->dev); - cdev_del(&port->cdev); - cdev_del(&port->sm_cdev); + cdev_del(port->cdev); + cdev_del(port->sm_cdev); + + spin_lock(&port_lock); + umad_port[port->dev_num] = NULL; + spin_unlock(&port_lock); mutex_lock(&port->file_mutex); @@ -1112,10 +1106,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port) mutex_unlock(&port->file_mutex); - if (port->dev_num < IB_UMAD_MAX_PORTS) - clear_bit(port->dev_num, dev_map); - else - clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map); + clear_bit(port->dev_num, dev_map); } static void ib_umad_add_one(struct ib_device *device) @@ -1223,8 +1214,6 @@ static void __exit ib_umad_cleanup(void) ib_unregister_client(&umad_client); class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); - if (overflow_maj) - unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2); } module_init(ib_umad_init); diff --git a/trunk/drivers/infiniband/core/uverbs.h b/trunk/drivers/infiniband/core/uverbs.h index e54d9ac6d1ca..b3ea9587dc80 100644 --- a/trunk/drivers/infiniband/core/uverbs.h +++ b/trunk/drivers/infiniband/core/uverbs.h @@ -41,7 +41,6 @@ #include #include #include -#include #include #include @@ -70,23 +69,23 @@ struct ib_uverbs_device { struct kref ref; - int num_comp_vectors; struct completion comp; + int devnum; + struct cdev *cdev; struct device *dev; struct ib_device *ib_dev; - int devnum; - struct cdev cdev; + int num_comp_vectors; }; struct ib_uverbs_event_file { struct kref ref; - int is_async; struct ib_uverbs_file *uverbs_file; spinlock_t lock; - int is_closed; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; struct list_head event_list; + int is_async; + int is_closed; }; struct ib_uverbs_file { diff --git a/trunk/drivers/infiniband/core/uverbs_main.c b/trunk/drivers/infiniband/core/uverbs_main.c index dbf04511cf0a..5f284ffd430e 100644 --- a/trunk/drivers/infiniband/core/uverbs_main.c +++ b/trunk/drivers/infiniband/core/uverbs_main.c @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -74,39 +75,40 @@ DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); static DEFINE_SPINLOCK(map_lock); +static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, - [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, - [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, - [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, - [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, - [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, - [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, + [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, + [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, + [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, + [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, + [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, + [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; static struct vfsmount *uverbs_event_mnt; @@ -368,7 +370,7 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp) static const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, - .read = ib_uverbs_event_read, + .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, .fasync = ib_uverbs_event_fasync @@ -615,12 +617,14 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) /* * ib_uverbs_open() does not need the BKL: * - * - the ib_uverbs_device structures are properly reference counted and + * - dev_table[] accesses are protected by map_lock, the + * ib_uverbs_device structures are properly reference counted, and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; - * - the open method will either immediately run -ENXIO, or all - * required initialization will be done. + * - the device is added to dev_table[] as the last part of module + * initialization, the open method will either immediately run + * -ENXIO, or all required initialization will be done. */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { @@ -628,10 +632,13 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) struct ib_uverbs_file *file; int ret; - dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); + spin_lock(&map_lock); + dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR]; if (dev) kref_get(&dev->ref); - else + spin_unlock(&map_lock); + + if (!dev) return -ENXIO; if (!try_module_get(dev->ib_dev->owner)) { @@ -678,17 +685,17 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) } static const struct file_operations uverbs_fops = { - .owner = THIS_MODULE, - .write = ib_uverbs_write, - .open = ib_uverbs_open, + .owner = THIS_MODULE, + .write = ib_uverbs_write, + .open = ib_uverbs_open, .release = ib_uverbs_close }; static const struct file_operations uverbs_mmap_fops = { - .owner = THIS_MODULE, - .write = ib_uverbs_write, + .owner = THIS_MODULE, + .write = ib_uverbs_write, .mmap = ib_uverbs_mmap, - .open = ib_uverbs_open, + .open = ib_uverbs_open, .release = ib_uverbs_close }; @@ -728,38 +735,8 @@ static ssize_t show_abi_version(struct class *class, char *buf) } static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); -static dev_t overflow_maj; -static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES); - -/* - * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by - * requesting a new major number and doubling the number of max devices we - * support. It's stupid, but simple. - */ -static int find_overflow_devnum(void) -{ - int ret; - - if (!overflow_maj) { - ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, - "infiniband_verbs"); - if (ret) { - printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n"); - return ret; - } - } - - ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES); - if (ret >= IB_UVERBS_MAX_DEVICES) - return -1; - - return ret; -} - static void ib_uverbs_add_one(struct ib_device *device) { - int devnum; - dev_t base; struct ib_uverbs_device *uverbs_dev; if (!device->alloc_ucontext) @@ -773,36 +750,28 @@ static void ib_uverbs_add_one(struct ib_device *device) init_completion(&uverbs_dev->comp); spin_lock(&map_lock); - devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); - if (devnum >= IB_UVERBS_MAX_DEVICES) { + uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); + if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { spin_unlock(&map_lock); - devnum = find_overflow_devnum(); - if (devnum < 0) - goto err; - - spin_lock(&map_lock); - uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; - base = devnum + overflow_maj; - set_bit(devnum, overflow_map); - } else { - uverbs_dev->devnum = devnum; - base = devnum + IB_UVERBS_BASE_DEV; - set_bit(devnum, dev_map); + goto err; } + set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); uverbs_dev->ib_dev = device; uverbs_dev->num_comp_vectors = device->num_comp_vectors; - cdev_init(&uverbs_dev->cdev, NULL); - uverbs_dev->cdev.owner = THIS_MODULE; - uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; - kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); - if (cdev_add(&uverbs_dev->cdev, base, 1)) + uverbs_dev->cdev = cdev_alloc(); + if (!uverbs_dev->cdev) + goto err; + uverbs_dev->cdev->owner = THIS_MODULE; + uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) goto err_cdev; uverbs_dev->dev = device_create(uverbs_class, device->dma_device, - uverbs_dev->cdev.dev, uverbs_dev, + uverbs_dev->cdev->dev, uverbs_dev, "uverbs%d", uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) goto err_cdev; @@ -812,19 +781,20 @@ static void ib_uverbs_add_one(struct ib_device *device) if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; + spin_lock(&map_lock); + dev_table[uverbs_dev->devnum] = uverbs_dev; + spin_unlock(&map_lock); + ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: - device_destroy(uverbs_class, uverbs_dev->cdev.dev); + device_destroy(uverbs_class, uverbs_dev->cdev->dev); err_cdev: - cdev_del(&uverbs_dev->cdev); - if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) - clear_bit(devnum, dev_map); - else - clear_bit(devnum, overflow_map); + cdev_del(uverbs_dev->cdev); + clear_bit(uverbs_dev->devnum, dev_map); err: kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); @@ -841,13 +811,14 @@ static void ib_uverbs_remove_one(struct ib_device *device) return; dev_set_drvdata(uverbs_dev->dev, NULL); - device_destroy(uverbs_class, uverbs_dev->cdev.dev); - cdev_del(&uverbs_dev->cdev); + device_destroy(uverbs_class, uverbs_dev->cdev->dev); + cdev_del(uverbs_dev->cdev); - if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) - clear_bit(uverbs_dev->devnum, dev_map); - else - clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); + spin_lock(&map_lock); + dev_table[uverbs_dev->devnum] = NULL; + spin_unlock(&map_lock); + + clear_bit(uverbs_dev->devnum, dev_map); kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); wait_for_completion(&uverbs_dev->comp); @@ -937,8 +908,6 @@ static void __exit ib_uverbs_cleanup(void) unregister_filesystem(&uverbs_event_fs); class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); - if (overflow_maj) - unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mw_idr); diff --git a/trunk/drivers/infiniband/hw/cxgb3/cxio_hal.c b/trunk/drivers/infiniband/hw/cxgb3/cxio_hal.c index a28e862f2d68..0677fc7dfd51 100644 --- a/trunk/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/trunk/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -109,6 +109,7 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { udelay(1); if (i++ > 1000000) { + BUG_ON(1); printk(KERN_ERR "%s: stalled rnic\n", rdev_p->dev_name); return -EIO; @@ -154,7 +155,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); } -int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) +int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) { struct rdma_cq_setup setup; int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); @@ -162,12 +163,12 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); if (!cq->cqid) return -ENOMEM; - if (kernel) { - cq->sw_queue = kzalloc(size, GFP_KERNEL); - if (!cq->sw_queue) - return -ENOMEM; - } - cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size, + cq->sw_queue = kzalloc(size, GFP_KERNEL); + if (!cq->sw_queue) + return -ENOMEM; + cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), + (1UL << (cq->size_log2)) * + sizeof(struct t3_cqe), &(cq->dma_addr), GFP_KERNEL); if (!cq->queue) { kfree(cq->sw_queue); diff --git a/trunk/drivers/infiniband/hw/cxgb3/cxio_hal.h b/trunk/drivers/infiniband/hw/cxgb3/cxio_hal.h index 073373c2c560..f3d440cc68f2 100644 --- a/trunk/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/trunk/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -53,7 +53,7 @@ #define T3_MAX_PBL_SIZE 256 #define T3_MAX_RQ_SIZE 1024 #define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) -#define T3_MAX_CQ_DEPTH 262144 +#define T3_MAX_CQ_DEPTH 8192 #define T3_MAX_NUM_STAG (1<<15) #define T3_MAX_MR_SIZE 0x100000000ULL #define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ @@ -157,7 +157,7 @@ int cxio_rdev_open(struct cxio_rdev *rdev); void cxio_rdev_close(struct cxio_rdev *rdev); int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, enum t3_cq_opcode op, u32 credit); -int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel); +int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq); int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq); void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); diff --git a/trunk/drivers/infiniband/hw/cxgb3/cxio_wr.h b/trunk/drivers/infiniband/hw/cxgb3/cxio_wr.h index 15073b2da1c5..a197a5b7ac7f 100644 --- a/trunk/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/trunk/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -730,22 +730,7 @@ struct t3_cq { static inline void cxio_set_wq_in_error(struct t3_wq *wq) { - wq->queue->wq_in_err.err |= 1; -} - -static inline void cxio_disable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 2; -} - -static inline void cxio_enable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err &= ~2; -} - -static inline int cxio_wq_db_enabled(struct t3_wq *wq) -{ - return !(wq->queue->wq_in_err.err & 2); + wq->queue->wq_in_err.err = 1; } static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) diff --git a/trunk/drivers/infiniband/hw/cxgb3/iwch.c b/trunk/drivers/infiniband/hw/cxgb3/iwch.c index ee1d8b4d4541..b0ea0105ddf6 100644 --- a/trunk/drivers/infiniband/hw/cxgb3/iwch.c +++ b/trunk/drivers/infiniband/hw/cxgb3/iwch.c @@ -65,46 +65,6 @@ struct cxgb3_client t3c_client = { static LIST_HEAD(dev_list); static DEFINE_MUTEX(dev_mutex); -static int disable_qp_db(int id, void *p, void *data) -{ - struct iwch_qp *qhp = p; - - cxio_disable_wq_db(&qhp->wq); - return 0; -} - -static int enable_qp_db(int id, void *p, void *data) -{ - struct iwch_qp *qhp = p; - - if (data) - ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid); - cxio_enable_wq_db(&qhp->wq); - return 0; -} - -static void disable_dbs(struct iwch_dev *rnicp) -{ - spin_lock_irq(&rnicp->lock); - idr_for_each(&rnicp->qpidr, disable_qp_db, NULL); - spin_unlock_irq(&rnicp->lock); -} - -static void enable_dbs(struct iwch_dev *rnicp, int ring_db) -{ - spin_lock_irq(&rnicp->lock); - idr_for_each(&rnicp->qpidr, enable_qp_db, - (void *)(unsigned long)ring_db); - spin_unlock_irq(&rnicp->lock); -} - -static void iwch_db_drop_task(struct work_struct *work) -{ - struct iwch_dev *rnicp = container_of(work, struct iwch_dev, - db_drop_task.work); - enable_dbs(rnicp, 1); -} - static void rnic_init(struct iwch_dev *rnicp) { PDBG("%s iwch_dev %p\n", __func__, rnicp); @@ -112,7 +72,6 @@ static void rnic_init(struct iwch_dev *rnicp) idr_init(&rnicp->qpidr); idr_init(&rnicp->mmidr); spin_lock_init(&rnicp->lock); - INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; @@ -188,8 +147,6 @@ static void close_rnic_dev(struct t3cdev *tdev) mutex_lock(&dev_mutex); list_for_each_entry_safe(dev, tmp, &dev_list, entry) { if (dev->rdev.t3cdev_p == tdev) { - dev->rdev.flags = CXIO_ERROR_FATAL; - cancel_delayed_work_sync(&dev->db_drop_task); list_del(&dev->entry); iwch_unregister_device(dev); cxio_rdev_close(&dev->rdev); @@ -208,8 +165,7 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) struct cxio_rdev *rdev = tdev->ulp; struct iwch_dev *rnicp; struct ib_event event; - u32 portnum = port_id + 1; - int dispatch = 0; + u32 portnum = port_id + 1; if (!rdev) return; @@ -218,49 +174,21 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) case OFFLOAD_STATUS_DOWN: { rdev->flags = CXIO_ERROR_FATAL; event.event = IB_EVENT_DEVICE_FATAL; - dispatch = 1; break; } case OFFLOAD_PORT_DOWN: { event.event = IB_EVENT_PORT_ERR; - dispatch = 1; break; } case OFFLOAD_PORT_UP: { event.event = IB_EVENT_PORT_ACTIVE; - dispatch = 1; - break; - } - case OFFLOAD_DB_FULL: { - disable_dbs(rnicp); - break; - } - case OFFLOAD_DB_EMPTY: { - enable_dbs(rnicp, 1); - break; - } - case OFFLOAD_DB_DROP: { - unsigned long delay = 1000; - unsigned short r; - - disable_dbs(rnicp); - get_random_bytes(&r, 2); - delay += r & 1023; - - /* - * delay is between 1000-2023 usecs. - */ - schedule_delayed_work(&rnicp->db_drop_task, - usecs_to_jiffies(delay)); break; } } - if (dispatch) { - event.device = &rnicp->ibdev; - event.element.port_num = portnum; - ib_dispatch_event(&event); - } + event.device = &rnicp->ibdev; + event.element.port_num = portnum; + ib_dispatch_event(&event); return; } diff --git a/trunk/drivers/infiniband/hw/cxgb3/iwch.h b/trunk/drivers/infiniband/hw/cxgb3/iwch.h index a1c44578e039..84735506333f 100644 --- a/trunk/drivers/infiniband/hw/cxgb3/iwch.h +++ b/trunk/drivers/infiniband/hw/cxgb3/iwch.h @@ -36,7 +36,6 @@ #include #include #include -#include #include @@ -111,7 +110,6 @@ struct iwch_dev { struct idr mmidr; spinlock_t lock; struct list_head entry; - struct delayed_work db_drop_task; }; static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) diff --git a/trunk/drivers/infiniband/hw/cxgb3/iwch_provider.c b/trunk/drivers/infiniband/hw/cxgb3/iwch_provider.c index 47b35c6608d2..ed7175549ebd 100644 --- a/trunk/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/trunk/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -187,7 +187,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve entries = roundup_pow_of_two(entries); chp->cq.size_log2 = ilog2(entries); - if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) { + if (cxio_create_cq(&rhp->rdev, &chp->cq)) { kfree(chp); return ERR_PTR(-ENOMEM); } diff --git a/trunk/drivers/infiniband/hw/cxgb3/iwch_qp.c b/trunk/drivers/infiniband/hw/cxgb3/iwch_qp.c index b4d893de3650..3eb8cecf81d7 100644 --- a/trunk/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/trunk/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -452,8 +452,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ++(qhp->wq.sq_wptr); } spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); out: if (err) @@ -515,8 +514,7 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, num_wrs--; } spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); out: if (err) @@ -599,8 +597,7 @@ int iwch_bind_mw(struct ib_qp *qp, ++(qhp->wq.sq_wptr); spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); return err; } diff --git a/trunk/drivers/infiniband/hw/ehca/ehca_irq.c b/trunk/drivers/infiniband/hw/ehca/ehca_irq.c index b2b6fea2b141..42be0b15084b 100644 --- a/trunk/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/trunk/drivers/infiniband/hw/ehca/ehca_irq.c @@ -548,10 +548,11 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) struct ehca_eq *eq = &shca->eq; struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; u64 eqe_value, ret; + unsigned long flags; int eqe_cnt, i; int eq_empty = 0; - spin_lock(&eq->irq_spinlock); + spin_lock_irqsave(&eq->irq_spinlock, flags); if (is_irq) { const int max_query_cnt = 100; int query_cnt = 0; @@ -642,7 +643,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) } while (1); unlock_irq_spinlock: - spin_unlock(&eq->irq_spinlock); + spin_unlock_irqrestore(&eq->irq_spinlock, flags); } void ehca_tasklet_eq(unsigned long data) diff --git a/trunk/drivers/infiniband/hw/ehca/ehca_qp.c b/trunk/drivers/infiniband/hw/ehca/ehca_qp.c index b105f664d3ef..0338f1fabe8a 100644 --- a/trunk/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/trunk/drivers/infiniband/hw/ehca/ehca_qp.c @@ -55,7 +55,9 @@ static struct kmem_cache *qp_cache; /* * attributes not supported by query qp */ -#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \ + IB_QP_MAX_QP_RD_ATOMIC | \ + IB_QP_ACCESS_FLAGS | \ IB_QP_EN_SQD_ASYNC_NOTIFY) /* diff --git a/trunk/drivers/infiniband/hw/ehca/ehca_sqp.c b/trunk/drivers/infiniband/hw/ehca/ehca_sqp.c index dba8f9f8b996..8c1213f8916a 100644 --- a/trunk/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/trunk/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -222,7 +222,7 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, { int ret; - if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) + if (!port_num || port_num > ibdev->phys_port_cnt) return IB_MAD_RESULT_FAILURE; /* accept only pma request */ diff --git a/trunk/drivers/infiniband/hw/mlx4/qp.c b/trunk/drivers/infiniband/hw/mlx4/qp.c index b377671264e9..2a97c964b9ef 100644 --- a/trunk/drivers/infiniband/hw/mlx4/qp.c +++ b/trunk/drivers/infiniband/hw/mlx4/qp.c @@ -1214,7 +1214,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) { - struct ib_device *ib_dev = sqp->qp.ibqp.device; + struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); diff --git a/trunk/drivers/infiniband/hw/nes/nes.c b/trunk/drivers/infiniband/hw/nes/nes.c index 4272c52e38a4..b9d09bafd6c1 100644 --- a/trunk/drivers/infiniband/hw/nes/nes.c +++ b/trunk/drivers/infiniband/hw/nes/nes.c @@ -110,7 +110,6 @@ static unsigned int sysfs_idx_addr; static struct pci_device_id nes_pci_table[] = { {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID}, - {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR, PCI_ANY_ID, PCI_ANY_ID}, {0} }; diff --git a/trunk/drivers/infiniband/hw/nes/nes.h b/trunk/drivers/infiniband/hw/nes/nes.h index cc78fee1dd51..98840564bb2f 100644 --- a/trunk/drivers/infiniband/hw/nes/nes.h +++ b/trunk/drivers/infiniband/hw/nes/nes.h @@ -64,9 +64,8 @@ * NetEffect PCI vendor id and NE010 PCI device id. */ #ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */ -#define PCI_VENDOR_ID_NETEFFECT 0x1678 -#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100 -#define PCI_DEVICE_ID_NETEFFECT_NE020_KR 0x0110 +#define PCI_VENDOR_ID_NETEFFECT 0x1678 +#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100 #endif #define NE020_REV 4 @@ -194,8 +193,8 @@ extern u32 cm_packets_created; extern u32 cm_packets_received; extern u32 cm_packets_dropped; extern u32 cm_packets_retrans; -extern atomic_t cm_listens_created; -extern atomic_t cm_listens_destroyed; +extern u32 cm_listens_created; +extern u32 cm_listens_destroyed; extern u32 cm_backlog_drops; extern atomic_t cm_loopbacks; extern atomic_t cm_nodes_created; diff --git a/trunk/drivers/infiniband/hw/nes/nes_cm.c b/trunk/drivers/infiniband/hw/nes/nes_cm.c index 2a49ee40b520..39468c277036 100644 --- a/trunk/drivers/infiniband/hw/nes/nes_cm.c +++ b/trunk/drivers/infiniband/hw/nes/nes_cm.c @@ -67,8 +67,8 @@ u32 cm_packets_dropped; u32 cm_packets_retrans; u32 cm_packets_created; u32 cm_packets_received; -atomic_t cm_listens_created; -atomic_t cm_listens_destroyed; +u32 cm_listens_created; +u32 cm_listens_destroyed; u32 cm_backlog_drops; atomic_t cm_loopbacks; atomic_t cm_nodes_created; @@ -1011,10 +1011,9 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, event.cm_info.loc_port = loopback->loc_port; event.cm_info.cm_id = loopback->cm_id; - add_ref_cm_node(loopback); - loopback->state = NES_CM_STATE_CLOSED; cm_event_connect_error(&event); cm_node->state = NES_CM_STATE_LISTENER_DESTROYED; + loopback->state = NES_CM_STATE_CLOSED; rem_ref_cm_node(cm_node->cm_core, cm_node); @@ -1043,7 +1042,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, kfree(listener); listener = NULL; ret = 0; - atomic_inc(&cm_listens_destroyed); + cm_listens_destroyed++; } else { spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); } @@ -3173,7 +3172,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); return err; } - atomic_inc(&cm_listens_created); + cm_listens_created++; } cm_id->add_ref(cm_id); diff --git a/trunk/drivers/infiniband/hw/nes/nes_hw.c b/trunk/drivers/infiniband/hw/nes/nes_hw.c index ce7f53833577..b1c2cbb88f09 100644 --- a/trunk/drivers/infiniband/hw/nes/nes_hw.c +++ b/trunk/drivers/infiniband/hw/nes/nes_hw.c @@ -748,28 +748,16 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, if (hw_rev != NE020_REV) { /* init serdes 0 */ - switch (nesadapter->phy_type[0]) { - case NES_PHY_TYPE_CX4: - if (wide_ppm_offset) - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA); - else - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); - break; - case NES_PHY_TYPE_KR: - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000); - break; - case NES_PHY_TYPE_PUMA_1G: + if (wide_ppm_offset && (nesadapter->phy_type[0] == NES_PHY_TYPE_CX4)) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA); + else nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); + + if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); sds |= 0x00000100; nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds); - break; - default: - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); - break; } - if (!OneG_Mode) nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000); @@ -790,9 +778,6 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, if (wide_ppm_offset) nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA); break; - case NES_PHY_TYPE_KR: - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000); - break; case NES_PHY_TYPE_PUMA_1G: sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); sds |= 0x000000100; @@ -1294,115 +1279,115 @@ int nes_destroy_cqp(struct nes_device *nesdev) /** - * nes_init_1g_phy + * nes_init_phy */ -int nes_init_1g_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index) +int nes_init_phy(struct nes_device *nesdev) { + struct nes_adapter *nesadapter = nesdev->nesadapter; u32 counter = 0; + u32 sds; + u32 mac_index = nesdev->mac_index; + u32 tx_config = 0; u16 phy_data; - int ret = 0; + u32 temp_phy_data = 0; + u32 temp_phy_data2 = 0; + u8 phy_type = nesadapter->phy_type[mac_index]; + u8 phy_index = nesadapter->phy_index[mac_index]; - nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data); - nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000); + if ((nesadapter->OneG_Mode) && + (phy_type != NES_PHY_TYPE_PUMA_1G)) { + nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index); + if (phy_type == NES_PHY_TYPE_1G) { + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_config &= 0xFFFFFFE3; + tx_config |= 0x04; + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + } - /* Reset the PHY */ - nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000); - udelay(100); - counter = 0; - do { + nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000); + + /* Reset the PHY */ + nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000); + udelay(100); + counter = 0; + do { + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + if (counter++ > 100) + break; + } while (phy_data & 0x8000); + + /* Setting no phy loopback */ + phy_data &= 0xbfff; + phy_data |= 0x1140; + nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data); nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); - if (counter++ > 100) { - ret = -1; - break; - } - } while (phy_data & 0x8000); - - /* Setting no phy loopback */ - phy_data &= 0xbfff; - phy_data |= 0x1140; - nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data); - nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); - nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data); - nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data); - - /* Setting the interrupt mask */ - nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); - nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee); - nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); - - /* turning on flow control */ - nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); - nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00); - nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); - - /* Clear Half duplex */ - nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); - nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100)); - nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); - - nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); - nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300); - - return ret; -} + nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data); + nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data); + /* Setting the interrupt mask */ + nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee); + nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); -/** - * nes_init_2025_phy - */ -int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index) -{ - u32 temp_phy_data = 0; - u32 temp_phy_data2 = 0; - u32 counter = 0; - u32 sds; - u32 mac_index = nesdev->mac_index; - int ret = 0; - unsigned int first_attempt = 1; + /* turning on flow control */ + nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00); + nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); - /* Check firmware heartbeat */ - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - udelay(1500); - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); - temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + /* Clear Half duplex */ + nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100)); + nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); - if (temp_phy_data != temp_phy_data2) { - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if ((temp_phy_data & 0xff) > 0x20) - return 0; - printk(PFX "Reinitialize external PHY\n"); + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300); + + return 0; } - /* no heartbeat, configure the PHY */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); + if ((phy_type == NES_PHY_TYPE_IRIS) || + (phy_type == NES_PHY_TYPE_ARGUS) || + (phy_type == NES_PHY_TYPE_SFP_D)) { + /* setup 10G MDIO operation */ + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_config &= 0xFFFFFFE3; + tx_config |= 0x15; + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + } + if ((phy_type == NES_PHY_TYPE_ARGUS) || + (phy_type == NES_PHY_TYPE_SFP_D)) { + u32 first_time = 1; - switch (phy_type) { - case NES_PHY_TYPE_ARGUS: - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); + /* Check firmware heartbeat */ + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + udelay(1500); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - /* setup LEDs */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); - break; + if (temp_phy_data != temp_phy_data2) { + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if ((temp_phy_data & 0xff) > 0x20) + return 0; + printk(PFX "Reinitializing PHY\n"); + } - case NES_PHY_TYPE_SFP_D: + /* no heartbeat, configure the PHY */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000); nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013); + if (phy_type == NES_PHY_TYPE_ARGUS) { + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001); + } else { + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013); + } nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); @@ -1410,136 +1395,71 @@ int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index) nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); - break; - - case NES_PHY_TYPE_KR: - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0010); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0080); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); - - /* setup LEDs */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x000B); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x0003); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0004); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0022, 0x406D); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0023, 0x0020); - break; - } - - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528); - /* Bring PHY out of reset */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002); + /* Bring PHY out of reset */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002); - /* Check for heartbeat */ - counter = 0; - mdelay(690); - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - do { - if (counter++ > 150) { - printk(PFX "No PHY heartbeat\n"); - break; - } - mdelay(1); + /* Check for heartbeat */ + counter = 0; + mdelay(690); nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); - temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - } while ((temp_phy_data2 == temp_phy_data)); - - /* wait for tracking */ - counter = 0; - do { - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if (counter++ > 300) { - if (((temp_phy_data & 0xff) == 0x0) && first_attempt) { - first_attempt = 0; - counter = 0; - /* reset AMCC PHY and try again */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040); - continue; - } else { - ret = 1; + do { + if (counter++ > 150) { + printk(PFX "No PHY heartbeat\n"); break; } - } - mdelay(10); - } while ((temp_phy_data & 0xff) < 0x30); - - /* setup signal integrity */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032); - if (phy_type == NES_PHY_TYPE_KR) { - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x000C); - } else { + mdelay(1); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + } while ((temp_phy_data2 == temp_phy_data)); + + /* wait for tracking */ + counter = 0; + do { + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if (counter++ > 300) { + if (((temp_phy_data & 0xff) == 0x0) && first_time) { + first_time = 0; + counter = 0; + /* reset AMCC PHY and try again */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040); + continue; + } else { + printk(PFX "PHY did not track\n"); + break; + } + } + mdelay(10); + } while ((temp_phy_data & 0xff) < 0x30); + + /* setup signal integrity */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032); nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002); nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063); - } - - /* reset serdes */ - sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200); - sds |= 0x1; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds); - sds &= 0xfffffffe; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds); - - counter = 0; - while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) - && (counter++ < 5000)) - ; - - return ret; -} - -/** - * nes_init_phy - */ -int nes_init_phy(struct nes_device *nesdev) -{ - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 mac_index = nesdev->mac_index; - u32 tx_config = 0; - unsigned long flags; - u8 phy_type = nesadapter->phy_type[mac_index]; - u8 phy_index = nesadapter->phy_index[mac_index]; - int ret = 0; - - tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); - if (phy_type == NES_PHY_TYPE_1G) { - /* setup 1G MDIO operation */ - tx_config &= 0xFFFFFFE3; - tx_config |= 0x04; - } else { - /* setup 10G MDIO operation */ - tx_config &= 0xFFFFFFE3; - tx_config |= 0x15; + /* reset serdes */ + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + + mac_index * 0x200); + sds |= 0x1; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + + mac_index * 0x200, sds); + sds &= 0xfffffffe; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + + mac_index * 0x200, sds); + + counter = 0; + while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) + && (counter++ < 5000)) + ; } - nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); - - spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); - - switch (phy_type) { - case NES_PHY_TYPE_1G: - ret = nes_init_1g_phy(nesdev, phy_type, phy_index); - break; - case NES_PHY_TYPE_ARGUS: - case NES_PHY_TYPE_SFP_D: - case NES_PHY_TYPE_KR: - ret = nes_init_2025_phy(nesdev, phy_type, phy_index); - break; - } - - spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); - - return ret; + return 0; } @@ -2540,9 +2460,23 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) } } else { switch (nesadapter->phy_type[mac_index]) { + case NES_PHY_TYPE_IRIS: + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + u32temp = 20; + do { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if ((phy_data == temp_phy_data) || (!(--u32temp))) + break; + temp_phy_data = phy_data; + } while (1); + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); + break; + case NES_PHY_TYPE_ARGUS: case NES_PHY_TYPE_SFP_D: - case NES_PHY_TYPE_KR: /* clear the alarms */ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008); nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001); @@ -3418,6 +3352,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, u16 async_event_id; u8 tcp_state; u8 iwarp_state; + int must_disconn = 1; + int must_terminate = 0; struct ib_event ibevent; nes_debug(NES_DBG_AEQ, "\n"); @@ -3431,8 +3367,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, BUG_ON(!context); } - /* context is nesqp unless async_event_id == CQ ERROR */ - nesqp = (struct nes_qp *)(unsigned long)context; async_event_id = (u16)aeq_info; tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; @@ -3444,6 +3378,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, switch (async_event_id) { case NES_AEQE_AEID_LLP_FIN_RECEIVED: + nesqp = (struct nes_qp *)(unsigned long)context; + if (nesqp->term_flags) return; /* Ignore it, wait for close complete */ @@ -3458,48 +3394,79 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, async_event_id, nesqp->last_aeq, tcp_state); } - break; + if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) || + (nesqp->ibqp_state != IB_QPS_RTS)) { + /* FIN Received but tcp state or IB state moved on, + should expect a close complete */ + return; + } + case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: + nesqp = (struct nes_qp *)(unsigned long)context; if (nesqp->term_flags) { nes_terminate_done(nesqp, 0); return; } - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_CLOSING, 0, 0); - nes_cm_disconn(nesqp); - break; + case NES_AEQE_AEID_LLP_CONNECTION_RESET: case NES_AEQE_AEID_RESET_SENT: - tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp = (struct nes_qp *)(unsigned long)context; + if (async_event_id == NES_AEQE_AEID_RESET_SENT) { + tcp_state = NES_AEQE_TCP_STATE_CLOSED; + } spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = iwarp_state; nesqp->hw_tcp_state = tcp_state; nesqp->last_aeq = async_event_id; - nesqp->hte_added = 0; - spin_unlock_irqrestore(&nesqp->lock, flags); - next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE; - nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); - nes_cm_disconn(nesqp); - break; - case NES_AEQE_AEID_LLP_CONNECTION_RESET: - if (atomic_read(&nesqp->close_timer_started)) - return; - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; + if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) || + (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) { + nesqp->hte_added = 0; + next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE; + } + + if ((nesqp->ibqp_state == IB_QPS_RTS) && + ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || + (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + switch (nesqp->hw_iwarp_state) { + case NES_AEQE_IWARP_STATE_RTS: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; + break; + case NES_AEQE_IWARP_STATE_TERMINATE: + must_disconn = 0; /* terminate path takes care of disconn */ + if (nesqp->term_flags == 0) + must_terminate = 1; + break; + } + } else { + if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) { + /* FIN Received but ib state not RTS, + close complete will be on its way */ + must_disconn = 0; + } + } spin_unlock_irqrestore(&nesqp->lock, flags); - nes_cm_disconn(nesqp); + + if (must_terminate) + nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); + else if (must_disconn) { + if (next_iwarp_state) { + nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X\n", + nesqp->hwqp.qp_id, next_iwarp_state); + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); + } + nes_cm_disconn(nesqp); + } break; case NES_AEQE_AEID_TERMINATE_SENT: + nesqp = (struct nes_qp *)(unsigned long)context; nes_terminate_send_fin(nesdev, nesqp, aeqe); break; case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED: + nesqp = (struct nes_qp *)(unsigned long)context; nes_terminate_received(nesdev, nesqp, aeqe); break; @@ -3513,8 +3480,7 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: case NES_AEQE_AEID_AMP_TO_WRAP: - printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n", - nesqp->hwqp.qp_id, async_event_id); + nesqp = (struct nes_qp *)(unsigned long)context; nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR); break; @@ -3522,6 +3488,7 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL: case NES_AEQE_AEID_DDP_UBE_INVALID_MO: case NES_AEQE_AEID_DDP_UBE_INVALID_QN: + nesqp = (struct nes_qp *)(unsigned long)context; if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) { aeq_info &= 0xffff0000; aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE; @@ -3563,8 +3530,7 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, case NES_AEQE_AEID_STAG_ZERO_INVALID: case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST: case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: - printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n", - nesqp->hwqp.qp_id, async_event_id); + nesqp = (struct nes_qp *)(unsigned long)context; nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); break; diff --git a/trunk/drivers/infiniband/hw/nes/nes_hw.h b/trunk/drivers/infiniband/hw/nes/nes_hw.h index 9b1e7f869d83..084be0ee689b 100644 --- a/trunk/drivers/infiniband/hw/nes/nes_hw.h +++ b/trunk/drivers/infiniband/hw/nes/nes_hw.h @@ -37,12 +37,12 @@ #define NES_PHY_TYPE_CX4 1 #define NES_PHY_TYPE_1G 2 +#define NES_PHY_TYPE_IRIS 3 #define NES_PHY_TYPE_ARGUS 4 #define NES_PHY_TYPE_PUMA_1G 5 #define NES_PHY_TYPE_PUMA_10G 6 #define NES_PHY_TYPE_GLADIUS 7 #define NES_PHY_TYPE_SFP_D 8 -#define NES_PHY_TYPE_KR 9 #define NES_MULTICAST_PF_MAX 8 diff --git a/trunk/drivers/infiniband/hw/nes/nes_nic.c b/trunk/drivers/infiniband/hw/nes/nes_nic.c index 7dd6ce6e7b99..ab1102780186 100644 --- a/trunk/drivers/infiniband/hw/nes/nes_nic.c +++ b/trunk/drivers/infiniband/hw/nes/nes_nic.c @@ -1230,8 +1230,8 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, target_stat_values[++index] = cm_packets_received; target_stat_values[++index] = cm_packets_dropped; target_stat_values[++index] = cm_packets_retrans; - target_stat_values[++index] = atomic_read(&cm_listens_created); - target_stat_values[++index] = atomic_read(&cm_listens_destroyed); + target_stat_values[++index] = cm_listens_created; + target_stat_values[++index] = cm_listens_destroyed; target_stat_values[++index] = cm_backlog_drops; target_stat_values[++index] = atomic_read(&cm_loopbacks); target_stat_values[++index] = atomic_read(&cm_nodes_created); @@ -1461,9 +1461,9 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd } return 0; } - if ((phy_type == NES_PHY_TYPE_ARGUS) || - (phy_type == NES_PHY_TYPE_SFP_D) || - (phy_type == NES_PHY_TYPE_KR)) { + if ((phy_type == NES_PHY_TYPE_IRIS) || + (phy_type == NES_PHY_TYPE_ARGUS) || + (phy_type == NES_PHY_TYPE_SFP_D)) { et_cmd->transceiver = XCVR_EXTERNAL; et_cmd->port = PORT_FIBRE; et_cmd->supported = SUPPORTED_FIBRE; @@ -1583,7 +1583,8 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, struct net_device *netdev; struct nic_qp_map *curr_qp_map; u32 u32temp; - u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index]; + u16 phy_data; + u16 temp_phy_data; netdev = alloc_etherdev(sizeof(struct nes_vnic)); if (!netdev) { @@ -1691,23 +1692,65 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, if ((nesdev->netdev_count == 0) && ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) || - ((phy_type == NES_PHY_TYPE_PUMA_1G) && + ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) && (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) || ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) { + /* + * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n", + * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1))); + */ u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesdev->mac_index & 1))); - if (phy_type != NES_PHY_TYPE_PUMA_1G) { + if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) { u32temp |= 0x00200000; nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesdev->mac_index & 1)), u32temp); } + u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200 * (nesdev->mac_index & 1))); + + if ((u32temp&0x0f1f0000) == 0x0f0f0000) { + if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) { + nes_init_phy(nesdev); + nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1); + temp_phy_data = (u16)nes_read_indexed(nesdev, + NES_IDX_MAC_MDIO_CONTROL); + u32temp = 20; + do { + nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1); + phy_data = (u16)nes_read_indexed(nesdev, + NES_IDX_MAC_MDIO_CONTROL); + if ((phy_data == temp_phy_data) || (!(--u32temp))) + break; + temp_phy_data = phy_data; + } while (1); + if (phy_data & 4) { + nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); + nesvnic->linkup = 1; + } else { + nes_debug(NES_DBG_INIT, "The Link is DOWN!!.\n"); + } + } else { + nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); + nesvnic->linkup = 1; + } + } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) { + nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n", + nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn)); + if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) || + ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) { + nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); + nesvnic->linkup = 1; + } + } /* clear the MAC interrupt status, assumes direct logical to physical mapping */ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp); - nes_init_phy(nesdev); + if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_IRIS) + nes_init_phy(nesdev); } diff --git a/trunk/drivers/infiniband/hw/nes/nes_verbs.c b/trunk/drivers/infiniband/hw/nes/nes_verbs.c index 815725f886c4..64d3136e3747 100644 --- a/trunk/drivers/infiniband/hw/nes/nes_verbs.c +++ b/trunk/drivers/infiniband/hw/nes/nes_verbs.c @@ -228,7 +228,7 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, /* Check for SQ overflow */ if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { spin_unlock_irqrestore(&nesqp->lock, flags); - return -ENOMEM; + return -EINVAL; } wqe = &nesqp->hwqp.sq_vbase[head]; @@ -3294,7 +3294,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, /* Check for SQ overflow */ if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { - err = -ENOMEM; + err = -EINVAL; break; } @@ -3577,7 +3577,7 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, } /* Check for RQ overflow */ if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) { - err = -ENOMEM; + err = -EINVAL; break; } diff --git a/trunk/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/trunk/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index d10b4ec68d28..e9795f60e5d6 100644 --- a/trunk/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/trunk/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -55,7 +55,9 @@ static int ipoib_get_coalesce(struct net_device *dev, struct ipoib_dev_priv *priv = netdev_priv(dev); coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs; + coal->tx_coalesce_usecs = priv->ethtool.coalesce_usecs; coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames; + coal->tx_max_coalesced_frames = priv->ethtool.max_coalesced_frames; return 0; } @@ -67,8 +69,10 @@ static int ipoib_set_coalesce(struct net_device *dev, int ret; /* - * These values are saved in the private data and returned - * when ipoib_get_coalesce() is called + * Since IPoIB uses a single CQ for both rx and tx, we assume + * that rx params dictate the configuration. These values are + * saved in the private data and returned when ipoib_get_coalesce() + * is called. */ if (coal->rx_coalesce_usecs > 0xffff || coal->rx_max_coalesced_frames > 0xffff) @@ -81,6 +85,8 @@ static int ipoib_set_coalesce(struct net_device *dev, return ret; } + coal->tx_coalesce_usecs = coal->rx_coalesce_usecs; + coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames; priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs; priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames; diff --git a/trunk/drivers/infiniband/ulp/iser/iscsi_iser.c b/trunk/drivers/infiniband/ulp/iser/iscsi_iser.c index 71237f8f78f7..5f7a6fca0a4d 100644 --- a/trunk/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/trunk/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -128,28 +128,6 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) return 0; } -int iser_initialize_task_headers(struct iscsi_task *task, - struct iser_tx_desc *tx_desc) -{ - struct iscsi_iser_conn *iser_conn = task->conn->dd_data; - struct iser_device *device = iser_conn->ib_conn->device; - struct iscsi_iser_task *iser_task = task->dd_data; - u64 dma_addr; - - dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, - ISER_HEADERS_LEN, DMA_TO_DEVICE); - if (ib_dma_mapping_error(device->ib_device, dma_addr)) - return -ENOMEM; - - tx_desc->dma_addr = dma_addr; - tx_desc->tx_sg[0].addr = tx_desc->dma_addr; - tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; - tx_desc->tx_sg[0].lkey = device->mr->lkey; - - iser_task->headers_initialized = 1; - iser_task->iser_conn = iser_conn; - return 0; -} /** * iscsi_iser_task_init - Initialize task * @task: iscsi task @@ -159,17 +137,17 @@ int iser_initialize_task_headers(struct iscsi_task *task, static int iscsi_iser_task_init(struct iscsi_task *task) { + struct iscsi_iser_conn *iser_conn = task->conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; - if (!iser_task->headers_initialized) - if (iser_initialize_task_headers(task, &iser_task->desc)) - return -ENOMEM; - /* mgmt task */ - if (!task->sc) + if (!task->sc) { + iser_task->desc.data = task->data; return 0; + } iser_task->command_sent = 0; + iser_task->iser_conn = iser_conn; iser_task_rdma_init(iser_task); return 0; } @@ -190,7 +168,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) { int error = 0; - iser_dbg("mtask xmit [cid %d itt 0x%x]\n", conn->id, task->itt); + iser_dbg("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); error = iser_send_control(conn, task); @@ -200,6 +178,9 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) * - if yes, the task is recycled at iscsi_complete_pdu * - if no, the task is recycled at iser_snd_completion */ + if (error && error != -ENOBUFS) + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + return error; } @@ -251,7 +232,7 @@ iscsi_iser_task_xmit(struct iscsi_task *task) task->imm_count, task->unsol_r2t.data_length); } - iser_dbg("ctask xmit [cid %d itt 0x%x]\n", + iser_dbg("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); /* Send the cmd PDU */ @@ -267,6 +248,8 @@ iscsi_iser_task_xmit(struct iscsi_task *task) error = iscsi_iser_task_xmit_unsol_data(conn, task); iscsi_iser_task_xmit_exit: + if (error && error != -ENOBUFS) + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; } @@ -300,7 +283,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) * due to issues with the login code re iser sematics * this not set in iscsi_conn_setup - FIXME */ - conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN; + conn->max_recv_dlength = 128; iser_conn = conn->dd_data; conn->dd_data = iser_conn; @@ -418,7 +401,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct Scsi_Host *shost; struct iser_conn *ib_conn; - shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); + shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 1); if (!shost) return NULL; shost->transportt = iscsi_iser_scsi_transport; @@ -692,7 +675,7 @@ static int __init iser_init(void) memset(&ig, 0, sizeof(struct iser_global)); ig.desc_cache = kmem_cache_create("iser_descriptors", - sizeof(struct iser_tx_desc), + sizeof (struct iser_desc), 0, SLAB_HWCACHE_ALIGN, NULL); if (ig.desc_cache == NULL) diff --git a/trunk/drivers/infiniband/ulp/iser/iscsi_iser.h b/trunk/drivers/infiniband/ulp/iser/iscsi_iser.h index 036934cdcb92..9d529cae1f0d 100644 --- a/trunk/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/trunk/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -102,9 +102,9 @@ #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * * SCSI_TMFUNC(2), LOGOUT(1) */ -#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX) - -#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2) +#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \ + ISER_MAX_RX_MISC_PDUS + \ + ISER_MAX_TX_MISC_PDUS) /* the max TX (send) WR supported by the iSER QP is defined by * * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * @@ -132,12 +132,6 @@ struct iser_hdr { __be64 read_va; } __attribute__((packed)); -/* Constant PDU lengths calculations */ -#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr)) - -#define ISER_RECV_DATA_SEG_LEN 128 -#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN) -#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN) /* Length of an object name string */ #define ISER_OBJECT_NAME_SIZE 64 @@ -193,43 +187,51 @@ struct iser_regd_buf { struct iser_mem_reg reg; /* memory registration info */ void *virt_addr; struct iser_device *device; /* device->device for dma_unmap */ + u64 dma_addr; /* if non zero, addr for dma_unmap */ enum dma_data_direction direction; /* direction for dma_unmap */ unsigned int data_size; + atomic_t ref_count; /* refcount, freed when dec to 0 */ +}; + +#define MAX_REGD_BUF_VECTOR_LEN 2 + +struct iser_dto { + struct iscsi_iser_task *task; + struct iser_conn *ib_conn; + int notify_enable; + + /* vector of registered buffers */ + unsigned int regd_vector_len; + struct iser_regd_buf *regd[MAX_REGD_BUF_VECTOR_LEN]; + + /* offset into the registered buffer may be specified */ + unsigned int offset[MAX_REGD_BUF_VECTOR_LEN]; + + /* a smaller size may be specified, if 0, then full size is used */ + unsigned int used_sz[MAX_REGD_BUF_VECTOR_LEN]; }; enum iser_desc_type { + ISCSI_RX, ISCSI_TX_CONTROL , ISCSI_TX_SCSI_COMMAND, ISCSI_TX_DATAOUT }; -struct iser_tx_desc { +struct iser_desc { struct iser_hdr iser_header; struct iscsi_hdr iscsi_header; + struct iser_regd_buf hdr_regd_buf; + void *data; /* used by RX & TX_CONTROL */ + struct iser_regd_buf data_regd_buf; /* used by RX & TX_CONTROL */ enum iser_desc_type type; - u64 dma_addr; - /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either - of immediate data, unsolicited data-out or control (login,text) */ - struct ib_sge tx_sg[2]; - int num_sge; + struct iser_dto dto; }; -#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ - sizeof(u64) + sizeof(struct ib_sge))) -struct iser_rx_desc { - struct iser_hdr iser_header; - struct iscsi_hdr iscsi_header; - char data[ISER_RECV_DATA_SEG_LEN]; - u64 dma_addr; - struct ib_sge rx_sg; - char pad[ISER_RX_PAD_SIZE]; -} __attribute__((packed)); - struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; - struct ib_cq *rx_cq; - struct ib_cq *tx_cq; + struct ib_cq *cq; struct ib_mr *mr; struct tasklet_struct cq_tasklet; struct list_head ig_list; /* entry in ig devices list */ @@ -248,18 +250,15 @@ struct iser_conn { struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ int disc_evt_flag; /* disconn event delivered */ wait_queue_head_t wait; /* waitq for conn/disconn */ - int post_recv_buf_count; /* posted rx count */ + atomic_t post_recv_buf_count; /* posted rx count */ atomic_t post_send_buf_count; /* posted tx count */ + atomic_t unexpected_pdu_count;/* count of received * + * unexpected pdus * + * not yet retired */ char name[ISER_OBJECT_NAME_SIZE]; struct iser_page_vec *page_vec; /* represents SG to fmr maps* * maps serialized as tx is*/ struct list_head conn_list; /* entry in ig conn list */ - - char *login_buf; - u64 login_dma; - unsigned int rx_desc_head; - struct iser_rx_desc *rx_descs; - struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; }; struct iscsi_iser_conn { @@ -268,7 +267,7 @@ struct iscsi_iser_conn { }; struct iscsi_iser_task { - struct iser_tx_desc desc; + struct iser_desc desc; struct iscsi_iser_conn *iser_conn; enum iser_task_status status; int command_sent; /* set if command sent */ @@ -276,7 +275,6 @@ struct iscsi_iser_task { struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ - int headers_initialized; }; struct iser_page_vec { @@ -324,17 +322,22 @@ void iser_conn_put(struct iser_conn *ib_conn); void iser_conn_terminate(struct iser_conn *ib_conn); -void iser_rcv_completion(struct iser_rx_desc *desc, - unsigned long dto_xfer_len, - struct iser_conn *ib_conn); +void iser_rcv_completion(struct iser_desc *desc, + unsigned long dto_xfer_len); -void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn); +void iser_snd_completion(struct iser_desc *desc); void iser_task_rdma_init(struct iscsi_iser_task *task); void iser_task_rdma_finalize(struct iscsi_iser_task *task); -void iser_free_rx_descriptors(struct iser_conn *ib_conn); +void iser_dto_buffs_release(struct iser_dto *dto); + +int iser_regd_buff_release(struct iser_regd_buf *regd_buf); + +void iser_reg_single(struct iser_device *device, + struct iser_regd_buf *regd_buf, + enum dma_data_direction direction); void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); @@ -353,9 +356,11 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, void iser_unreg_mem(struct iser_mem_reg *mem_reg); -int iser_post_recvl(struct iser_conn *ib_conn); -int iser_post_recvm(struct iser_conn *ib_conn, int count); -int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc); +int iser_post_recv(struct iser_desc *rx_desc); +int iser_post_send(struct iser_desc *tx_desc); + +int iser_conn_state_comp(struct iser_conn *ib_conn, + enum iser_ib_conn_state comp); int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data, @@ -363,6 +368,4 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, enum dma_data_direction dma_dir); void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); -int iser_initialize_task_headers(struct iscsi_task *task, - struct iser_tx_desc *tx_desc); #endif diff --git a/trunk/drivers/infiniband/ulp/iser/iser_initiator.c b/trunk/drivers/infiniband/ulp/iser/iser_initiator.c index 0b9ef0716588..9de640200ad3 100644 --- a/trunk/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/trunk/drivers/infiniband/ulp/iser/iser_initiator.c @@ -39,6 +39,29 @@ #include "iscsi_iser.h" +/* Constant PDU lengths calculations */ +#define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \ + sizeof (struct iscsi_hdr)) + +/* iser_dto_add_regd_buff - increments the reference count for * + * the registered buffer & adds it to the DTO object */ +static void iser_dto_add_regd_buff(struct iser_dto *dto, + struct iser_regd_buf *regd_buf, + unsigned long use_offset, + unsigned long use_size) +{ + int add_idx; + + atomic_inc(®d_buf->ref_count); + + add_idx = dto->regd_vector_len; + dto->regd[add_idx] = regd_buf; + dto->used_sz[add_idx] = use_size; + dto->offset[add_idx] = use_offset; + + dto->regd_vector_len++; +} + /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in * iser_task->data[ISER_DIR_IN].data_len @@ -99,9 +122,9 @@ iser_prepare_write_cmd(struct iscsi_task *task, struct iscsi_iser_task *iser_task = task->dd_data; struct iser_regd_buf *regd_buf; int err; + struct iser_dto *send_dto = &iser_task->desc.dto; struct iser_hdr *hdr = &iser_task->desc.iser_header; struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; - struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1]; err = iser_dma_map_task_data(iser_task, buf_out, @@ -140,100 +163,135 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (imm_sz > 0) { iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", task->itt, imm_sz); - tx_dsg->addr = regd_buf->reg.va; - tx_dsg->length = imm_sz; - tx_dsg->lkey = regd_buf->reg.lkey; - iser_task->desc.num_sge = 2; + iser_dto_add_regd_buff(send_dto, + regd_buf, + 0, + imm_sz); } return 0; } -/* creates a new tx descriptor and adds header regd buffer */ -static void iser_create_send_desc(struct iser_conn *ib_conn, - struct iser_tx_desc *tx_desc) +/** + * iser_post_receive_control - allocates, initializes and posts receive DTO. + */ +static int iser_post_receive_control(struct iscsi_conn *conn) { - struct iser_device *device = ib_conn->device; + struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_desc *rx_desc; + struct iser_regd_buf *regd_hdr; + struct iser_regd_buf *regd_data; + struct iser_dto *recv_dto = NULL; + struct iser_device *device = iser_conn->ib_conn->device; + int rx_data_size, err; + int posts, outstanding_unexp_pdus; + + /* for the login sequence we must support rx of upto 8K; login is done + * after conn create/bind (connect) and conn stop/bind (reconnect), + * what's common for both schemes is that the connection is not started + */ + if (conn->c_stage != ISCSI_CONN_STARTED) + rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN; + else /* FIXME till user space sets conn->max_recv_dlength correctly */ + rx_data_size = 128; - ib_dma_sync_single_for_cpu(device->ib_device, - tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); + outstanding_unexp_pdus = + atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0); - memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); - tx_desc->iser_header.flags = ISER_VER; + /* + * in addition to the response buffer, replace those consumed by + * unexpected pdus. + */ + for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) { + rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); + if (rx_desc == NULL) { + iser_err("Failed to alloc desc for post recv %d\n", + posts); + err = -ENOMEM; + goto post_rx_cache_alloc_failure; + } + rx_desc->type = ISCSI_RX; + rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); + if (rx_desc->data == NULL) { + iser_err("Failed to alloc data buf for post recv %d\n", + posts); + err = -ENOMEM; + goto post_rx_kmalloc_failure; + } - tx_desc->num_sge = 1; + recv_dto = &rx_desc->dto; + recv_dto->ib_conn = iser_conn->ib_conn; + recv_dto->regd_vector_len = 0; - if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { - tx_desc->tx_sg[0].lkey = device->mr->lkey; - iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc); - } -} + regd_hdr = &rx_desc->hdr_regd_buf; + memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); + regd_hdr->device = device; + regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */ + regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; + iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); -int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) -{ - int i, j; - u64 dma_addr; - struct iser_rx_desc *rx_desc; - struct ib_sge *rx_sg; - struct iser_device *device = ib_conn->device; - - ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS * - sizeof(struct iser_rx_desc), GFP_KERNEL); - if (!ib_conn->rx_descs) - goto rx_desc_alloc_fail; - - rx_desc = ib_conn->rx_descs; - - for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) { - dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(device->ib_device, dma_addr)) - goto rx_desc_dma_map_failed; - - rx_desc->dma_addr = dma_addr; - - rx_sg = &rx_desc->rx_sg; - rx_sg->addr = rx_desc->dma_addr; - rx_sg->length = ISER_RX_PAYLOAD_SIZE; - rx_sg->lkey = device->mr->lkey; - } + iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); - ib_conn->rx_desc_head = 0; + regd_data = &rx_desc->data_regd_buf; + memset(regd_data, 0, sizeof(struct iser_regd_buf)); + regd_data->device = device; + regd_data->virt_addr = rx_desc->data; + regd_data->data_size = rx_data_size; + + iser_reg_single(device, regd_data, DMA_FROM_DEVICE); + + iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); + + err = iser_post_recv(rx_desc); + if (err) { + iser_err("Failed iser_post_recv for post %d\n", posts); + goto post_rx_post_recv_failure; + } + } + /* all posts successful */ return 0; -rx_desc_dma_map_failed: - rx_desc = ib_conn->rx_descs; - for (j = 0; j < i; j++, rx_desc++) - ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->rx_descs); - ib_conn->rx_descs = NULL; -rx_desc_alloc_fail: - iser_err("failed allocating rx descriptors / data buffers\n"); - return -ENOMEM; +post_rx_post_recv_failure: + iser_dto_buffs_release(recv_dto); + kfree(rx_desc->data); +post_rx_kmalloc_failure: + kmem_cache_free(ig.desc_cache, rx_desc); +post_rx_cache_alloc_failure: + if (posts > 0) { + /* + * response buffer posted, but did not replace all unexpected + * pdu recv bufs. Ignore error, retry occurs next send + */ + outstanding_unexp_pdus -= (posts - 1); + err = 0; + } + atomic_add(outstanding_unexp_pdus, + &iser_conn->ib_conn->unexpected_pdu_count); + + return err; } -void iser_free_rx_descriptors(struct iser_conn *ib_conn) +/* creates a new tx descriptor and adds header regd buffer */ +static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, + struct iser_desc *tx_desc) { - int i; - struct iser_rx_desc *rx_desc; - struct iser_device *device = ib_conn->device; + struct iser_regd_buf *regd_hdr = &tx_desc->hdr_regd_buf; + struct iser_dto *send_dto = &tx_desc->dto; - if (ib_conn->login_buf) { - ib_dma_unmap_single(device->ib_device, ib_conn->login_dma, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->login_buf); - } + memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); + regd_hdr->device = iser_conn->ib_conn->device; + regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ + regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; - if (!ib_conn->rx_descs) - return; + send_dto->ib_conn = iser_conn->ib_conn; + send_dto->notify_enable = 1; + send_dto->regd_vector_len = 0; - rx_desc = ib_conn->rx_descs; - for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) - ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->rx_descs); + memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); + tx_desc->iser_header.flags = ISER_VER; + + iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0); } /** @@ -243,23 +301,46 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX); + int i; + /* + * FIXME this value should be declared to the target during login with + * the MaxOutstandingUnexpectedPDUs key when supported + */ + int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS; + + iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num); /* Check that there is no posted recv or send buffers left - */ /* they must be consumed during the login phase */ - BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0); + BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0); BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); - if (iser_alloc_rx_descriptors(iser_conn->ib_conn)) - return -ENOMEM; - /* Initial post receive buffers */ - if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) - return -ENOMEM; + for (i = 0; i < initial_post_recv_bufs_num; i++) { + if (iser_post_receive_control(conn) != 0) { + iser_err("Failed to post recv bufs at:%d conn:0x%p\n", + i, conn); + return -ENOMEM; + } + } + iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn); + return 0; +} + +static int +iser_check_xmit(struct iscsi_conn *conn, void *task) +{ + struct iscsi_iser_conn *iser_conn = conn->dd_data; + if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == + ISER_QP_MAX_REQ_DTOS) { + iser_dbg("%ld can't xmit task %p\n",jiffies,task); + return -ENOBUFS; + } return 0; } + /** * iser_send_command - send command PDU */ @@ -268,18 +349,27 @@ int iser_send_command(struct iscsi_conn *conn, { struct iscsi_iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_dto *send_dto = NULL; unsigned long edtl; - int err; + int err = 0; struct iser_data_buf *data_buf; struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr; struct scsi_cmnd *sc = task->sc; - struct iser_tx_desc *tx_desc = &iser_task->desc; + + if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { + iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); + return -EPERM; + } + if (iser_check_xmit(conn, task)) + return -ENOBUFS; edtl = ntohl(hdr->data_length); /* build the tx desc regd header and add it to the tx desc dto */ - tx_desc->type = ISCSI_TX_SCSI_COMMAND; - iser_create_send_desc(iser_conn->ib_conn, tx_desc); + iser_task->desc.type = ISCSI_TX_SCSI_COMMAND; + send_dto = &iser_task->desc.dto; + send_dto->task = iser_task; + iser_create_send_desc(iser_conn, &iser_task->desc); if (hdr->flags & ISCSI_FLAG_CMD_READ) data_buf = &iser_task->data[ISER_DIR_IN]; @@ -308,13 +398,23 @@ int iser_send_command(struct iscsi_conn *conn, goto send_command_error; } + iser_reg_single(iser_conn->ib_conn->device, + send_dto->regd[0], DMA_TO_DEVICE); + + if (iser_post_receive_control(conn) != 0) { + iser_err("post_recv failed!\n"); + err = -ENOMEM; + goto send_command_error; + } + iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(iser_conn->ib_conn, tx_desc); + err = iser_post_send(&iser_task->desc); if (!err) return 0; send_command_error: + iser_dto_buffs_release(send_dto); iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err); return err; } @@ -328,13 +428,20 @@ int iser_send_data_out(struct iscsi_conn *conn, { struct iscsi_iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_tx_desc *tx_desc = NULL; - struct iser_regd_buf *regd_buf; + struct iser_desc *tx_desc = NULL; + struct iser_dto *send_dto = NULL; unsigned long buf_offset; unsigned long data_seg_len; uint32_t itt; int err = 0; - struct ib_sge *tx_dsg; + + if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { + iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); + return -EPERM; + } + + if (iser_check_xmit(conn, task)) + return -ENOBUFS; itt = (__force uint32_t)hdr->itt; data_seg_len = ntoh24(hdr->dlength); @@ -343,25 +450,28 @@ int iser_send_data_out(struct iscsi_conn *conn, iser_dbg("%s itt %d dseg_len %d offset %d\n", __func__,(int)itt,(int)data_seg_len,(int)buf_offset); - tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC); + tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); if (tx_desc == NULL) { iser_err("Failed to alloc desc for post dataout\n"); return -ENOMEM; } tx_desc->type = ISCSI_TX_DATAOUT; - tx_desc->iser_header.flags = ISER_VER; memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); - /* build the tx desc */ - iser_initialize_task_headers(task, tx_desc); + /* build the tx desc regd header and add it to the tx desc dto */ + send_dto = &tx_desc->dto; + send_dto->task = iser_task; + iser_create_send_desc(iser_conn, tx_desc); - regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; - tx_dsg = &tx_desc->tx_sg[1]; - tx_dsg->addr = regd_buf->reg.va + buf_offset; - tx_dsg->length = data_seg_len; - tx_dsg->lkey = regd_buf->reg.lkey; - tx_desc->num_sge = 2; + iser_reg_single(iser_conn->ib_conn->device, + send_dto->regd[0], DMA_TO_DEVICE); + + /* all data was registered for RDMA, we can use the lkey */ + iser_dto_add_regd_buff(send_dto, + &iser_task->rdma_regd[ISER_DIR_OUT], + buf_offset, + data_seg_len); if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { iser_err("Offset:%ld & DSL:%ld in Data-Out " @@ -375,11 +485,12 @@ int iser_send_data_out(struct iscsi_conn *conn, itt, buf_offset, data_seg_len); - err = iser_post_send(iser_conn->ib_conn, tx_desc); + err = iser_post_send(tx_desc); if (!err) return 0; send_data_out_error: + iser_dto_buffs_release(send_dto); kmem_cache_free(ig.desc_cache, tx_desc); iser_err("conn %p failed err %d\n",conn, err); return err; @@ -390,44 +501,64 @@ int iser_send_control(struct iscsi_conn *conn, { struct iscsi_iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_tx_desc *mdesc = &iser_task->desc; + struct iser_desc *mdesc = &iser_task->desc; + struct iser_dto *send_dto = NULL; unsigned long data_seg_len; int err = 0; + struct iser_regd_buf *regd_buf; struct iser_device *device; + unsigned char opcode; + + if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { + iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); + return -EPERM; + } + + if (iser_check_xmit(conn, task)) + return -ENOBUFS; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; - iser_create_send_desc(iser_conn->ib_conn, mdesc); + send_dto = &mdesc->dto; + send_dto->task = NULL; + iser_create_send_desc(iser_conn, mdesc); device = iser_conn->ib_conn->device; + iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE); + data_seg_len = ntoh24(task->hdr->dlength); if (data_seg_len > 0) { - struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; - if (task != conn->login_task) { - iser_err("data present on non login task!!!\n"); - goto send_control_error; - } - memcpy(iser_conn->ib_conn->login_buf, task->data, - task->data_count); - tx_dsg->addr = iser_conn->ib_conn->login_dma; - tx_dsg->length = data_seg_len; - tx_dsg->lkey = device->mr->lkey; - mdesc->num_sge = 2; + regd_buf = &mdesc->data_regd_buf; + memset(regd_buf, 0, sizeof(struct iser_regd_buf)); + regd_buf->device = device; + regd_buf->virt_addr = task->data; + regd_buf->data_size = task->data_count; + iser_reg_single(device, regd_buf, + DMA_TO_DEVICE); + iser_dto_add_regd_buff(send_dto, regd_buf, + 0, + data_seg_len); } - if (task == conn->login_task) { - err = iser_post_recvl(iser_conn->ib_conn); - if (err) + opcode = task->hdr->opcode & ISCSI_OPCODE_MASK; + + /* post recv buffer for response if one is expected */ + if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) { + if (iser_post_receive_control(conn) != 0) { + iser_err("post_rcv_buff failed!\n"); + err = -ENOMEM; goto send_control_error; + } } - err = iser_post_send(iser_conn->ib_conn, mdesc); + err = iser_post_send(mdesc); if (!err) return 0; send_control_error: + iser_dto_buffs_release(send_dto); iser_err("conn %p failed err %d\n",conn, err); return err; } @@ -435,71 +566,104 @@ int iser_send_control(struct iscsi_conn *conn, /** * iser_rcv_dto_completion - recv DTO completion */ -void iser_rcv_completion(struct iser_rx_desc *rx_desc, - unsigned long rx_xfer_len, - struct iser_conn *ib_conn) +void iser_rcv_completion(struct iser_desc *rx_desc, + unsigned long dto_xfer_len) { - struct iscsi_iser_conn *conn = ib_conn->iser_conn; + struct iser_dto *dto = &rx_desc->dto; + struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; + struct iscsi_task *task; + struct iscsi_iser_task *iser_task; struct iscsi_hdr *hdr; - u64 rx_dma; - int rx_buflen, outstanding, count, err; - - /* differentiate between login to all other PDUs */ - if ((char *)rx_desc == ib_conn->login_buf) { - rx_dma = ib_conn->login_dma; - rx_buflen = ISER_RX_LOGIN_SIZE; - } else { - rx_dma = rx_desc->dma_addr; - rx_buflen = ISER_RX_PAYLOAD_SIZE; - } - - ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + char *rx_data = NULL; + int rx_data_len = 0; + unsigned char opcode; hdr = &rx_desc->iscsi_header; - iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, - hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); + iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt); + + if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */ + rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN; + rx_data = dto->regd[1]->virt_addr; + rx_data += dto->offset[1]; + } + + opcode = hdr->opcode & ISCSI_OPCODE_MASK; + + if (opcode == ISCSI_OP_SCSI_CMD_RSP) { + spin_lock(&conn->iscsi_conn->session->lock); + task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt); + if (task) + __iscsi_get_task(task); + spin_unlock(&conn->iscsi_conn->session->lock); + + if (!task) + iser_err("itt can't be matched to task!!! " + "conn %p opcode %d itt %d\n", + conn->iscsi_conn, opcode, hdr->itt); + else { + iser_task = task->dd_data; + iser_dbg("itt %d task %p\n",hdr->itt, task); + iser_task->status = ISER_TASK_STATUS_COMPLETED; + iser_task_rdma_finalize(iser_task); + iscsi_put_task(task); + } + } + iser_dto_buffs_release(dto); - iscsi_iser_recv(conn->iscsi_conn, hdr, - rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); + iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); - ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + kfree(rx_desc->data); + kmem_cache_free(ig.desc_cache, rx_desc); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ - conn->ib_conn->post_recv_buf_count--; + atomic_dec(&conn->ib_conn->post_recv_buf_count); - if (rx_dma == ib_conn->login_dma) - return; - - outstanding = ib_conn->post_recv_buf_count; - if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) { - count = min(ISER_QP_MAX_RECV_DTOS - outstanding, - ISER_MIN_POSTED_RX); - err = iser_post_recvm(ib_conn, count); - if (err) - iser_err("posting %d rx bufs err %d\n", count, err); + /* + * if an unexpected PDU was received then the recv wr consumed must + * be replaced, this is done in the next send of a control-type PDU + */ + if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) { + /* nop-in with itt = 0xffffffff */ + atomic_inc(&conn->ib_conn->unexpected_pdu_count); } + else if (opcode == ISCSI_OP_ASYNC_EVENT) { + /* asyncronous message */ + atomic_inc(&conn->ib_conn->unexpected_pdu_count); + } + /* a reject PDU consumes the recv buf posted for the response */ } -void iser_snd_completion(struct iser_tx_desc *tx_desc, - struct iser_conn *ib_conn) +void iser_snd_completion(struct iser_desc *tx_desc) { + struct iser_dto *dto = &tx_desc->dto; + struct iser_conn *ib_conn = dto->ib_conn; + struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; + struct iscsi_conn *conn = iser_conn->iscsi_conn; struct iscsi_task *task; - struct iser_device *device = ib_conn->device; + int resume_tx = 0; + + iser_dbg("Initiator, Data sent dto=0x%p\n", dto); + + iser_dto_buffs_release(dto); - if (tx_desc->type == ISCSI_TX_DATAOUT) { - ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, - ISER_HEADERS_LEN, DMA_TO_DEVICE); + if (tx_desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, tx_desc); - } + + if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == + ISER_QP_MAX_REQ_DTOS) + resume_tx = 1; atomic_dec(&ib_conn->post_send_buf_count); + if (resume_tx) { + iser_dbg("%ld resuming tx\n",jiffies); + iscsi_conn_queue_work(conn); + } + if (tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ task = (void *) ((long)(void *)tx_desc - @@ -528,6 +692,7 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { + int deferred; int is_rdma_aligned = 1; struct iser_regd_buf *regd; @@ -545,17 +710,32 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) if (iser_task->dir[ISER_DIR_IN]) { regd = &iser_task->rdma_regd[ISER_DIR_IN]; - if (regd->reg.is_fmr) - iser_unreg_mem(®d->reg); + deferred = iser_regd_buff_release(regd); + if (deferred) { + iser_err("%d references remain for BUF-IN rdma reg\n", + atomic_read(®d->ref_count)); + } } if (iser_task->dir[ISER_DIR_OUT]) { regd = &iser_task->rdma_regd[ISER_DIR_OUT]; - if (regd->reg.is_fmr) - iser_unreg_mem(®d->reg); + deferred = iser_regd_buff_release(regd); + if (deferred) { + iser_err("%d references remain for BUF-OUT rdma reg\n", + atomic_read(®d->ref_count)); + } } /* if the data was unaligned, it was already unmapped and then copied */ if (is_rdma_aligned) iser_dma_unmap_task_data(iser_task); } + +void iser_dto_buffs_release(struct iser_dto *dto) +{ + int i; + + for (i = 0; i < dto->regd_vector_len; i++) + iser_regd_buff_release(dto->regd[i]); +} + diff --git a/trunk/drivers/infiniband/ulp/iser/iser_memory.c b/trunk/drivers/infiniband/ulp/iser/iser_memory.c index fb88d6896b67..274c883ef3ea 100644 --- a/trunk/drivers/infiniband/ulp/iser/iser_memory.c +++ b/trunk/drivers/infiniband/ulp/iser/iser_memory.c @@ -40,6 +40,62 @@ #define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ +/** + * Decrements the reference count for the + * registered buffer & releases it + * + * returns 0 if released, 1 if deferred + */ +int iser_regd_buff_release(struct iser_regd_buf *regd_buf) +{ + struct ib_device *dev; + + if ((atomic_read(®d_buf->ref_count) == 0) || + atomic_dec_and_test(®d_buf->ref_count)) { + /* if we used the dma mr, unreg is just NOP */ + if (regd_buf->reg.is_fmr) + iser_unreg_mem(®d_buf->reg); + + if (regd_buf->dma_addr) { + dev = regd_buf->device->ib_device; + ib_dma_unmap_single(dev, + regd_buf->dma_addr, + regd_buf->data_size, + regd_buf->direction); + } + /* else this regd buf is associated with task which we */ + /* dma_unmap_single/sg later */ + return 0; + } else { + iser_dbg("Release deferred, regd.buff: 0x%p\n", regd_buf); + return 1; + } +} + +/** + * iser_reg_single - fills registered buffer descriptor with + * registration information + */ +void iser_reg_single(struct iser_device *device, + struct iser_regd_buf *regd_buf, + enum dma_data_direction direction) +{ + u64 dma_addr; + + dma_addr = ib_dma_map_single(device->ib_device, + regd_buf->virt_addr, + regd_buf->data_size, direction); + BUG_ON(ib_dma_mapping_error(device->ib_device, dma_addr)); + + regd_buf->reg.lkey = device->mr->lkey; + regd_buf->reg.len = regd_buf->data_size; + regd_buf->reg.va = dma_addr; + regd_buf->reg.is_fmr = 0; + + regd_buf->dma_addr = dma_addr; + regd_buf->direction = direction; +} + /** * iser_start_rdma_unaligned_sg */ @@ -53,10 +109,10 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, unsigned long cmd_data_len = data->data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) - mem = (void *)__get_free_pages(GFP_ATOMIC, + mem = (void *)__get_free_pages(GFP_NOIO, ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); else - mem = kmalloc(cmd_data_len, GFP_ATOMIC); + mem = kmalloc(cmd_data_len, GFP_NOIO); if (mem == NULL) { iser_err("Failed to allocate mem size %d %d for copying sglist\n", @@ -418,5 +474,9 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, return err; } } + + /* take a reference on this regd buf such that it will not be released * + * (eg in send dto completion) before we get the scsi response */ + atomic_inc(®d_buf->ref_count); return 0; } diff --git a/trunk/drivers/infiniband/ulp/iser/iser_verbs.c b/trunk/drivers/infiniband/ulp/iser/iser_verbs.c index 308d17bb5146..8579f32ce38e 100644 --- a/trunk/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/trunk/drivers/infiniband/ulp/iser/iser_verbs.c @@ -37,8 +37,9 @@ #include "iscsi_iser.h" #define ISCSI_ISER_MAX_CONN 8 -#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) -#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) +#define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ + ISER_QP_MAX_REQ_DTOS) * \ + ISCSI_ISER_MAX_CONN) static void iser_cq_tasklet_fn(unsigned long data); static void iser_cq_callback(struct ib_cq *cq, void *cq_context); @@ -66,23 +67,15 @@ static int iser_create_device_ib_res(struct iser_device *device) if (IS_ERR(device->pd)) goto pd_err; - device->rx_cq = ib_create_cq(device->ib_device, + device->cq = ib_create_cq(device->ib_device, iser_cq_callback, iser_cq_event_callback, (void *)device, - ISER_MAX_RX_CQ_LEN, 0); - if (IS_ERR(device->rx_cq)) - goto rx_cq_err; + ISER_MAX_CQ_LEN, 0); + if (IS_ERR(device->cq)) + goto cq_err; - device->tx_cq = ib_create_cq(device->ib_device, - NULL, iser_cq_event_callback, - (void *)device, - ISER_MAX_TX_CQ_LEN, 0); - - if (IS_ERR(device->tx_cq)) - goto tx_cq_err; - - if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP)) + if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP)) goto cq_arm_err; tasklet_init(&device->cq_tasklet, @@ -100,10 +93,8 @@ static int iser_create_device_ib_res(struct iser_device *device) dma_mr_err: tasklet_kill(&device->cq_tasklet); cq_arm_err: - ib_destroy_cq(device->tx_cq); -tx_cq_err: - ib_destroy_cq(device->rx_cq); -rx_cq_err: + ib_destroy_cq(device->cq); +cq_err: ib_dealloc_pd(device->pd); pd_err: iser_err("failed to allocate an IB resource\n"); @@ -121,13 +112,11 @@ static void iser_free_device_ib_res(struct iser_device *device) tasklet_kill(&device->cq_tasklet); (void)ib_dereg_mr(device->mr); - (void)ib_destroy_cq(device->tx_cq); - (void)ib_destroy_cq(device->rx_cq); + (void)ib_destroy_cq(device->cq); (void)ib_dealloc_pd(device->pd); device->mr = NULL; - device->tx_cq = NULL; - device->rx_cq = NULL; + device->cq = NULL; device->pd = NULL; } @@ -140,23 +129,13 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) { struct iser_device *device; struct ib_qp_init_attr init_attr; - int ret = -ENOMEM; + int ret; struct ib_fmr_pool_param params; BUG_ON(ib_conn->device == NULL); device = ib_conn->device; - ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL); - if (!ib_conn->login_buf) { - goto alloc_err; - ret = -ENOMEM; - } - - ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device, - (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE, - DMA_FROM_DEVICE); - ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), GFP_KERNEL); @@ -190,12 +169,12 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; - init_attr.send_cq = device->tx_cq; - init_attr.recv_cq = device->rx_cq; + init_attr.send_cq = device->cq; + init_attr.recv_cq = device->cq; init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; - init_attr.cap.max_send_sge = 2; - init_attr.cap.max_recv_sge = 1; + init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; + init_attr.cap.max_recv_sge = 2; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; @@ -213,7 +192,6 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); fmr_pool_err: kfree(ib_conn->page_vec); - kfree(ib_conn->login_buf); alloc_err: iser_err("unable to alloc mem or create resource, err %d\n", ret); return ret; @@ -300,6 +278,17 @@ static void iser_device_try_release(struct iser_device *device) mutex_unlock(&ig.device_list_mutex); } +int iser_conn_state_comp(struct iser_conn *ib_conn, + enum iser_ib_conn_state comp) +{ + int ret; + + spin_lock_bh(&ib_conn->lock); + ret = (ib_conn->state == comp); + spin_unlock_bh(&ib_conn->lock); + return ret; +} + static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, enum iser_ib_conn_state comp, enum iser_ib_conn_state exch) @@ -325,7 +314,7 @@ static void iser_conn_release(struct iser_conn *ib_conn) mutex_lock(&ig.connlist_mutex); list_del(&ib_conn->conn_list); mutex_unlock(&ig.connlist_mutex); - iser_free_rx_descriptors(ib_conn); + iser_free_ib_conn_res(ib_conn); ib_conn->device = NULL; /* on EVENT_ADDR_ERROR there's no device yet for this conn */ @@ -453,7 +442,7 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id) ISCSI_ERR_CONN_FAILED); /* Complete the termination process if no posts are pending */ - if (ib_conn->post_recv_buf_count == 0 && + if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) && (atomic_read(&ib_conn->post_send_buf_count) == 0)) { ib_conn->state = ISER_CONN_DOWN; wake_up_interruptible(&ib_conn->wait); @@ -500,8 +489,9 @@ void iser_conn_init(struct iser_conn *ib_conn) { ib_conn->state = ISER_CONN_INIT; init_waitqueue_head(&ib_conn->wait); - ib_conn->post_recv_buf_count = 0; + atomic_set(&ib_conn->post_recv_buf_count, 0); atomic_set(&ib_conn->post_send_buf_count, 0); + atomic_set(&ib_conn->unexpected_pdu_count, 0); atomic_set(&ib_conn->refcount, 1); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); @@ -636,97 +626,136 @@ void iser_unreg_mem(struct iser_mem_reg *reg) reg->mem_h = NULL; } -int iser_post_recvl(struct iser_conn *ib_conn) +/** + * iser_dto_to_iov - builds IOV from a dto descriptor + */ +static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len) { - struct ib_recv_wr rx_wr, *rx_wr_failed; - struct ib_sge sge; - int ib_ret; - - sge.addr = ib_conn->login_dma; - sge.length = ISER_RX_LOGIN_SIZE; - sge.lkey = ib_conn->device->mr->lkey; + int i; + struct ib_sge *sge; + struct iser_regd_buf *regd_buf; + + if (dto->regd_vector_len > iov_len) { + iser_err("iov size %d too small for posting dto of len %d\n", + iov_len, dto->regd_vector_len); + BUG(); + } - rx_wr.wr_id = (unsigned long)ib_conn->login_buf; - rx_wr.sg_list = &sge; - rx_wr.num_sge = 1; - rx_wr.next = NULL; + for (i = 0; i < dto->regd_vector_len; i++) { + sge = &iov[i]; + regd_buf = dto->regd[i]; + + sge->addr = regd_buf->reg.va; + sge->length = regd_buf->reg.len; + sge->lkey = regd_buf->reg.lkey; + + if (dto->used_sz[i] > 0) /* Adjust size */ + sge->length = dto->used_sz[i]; + + /* offset and length should not exceed the regd buf length */ + if (sge->length + dto->offset[i] > regd_buf->reg.len) { + iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:" + "%ld in dto:0x%p [%d], va:0x%08lX\n", + (unsigned long)sge->length, dto->offset[i], + (unsigned long)regd_buf->reg.len, dto, i, + (unsigned long)sge->addr); + BUG(); + } - ib_conn->post_recv_buf_count++; - ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); - if (ib_ret) { - iser_err("ib_post_recv failed ret=%d\n", ib_ret); - ib_conn->post_recv_buf_count--; + sge->addr += dto->offset[i]; /* Adjust offset */ } - return ib_ret; } -int iser_post_recvm(struct iser_conn *ib_conn, int count) +/** + * iser_post_recv - Posts a receive buffer. + * + * returns 0 on success, -1 on failure + */ +int iser_post_recv(struct iser_desc *rx_desc) { - struct ib_recv_wr *rx_wr, *rx_wr_failed; - int i, ib_ret; - unsigned int my_rx_head = ib_conn->rx_desc_head; - struct iser_rx_desc *rx_desc; - - for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { - rx_desc = &ib_conn->rx_descs[my_rx_head]; - rx_wr->wr_id = (unsigned long)rx_desc; - rx_wr->sg_list = &rx_desc->rx_sg; - rx_wr->num_sge = 1; - rx_wr->next = rx_wr + 1; - my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1); - } + int ib_ret, ret_val = 0; + struct ib_recv_wr recv_wr, *recv_wr_failed; + struct ib_sge iov[2]; + struct iser_conn *ib_conn; + struct iser_dto *recv_dto = &rx_desc->dto; + + /* Retrieve conn */ + ib_conn = recv_dto->ib_conn; + + iser_dto_to_iov(recv_dto, iov, 2); - rx_wr--; - rx_wr->next = NULL; /* mark end of work requests list */ + recv_wr.next = NULL; + recv_wr.sg_list = iov; + recv_wr.num_sge = recv_dto->regd_vector_len; + recv_wr.wr_id = (unsigned long)rx_desc; - ib_conn->post_recv_buf_count += count; - ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); + atomic_inc(&ib_conn->post_recv_buf_count); + ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); - ib_conn->post_recv_buf_count -= count; - } else - ib_conn->rx_desc_head = my_rx_head; - return ib_ret; -} + atomic_dec(&ib_conn->post_recv_buf_count); + ret_val = -1; + } + return ret_val; +} /** * iser_start_send - Initiate a Send DTO operation * * returns 0 on success, -1 on failure */ -int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) +int iser_post_send(struct iser_desc *tx_desc) { - int ib_ret; + int ib_ret, ret_val = 0; struct ib_send_wr send_wr, *send_wr_failed; + struct ib_sge iov[MAX_REGD_BUF_VECTOR_LEN]; + struct iser_conn *ib_conn; + struct iser_dto *dto = &tx_desc->dto; - ib_dma_sync_single_for_device(ib_conn->device->ib_device, - tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); + ib_conn = dto->ib_conn; + + iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN); send_wr.next = NULL; send_wr.wr_id = (unsigned long)tx_desc; - send_wr.sg_list = tx_desc->tx_sg; - send_wr.num_sge = tx_desc->num_sge; + send_wr.sg_list = iov; + send_wr.num_sge = dto->regd_vector_len; send_wr.opcode = IB_WR_SEND; - send_wr.send_flags = IB_SEND_SIGNALED; + send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0; atomic_inc(&ib_conn->post_send_buf_count); ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); if (ib_ret) { + iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n", + dto, dto->regd_vector_len); iser_err("ib_post_send failed, ret:%d\n", ib_ret); atomic_dec(&ib_conn->post_send_buf_count); + ret_val = -1; } - return ib_ret; + + return ret_val; } -static void iser_handle_comp_error(struct iser_tx_desc *desc, - struct iser_conn *ib_conn) +static void iser_handle_comp_error(struct iser_desc *desc) { - if (desc && desc->type == ISCSI_TX_DATAOUT) + struct iser_dto *dto = &desc->dto; + struct iser_conn *ib_conn = dto->ib_conn; + + iser_dto_buffs_release(dto); + + if (desc->type == ISCSI_RX) { + kfree(desc->data); kmem_cache_free(ig.desc_cache, desc); + atomic_dec(&ib_conn->post_recv_buf_count); + } else { /* type is TX control/command/dataout */ + if (desc->type == ISCSI_TX_DATAOUT) + kmem_cache_free(ig.desc_cache, desc); + atomic_dec(&ib_conn->post_send_buf_count); + } - if (ib_conn->post_recv_buf_count == 0 && + if (atomic_read(&ib_conn->post_recv_buf_count) == 0 && atomic_read(&ib_conn->post_send_buf_count) == 0) { /* getting here when the state is UP means that the conn is * * being terminated asynchronously from the iSCSI layer's * @@ -745,74 +774,32 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc, } } -static int iser_drain_tx_cq(struct iser_device *device) -{ - struct ib_cq *cq = device->tx_cq; - struct ib_wc wc; - struct iser_tx_desc *tx_desc; - struct iser_conn *ib_conn; - int completed_tx = 0; - - while (ib_poll_cq(cq, 1, &wc) == 1) { - tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; - ib_conn = wc.qp->qp_context; - if (wc.status == IB_WC_SUCCESS) { - if (wc.opcode == IB_WC_SEND) - iser_snd_completion(tx_desc, ib_conn); - else - iser_err("expected opcode %d got %d\n", - IB_WC_SEND, wc.opcode); - } else { - iser_err("tx id %llx status %d vend_err %x\n", - wc.wr_id, wc.status, wc.vendor_err); - atomic_dec(&ib_conn->post_send_buf_count); - iser_handle_comp_error(tx_desc, ib_conn); - } - completed_tx++; - } - return completed_tx; -} - - static void iser_cq_tasklet_fn(unsigned long data) { struct iser_device *device = (struct iser_device *)data; - struct ib_cq *cq = device->rx_cq; + struct ib_cq *cq = device->cq; struct ib_wc wc; - struct iser_rx_desc *desc; + struct iser_desc *desc; unsigned long xfer_len; - struct iser_conn *ib_conn; - int completed_tx, completed_rx; - completed_tx = completed_rx = 0; while (ib_poll_cq(cq, 1, &wc) == 1) { - desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; + desc = (struct iser_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); - ib_conn = wc.qp->qp_context; + if (wc.status == IB_WC_SUCCESS) { - if (wc.opcode == IB_WC_RECV) { + if (desc->type == ISCSI_RX) { xfer_len = (unsigned long)wc.byte_len; - iser_rcv_completion(desc, xfer_len, ib_conn); - } else - iser_err("expected opcode %d got %d\n", - IB_WC_RECV, wc.opcode); + iser_rcv_completion(desc, xfer_len); + } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ + iser_snd_completion(desc); } else { - if (wc.status != IB_WC_WR_FLUSH_ERR) - iser_err("rx id %llx status %d vend_err %x\n", - wc.wr_id, wc.status, wc.vendor_err); - ib_conn->post_recv_buf_count--; - iser_handle_comp_error(NULL, ib_conn); + iser_err("comp w. error op %d status %d\n",desc->type,wc.status); + iser_handle_comp_error(desc); } - completed_rx++; - if (!(completed_rx & 63)) - completed_tx += iser_drain_tx_cq(device); } /* #warning "it is assumed here that arming CQ only once its empty" * * " would not cause interrupts to be missed" */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - - completed_tx += iser_drain_tx_cq(device); - iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); } static void iser_cq_callback(struct ib_cq *cq, void *cq_context) diff --git a/trunk/drivers/infiniband/ulp/srp/ib_srp.c b/trunk/drivers/infiniband/ulp/srp/ib_srp.c index 54c8fe25c423..441ea7c2e7c4 100644 --- a/trunk/drivers/infiniband/ulp/srp/ib_srp.c +++ b/trunk/drivers/infiniband/ulp/srp/ib_srp.c @@ -80,7 +80,8 @@ MODULE_PARM_DESC(mellanox_workarounds, static void srp_add_one(struct ib_device *device); static void srp_remove_one(struct ib_device *device); -static void srp_completion(struct ib_cq *cq, void *target_ptr); +static void srp_recv_completion(struct ib_cq *cq, void *target_ptr); +static void srp_send_completion(struct ib_cq *cq, void *target_ptr); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); static struct scsi_transport_template *ib_srp_transport_template; @@ -227,14 +228,22 @@ static int srp_create_target_ib(struct srp_target_port *target) if (!init_attr) return -ENOMEM; - target->cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_completion, NULL, target, SRP_CQ_SIZE, 0); - if (IS_ERR(target->cq)) { - ret = PTR_ERR(target->cq); + target->recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, + srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); + if (IS_ERR(target->recv_cq)) { + ret = PTR_ERR(target->recv_cq); goto out; } - ib_req_notify_cq(target->cq, IB_CQ_NEXT_COMP); + target->send_cq = ib_create_cq(target->srp_host->srp_dev->dev, + srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); + if (IS_ERR(target->send_cq)) { + ret = PTR_ERR(target->send_cq); + ib_destroy_cq(target->recv_cq); + goto out; + } + + ib_req_notify_cq(target->recv_cq, IB_CQ_NEXT_COMP); init_attr->event_handler = srp_qp_event; init_attr->cap.max_send_wr = SRP_SQ_SIZE; @@ -243,20 +252,22 @@ static int srp_create_target_ib(struct srp_target_port *target) init_attr->cap.max_send_sge = 1; init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; init_attr->qp_type = IB_QPT_RC; - init_attr->send_cq = target->cq; - init_attr->recv_cq = target->cq; + init_attr->send_cq = target->send_cq; + init_attr->recv_cq = target->recv_cq; target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); if (IS_ERR(target->qp)) { ret = PTR_ERR(target->qp); - ib_destroy_cq(target->cq); + ib_destroy_cq(target->send_cq); + ib_destroy_cq(target->recv_cq); goto out; } ret = srp_init_qp(target, target->qp); if (ret) { ib_destroy_qp(target->qp); - ib_destroy_cq(target->cq); + ib_destroy_cq(target->send_cq); + ib_destroy_cq(target->recv_cq); goto out; } @@ -270,7 +281,8 @@ static void srp_free_target_ib(struct srp_target_port *target) int i; ib_destroy_qp(target->qp); - ib_destroy_cq(target->cq); + ib_destroy_cq(target->send_cq); + ib_destroy_cq(target->recv_cq); for (i = 0; i < SRP_RQ_SIZE; ++i) srp_free_iu(target->srp_host, target->rx_ring[i]); @@ -568,7 +580,9 @@ static int srp_reconnect_target(struct srp_target_port *target) if (ret) goto err; - while (ib_poll_cq(target->cq, 1, &wc) > 0) + while (ib_poll_cq(target->recv_cq, 1, &wc) > 0) + ; /* nothing */ + while (ib_poll_cq(target->send_cq, 1, &wc) > 0) ; /* nothing */ spin_lock_irq(target->scsi_host->host_lock); @@ -851,7 +865,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) struct srp_iu *iu; u8 opcode; - iu = target->rx_ring[wc->wr_id & ~SRP_OP_RECV]; + iu = target->rx_ring[wc->wr_id]; dev = target->srp_host->srp_dev->dev; ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_ti_iu_len, @@ -898,7 +912,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) DMA_FROM_DEVICE); } -static void srp_completion(struct ib_cq *cq, void *target_ptr) +static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) { struct srp_target_port *target = target_ptr; struct ib_wc wc; @@ -907,17 +921,31 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr) while (ib_poll_cq(cq, 1, &wc) > 0) { if (wc.status) { shost_printk(KERN_ERR, target->scsi_host, - PFX "failed %s status %d\n", - wc.wr_id & SRP_OP_RECV ? "receive" : "send", + PFX "failed receive status %d\n", wc.status); target->qp_in_error = 1; break; } - if (wc.wr_id & SRP_OP_RECV) - srp_handle_recv(target, &wc); - else - ++target->tx_tail; + srp_handle_recv(target, &wc); + } +} + +static void srp_send_completion(struct ib_cq *cq, void *target_ptr) +{ + struct srp_target_port *target = target_ptr; + struct ib_wc wc; + + while (ib_poll_cq(cq, 1, &wc) > 0) { + if (wc.status) { + shost_printk(KERN_ERR, target->scsi_host, + PFX "failed send status %d\n", + wc.status); + target->qp_in_error = 1; + break; + } + + ++target->tx_tail; } } @@ -930,7 +958,7 @@ static int __srp_post_recv(struct srp_target_port *target) int ret; next = target->rx_head & (SRP_RQ_SIZE - 1); - wr.wr_id = next | SRP_OP_RECV; + wr.wr_id = next; iu = target->rx_ring[next]; list.addr = iu->dma; @@ -970,6 +998,8 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target, { s32 min = (req_type == SRP_REQ_TASK_MGMT) ? 1 : 2; + srp_send_completion(target->send_cq, target); + if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) return NULL; diff --git a/trunk/drivers/infiniband/ulp/srp/ib_srp.h b/trunk/drivers/infiniband/ulp/srp/ib_srp.h index e185b907fc12..5a80eac6fdaa 100644 --- a/trunk/drivers/infiniband/ulp/srp/ib_srp.h +++ b/trunk/drivers/infiniband/ulp/srp/ib_srp.h @@ -60,7 +60,6 @@ enum { SRP_RQ_SHIFT = 6, SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT, SRP_SQ_SIZE = SRP_RQ_SIZE - 1, - SRP_CQ_SIZE = SRP_SQ_SIZE + SRP_RQ_SIZE, SRP_TAG_TSK_MGMT = 1 << (SRP_RQ_SHIFT + 1), @@ -69,8 +68,6 @@ enum { SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4 }; -#define SRP_OP_RECV (1 << 31) - enum srp_target_state { SRP_TARGET_LIVE, SRP_TARGET_CONNECTING, @@ -133,7 +130,8 @@ struct srp_target_port { int path_query_id; struct ib_cm_id *cm_id; - struct ib_cq *cq; + struct ib_cq *recv_cq; + struct ib_cq *send_cq; struct ib_qp *qp; int max_ti_iu_len; diff --git a/trunk/drivers/net/cxgb3/adapter.h b/trunk/drivers/net/cxgb3/adapter.h index 4cd7f420766a..3e8618b4efbc 100644 --- a/trunk/drivers/net/cxgb3/adapter.h +++ b/trunk/drivers/net/cxgb3/adapter.h @@ -264,10 +264,6 @@ struct adapter { struct work_struct fatal_error_handler_task; struct work_struct link_fault_handler_task; - struct work_struct db_full_task; - struct work_struct db_empty_task; - struct work_struct db_drop_task; - struct dentry *debugfs_root; struct mutex mdio_lock; @@ -339,7 +335,6 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data); irqreturn_t t3_sge_intr_msix(int irq, void *cookie); -extern struct workqueue_struct *cxgb3_wq; int t3_get_edc_fw(struct cphy *phy, int edc_idx, int size); diff --git a/trunk/drivers/net/cxgb3/cxgb3_main.c b/trunk/drivers/net/cxgb3/cxgb3_main.c index 37945fce7fa5..89bec9c3c141 100644 --- a/trunk/drivers/net/cxgb3/cxgb3_main.c +++ b/trunk/drivers/net/cxgb3/cxgb3_main.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include "common.h" @@ -141,7 +140,7 @@ MODULE_PARM_DESC(ofld_disable, "whether to enable offload at init time or not"); * will block keventd as it needs the rtnl lock, and we'll deadlock waiting * for our work to complete. Get our own work queue to solve this. */ -struct workqueue_struct *cxgb3_wq; +static struct workqueue_struct *cxgb3_wq; /** * link_report - show link status and link speed/duplex @@ -591,19 +590,6 @@ static void setup_rss(struct adapter *adap) V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ, cpus, rspq_map); } -static void ring_dbs(struct adapter *adap) -{ - int i, j; - - for (i = 0; i < SGE_QSETS; i++) { - struct sge_qset *qs = &adap->sge.qs[i]; - - if (qs->adap) - for (j = 0; j < SGE_TXQ_PER_SET; j++) - t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(qs->txq[j].cntxt_id)); - } -} - static void init_napi(struct adapter *adap) { int i; @@ -2768,42 +2754,6 @@ static void t3_adap_check_task(struct work_struct *work) spin_unlock_irq(&adapter->work_lock); } -static void db_full_task(struct work_struct *work) -{ - struct adapter *adapter = container_of(work, struct adapter, - db_full_task); - - cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_FULL, 0); -} - -static void db_empty_task(struct work_struct *work) -{ - struct adapter *adapter = container_of(work, struct adapter, - db_empty_task); - - cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_EMPTY, 0); -} - -static void db_drop_task(struct work_struct *work) -{ - struct adapter *adapter = container_of(work, struct adapter, - db_drop_task); - unsigned long delay = 1000; - unsigned short r; - - cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_DROP, 0); - - /* - * Sleep a while before ringing the driver qset dbs. - * The delay is between 1000-2023 usecs. - */ - get_random_bytes(&r, 2); - delay += r & 1023; - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(delay)); - ring_dbs(adapter); -} - /* * Processes external (PHY) interrupts in process context. */ @@ -3272,11 +3222,6 @@ static int __devinit init_one(struct pci_dev *pdev, INIT_LIST_HEAD(&adapter->adapter_list); INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task); INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task); - - INIT_WORK(&adapter->db_full_task, db_full_task); - INIT_WORK(&adapter->db_empty_task, db_empty_task); - INIT_WORK(&adapter->db_drop_task, db_drop_task); - INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task); for (i = 0; i < ai->nports0 + ai->nports1; ++i) { diff --git a/trunk/drivers/net/cxgb3/cxgb3_offload.h b/trunk/drivers/net/cxgb3/cxgb3_offload.h index 929c298115ca..670aa62042da 100644 --- a/trunk/drivers/net/cxgb3/cxgb3_offload.h +++ b/trunk/drivers/net/cxgb3/cxgb3_offload.h @@ -73,10 +73,7 @@ enum { OFFLOAD_STATUS_UP, OFFLOAD_STATUS_DOWN, OFFLOAD_PORT_DOWN, - OFFLOAD_PORT_UP, - OFFLOAD_DB_FULL, - OFFLOAD_DB_EMPTY, - OFFLOAD_DB_DROP + OFFLOAD_PORT_UP }; struct cxgb3_client { diff --git a/trunk/drivers/net/cxgb3/regs.h b/trunk/drivers/net/cxgb3/regs.h index cb42353c9fdd..1b5327b5a965 100644 --- a/trunk/drivers/net/cxgb3/regs.h +++ b/trunk/drivers/net/cxgb3/regs.h @@ -254,22 +254,6 @@ #define V_LOPIODRBDROPERR(x) ((x) << S_LOPIODRBDROPERR) #define F_LOPIODRBDROPERR V_LOPIODRBDROPERR(1U) -#define S_HIPRIORITYDBFULL 7 -#define V_HIPRIORITYDBFULL(x) ((x) << S_HIPRIORITYDBFULL) -#define F_HIPRIORITYDBFULL V_HIPRIORITYDBFULL(1U) - -#define S_HIPRIORITYDBEMPTY 6 -#define V_HIPRIORITYDBEMPTY(x) ((x) << S_HIPRIORITYDBEMPTY) -#define F_HIPRIORITYDBEMPTY V_HIPRIORITYDBEMPTY(1U) - -#define S_LOPRIORITYDBFULL 5 -#define V_LOPRIORITYDBFULL(x) ((x) << S_LOPRIORITYDBFULL) -#define F_LOPRIORITYDBFULL V_LOPRIORITYDBFULL(1U) - -#define S_LOPRIORITYDBEMPTY 4 -#define V_LOPRIORITYDBEMPTY(x) ((x) << S_LOPRIORITYDBEMPTY) -#define F_LOPRIORITYDBEMPTY V_LOPRIORITYDBEMPTY(1U) - #define S_RSPQDISABLED 3 #define V_RSPQDISABLED(x) ((x) << S_RSPQDISABLED) #define F_RSPQDISABLED V_RSPQDISABLED(1U) diff --git a/trunk/drivers/net/cxgb3/sge.c b/trunk/drivers/net/cxgb3/sge.c index 9b434461c4f1..318a018ca7c5 100644 --- a/trunk/drivers/net/cxgb3/sge.c +++ b/trunk/drivers/net/cxgb3/sge.c @@ -42,7 +42,6 @@ #include "sge_defs.h" #include "t3_cpl.h" #include "firmware_exports.h" -#include "cxgb3_offload.h" #define USE_GTS 0 @@ -2834,13 +2833,8 @@ void t3_sge_err_intr_handler(struct adapter *adapter) } if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR)) - queue_work(cxgb3_wq, &adapter->db_drop_task); - - if (status & (F_HIPRIORITYDBFULL | F_LOPRIORITYDBFULL)) - queue_work(cxgb3_wq, &adapter->db_full_task); - - if (status & (F_HIPRIORITYDBEMPTY | F_LOPRIORITYDBEMPTY)) - queue_work(cxgb3_wq, &adapter->db_empty_task); + CH_ALERT(adapter, "SGE dropped %s priority doorbell\n", + status & F_HIPIODRBDROPERR ? "high" : "lo"); t3_write_reg(adapter, A_SG_INT_CAUSE, status); if (status & SGE_FATALERR) diff --git a/trunk/drivers/net/cxgb3/t3_hw.c b/trunk/drivers/net/cxgb3/t3_hw.c index c38fc717a0d1..032cfe065570 100644 --- a/trunk/drivers/net/cxgb3/t3_hw.c +++ b/trunk/drivers/net/cxgb3/t3_hw.c @@ -1432,10 +1432,7 @@ static int t3_handle_intr_status(struct adapter *adapter, unsigned int reg, F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ - F_HIRCQPARITYERROR | F_LOPRIORITYDBFULL | \ - F_HIPRIORITYDBFULL | F_LOPRIORITYDBEMPTY | \ - F_HIPRIORITYDBEMPTY | F_HIPIODRBDROPERR | \ - F_LOPIODRBDROPERR) + F_HIRCQPARITYERROR) #define MC5_INTR_MASK (F_PARITYERR | F_ACTRGNFULL | F_UNKNOWNCMD | \ F_REQQPARERR | F_DISPQPARERR | F_DELACTEMPTY | \ F_NFASRCHFAIL) diff --git a/trunk/include/rdma/ib_verbs.h b/trunk/include/rdma/ib_verbs.h index a585e0f92bc3..09509edb1c5f 100644 --- a/trunk/include/rdma/ib_verbs.h +++ b/trunk/include/rdma/ib_verbs.h @@ -984,9 +984,9 @@ struct ib_device { struct list_head event_handler_list; spinlock_t event_handler_lock; - spinlock_t client_data_lock; struct list_head core_list; struct list_head client_data_list; + spinlock_t client_data_lock; struct ib_cache cache; int *pkey_tbl_len; @@ -1144,8 +1144,8 @@ struct ib_device { IB_DEV_UNREGISTERED } reg_state; - int uverbs_abi_ver; u64 uverbs_cmd_mask; + int uverbs_abi_ver; char node_desc[64]; __be64 node_guid; diff --git a/trunk/include/rdma/rdma_cm.h b/trunk/include/rdma/rdma_cm.h index 4fae90304648..c6b2962315b3 100644 --- a/trunk/include/rdma/rdma_cm.h +++ b/trunk/include/rdma/rdma_cm.h @@ -67,6 +67,7 @@ enum rdma_port_space { RDMA_PS_IPOIB = 0x0002, RDMA_PS_TCP = 0x0106, RDMA_PS_UDP = 0x0111, + RDMA_PS_SCTP = 0x0183 }; struct rdma_addr {