From 476f7600cd348a9be57f8c0b8969bb6caced9cf6 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 20 Sep 2012 19:23:14 +0000 Subject: [PATCH] --- yaml --- r: 328567 b: refs/heads/master c: ef3d0c4a5e8c7ad3429b9f9ef66cf5a7563cd513 h: refs/heads/master i: 328565: eb3e62d0f4c3a66e39a7ffe82627480013bee107 328563: dc3e62e4dd3eee78940bbcc98952ae249195a920 328559: b72c62b4c70c86a1259a13f6e11a5675cf3e9ab6 v: v3 --- [refs] | 2 +- trunk/drivers/infiniband/core/cache.c | 43 +- trunk/drivers/infiniband/core/device.c | 16 +- trunk/drivers/infiniband/hw/mlx4/Makefile | 2 +- trunk/drivers/infiniband/hw/mlx4/alias_GUID.c | 688 ------- trunk/drivers/infiniband/hw/mlx4/cm.c | 437 ----- trunk/drivers/infiniband/hw/mlx4/cq.c | 31 +- trunk/drivers/infiniband/hw/mlx4/mad.c | 1573 +---------------- trunk/drivers/infiniband/hw/mlx4/main.c | 273 +-- trunk/drivers/infiniband/hw/mlx4/mcg.c | 1254 ------------- trunk/drivers/infiniband/hw/mlx4/mlx4_ib.h | 341 +--- trunk/drivers/infiniband/hw/mlx4/qp.c | 656 +------ trunk/drivers/infiniband/hw/mlx4/sysfs.c | 794 --------- trunk/drivers/infiniband/hw/nes/nes_cm.c | 32 +- .../drivers/net/ethernet/mellanox/mlx4/cmd.c | 242 +-- trunk/drivers/net/ethernet/mellanox/mlx4/eq.c | 245 +-- trunk/drivers/net/ethernet/mellanox/mlx4/fw.c | 244 +-- trunk/drivers/net/ethernet/mellanox/mlx4/fw.h | 11 +- .../drivers/net/ethernet/mellanox/mlx4/main.c | 171 +- .../drivers/net/ethernet/mellanox/mlx4/mlx4.h | 59 +- .../drivers/net/ethernet/mellanox/mlx4/port.c | 10 - trunk/drivers/net/ethernet/mellanox/mlx4/qp.c | 100 +- .../ethernet/mellanox/mlx4/resource_tracker.c | 220 +-- trunk/include/linux/mlx4/device.h | 69 +- trunk/include/linux/mlx4/driver.h | 2 - trunk/include/linux/mlx4/qp.h | 3 +- trunk/include/rdma/ib_cache.h | 16 - trunk/include/rdma/ib_verbs.h | 3 - 28 files changed, 349 insertions(+), 7188 deletions(-) delete mode 100644 trunk/drivers/infiniband/hw/mlx4/alias_GUID.c delete mode 100644 trunk/drivers/infiniband/hw/mlx4/cm.c delete mode 100644 trunk/drivers/infiniband/hw/mlx4/mcg.c delete mode 100644 trunk/drivers/infiniband/hw/mlx4/sysfs.c diff --git a/[refs] b/[refs] index 54ef0409c479..c68f87df1747 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: aadf4f3f66a7b710b05b31ac0839fbbf59e41f7c +refs/heads/master: ef3d0c4a5e8c7ad3429b9f9ef66cf5a7563cd513 diff --git a/trunk/drivers/infiniband/core/cache.c b/trunk/drivers/infiniband/core/cache.c index 80f6cf2449fb..9353992f9eea 100644 --- a/trunk/drivers/infiniband/core/cache.c +++ b/trunk/drivers/infiniband/core/cache.c @@ -167,7 +167,6 @@ int ib_find_cached_pkey(struct ib_device *device, unsigned long flags; int i; int ret = -ENOENT; - int partial_ix = -1; if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; @@ -180,46 +179,6 @@ int ib_find_cached_pkey(struct ib_device *device, for (i = 0; i < cache->table_len; ++i) if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { - if (cache->table[i] & 0x8000) { - *index = i; - ret = 0; - break; - } else - partial_ix = i; - } - - if (ret && partial_ix >= 0) { - *index = partial_ix; - ret = 0; - } - - read_unlock_irqrestore(&device->cache.lock, flags); - - return ret; -} -EXPORT_SYMBOL(ib_find_cached_pkey); - -int ib_find_exact_cached_pkey(struct ib_device *device, - u8 port_num, - u16 pkey, - u16 *index) -{ - struct ib_pkey_cache *cache; - unsigned long flags; - int i; - int ret = -ENOENT; - - if (port_num < start_port(device) || port_num > end_port(device)) - return -EINVAL; - - read_lock_irqsave(&device->cache.lock, flags); - - cache = device->cache.pkey_cache[port_num - start_port(device)]; - - *index = -1; - - for (i = 0; i < cache->table_len; ++i) - if (cache->table[i] == pkey) { *index = i; ret = 0; break; @@ -229,7 +188,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, return ret; } -EXPORT_SYMBOL(ib_find_exact_cached_pkey); +EXPORT_SYMBOL(ib_find_cached_pkey); int ib_get_cached_lmc(struct ib_device *device, u8 port_num, diff --git a/trunk/drivers/infiniband/core/device.c b/trunk/drivers/infiniband/core/device.c index 18c1ece765f2..e711de400a01 100644 --- a/trunk/drivers/infiniband/core/device.c +++ b/trunk/drivers/infiniband/core/device.c @@ -707,28 +707,18 @@ int ib_find_pkey(struct ib_device *device, { int ret, i; u16 tmp_pkey; - int partial_ix = -1; for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { ret = ib_query_pkey(device, port_num, i, &tmp_pkey); if (ret) return ret; + if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { - /* if there is full-member pkey take it.*/ - if (tmp_pkey & 0x8000) { - *index = i; - return 0; - } - if (partial_ix < 0) - partial_ix = i; + *index = i; + return 0; } } - /*no full-member, if exists take the limited*/ - if (partial_ix >= 0) { - *index = partial_ix; - return 0; - } return -ENOENT; } EXPORT_SYMBOL(ib_find_pkey); diff --git a/trunk/drivers/infiniband/hw/mlx4/Makefile b/trunk/drivers/infiniband/hw/mlx4/Makefile index f4213b3a8fe1..70f09c7826da 100644 --- a/trunk/drivers/infiniband/hw/mlx4/Makefile +++ b/trunk/drivers/infiniband/hw/mlx4/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o -mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o sysfs.o +mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o diff --git a/trunk/drivers/infiniband/hw/mlx4/alias_GUID.c b/trunk/drivers/infiniband/hw/mlx4/alias_GUID.c deleted file mode 100644 index 0fcd5cd6f3ee..000000000000 --- a/trunk/drivers/infiniband/hw/mlx4/alias_GUID.c +++ /dev/null @@ -1,688 +0,0 @@ -/* - * Copyright (c) 2012 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - /***********************************************************/ -/*This file support the handling of the Alias GUID feature. */ -/***********************************************************/ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mlx4_ib.h" - -/* -The driver keeps the current state of all guids, as they are in the HW. -Whenever we receive an smp mad GUIDInfo record, the data will be cached. -*/ - -struct mlx4_alias_guid_work_context { - u8 port; - struct mlx4_ib_dev *dev ; - struct ib_sa_query *sa_query; - struct completion done; - int query_id; - struct list_head list; - int block_num; -}; - -struct mlx4_next_alias_guid_work { - u8 port; - u8 block_num; - struct mlx4_sriov_alias_guid_info_rec_det rec_det; -}; - - -void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, - u8 port_num, u8 *p_data) -{ - int i; - u64 guid_indexes; - int slave_id; - int port_index = port_num - 1; - - if (!mlx4_is_master(dev->dev)) - return; - - guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. - ports_guid[port_num - 1]. - all_rec_per_port[block_num].guid_indexes); - pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes); - - for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { - /* The location of the specific index starts from bit number 4 - * until bit num 11 */ - if (test_bit(i + 4, (unsigned long *)&guid_indexes)) { - slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; - if (slave_id >= dev->dev->num_slaves) { - pr_debug("The last slave: %d\n", slave_id); - return; - } - - /* cache the guid: */ - memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id], - &p_data[i * GUID_REC_SIZE], - GUID_REC_SIZE); - } else - pr_debug("Guid number: %d in block: %d" - " was not updated\n", i, block_num); - } -} - -static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index) -{ - if (index >= NUM_ALIAS_GUID_PER_PORT) { - pr_err("%s: ERROR: asked for index:%d\n", __func__, index); - return (__force __be64) ((u64) 0xFFFFFFFFFFFFFFFFUL); - } - return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index]; -} - - -ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index) -{ - return IB_SA_COMP_MASK(4 + index); -} - -/* - * Whenever new GUID is set/unset (guid table change) create event and - * notify the relevant slave (master also should be notified). - * If the GUID value is not as we have in the cache the slave will not be - * updated; in this case it waits for the smp_snoop or the port management - * event to call the function and to update the slave. - * block_number - the index of the block (16 blocks available) - * port_number - 1 or 2 - */ -void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, - int block_num, u8 port_num, - u8 *p_data) -{ - int i; - u64 guid_indexes; - int slave_id; - enum slave_port_state new_state; - enum slave_port_state prev_state; - __be64 tmp_cur_ag, form_cache_ag; - enum slave_port_gen_event gen_event; - - if (!mlx4_is_master(dev->dev)) - return; - - guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. - ports_guid[port_num - 1]. - all_rec_per_port[block_num].guid_indexes); - pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes); - - /*calculate the slaves and notify them*/ - for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { - /* the location of the specific index runs from bits 4..11 */ - if (!(test_bit(i + 4, (unsigned long *)&guid_indexes))) - continue; - - slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; - if (slave_id >= dev->dev->num_slaves) - return; - tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; - form_cache_ag = get_cached_alias_guid(dev, port_num, - (NUM_ALIAS_GUID_IN_REC * block_num) + i); - /* - * Check if guid is not the same as in the cache, - * If it is different, wait for the snoop_smp or the port mgmt - * change event to update the slave on its port state change - */ - if (tmp_cur_ag != form_cache_ag) - continue; - mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num); - - /*2 cases: Valid GUID, and Invalid Guid*/ - - if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/ - prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num); - new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num, - MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, - &gen_event); - pr_debug("slave: %d, port: %d prev_port_state: %d," - " new_port_state: %d, gen_event: %d\n", - slave_id, port_num, prev_state, new_state, gen_event); - if (gen_event == SLAVE_PORT_GEN_EVENT_UP) { - pr_debug("sending PORT_UP event to slave: %d, port: %d\n", - slave_id, port_num); - mlx4_gen_port_state_change_eqe(dev->dev, slave_id, - port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE); - } - } else { /* request to invalidate GUID */ - set_and_calc_slave_port_state(dev->dev, slave_id, port_num, - MLX4_PORT_STATE_IB_EVENT_GID_INVALID, - &gen_event); - pr_debug("sending PORT DOWN event to slave: %d, port: %d\n", - slave_id, port_num); - mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num, - MLX4_PORT_CHANGE_SUBTYPE_DOWN); - } - } -} - -static void aliasguid_query_handler(int status, - struct ib_sa_guidinfo_rec *guid_rec, - void *context) -{ - struct mlx4_ib_dev *dev; - struct mlx4_alias_guid_work_context *cb_ctx = context; - u8 port_index ; - int i; - struct mlx4_sriov_alias_guid_info_rec_det *rec; - unsigned long flags, flags1; - - if (!context) - return; - - dev = cb_ctx->dev; - port_index = cb_ctx->port - 1; - rec = &dev->sriov.alias_guid.ports_guid[port_index]. - all_rec_per_port[cb_ctx->block_num]; - - if (status) { - rec->status = MLX4_GUID_INFO_STATUS_IDLE; - pr_debug("(port: %d) failed: status = %d\n", - cb_ctx->port, status); - goto out; - } - - if (guid_rec->block_num != cb_ctx->block_num) { - pr_err("block num mismatch: %d != %d\n", - cb_ctx->block_num, guid_rec->block_num); - goto out; - } - - pr_debug("lid/port: %d/%d, block_num: %d\n", - be16_to_cpu(guid_rec->lid), cb_ctx->port, - guid_rec->block_num); - - rec = &dev->sriov.alias_guid.ports_guid[port_index]. - all_rec_per_port[guid_rec->block_num]; - - rec->status = MLX4_GUID_INFO_STATUS_SET; - rec->method = MLX4_GUID_INFO_RECORD_SET; - - for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) { - __be64 tmp_cur_ag; - tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE]; - /* check if the SM didn't assign one of the records. - * if it didn't, if it was not sysadmin request: - * ask the SM to give a new GUID, (instead of the driver request). - */ - if (tmp_cur_ag == MLX4_NOT_SET_GUID) { - mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in " - "block_num: %d was declined by SM, " - "ownership by %d (0 = driver, 1=sysAdmin," - " 2=None)\n", __func__, i, - guid_rec->block_num, rec->ownership); - if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) { - /* if it is driver assign, asks for new GUID from SM*/ - *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] = - MLX4_NOT_SET_GUID; - - /* Mark the record as not assigned, and let it - * be sent again in the next work sched.*/ - rec->status = MLX4_GUID_INFO_STATUS_IDLE; - rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i); - } - } else { - /* properly assigned record. */ - /* We save the GUID we just got from the SM in the - * admin_guid in order to be persistent, and in the - * request from the sm the process will ask for the same GUID */ - if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN && - tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) { - /* the sysadmin assignment failed.*/ - mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set" - " admin guid after SysAdmin " - "configuration. " - "Record num %d in block_num:%d " - "was declined by SM, " - "new val(0x%llx) was kept\n", - __func__, i, - guid_rec->block_num, - be64_to_cpu(*(__be64 *) & - rec->all_recs[i * GUID_REC_SIZE])); - } else { - memcpy(&rec->all_recs[i * GUID_REC_SIZE], - &guid_rec->guid_info_list[i * GUID_REC_SIZE], - GUID_REC_SIZE); - } - } - } - /* - The func is call here to close the cases when the - sm doesn't send smp, so in the sa response the driver - notifies the slave. - */ - mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num, - cb_ctx->port, - guid_rec->guid_info_list); -out: - spin_lock_irqsave(&dev->sriov.going_down_lock, flags); - spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); - if (!dev->sriov.is_going_down) - queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq, - &dev->sriov.alias_guid.ports_guid[port_index]. - alias_guid_work, 0); - if (cb_ctx->sa_query) { - list_del(&cb_ctx->list); - kfree(cb_ctx); - } else - complete(&cb_ctx->done); - spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); - spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); -} - -static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index) -{ - int i; - u64 cur_admin_val; - ib_sa_comp_mask comp_mask = 0; - - dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status - = MLX4_GUID_INFO_STATUS_IDLE; - dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method - = MLX4_GUID_INFO_RECORD_SET; - - /* calculate the comp_mask for that record.*/ - for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { - cur_admin_val = - *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1]. - all_rec_per_port[index].all_recs[GUID_REC_SIZE * i]; - /* - check the admin value: if it's for delete (~00LL) or - it is the first guid of the first record (hw guid) or - the records is not in ownership of the sysadmin and the sm doesn't - need to assign GUIDs, then don't put it up for assignment. - */ - if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val || - (!index && !i) || - MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid. - ports_guid[port - 1].all_rec_per_port[index].ownership) - continue; - comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i); - } - dev->sriov.alias_guid.ports_guid[port - 1]. - all_rec_per_port[index].guid_indexes = comp_mask; -} - -static int set_guid_rec(struct ib_device *ibdev, - u8 port, int index, - struct mlx4_sriov_alias_guid_info_rec_det *rec_det) -{ - int err; - struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct ib_sa_guidinfo_rec guid_info_rec; - ib_sa_comp_mask comp_mask; - struct ib_port_attr attr; - struct mlx4_alias_guid_work_context *callback_context; - unsigned long resched_delay, flags, flags1; - struct list_head *head = - &dev->sriov.alias_guid.ports_guid[port - 1].cb_list; - - err = __mlx4_ib_query_port(ibdev, port, &attr, 1); - if (err) { - pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n", - err, port); - return err; - } - /*check the port was configured by the sm, otherwise no need to send */ - if (attr.state != IB_PORT_ACTIVE) { - pr_debug("port %d not active...rescheduling\n", port); - resched_delay = 5 * HZ; - err = -EAGAIN; - goto new_schedule; - } - - callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL); - if (!callback_context) { - err = -ENOMEM; - resched_delay = HZ * 5; - goto new_schedule; - } - callback_context->port = port; - callback_context->dev = dev; - callback_context->block_num = index; - - memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); - - guid_info_rec.lid = cpu_to_be16(attr.lid); - guid_info_rec.block_num = index; - - memcpy(guid_info_rec.guid_info_list, rec_det->all_recs, - GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC); - comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM | - rec_det->guid_indexes; - - init_completion(&callback_context->done); - spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); - list_add_tail(&callback_context->list, head); - spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); - - callback_context->query_id = - ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client, - ibdev, port, &guid_info_rec, - comp_mask, rec_det->method, 1000, - GFP_KERNEL, aliasguid_query_handler, - callback_context, - &callback_context->sa_query); - if (callback_context->query_id < 0) { - pr_debug("ib_sa_guid_info_rec_query failed, query_id: " - "%d. will reschedule to the next 1 sec.\n", - callback_context->query_id); - spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); - list_del(&callback_context->list); - kfree(callback_context); - spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); - resched_delay = 1 * HZ; - err = -EAGAIN; - goto new_schedule; - } - err = 0; - goto out; - -new_schedule: - spin_lock_irqsave(&dev->sriov.going_down_lock, flags); - spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); - invalidate_guid_record(dev, port, index); - if (!dev->sriov.is_going_down) { - queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq, - &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work, - resched_delay); - } - spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); - spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); - -out: - return err; -} - -void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port) -{ - int i; - unsigned long flags, flags1; - - pr_debug("port %d\n", port); - - spin_lock_irqsave(&dev->sriov.going_down_lock, flags); - spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); - for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++) - invalidate_guid_record(dev, port, i); - - if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) { - /* - make sure no work waits in the queue, if the work is already - queued(not on the timer) the cancel will fail. That is not a problem - because we just want the work started. - */ - __cancel_delayed_work(&dev->sriov.alias_guid. - ports_guid[port - 1].alias_guid_work); - queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq, - &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work, - 0); - } - spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); - spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); -} - -/* The function returns the next record that was - * not configured (or failed to be configured) */ -static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port, - struct mlx4_next_alias_guid_work *rec) -{ - int j; - unsigned long flags; - - for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { - spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); - if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status == - MLX4_GUID_INFO_STATUS_IDLE) { - memcpy(&rec->rec_det, - &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j], - sizeof (struct mlx4_sriov_alias_guid_info_rec_det)); - rec->port = port; - rec->block_num = j; - dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status = - MLX4_GUID_INFO_STATUS_PENDING; - spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); - return 0; - } - spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); - } - return -ENOENT; -} - -static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port, - int rec_index, - struct mlx4_sriov_alias_guid_info_rec_det *rec_det) -{ - dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes = - rec_det->guid_indexes; - memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs, - rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE); - dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status = - rec_det->status; -} - -static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port) -{ - int j; - struct mlx4_sriov_alias_guid_info_rec_det rec_det ; - - for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) { - memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE); - rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) | - IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 | - IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 | - IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 | - IB_SA_GUIDINFO_REC_GID7; - rec_det.status = MLX4_GUID_INFO_STATUS_IDLE; - set_administratively_guid_record(dev, port, j, &rec_det); - } -} - -static void alias_guid_work(struct work_struct *work) -{ - struct delayed_work *delay = to_delayed_work(work); - int ret = 0; - struct mlx4_next_alias_guid_work *rec; - struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port = - container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det, - alias_guid_work); - struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent; - struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid, - struct mlx4_ib_sriov, - alias_guid); - struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov); - - rec = kzalloc(sizeof *rec, GFP_KERNEL); - if (!rec) { - pr_err("alias_guid_work: No Memory\n"); - return; - } - - pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1); - ret = get_next_record_to_update(dev, sriov_alias_port->port, rec); - if (ret) { - pr_debug("No more records to update.\n"); - goto out; - } - - set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num, - &rec->rec_det); - -out: - kfree(rec); -} - - -void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port) -{ - unsigned long flags, flags1; - - if (!mlx4_is_master(dev->dev)) - return; - spin_lock_irqsave(&dev->sriov.going_down_lock, flags); - spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); - if (!dev->sriov.is_going_down) { - queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq, - &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0); - } - spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); - spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); -} - -void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) -{ - int i; - struct mlx4_ib_sriov *sriov = &dev->sriov; - struct mlx4_alias_guid_work_context *cb_ctx; - struct mlx4_sriov_alias_guid_port_rec_det *det; - struct ib_sa_query *sa_query; - unsigned long flags; - - for (i = 0 ; i < dev->num_ports; i++) { - cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work); - det = &sriov->alias_guid.ports_guid[i]; - spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); - while (!list_empty(&det->cb_list)) { - cb_ctx = list_entry(det->cb_list.next, - struct mlx4_alias_guid_work_context, - list); - sa_query = cb_ctx->sa_query; - cb_ctx->sa_query = NULL; - list_del(&cb_ctx->list); - spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); - ib_sa_cancel_query(cb_ctx->query_id, sa_query); - wait_for_completion(&cb_ctx->done); - kfree(cb_ctx); - spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); - } - spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); - } - for (i = 0 ; i < dev->num_ports; i++) { - flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); - destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); - } - ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); - kfree(dev->sriov.alias_guid.sa_client); -} - -int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) -{ - char alias_wq_name[15]; - int ret = 0; - int i, j, k; - union ib_gid gid; - - if (!mlx4_is_master(dev->dev)) - return 0; - dev->sriov.alias_guid.sa_client = - kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL); - if (!dev->sriov.alias_guid.sa_client) - return -ENOMEM; - - ib_sa_register_client(dev->sriov.alias_guid.sa_client); - - spin_lock_init(&dev->sriov.alias_guid.ag_work_lock); - - for (i = 1; i <= dev->num_ports; ++i) { - if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) { - ret = -EFAULT; - goto err_unregister; - } - } - - for (i = 0 ; i < dev->num_ports; i++) { - memset(&dev->sriov.alias_guid.ports_guid[i], 0, - sizeof (struct mlx4_sriov_alias_guid_port_rec_det)); - /*Check if the SM doesn't need to assign the GUIDs*/ - for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { - if (mlx4_ib_sm_guid_assign) { - dev->sriov.alias_guid.ports_guid[i]. - all_rec_per_port[j]. - ownership = MLX4_GUID_DRIVER_ASSIGN; - continue; - } - dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j]. - ownership = MLX4_GUID_NONE_ASSIGN; - /*mark each val as it was deleted, - till the sysAdmin will give it valid val*/ - for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) { - *(__be64 *)&dev->sriov.alias_guid.ports_guid[i]. - all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] = - cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL); - } - } - INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list); - /*prepare the records, set them to be allocated by sm*/ - for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) - invalidate_guid_record(dev, i + 1, j); - - dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid; - dev->sriov.alias_guid.ports_guid[i].port = i; - if (mlx4_ib_sm_guid_assign) - set_all_slaves_guids(dev, i); - - snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i); - dev->sriov.alias_guid.ports_guid[i].wq = - create_singlethread_workqueue(alias_wq_name); - if (!dev->sriov.alias_guid.ports_guid[i].wq) { - ret = -ENOMEM; - goto err_thread; - } - INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work, - alias_guid_work); - } - return 0; - -err_thread: - for (--i; i >= 0; i--) { - destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); - dev->sriov.alias_guid.ports_guid[i].wq = NULL; - } - -err_unregister: - ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); - kfree(dev->sriov.alias_guid.sa_client); - dev->sriov.alias_guid.sa_client = NULL; - pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret); - return ret; -} diff --git a/trunk/drivers/infiniband/hw/mlx4/cm.c b/trunk/drivers/infiniband/hw/mlx4/cm.c deleted file mode 100644 index e25e4dafb8a8..000000000000 --- a/trunk/drivers/infiniband/hw/mlx4/cm.c +++ /dev/null @@ -1,437 +0,0 @@ -/* - * Copyright (c) 2012 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include -#include -#include -#include - -#include "mlx4_ib.h" - -#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ) - -struct id_map_entry { - struct rb_node node; - - u32 sl_cm_id; - u32 pv_cm_id; - int slave_id; - int scheduled_delete; - struct mlx4_ib_dev *dev; - - struct list_head list; - struct delayed_work timeout; -}; - -struct cm_generic_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; -}; - -struct cm_req_msg { - unsigned char unused[0x60]; - union ib_gid primary_path_sgid; -}; - - -static void set_local_comm_id(struct ib_mad *mad, u32 cm_id) -{ - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - msg->local_comm_id = cpu_to_be32(cm_id); -} - -static u32 get_local_comm_id(struct ib_mad *mad) -{ - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - - return be32_to_cpu(msg->local_comm_id); -} - -static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id) -{ - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - msg->remote_comm_id = cpu_to_be32(cm_id); -} - -static u32 get_remote_comm_id(struct ib_mad *mad) -{ - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - - return be32_to_cpu(msg->remote_comm_id); -} - -static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad) -{ - struct cm_req_msg *msg = (struct cm_req_msg *)mad; - - return msg->primary_path_sgid; -} - -/* Lock should be taken before called */ -static struct id_map_entry * -id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id) -{ - struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map; - struct rb_node *node = sl_id_map->rb_node; - - while (node) { - struct id_map_entry *id_map_entry = - rb_entry(node, struct id_map_entry, node); - - if (id_map_entry->sl_cm_id > sl_cm_id) - node = node->rb_left; - else if (id_map_entry->sl_cm_id < sl_cm_id) - node = node->rb_right; - else if (id_map_entry->slave_id > slave_id) - node = node->rb_left; - else if (id_map_entry->slave_id < slave_id) - node = node->rb_right; - else - return id_map_entry; - } - return NULL; -} - -static void id_map_ent_timeout(struct work_struct *work) -{ - struct delayed_work *delay = to_delayed_work(work); - struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout); - struct id_map_entry *db_ent, *found_ent; - struct mlx4_ib_dev *dev = ent->dev; - struct mlx4_ib_sriov *sriov = &dev->sriov; - struct rb_root *sl_id_map = &sriov->sl_id_map; - int pv_id = (int) ent->pv_cm_id; - - spin_lock(&sriov->id_map_lock); - db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id); - if (!db_ent) - goto out; - found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id); - if (found_ent && found_ent == ent) - rb_erase(&found_ent->node, sl_id_map); - idr_remove(&sriov->pv_id_table, pv_id); - -out: - list_del(&ent->list); - spin_unlock(&sriov->id_map_lock); - kfree(ent); -} - -static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id) -{ - struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; - struct rb_root *sl_id_map = &sriov->sl_id_map; - struct id_map_entry *ent, *found_ent; - - spin_lock(&sriov->id_map_lock); - ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id); - if (!ent) - goto out; - found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id); - if (found_ent && found_ent == ent) - rb_erase(&found_ent->node, sl_id_map); - idr_remove(&sriov->pv_id_table, pv_cm_id); -out: - spin_unlock(&sriov->id_map_lock); -} - -static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new) -{ - struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map; - struct rb_node **link = &sl_id_map->rb_node, *parent = NULL; - struct id_map_entry *ent; - int slave_id = new->slave_id; - int sl_cm_id = new->sl_cm_id; - - ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id); - if (ent) { - pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n", - sl_cm_id); - - rb_replace_node(&ent->node, &new->node, sl_id_map); - return; - } - - /* Go to the bottom of the tree */ - while (*link) { - parent = *link; - ent = rb_entry(parent, struct id_map_entry, node); - - if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id)) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, sl_id_map); -} - -static struct id_map_entry * -id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) -{ - int ret, id; - static int next_id; - struct id_map_entry *ent; - struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; - - ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL); - if (!ent) { - mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n"); - return ERR_PTR(-ENOMEM); - } - - ent->sl_cm_id = sl_cm_id; - ent->slave_id = slave_id; - ent->scheduled_delete = 0; - ent->dev = to_mdev(ibdev); - INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout); - - do { - spin_lock(&to_mdev(ibdev)->sriov.id_map_lock); - ret = idr_get_new_above(&sriov->pv_id_table, ent, - next_id, &id); - if (!ret) { - next_id = ((unsigned) id + 1) & MAX_ID_MASK; - ent->pv_cm_id = (u32)id; - sl_id_map_add(ibdev, ent); - } - - spin_unlock(&sriov->id_map_lock); - } while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL)); - /*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/ - if (!ret) { - spin_lock(&sriov->id_map_lock); - list_add_tail(&ent->list, &sriov->cm_list); - spin_unlock(&sriov->id_map_lock); - return ent; - } - /*error flow*/ - kfree(ent); - mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret); - return ERR_PTR(-ENOMEM); -} - -static struct id_map_entry * -id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id) -{ - struct id_map_entry *ent; - struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; - - spin_lock(&sriov->id_map_lock); - if (*pv_cm_id == -1) { - ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id); - if (ent) - *pv_cm_id = (int) ent->pv_cm_id; - } else - ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id); - spin_unlock(&sriov->id_map_lock); - - return ent; -} - -static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) -{ - struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; - unsigned long flags; - - spin_lock_irqsave(&sriov->going_down_lock, flags); - spin_lock(&sriov->id_map_lock); - /*make sure that there is no schedule inside the scheduled work.*/ - if (!sriov->is_going_down) { - id->scheduled_delete = 1; - schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); - } - spin_unlock(&sriov->id_map_lock); - spin_unlock_irqrestore(&sriov->going_down_lock, flags); -} - -int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, - struct ib_mad *mad) -{ - struct id_map_entry *id; - u32 sl_cm_id; - int pv_cm_id = -1; - - sl_cm_id = get_local_comm_id(mad); - - if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || - mad->mad_hdr.attr_id == CM_REP_ATTR_ID) { - id = id_map_alloc(ibdev, slave_id, sl_cm_id); - if (IS_ERR(id)) { - mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n", - __func__, slave_id, sl_cm_id); - return PTR_ERR(id); - } - } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) { - return 0; - } else { - id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id); - } - - if (!id) { - pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n", - slave_id, sl_cm_id); - return -EINVAL; - } - - set_local_comm_id(mad, id->pv_cm_id); - - if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) - schedule_delayed(ibdev, id); - else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) - id_map_find_del(ibdev, pv_cm_id); - - return 0; -} - -int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, - struct ib_mad *mad) -{ - u32 pv_cm_id; - struct id_map_entry *id; - - if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) { - union ib_gid gid; - - gid = gid_from_req_msg(ibdev, mad); - *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id); - if (*slave < 0) { - mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n", - gid.global.interface_id); - return -ENOENT; - } - return 0; - } - - pv_cm_id = get_remote_comm_id(mad); - id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1); - - if (!id) { - pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id); - return -ENOENT; - } - - *slave = id->slave_id; - set_remote_comm_id(mad, id->sl_cm_id); - - if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) - schedule_delayed(ibdev, id); - else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID || - mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) { - id_map_find_del(ibdev, (int) pv_cm_id); - } - - return 0; -} - -void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev) -{ - spin_lock_init(&dev->sriov.id_map_lock); - INIT_LIST_HEAD(&dev->sriov.cm_list); - dev->sriov.sl_id_map = RB_ROOT; - idr_init(&dev->sriov.pv_id_table); - idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL); -} - -/* slave = -1 ==> all slaves */ -/* TBD -- call paravirt clean for single slave. Need for slave RESET event */ -void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) -{ - struct mlx4_ib_sriov *sriov = &dev->sriov; - struct rb_root *sl_id_map = &sriov->sl_id_map; - struct list_head lh; - struct rb_node *nd; - int need_flush = 1; - struct id_map_entry *map, *tmp_map; - /* cancel all delayed work queue entries */ - INIT_LIST_HEAD(&lh); - spin_lock(&sriov->id_map_lock); - list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) { - if (slave < 0 || slave == map->slave_id) { - if (map->scheduled_delete) - need_flush &= !!cancel_delayed_work(&map->timeout); - } - } - - spin_unlock(&sriov->id_map_lock); - - if (!need_flush) - flush_scheduled_work(); /* make sure all timers were flushed */ - - /* now, remove all leftover entries from databases*/ - spin_lock(&sriov->id_map_lock); - if (slave < 0) { - while (rb_first(sl_id_map)) { - struct id_map_entry *ent = - rb_entry(rb_first(sl_id_map), - struct id_map_entry, node); - - rb_erase(&ent->node, sl_id_map); - idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id); - } - list_splice_init(&dev->sriov.cm_list, &lh); - } else { - /* first, move nodes belonging to slave to db remove list */ - nd = rb_first(sl_id_map); - while (nd) { - struct id_map_entry *ent = - rb_entry(nd, struct id_map_entry, node); - nd = rb_next(nd); - if (ent->slave_id == slave) - list_move_tail(&ent->list, &lh); - } - /* remove those nodes from databases */ - list_for_each_entry_safe(map, tmp_map, &lh, list) { - rb_erase(&map->node, sl_id_map); - idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id); - } - - /* add remaining nodes from cm_list */ - list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) { - if (slave == map->slave_id) - list_move_tail(&map->list, &lh); - } - } - - spin_unlock(&sriov->id_map_lock); - - /* free any map entries left behind due to cancel_delayed_work above */ - list_for_each_entry_safe(map, tmp_map, &lh, list) { - list_del(&map->list); - kfree(map); - } -} diff --git a/trunk/drivers/infiniband/hw/mlx4/cq.c b/trunk/drivers/infiniband/hw/mlx4/cq.c index c9eb6a6815ce..6d4ef71cbcdf 100644 --- a/trunk/drivers/infiniband/hw/mlx4/cq.c +++ b/trunk/drivers/infiniband/hw/mlx4/cq.c @@ -547,26 +547,6 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) checksum == cpu_to_be16(0xffff); } -static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe) -{ - struct mlx4_ib_proxy_sqp_hdr *hdr; - - ib_dma_sync_single_for_cpu(qp->ibqp.device, - qp->sqp_proxy_rcv[tail].map, - sizeof (struct mlx4_ib_proxy_sqp_hdr), - DMA_FROM_DEVICE); - hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr); - wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index); - wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); - wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); - wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF; - wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0; - wc->dlid_path_bits = 0; - - return 0; -} - static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_qp **cur_qp, struct ib_wc *wc) @@ -579,7 +559,6 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, int is_error; u32 g_mlpath_rqpn; u16 wqe_ctr; - unsigned tail = 0; repoll: cqe = next_cqe_sw(cq); @@ -655,8 +634,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, mlx4_ib_free_srq_wqe(srq, wqe_ctr); } else { wq = &(*cur_qp)->rq; - tail = wq->tail & (wq->wqe_cnt - 1); - wc->wr_id = wq->wrid[tail]; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } @@ -739,13 +717,6 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, break; } - if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { - if ((*cur_qp)->mlx4_ib_qp_type & - (MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, cqe); - } - wc->slid = be16_to_cpu(cqe->rlid); g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; diff --git a/trunk/drivers/infiniband/hw/mlx4/mad.c b/trunk/drivers/infiniband/hw/mlx4/mad.c index 21a794152d15..9c2ae7efd00f 100644 --- a/trunk/drivers/infiniband/hw/mlx4/mad.c +++ b/trunk/drivers/infiniband/hw/mlx4/mad.c @@ -32,10 +32,7 @@ #include #include -#include -#include -#include #include #include #include @@ -47,62 +44,7 @@ enum { MLX4_IB_VENDOR_CLASS2 = 0xa }; -#define MLX4_TUN_SEND_WRID_SHIFT 34 -#define MLX4_TUN_QPN_SHIFT 32 -#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT) -#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT) - -#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1) -#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3) - - /* Port mgmt change event handling */ - -#define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr) -#define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask) -#define NUM_IDX_IN_PKEY_TBL_BLK 32 -#define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */ -#define GUID_TBL_BLK_NUM_ENTRIES 8 -#define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES) - -struct mlx4_mad_rcv_buf { - struct ib_grh grh; - u8 payload[256]; -} __packed; - -struct mlx4_mad_snd_buf { - u8 payload[256]; -} __packed; - -struct mlx4_tunnel_mad { - struct ib_grh grh; - struct mlx4_ib_tunnel_header hdr; - struct ib_mad mad; -} __packed; - -struct mlx4_rcv_tunnel_mad { - struct mlx4_rcv_tunnel_hdr hdr; - struct ib_grh grh; - struct ib_mad mad; -} __packed; - -static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num); -static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num); -static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, - int block, u32 change_bitmap); - -__be64 mlx4_ib_gen_node_guid(void) -{ -#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40)) - return cpu_to_be64(NODE_GUID_HI | random32()); -} - -__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) -{ - return cpu_to_be64(atomic_inc_return(&ctx->tid)) | - cpu_to_be64(0xff00000000000000LL); -} - -int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, +int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad) { @@ -129,13 +71,10 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, * Key check traps can't be generated unless we have in_wc to * tell us where to send the trap. */ - if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc) + if (ignore_mkey || !in_wc) op_modifier |= 0x1; - if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc) + if (ignore_bkey || !in_wc) op_modifier |= 0x2; - if (mlx4_is_mfunc(dev->dev) && - (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc)) - op_modifier |= 0x8; if (in_wc) { struct { @@ -168,10 +107,10 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, in_modifier |= in_wc->slid << 16; } - err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, - mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier, + err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, + in_modifier, op_modifier, MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, - (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (!err) memcpy(response_mad, outmailbox->buf, 256); @@ -217,10 +156,6 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, { struct ib_port_info *pinfo; u16 lid; - __be16 *base; - u32 bn, pkey_change_bitmap; - int i; - struct mlx4_ib_dev *dev = to_mdev(ibdev); if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || @@ -236,46 +171,17 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, pinfo->neighbormtu_mastersmsl & 0xf); if (pinfo->clientrereg_resv_subnetto & 0x80) - handle_client_rereg_event(dev, port_num); + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_CLIENT_REREGISTER); if (prev_lid != lid) - handle_lid_change_event(dev, port_num); + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_LID_CHANGE); break; case IB_SMP_ATTR_PKEY_TABLE: - if (!mlx4_is_mfunc(dev->dev)) { - mlx4_ib_dispatch_event(dev, port_num, - IB_EVENT_PKEY_CHANGE); - break; - } - - /* at this point, we are running in the master. - * Slaves do not receive SMPs. - */ - bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF; - base = (__be16 *) &(((struct ib_smp *)mad)->data[0]); - pkey_change_bitmap = 0; - for (i = 0; i < 32; i++) { - pr_debug("PKEY[%d] = x%x\n", - i + bn*32, be16_to_cpu(base[i])); - if (be16_to_cpu(base[i]) != - dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) { - pkey_change_bitmap |= (1 << i); - dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] = - be16_to_cpu(base[i]); - } - } - pr_debug("PKEY Change event: port=%d, " - "block=0x%x, change_bitmap=0x%x\n", - port_num, bn, pkey_change_bitmap); - - if (pkey_change_bitmap) { - mlx4_ib_dispatch_event(dev, port_num, - IB_EVENT_PKEY_CHANGE); - if (!dev->sriov.is_going_down) - __propagate_pkey_ev(dev, port_num, bn, - pkey_change_bitmap); - } + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); break; case IB_SMP_ATTR_GUID_INFO: @@ -283,56 +189,12 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, if (!mlx4_is_master(dev->dev)) mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_GID_CHANGE); - /*if master, notify relevant slaves*/ - if (mlx4_is_master(dev->dev) && - !dev->sriov.is_going_down) { - bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod); - mlx4_ib_update_cache_on_guid_change(dev, bn, port_num, - (u8 *)(&((struct ib_smp *)mad)->data)); - mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num, - (u8 *)(&((struct ib_smp *)mad)->data)); - } break; - default: break; } } -static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, - int block, u32 change_bitmap) -{ - int i, ix, slave, err; - int have_event = 0; - - for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) { - if (slave == mlx4_master_func_num(dev->dev)) - continue; - if (!mlx4_is_slave_active(dev->dev, slave)) - continue; - - have_event = 0; - for (i = 0; i < 32; i++) { - if (!(change_bitmap & (1 << i))) - continue; - for (ix = 0; - ix < dev->dev->caps.pkey_table_len[port_num]; ix++) { - if (dev->pkeys.virt2phys_pkey[slave][port_num - 1] - [ix] == i + 32 * block) { - err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num); - pr_debug("propagate_pkey_ev: slave %d," - " port %d, ix %d (%d)\n", - slave, port_num, ix, err); - have_event = 1; - break; - } - } - if (have_event) - break; - } - } -} - static void node_desc_override(struct ib_device *dev, struct ib_mad *mad) { @@ -380,268 +242,6 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma } } -static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave, - struct ib_sa_mad *sa_mad) -{ - int ret = 0; - - /* dispatch to different sa handlers */ - switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) { - case IB_SA_ATTR_MC_MEMBER_REC: - ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad); - break; - default: - break; - } - return ret; -} - -int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid) -{ - struct mlx4_ib_dev *dev = to_mdev(ibdev); - int i; - - for (i = 0; i < dev->dev->caps.sqp_demux; i++) { - if (dev->sriov.demux[port - 1].guid_cache[i] == guid) - return i; - } - return -1; -} - - -static int get_pkey_phys_indices(struct mlx4_ib_dev *ibdev, u8 port, u8 ph_pkey_ix, - u8 *full_pk_ix, u8 *partial_pk_ix, - int *is_full_member) -{ - u16 search_pkey; - int fm; - int err = 0; - u16 pk; - - err = ib_get_cached_pkey(&ibdev->ib_dev, port, ph_pkey_ix, &search_pkey); - if (err) - return err; - - fm = (search_pkey & 0x8000) ? 1 : 0; - if (fm) { - *full_pk_ix = ph_pkey_ix; - search_pkey &= 0x7FFF; - } else { - *partial_pk_ix = ph_pkey_ix; - search_pkey |= 0x8000; - } - - if (ib_find_exact_cached_pkey(&ibdev->ib_dev, port, search_pkey, &pk)) - pk = 0xFFFF; - - if (fm) - *partial_pk_ix = (pk & 0xFF); - else - *full_pk_ix = (pk & 0xFF); - - *is_full_member = fm; - return err; -} - -int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, - enum ib_qp_type dest_qpt, struct ib_wc *wc, - struct ib_grh *grh, struct ib_mad *mad) -{ - struct ib_sge list; - struct ib_send_wr wr, *bad_wr; - struct mlx4_ib_demux_pv_ctx *tun_ctx; - struct mlx4_ib_demux_pv_qp *tun_qp; - struct mlx4_rcv_tunnel_mad *tun_mad; - struct ib_ah_attr attr; - struct ib_ah *ah; - struct ib_qp *src_qp = NULL; - unsigned tun_tx_ix = 0; - int dqpn; - int ret = 0; - int i; - int is_full_member = 0; - u16 tun_pkey_ix; - u8 ph_pkey_ix, full_pk_ix = 0, partial_pk_ix = 0; - - if (dest_qpt > IB_QPT_GSI) - return -EINVAL; - - tun_ctx = dev->sriov.demux[port-1].tun[slave]; - - /* check if proxy qp created */ - if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE) - return -EAGAIN; - - /* QP0 forwarding only for Dom0 */ - if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - - if (!dest_qpt) - tun_qp = &tun_ctx->qp[0]; - else - tun_qp = &tun_ctx->qp[1]; - - /* compute pkey index for slave */ - /* get physical pkey -- virtualized Dom0 pkey to phys*/ - if (dest_qpt) { - ph_pkey_ix = - dev->pkeys.virt2phys_pkey[mlx4_master_func_num(dev->dev)][port - 1][wc->pkey_index]; - - /* now, translate this to the slave pkey index */ - ret = get_pkey_phys_indices(dev, port, ph_pkey_ix, &full_pk_ix, - &partial_pk_ix, &is_full_member); - if (ret) - return -EINVAL; - - for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) { - if ((dev->pkeys.virt2phys_pkey[slave][port - 1][i] == full_pk_ix) || - (is_full_member && - (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == partial_pk_ix))) - break; - } - if (i == dev->dev->caps.pkey_table_len[port]) - return -EINVAL; - tun_pkey_ix = i; - } else - tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; - - dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1; - - /* get tunnel tx data buf for slave */ - src_qp = tun_qp->qp; - - /* create ah. Just need an empty one with the port num for the post send. - * The driver will set the force loopback bit in post_send */ - memset(&attr, 0, sizeof attr); - attr.port_num = port; - ah = ib_create_ah(tun_ctx->pd, &attr); - if (IS_ERR(ah)) - return -ENOMEM; - - /* allocate tunnel tx buf after pass failure returns */ - spin_lock(&tun_qp->tx_lock); - if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >= - (MLX4_NUM_TUNNEL_BUFS - 1)) - ret = -EAGAIN; - else - tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); - spin_unlock(&tun_qp->tx_lock); - if (ret) - goto out; - - tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr); - if (tun_qp->tx_ring[tun_tx_ix].ah) - ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah); - tun_qp->tx_ring[tun_tx_ix].ah = ah; - ib_dma_sync_single_for_cpu(&dev->ib_dev, - tun_qp->tx_ring[tun_tx_ix].buf.map, - sizeof (struct mlx4_rcv_tunnel_mad), - DMA_TO_DEVICE); - - /* copy over to tunnel buffer */ - if (grh) - memcpy(&tun_mad->grh, grh, sizeof *grh); - memcpy(&tun_mad->mad, mad, sizeof *mad); - - /* adjust tunnel data */ - tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix); - tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); - tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF); - tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0; - - ib_dma_sync_single_for_device(&dev->ib_dev, - tun_qp->tx_ring[tun_tx_ix].buf.map, - sizeof (struct mlx4_rcv_tunnel_mad), - DMA_TO_DEVICE); - - list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map; - list.length = sizeof (struct mlx4_rcv_tunnel_mad); - list.lkey = tun_ctx->mr->lkey; - - wr.wr.ud.ah = ah; - wr.wr.ud.port_num = port; - wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - wr.wr.ud.remote_qpn = dqpn; - wr.next = NULL; - wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt); - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; - - ret = ib_post_send(src_qp, &wr, &bad_wr); -out: - if (ret) - ib_destroy_ah(ah); - return ret; -} - -static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, - struct ib_wc *wc, struct ib_grh *grh, - struct ib_mad *mad) -{ - struct mlx4_ib_dev *dev = to_mdev(ibdev); - int err; - int slave; - u8 *slave_id; - - /* Initially assume that this mad is for us */ - slave = mlx4_master_func_num(dev->dev); - - /* See if the slave id is encoded in a response mad */ - if (mad->mad_hdr.method & 0x80) { - slave_id = (u8 *) &mad->mad_hdr.tid; - slave = *slave_id; - if (slave != 255) /*255 indicates the dom0*/ - *slave_id = 0; /* remap tid */ - } - - /* If a grh is present, we demux according to it */ - if (wc->wc_flags & IB_WC_GRH) { - slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id); - if (slave < 0) { - mlx4_ib_warn(ibdev, "failed matching grh\n"); - return -ENOENT; - } - } - /* Class-specific handling */ - switch (mad->mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_SUBN_ADM: - if (mlx4_ib_demux_sa_handler(ibdev, port, slave, - (struct ib_sa_mad *) mad)) - return 0; - break; - case IB_MGMT_CLASS_CM: - if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad)) - return 0; - break; - case IB_MGMT_CLASS_DEVICE_MGMT: - if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP) - return 0; - break; - default: - /* Drop unsupported classes for slaves in tunnel mode */ - if (slave != mlx4_master_func_num(dev->dev)) { - pr_debug("dropping unsupported ingress mad from class:%d " - "for slave:%d\n", mad->mad_hdr.mgmt_class, slave); - return 0; - } - } - /*make sure that no slave==255 was not handled yet.*/ - if (slave >= dev->dev->caps.sqp_demux) { - mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n", - slave, dev->dev->caps.sqp_demux); - return -ENOENT; - } - - err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); - if (err) - pr_debug("failed sending to slave %d via tunnel qp (%d)\n", - slave, err); - return 0; -} - static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) @@ -706,9 +306,8 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, prev_lid = pattr.lid; err = mlx4_MAD_IFC(to_mdev(ibdev), - (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) | - (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) | - MLX4_MAD_IFC_NET_VIEW, + mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, in_grh, in_mad, out_mad); if (err) return IB_MAD_RESULT_FAILURE; @@ -716,9 +315,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, if (!out_mad->mad_hdr.status) { if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) smp_snoop(ibdev, port_num, in_mad, prev_lid); - /* slaves get node desc from FW */ - if (!mlx4_is_slave(to_mdev(ibdev)->dev)) - node_desc_override(ibdev, out_mad); + node_desc_override(ibdev, out_mad); } /* set return bit in status of directed route responses */ @@ -801,8 +398,6 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { - if (mad_send_wc->send_buf->context[0]) - ib_destroy_ah(mad_send_wc->send_buf->context[0]); ib_free_send_mad(mad_send_wc->send_buf); } @@ -861,90 +456,6 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) } } -static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num) -{ - mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE); - - if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) - mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num, - MLX4_EQ_PORT_INFO_LID_CHANGE_MASK); -} - -static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num) -{ - /* re-configure the alias-guid and mcg's */ - if (mlx4_is_master(dev->dev)) { - mlx4_ib_invalidate_all_guid_record(dev, port_num); - - if (!dev->sriov.is_going_down) { - mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0); - mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num, - MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK); - } - } - mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER); -} - -static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, - struct mlx4_eqe *eqe) -{ - __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe), - GET_MASK_FROM_EQE(eqe)); -} - -static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num, - u32 guid_tbl_blk_num, u32 change_bitmap) -{ - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - u16 i; - - if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev)) - return; - - in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); - if (!in_mad || !out_mad) { - mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n"); - goto out; - } - - guid_tbl_blk_num *= 4; - - for (i = 0; i < 4; i++) { - if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff))) - continue; - memset(in_mad, 0, sizeof *in_mad); - memset(out_mad, 0, sizeof *out_mad); - - in_mad->base_version = 1; - in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - in_mad->class_version = 1; - in_mad->method = IB_MGMT_METHOD_GET; - in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; - in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i); - - if (mlx4_MAD_IFC(dev, - MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW, - port_num, NULL, NULL, in_mad, out_mad)) { - mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n"); - goto out; - } - - mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i, - port_num, - (u8 *)(&((struct ib_smp *)out_mad)->data)); - mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i, - port_num, - (u8 *)(&((struct ib_smp *)out_mad)->data)); - } - -out: - kfree(in_mad); - kfree(out_mad); - return; -} - void handle_port_mgmt_change_event(struct work_struct *work) { struct ib_event_work *ew = container_of(work, struct ib_event_work, work); @@ -952,8 +463,6 @@ void handle_port_mgmt_change_event(struct work_struct *work) struct mlx4_eqe *eqe = &(ew->ib_eqe); u8 port = eqe->event.port_mgmt_change.port; u32 changed_attr; - u32 tbl_block; - u32 change_bitmap; switch (eqe->subtype) { case MLX4_DEV_PMC_SUBTYPE_PORT_INFO: @@ -969,36 +478,24 @@ void handle_port_mgmt_change_event(struct work_struct *work) /* Check if it is a lid change event */ if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK) - handle_lid_change_event(dev, port); + mlx4_ib_dispatch_event(dev, port, IB_EVENT_LID_CHANGE); /* Generate GUID changed event */ - if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { + if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); - /*if master, notify all slaves*/ - if (mlx4_is_master(dev->dev)) - mlx4_gen_slaves_port_mgt_ev(dev->dev, port, - MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK); - } if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) - handle_client_rereg_event(dev, port); + mlx4_ib_dispatch_event(dev, port, + IB_EVENT_CLIENT_REREGISTER); break; case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE: mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); - if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) - propagate_pkey_ev(dev, port, eqe); break; case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: /* paravirtualized master's guid is guid 0 -- does not change */ if (!mlx4_is_master(dev->dev)) mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); - /*if master, notify relevant slaves*/ - else if (!dev->sriov.is_going_down) { - tbl_block = GET_BLK_PTR_FROM_EQE(eqe); - change_bitmap = GET_MASK_FROM_EQE(eqe); - handle_slaves_guid_change(dev, port, tbl_block, change_bitmap); - } break; default: pr_warn("Unsupported subtype 0x%x for " @@ -1019,1035 +516,3 @@ void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, ib_dispatch_event(&event); } - -static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg) -{ - unsigned long flags; - struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context; - struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); - spin_lock_irqsave(&dev->sriov.going_down_lock, flags); - if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE) - queue_work(ctx->wq, &ctx->work); - spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); -} - -static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, - struct mlx4_ib_demux_pv_qp *tun_qp, - int index) -{ - struct ib_sge sg_list; - struct ib_recv_wr recv_wr, *bad_recv_wr; - int size; - - size = (tun_qp->qp->qp_type == IB_QPT_UD) ? - sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf); - - sg_list.addr = tun_qp->ring[index].map; - sg_list.length = size; - sg_list.lkey = ctx->mr->lkey; - - recv_wr.next = NULL; - recv_wr.sg_list = &sg_list; - recv_wr.num_sge = 1; - recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV | - MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt); - ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map, - size, DMA_FROM_DEVICE); - return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr); -} - -static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port, - int slave, struct ib_sa_mad *sa_mad) -{ - int ret = 0; - - /* dispatch to different sa handlers */ - switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) { - case IB_SA_ATTR_MC_MEMBER_REC: - ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad); - break; - default: - break; - } - return ret; -} - -static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) -{ - int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; - - return (qpn >= proxy_start && qpn <= proxy_start + 1); -} - - -int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, - enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad) -{ - struct ib_sge list; - struct ib_send_wr wr, *bad_wr; - struct mlx4_ib_demux_pv_ctx *sqp_ctx; - struct mlx4_ib_demux_pv_qp *sqp; - struct mlx4_mad_snd_buf *sqp_mad; - struct ib_ah *ah; - struct ib_qp *send_qp = NULL; - unsigned wire_tx_ix = 0; - int ret = 0; - u16 wire_pkey_ix; - int src_qpnum; - u8 sgid_index; - - - sqp_ctx = dev->sriov.sqps[port-1]; - - /* check if proxy qp created */ - if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE) - return -EAGAIN; - - /* QP0 forwarding only for Dom0 */ - if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - - if (dest_qpt == IB_QPT_SMI) { - src_qpnum = 0; - sqp = &sqp_ctx->qp[0]; - wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; - } else { - src_qpnum = 1; - sqp = &sqp_ctx->qp[1]; - wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index]; - } - - send_qp = sqp->qp; - - /* create ah */ - sgid_index = attr->grh.sgid_index; - attr->grh.sgid_index = 0; - ah = ib_create_ah(sqp_ctx->pd, attr); - if (IS_ERR(ah)) - return -ENOMEM; - attr->grh.sgid_index = sgid_index; - to_mah(ah)->av.ib.gid_index = sgid_index; - /* get rid of force-loopback bit */ - to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); - spin_lock(&sqp->tx_lock); - if (sqp->tx_ix_head - sqp->tx_ix_tail >= - (MLX4_NUM_TUNNEL_BUFS - 1)) - ret = -EAGAIN; - else - wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); - spin_unlock(&sqp->tx_lock); - if (ret) - goto out; - - sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); - if (sqp->tx_ring[wire_tx_ix].ah) - ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah); - sqp->tx_ring[wire_tx_ix].ah = ah; - ib_dma_sync_single_for_cpu(&dev->ib_dev, - sqp->tx_ring[wire_tx_ix].buf.map, - sizeof (struct mlx4_mad_snd_buf), - DMA_TO_DEVICE); - - memcpy(&sqp_mad->payload, mad, sizeof *mad); - - ib_dma_sync_single_for_device(&dev->ib_dev, - sqp->tx_ring[wire_tx_ix].buf.map, - sizeof (struct mlx4_mad_snd_buf), - DMA_TO_DEVICE); - - list.addr = sqp->tx_ring[wire_tx_ix].buf.map; - list.length = sizeof (struct mlx4_mad_snd_buf); - list.lkey = sqp_ctx->mr->lkey; - - wr.wr.ud.ah = ah; - wr.wr.ud.port_num = port; - wr.wr.ud.pkey_index = wire_pkey_ix; - wr.wr.ud.remote_qkey = qkey; - wr.wr.ud.remote_qpn = remote_qpn; - wr.next = NULL; - wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum); - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; - - ret = ib_post_send(send_qp, &wr, &bad_wr); -out: - if (ret) - ib_destroy_ah(ah); - return ret; -} - -static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc) -{ - struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); - struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)]; - int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1); - struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr; - struct mlx4_ib_ah ah; - struct ib_ah_attr ah_attr; - u8 *slave_id; - int slave; - - /* Get slave that sent this packet */ - if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || - wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX || - (wc->src_qp & 0x1) != ctx->port - 1 || - wc->src_qp & 0x4) { - mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp); - return; - } - slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8; - if (slave != ctx->slave) { - mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " - "belongs to another slave\n", wc->src_qp); - return; - } - if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) { - mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " - "non-master trying to send QP0 packets\n", wc->src_qp); - return; - } - - /* Map transaction ID */ - ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map, - sizeof (struct mlx4_tunnel_mad), - DMA_FROM_DEVICE); - switch (tunnel->mad.mad_hdr.method) { - case IB_MGMT_METHOD_SET: - case IB_MGMT_METHOD_GET: - case IB_MGMT_METHOD_REPORT: - case IB_SA_METHOD_GET_TABLE: - case IB_SA_METHOD_DELETE: - case IB_SA_METHOD_GET_MULTI: - case IB_SA_METHOD_GET_TRACE_TBL: - slave_id = (u8 *) &tunnel->mad.mad_hdr.tid; - if (*slave_id) { - mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d " - "class:%d slave:%d\n", *slave_id, - tunnel->mad.mad_hdr.mgmt_class, slave); - return; - } else - *slave_id = slave; - default: - /* nothing */; - } - - /* Class-specific handling */ - switch (tunnel->mad.mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_SUBN_ADM: - if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave, - (struct ib_sa_mad *) &tunnel->mad)) - return; - break; - case IB_MGMT_CLASS_CM: - if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave, - (struct ib_mad *) &tunnel->mad)) - return; - break; - case IB_MGMT_CLASS_DEVICE_MGMT: - if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET && - tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET) - return; - break; - default: - /* Drop unsupported classes for slaves in tunnel mode */ - if (slave != mlx4_master_func_num(dev->dev)) { - mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d " - "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave); - return; - } - } - - /* We are using standard ib_core services to send the mad, so generate a - * stadard address handle by decoding the tunnelled mlx4_ah fields */ - memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av)); - ah.ibah.device = ctx->ib_dev; - mlx4_ib_query_ah(&ah.ibah, &ah_attr); - if ((ah_attr.ah_flags & IB_AH_GRH) && - (ah_attr.grh.sgid_index != slave)) { - mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n", - slave, ah_attr.grh.sgid_index); - return; - } - - mlx4_ib_send_to_wire(dev, slave, ctx->port, - is_proxy_qp0(dev, wc->src_qp, slave) ? - IB_QPT_SMI : IB_QPT_GSI, - be16_to_cpu(tunnel->hdr.pkey_index), - be32_to_cpu(tunnel->hdr.remote_qpn), - be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, &tunnel->mad); -} - -static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, - enum ib_qp_type qp_type, int is_tun) -{ - int i; - struct mlx4_ib_demux_pv_qp *tun_qp; - int rx_buf_size, tx_buf_size; - - if (qp_type > IB_QPT_GSI) - return -EINVAL; - - tun_qp = &ctx->qp[qp_type]; - - tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS, - GFP_KERNEL); - if (!tun_qp->ring) - return -ENOMEM; - - tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, - sizeof (struct mlx4_ib_tun_tx_buf), - GFP_KERNEL); - if (!tun_qp->tx_ring) { - kfree(tun_qp->ring); - tun_qp->ring = NULL; - return -ENOMEM; - } - - if (is_tun) { - rx_buf_size = sizeof (struct mlx4_tunnel_mad); - tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad); - } else { - rx_buf_size = sizeof (struct mlx4_mad_rcv_buf); - tx_buf_size = sizeof (struct mlx4_mad_snd_buf); - } - - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { - tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL); - if (!tun_qp->ring[i].addr) - goto err; - tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev, - tun_qp->ring[i].addr, - rx_buf_size, - DMA_FROM_DEVICE); - } - - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { - tun_qp->tx_ring[i].buf.addr = - kmalloc(tx_buf_size, GFP_KERNEL); - if (!tun_qp->tx_ring[i].buf.addr) - goto tx_err; - tun_qp->tx_ring[i].buf.map = - ib_dma_map_single(ctx->ib_dev, - tun_qp->tx_ring[i].buf.addr, - tx_buf_size, - DMA_TO_DEVICE); - tun_qp->tx_ring[i].ah = NULL; - } - spin_lock_init(&tun_qp->tx_lock); - tun_qp->tx_ix_head = 0; - tun_qp->tx_ix_tail = 0; - tun_qp->proxy_qpt = qp_type; - - return 0; - -tx_err: - while (i > 0) { - --i; - ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, - tx_buf_size, DMA_TO_DEVICE); - kfree(tun_qp->tx_ring[i].buf.addr); - } - kfree(tun_qp->tx_ring); - tun_qp->tx_ring = NULL; - i = MLX4_NUM_TUNNEL_BUFS; -err: - while (i > 0) { - --i; - ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, - rx_buf_size, DMA_FROM_DEVICE); - kfree(tun_qp->ring[i].addr); - } - kfree(tun_qp->ring); - tun_qp->ring = NULL; - return -ENOMEM; -} - -static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, - enum ib_qp_type qp_type, int is_tun) -{ - int i; - struct mlx4_ib_demux_pv_qp *tun_qp; - int rx_buf_size, tx_buf_size; - - if (qp_type > IB_QPT_GSI) - return; - - tun_qp = &ctx->qp[qp_type]; - if (is_tun) { - rx_buf_size = sizeof (struct mlx4_tunnel_mad); - tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad); - } else { - rx_buf_size = sizeof (struct mlx4_mad_rcv_buf); - tx_buf_size = sizeof (struct mlx4_mad_snd_buf); - } - - - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { - ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, - rx_buf_size, DMA_FROM_DEVICE); - kfree(tun_qp->ring[i].addr); - } - - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { - ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, - tx_buf_size, DMA_TO_DEVICE); - kfree(tun_qp->tx_ring[i].buf.addr); - if (tun_qp->tx_ring[i].ah) - ib_destroy_ah(tun_qp->tx_ring[i].ah); - } - kfree(tun_qp->tx_ring); - kfree(tun_qp->ring); -} - -static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) -{ - struct mlx4_ib_demux_pv_ctx *ctx; - struct mlx4_ib_demux_pv_qp *tun_qp; - struct ib_wc wc; - int ret; - ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work); - ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); - - while (ib_poll_cq(ctx->cq, 1, &wc) == 1) { - tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)]; - if (wc.status == IB_WC_SUCCESS) { - switch (wc.opcode) { - case IB_WC_RECV: - mlx4_ib_multiplex_mad(ctx, &wc); - ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, - wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)); - if (ret) - pr_err("Failed reposting tunnel " - "buf:%lld\n", wc.wr_id); - break; - case IB_WC_SEND: - pr_debug("received tunnel send completion:" - "wrid=0x%llx, status=0x%x\n", - wc.wr_id, wc.status); - ib_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); - tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah - = NULL; - spin_lock(&tun_qp->tx_lock); - tun_qp->tx_ix_tail++; - spin_unlock(&tun_qp->tx_lock); - - break; - default: - break; - } - } else { - pr_debug("mlx4_ib: completion error in tunnel: %d." - " status = %d, wrid = 0x%llx\n", - ctx->slave, wc.status, wc.wr_id); - if (!MLX4_TUN_IS_RECV(wc.wr_id)) { - ib_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); - tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah - = NULL; - spin_lock(&tun_qp->tx_lock); - tun_qp->tx_ix_tail++; - spin_unlock(&tun_qp->tx_lock); - } - } - } -} - -static void pv_qp_event_handler(struct ib_event *event, void *qp_context) -{ - struct mlx4_ib_demux_pv_ctx *sqp = qp_context; - - /* It's worse than that! He's dead, Jim! */ - pr_err("Fatal error (%d) on a MAD QP on port %d\n", - event->event, sqp->port); -} - -static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, - enum ib_qp_type qp_type, int create_tun) -{ - int i, ret; - struct mlx4_ib_demux_pv_qp *tun_qp; - struct mlx4_ib_qp_tunnel_init_attr qp_init_attr; - struct ib_qp_attr attr; - int qp_attr_mask_INIT; - - if (qp_type > IB_QPT_GSI) - return -EINVAL; - - tun_qp = &ctx->qp[qp_type]; - - memset(&qp_init_attr, 0, sizeof qp_init_attr); - qp_init_attr.init_attr.send_cq = ctx->cq; - qp_init_attr.init_attr.recv_cq = ctx->cq; - qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS; - qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS; - qp_init_attr.init_attr.cap.max_send_sge = 1; - qp_init_attr.init_attr.cap.max_recv_sge = 1; - if (create_tun) { - qp_init_attr.init_attr.qp_type = IB_QPT_UD; - qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP; - qp_init_attr.port = ctx->port; - qp_init_attr.slave = ctx->slave; - qp_init_attr.proxy_qp_type = qp_type; - qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | - IB_QP_QKEY | IB_QP_PORT; - } else { - qp_init_attr.init_attr.qp_type = qp_type; - qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP; - qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY; - } - qp_init_attr.init_attr.port_num = ctx->port; - qp_init_attr.init_attr.qp_context = ctx; - qp_init_attr.init_attr.event_handler = pv_qp_event_handler; - tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr); - if (IS_ERR(tun_qp->qp)) { - ret = PTR_ERR(tun_qp->qp); - tun_qp->qp = NULL; - pr_err("Couldn't create %s QP (%d)\n", - create_tun ? "tunnel" : "special", ret); - return ret; - } - - memset(&attr, 0, sizeof attr); - attr.qp_state = IB_QPS_INIT; - attr.pkey_index = - to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0]; - attr.qkey = IB_QP1_QKEY; - attr.port_num = ctx->port; - ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT); - if (ret) { - pr_err("Couldn't change %s qp state to INIT (%d)\n", - create_tun ? "tunnel" : "special", ret); - goto err_qp; - } - attr.qp_state = IB_QPS_RTR; - ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE); - if (ret) { - pr_err("Couldn't change %s qp state to RTR (%d)\n", - create_tun ? "tunnel" : "special", ret); - goto err_qp; - } - attr.qp_state = IB_QPS_RTS; - attr.sq_psn = 0; - ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN); - if (ret) { - pr_err("Couldn't change %s qp state to RTS (%d)\n", - create_tun ? "tunnel" : "special", ret); - goto err_qp; - } - - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { - ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i); - if (ret) { - pr_err(" mlx4_ib_post_pv_buf error" - " (err = %d, i = %d)\n", ret, i); - goto err_qp; - } - } - return 0; - -err_qp: - ib_destroy_qp(tun_qp->qp); - tun_qp->qp = NULL; - return ret; -} - -/* - * IB MAD completion callback for real SQPs - */ -static void mlx4_ib_sqp_comp_worker(struct work_struct *work) -{ - struct mlx4_ib_demux_pv_ctx *ctx; - struct mlx4_ib_demux_pv_qp *sqp; - struct ib_wc wc; - struct ib_grh *grh; - struct ib_mad *mad; - - ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work); - ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); - - while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) { - sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)]; - if (wc.status == IB_WC_SUCCESS) { - switch (wc.opcode) { - case IB_WC_SEND: - ib_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); - sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah - = NULL; - spin_lock(&sqp->tx_lock); - sqp->tx_ix_tail++; - spin_unlock(&sqp->tx_lock); - break; - case IB_WC_RECV: - mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *) - (sqp->ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload); - grh = &(((struct mlx4_mad_rcv_buf *) - (sqp->ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh); - mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad); - if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1))) - pr_err("Failed reposting SQP " - "buf:%lld\n", wc.wr_id); - break; - default: - BUG_ON(1); - break; - } - } else { - pr_debug("mlx4_ib: completion error in tunnel: %d." - " status = %d, wrid = 0x%llx\n", - ctx->slave, wc.status, wc.wr_id); - if (!MLX4_TUN_IS_RECV(wc.wr_id)) { - ib_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); - sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah - = NULL; - spin_lock(&sqp->tx_lock); - sqp->tx_ix_tail++; - spin_unlock(&sqp->tx_lock); - } - } - } -} - -static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port, - struct mlx4_ib_demux_pv_ctx **ret_ctx) -{ - struct mlx4_ib_demux_pv_ctx *ctx; - - *ret_ctx = NULL; - ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL); - if (!ctx) { - pr_err("failed allocating pv resource context " - "for port %d, slave %d\n", port, slave); - return -ENOMEM; - } - - ctx->ib_dev = &dev->ib_dev; - ctx->port = port; - ctx->slave = slave; - *ret_ctx = ctx; - return 0; -} - -static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port) -{ - if (dev->sriov.demux[port - 1].tun[slave]) { - kfree(dev->sriov.demux[port - 1].tun[slave]); - dev->sriov.demux[port - 1].tun[slave] = NULL; - } -} - -static int create_pv_resources(struct ib_device *ibdev, int slave, int port, - int create_tun, struct mlx4_ib_demux_pv_ctx *ctx) -{ - int ret, cq_size; - - if (ctx->state != DEMUX_PV_STATE_DOWN) - return -EEXIST; - - ctx->state = DEMUX_PV_STATE_STARTING; - /* have QP0 only on port owner, and only if link layer is IB */ - if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) && - rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND) - ctx->has_smi = 1; - - if (ctx->has_smi) { - ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun); - if (ret) { - pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret); - goto err_out; - } - } - - ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun); - if (ret) { - pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret); - goto err_out_qp0; - } - - cq_size = 2 * MLX4_NUM_TUNNEL_BUFS; - if (ctx->has_smi) - cq_size *= 2; - - ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, - NULL, ctx, cq_size, 0); - if (IS_ERR(ctx->cq)) { - ret = PTR_ERR(ctx->cq); - pr_err("Couldn't create tunnel CQ (%d)\n", ret); - goto err_buf; - } - - ctx->pd = ib_alloc_pd(ctx->ib_dev); - if (IS_ERR(ctx->pd)) { - ret = PTR_ERR(ctx->pd); - pr_err("Couldn't create tunnel PD (%d)\n", ret); - goto err_cq; - } - - ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(ctx->mr)) { - ret = PTR_ERR(ctx->mr); - pr_err("Couldn't get tunnel DMA MR (%d)\n", ret); - goto err_pd; - } - - if (ctx->has_smi) { - ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun); - if (ret) { - pr_err("Couldn't create %s QP0 (%d)\n", - create_tun ? "tunnel for" : "", ret); - goto err_mr; - } - } - - ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun); - if (ret) { - pr_err("Couldn't create %s QP1 (%d)\n", - create_tun ? "tunnel for" : "", ret); - goto err_qp0; - } - - if (create_tun) - INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker); - else - INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker); - - ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq; - - ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); - if (ret) { - pr_err("Couldn't arm tunnel cq (%d)\n", ret); - goto err_wq; - } - ctx->state = DEMUX_PV_STATE_ACTIVE; - return 0; - -err_wq: - ctx->wq = NULL; - ib_destroy_qp(ctx->qp[1].qp); - ctx->qp[1].qp = NULL; - - -err_qp0: - if (ctx->has_smi) - ib_destroy_qp(ctx->qp[0].qp); - ctx->qp[0].qp = NULL; - -err_mr: - ib_dereg_mr(ctx->mr); - ctx->mr = NULL; - -err_pd: - ib_dealloc_pd(ctx->pd); - ctx->pd = NULL; - -err_cq: - ib_destroy_cq(ctx->cq); - ctx->cq = NULL; - -err_buf: - mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun); - -err_out_qp0: - if (ctx->has_smi) - mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun); -err_out: - ctx->state = DEMUX_PV_STATE_DOWN; - return ret; -} - -static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port, - struct mlx4_ib_demux_pv_ctx *ctx, int flush) -{ - if (!ctx) - return; - if (ctx->state > DEMUX_PV_STATE_DOWN) { - ctx->state = DEMUX_PV_STATE_DOWNING; - if (flush) - flush_workqueue(ctx->wq); - if (ctx->has_smi) { - ib_destroy_qp(ctx->qp[0].qp); - ctx->qp[0].qp = NULL; - mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1); - } - ib_destroy_qp(ctx->qp[1].qp); - ctx->qp[1].qp = NULL; - mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1); - ib_dereg_mr(ctx->mr); - ctx->mr = NULL; - ib_dealloc_pd(ctx->pd); - ctx->pd = NULL; - ib_destroy_cq(ctx->cq); - ctx->cq = NULL; - ctx->state = DEMUX_PV_STATE_DOWN; - } -} - -static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave, - int port, int do_init) -{ - int ret = 0; - - if (!do_init) { - clean_vf_mcast(&dev->sriov.demux[port - 1], slave); - /* for master, destroy real sqp resources */ - if (slave == mlx4_master_func_num(dev->dev)) - destroy_pv_resources(dev, slave, port, - dev->sriov.sqps[port - 1], 1); - /* destroy the tunnel qp resources */ - destroy_pv_resources(dev, slave, port, - dev->sriov.demux[port - 1].tun[slave], 1); - return 0; - } - - /* create the tunnel qp resources */ - ret = create_pv_resources(&dev->ib_dev, slave, port, 1, - dev->sriov.demux[port - 1].tun[slave]); - - /* for master, create the real sqp resources */ - if (!ret && slave == mlx4_master_func_num(dev->dev)) - ret = create_pv_resources(&dev->ib_dev, slave, port, 0, - dev->sriov.sqps[port - 1]); - return ret; -} - -void mlx4_ib_tunnels_update_work(struct work_struct *work) -{ - struct mlx4_ib_demux_work *dmxw; - - dmxw = container_of(work, struct mlx4_ib_demux_work, work); - mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port, - dmxw->do_init); - kfree(dmxw); - return; -} - -static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, - struct mlx4_ib_demux_ctx *ctx, - int port) -{ - char name[12]; - int ret = 0; - int i; - - ctx->tun = kcalloc(dev->dev->caps.sqp_demux, - sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL); - if (!ctx->tun) - return -ENOMEM; - - ctx->dev = dev; - ctx->port = port; - ctx->ib_dev = &dev->ib_dev; - - for (i = 0; i < dev->dev->caps.sqp_demux; i++) { - ret = alloc_pv_object(dev, i, port, &ctx->tun[i]); - if (ret) { - ret = -ENOMEM; - goto err_mcg; - } - } - - ret = mlx4_ib_mcg_port_init(ctx); - if (ret) { - pr_err("Failed initializing mcg para-virt (%d)\n", ret); - goto err_mcg; - } - - snprintf(name, sizeof name, "mlx4_ibt%d", port); - ctx->wq = create_singlethread_workqueue(name); - if (!ctx->wq) { - pr_err("Failed to create tunnelling WQ for port %d\n", port); - ret = -ENOMEM; - goto err_wq; - } - - snprintf(name, sizeof name, "mlx4_ibud%d", port); - ctx->ud_wq = create_singlethread_workqueue(name); - if (!ctx->ud_wq) { - pr_err("Failed to create up/down WQ for port %d\n", port); - ret = -ENOMEM; - goto err_udwq; - } - - return 0; - -err_udwq: - destroy_workqueue(ctx->wq); - ctx->wq = NULL; - -err_wq: - mlx4_ib_mcg_port_cleanup(ctx, 1); -err_mcg: - for (i = 0; i < dev->dev->caps.sqp_demux; i++) - free_pv_object(dev, i, port); - kfree(ctx->tun); - ctx->tun = NULL; - return ret; -} - -static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx) -{ - if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) { - sqp_ctx->state = DEMUX_PV_STATE_DOWNING; - flush_workqueue(sqp_ctx->wq); - if (sqp_ctx->has_smi) { - ib_destroy_qp(sqp_ctx->qp[0].qp); - sqp_ctx->qp[0].qp = NULL; - mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0); - } - ib_destroy_qp(sqp_ctx->qp[1].qp); - sqp_ctx->qp[1].qp = NULL; - mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0); - ib_dereg_mr(sqp_ctx->mr); - sqp_ctx->mr = NULL; - ib_dealloc_pd(sqp_ctx->pd); - sqp_ctx->pd = NULL; - ib_destroy_cq(sqp_ctx->cq); - sqp_ctx->cq = NULL; - sqp_ctx->state = DEMUX_PV_STATE_DOWN; - } -} - -static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx) -{ - int i; - if (ctx) { - struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); - mlx4_ib_mcg_port_cleanup(ctx, 1); - for (i = 0; i < dev->dev->caps.sqp_demux; i++) { - if (!ctx->tun[i]) - continue; - if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN) - ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING; - } - flush_workqueue(ctx->wq); - for (i = 0; i < dev->dev->caps.sqp_demux; i++) { - destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0); - free_pv_object(dev, i, ctx->port); - } - kfree(ctx->tun); - destroy_workqueue(ctx->ud_wq); - destroy_workqueue(ctx->wq); - } -} - -static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init) -{ - int i; - - if (!mlx4_is_master(dev->dev)) - return; - /* initialize or tear down tunnel QPs for the master */ - for (i = 0; i < dev->dev->caps.num_ports; i++) - mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init); - return; -} - -int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) -{ - int i = 0; - int err; - - if (!mlx4_is_mfunc(dev->dev)) - return 0; - - dev->sriov.is_going_down = 0; - spin_lock_init(&dev->sriov.going_down_lock); - mlx4_ib_cm_paravirt_init(dev); - - mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n"); - - if (mlx4_is_slave(dev->dev)) { - mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n"); - return 0; - } - - for (i = 0; i < dev->dev->caps.sqp_demux; i++) { - if (i == mlx4_master_func_num(dev->dev)) - mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid); - else - mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid()); - } - - err = mlx4_ib_init_alias_guid_service(dev); - if (err) { - mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n"); - goto paravirt_err; - } - err = mlx4_ib_device_register_sysfs(dev); - if (err) { - mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n"); - goto sysfs_err; - } - - mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n", - dev->dev->caps.sqp_demux); - for (i = 0; i < dev->num_ports; i++) { - union ib_gid gid; - err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1); - if (err) - goto demux_err; - dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; - err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, - &dev->sriov.sqps[i]); - if (err) - goto demux_err; - err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1); - if (err) - goto demux_err; - } - mlx4_ib_master_tunnels(dev, 1); - return 0; - -demux_err: - while (i > 0) { - free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); - mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); - --i; - } - mlx4_ib_device_unregister_sysfs(dev); - -sysfs_err: - mlx4_ib_destroy_alias_guid_service(dev); - -paravirt_err: - mlx4_ib_cm_paravirt_clean(dev, -1); - - return err; -} - -void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev) -{ - int i; - unsigned long flags; - - if (!mlx4_is_mfunc(dev->dev)) - return; - - spin_lock_irqsave(&dev->sriov.going_down_lock, flags); - dev->sriov.is_going_down = 1; - spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); - if (mlx4_is_master(dev->dev)) { - for (i = 0; i < dev->num_ports; i++) { - flush_workqueue(dev->sriov.demux[i].ud_wq); - mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]); - kfree(dev->sriov.sqps[i]); - dev->sriov.sqps[i] = NULL; - mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); - } - - mlx4_ib_cm_paravirt_clean(dev, -1); - mlx4_ib_destroy_alias_guid_service(dev); - mlx4_ib_device_unregister_sysfs(dev); - } -} diff --git a/trunk/drivers/infiniband/hw/mlx4/main.c b/trunk/drivers/infiniband/hw/mlx4/main.c index 718ec6b2bad2..cc05579ebce7 100644 --- a/trunk/drivers/infiniband/hw/mlx4/main.c +++ b/trunk/drivers/infiniband/hw/mlx4/main.c @@ -59,10 +59,6 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); -int mlx4_ib_sm_guid_assign = 1; -module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); -MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)"); - static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -74,8 +70,6 @@ struct update_gid_work { int port; }; -static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); - static struct workqueue_struct *wq; static void init_query_mad(struct ib_smp *mad) @@ -104,8 +98,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; - err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, - 1, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -140,7 +133,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = dev->dev->pdev->device; + props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); @@ -189,12 +182,11 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) } static int ib_link_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props, int netw_view) + struct ib_port_attr *props) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int ext_active_speed; - int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -206,10 +198,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port, in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); - if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) - mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; - - err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -222,10 +211,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port, props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); - if (netw_view) - props->gid_tbl_len = out_mad->data[50]; - else - props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; + props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); @@ -258,7 +244,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port, in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); - err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -284,7 +270,7 @@ static u8 state_to_phys_state(enum ib_port_state state) } static int eth_link_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props, int netw_view) + struct ib_port_attr *props) { struct mlx4_ib_dev *mdev = to_mdev(ibdev); @@ -334,36 +320,26 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, return err; } -int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props, int netw_view) +static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) { int err; memset(props, 0, sizeof *props); err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? - ib_link_query_port(ibdev, port, props, netw_view) : - eth_link_query_port(ibdev, port, props, netw_view); + ib_link_query_port(ibdev, port, props) : + eth_link_query_port(ibdev, port, props); return err; } -static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) -{ - /* returns host view */ - return __mlx4_ib_query_port(ibdev, port, props, 0); -} - -int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid, int netw_view) +static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; - struct mlx4_ib_dev *dev = to_mdev(ibdev); - int clear = 0; - int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); @@ -374,38 +350,23 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); - if (mlx4_is_mfunc(dev->dev) && netw_view) - mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; - - err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw, out_mad->data + 8, 8); - if (mlx4_is_mfunc(dev->dev) && !netw_view) { - if (index) { - /* For any index > 0, return the null guid */ - err = 0; - clear = 1; - goto out; - } - } - init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); - err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, - NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); out: - if (clear) - memset(gid->raw + 8, 0, 8); kfree(in_mad); kfree(out_mad); return err; @@ -425,17 +386,16 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) - return __mlx4_ib_query_gid(ibdev, port, index, gid, 0); + return __mlx4_ib_query_gid(ibdev, port, index, gid); else return iboe_query_gid(ibdev, port, index, gid); } -int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey, int netw_view) +static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; - int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -447,11 +407,7 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; in_mad->attr_mod = cpu_to_be32(index / 32); - if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) - mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; - - err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, - in_mad, out_mad); + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -463,11 +419,6 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, return err; } -static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) -{ - return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0); -} - static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { @@ -480,9 +431,6 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) return 0; - if (mlx4_is_slave(to_mdev(ibdev)->dev)) - return -EOPNOTSUPP; - spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); memcpy(ibdev->node_desc, props->node_desc, 64); spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); @@ -498,7 +446,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, memset(mailbox->buf, 0, 256); memcpy(mailbox->buf, props->node_desc, 64); mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, - MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); @@ -901,7 +849,6 @@ static int init_node_data(struct mlx4_ib_dev *dev) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; - int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -911,10 +858,8 @@ static int init_node_data(struct mlx4_ib_dev *dev) init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; - if (mlx4_is_master(dev->dev)) - mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; - err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -922,11 +867,10 @@ static int init_node_data(struct mlx4_ib_dev *dev) in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; - err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; - dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); out: @@ -1015,7 +959,7 @@ static void update_gids_task(struct work_struct *work) err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (err) pr_warn("set port command failed\n"); else { @@ -1177,38 +1121,6 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event return NOTIFY_DONE; } -static void init_pkeys(struct mlx4_ib_dev *ibdev) -{ - int port; - int slave; - int i; - - if (mlx4_is_master(ibdev->dev)) { - for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { - for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { - for (i = 0; - i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; - ++i) { - ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] = - /* master has the identity virt2phys pkey mapping */ - (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i : - ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1; - mlx4_sync_pkey_table(ibdev->dev, slave, port, i, - ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]); - } - } - } - /* initialize pkey cache */ - for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { - for (i = 0; - i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; - ++i) - ibdev->pkeys.phys_pkey_cache[port-1][i] = - (i) ? 0 : 0xFFFF; - } - } -} - static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { char name[32]; @@ -1295,15 +1207,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) pr_info_once("%s", mlx4_ib_version); - mlx4_foreach_non_ib_transport_port(i, dev) - num_ports++; - - if (mlx4_is_mfunc(dev) && num_ports) { - dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n"); + if (mlx4_is_mfunc(dev)) { + pr_warn("IB not yet supported in SRIOV\n"); return NULL; } - num_ports = 0; mlx4_foreach_ib_transport_port(i, dev) num_ports++; @@ -1410,12 +1318,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; - if (!mlx4_is_slave(ibdev->dev)) { - ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; - ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; - ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; - ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; - } + ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; + ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; + ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; + ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; @@ -1451,14 +1357,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_mad_init(ibdev)) goto err_reg; - if (mlx4_ib_init_sriov(ibdev)) - goto err_mad; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { iboe->nb.notifier_call = mlx4_ib_netdev_event; err = register_netdevice_notifier(&iboe->nb); if (err) - goto err_sriov; + goto err_reg; } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { @@ -1469,18 +1372,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_active = true; - if (mlx4_is_mfunc(ibdev->dev)) - init_pkeys(ibdev); - - /* create paravirt contexts for any VFs which are active */ - if (mlx4_is_master(ibdev->dev)) { - for (j = 0; j < MLX4_MFUNC_MAX; j++) { - if (j == mlx4_master_func_num(ibdev->dev)) - continue; - if (mlx4_is_slave_active(ibdev->dev, j)) - do_slave_init(ibdev, j, 1); - } - } return ibdev; err_notif: @@ -1488,12 +1379,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) pr_warn("failure unregistering notifier\n"); flush_workqueue(wq); -err_sriov: - mlx4_ib_close_sriov(ibdev); - -err_mad: - mlx4_ib_mad_cleanup(ibdev); - err_reg: ib_unregister_device(&ibdev->ib_dev); @@ -1522,7 +1407,6 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) struct mlx4_ib_dev *ibdev = ibdev_ptr; int p; - mlx4_ib_close_sriov(ibdev); mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); if (ibdev->iboe.nb.notifier_call) { @@ -1544,51 +1428,6 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ib_dealloc_device(&ibdev->ib_dev); } -static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) -{ - struct mlx4_ib_demux_work **dm = NULL; - struct mlx4_dev *dev = ibdev->dev; - int i; - unsigned long flags; - - if (!mlx4_is_master(dev)) - return; - - dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC); - if (!dm) { - pr_err("failed to allocate memory for tunneling qp update\n"); - goto out; - } - - for (i = 0; i < dev->caps.num_ports; i++) { - dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); - if (!dm[i]) { - pr_err("failed to allocate memory for tunneling qp update work struct\n"); - for (i = 0; i < dev->caps.num_ports; i++) { - if (dm[i]) - kfree(dm[i]); - } - goto out; - } - } - /* initialize or tear down tunnel QPs for the slave */ - for (i = 0; i < dev->caps.num_ports; i++) { - INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); - dm[i]->port = i + 1; - dm[i]->slave = slave; - dm[i]->do_init = do_init; - dm[i]->dev = ibdev; - spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); - if (!ibdev->sriov.is_going_down) - queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); - spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); - } -out: - if (dm) - kfree(dm); - return; -} - static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { @@ -1596,28 +1435,22 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); struct mlx4_eqe *eqe = NULL; struct ib_event_work *ew; - int p = 0; + int port = 0; if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) eqe = (struct mlx4_eqe *)param; else - p = (int) param; + port = (u8)param; + + if (port > ibdev->num_ports) + return; switch (event) { case MLX4_DEV_EVENT_PORT_UP: - if (p > ibdev->num_ports) - return; - if (mlx4_is_master(dev) && - rdma_port_get_link_layer(&ibdev->ib_dev, p) == - IB_LINK_LAYER_INFINIBAND) { - mlx4_ib_invalidate_all_guid_record(ibdev, p); - } ibev.event = IB_EVENT_PORT_ACTIVE; break; case MLX4_DEV_EVENT_PORT_DOWN: - if (p > ibdev->num_ports) - return; ibev.event = IB_EVENT_PORT_ERR; break; @@ -1636,21 +1469,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); ew->ib_dev = ibdev; - /* need to queue only for port owner, which uses GEN_EQE */ - if (mlx4_is_master(dev)) - queue_work(wq, &ew->work); - else - handle_port_mgmt_change_event(&ew->work); - return; - - case MLX4_DEV_EVENT_SLAVE_INIT: - /* here, p is the slave id */ - do_slave_init(ibdev, p, 1); - return; - - case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: - /* here, p is the slave id */ - do_slave_init(ibdev, p, 0); + handle_port_mgmt_change_event(&ew->work); return; default: @@ -1658,7 +1477,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, } ibev.device = ibdev_ptr; - ibev.element.port_num = (u8) p; + ibev.element.port_num = port; ib_dispatch_event(&ibev); } @@ -1678,28 +1497,18 @@ static int __init mlx4_ib_init(void) if (!wq) return -ENOMEM; - err = mlx4_ib_mcg_init(); - if (err) - goto clean_wq; - err = mlx4_register_interface(&mlx4_ib_interface); - if (err) - goto clean_mcg; + if (err) { + destroy_workqueue(wq); + return err; + } return 0; - -clean_mcg: - mlx4_ib_mcg_destroy(); - -clean_wq: - destroy_workqueue(wq); - return err; } static void __exit mlx4_ib_cleanup(void) { mlx4_unregister_interface(&mlx4_ib_interface); - mlx4_ib_mcg_destroy(); destroy_workqueue(wq); } diff --git a/trunk/drivers/infiniband/hw/mlx4/mcg.c b/trunk/drivers/infiniband/hw/mlx4/mcg.c deleted file mode 100644 index 3c3b54c3fdd9..000000000000 --- a/trunk/drivers/infiniband/hw/mlx4/mcg.c +++ /dev/null @@ -1,1254 +0,0 @@ -/* - * Copyright (c) 2012 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include "mlx4_ib.h" - -#define MAX_VFS 80 -#define MAX_PEND_REQS_PER_FUNC 4 -#define MAD_TIMEOUT_MS 2000 - -#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg) -#define mcg_error(fmt, arg...) pr_err(fmt, ##arg) -#define mcg_warn_group(group, format, arg...) \ - pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\ - (group)->name, group->demux->port, ## arg) - -#define mcg_error_group(group, format, arg...) \ - pr_err(" %16s: " format, (group)->name, ## arg) - - -static union ib_gid mgid0; - -static struct workqueue_struct *clean_wq; - -enum mcast_state { - MCAST_NOT_MEMBER = 0, - MCAST_MEMBER, -}; - -enum mcast_group_state { - MCAST_IDLE, - MCAST_JOIN_SENT, - MCAST_LEAVE_SENT, - MCAST_RESP_READY -}; - -struct mcast_member { - enum mcast_state state; - uint8_t join_state; - int num_pend_reqs; - struct list_head pending; -}; - -struct ib_sa_mcmember_data { - union ib_gid mgid; - union ib_gid port_gid; - __be32 qkey; - __be16 mlid; - u8 mtusel_mtu; - u8 tclass; - __be16 pkey; - u8 ratesel_rate; - u8 lifetmsel_lifetm; - __be32 sl_flowlabel_hoplimit; - u8 scope_join_state; - u8 proxy_join; - u8 reserved[2]; -}; - -struct mcast_group { - struct ib_sa_mcmember_data rec; - struct rb_node node; - struct list_head mgid0_list; - struct mlx4_ib_demux_ctx *demux; - struct mcast_member func[MAX_VFS]; - struct mutex lock; - struct work_struct work; - struct list_head pending_list; - int members[3]; - enum mcast_group_state state; - enum mcast_group_state prev_state; - struct ib_sa_mad response_sa_mad; - __be64 last_req_tid; - - char name[33]; /* MGID string */ - struct device_attribute dentry; - - /* refcount is the reference count for the following: - 1. Each queued request - 2. Each invocation of the worker thread - 3. Membership of the port at the SA - */ - atomic_t refcount; - - /* delayed work to clean pending SM request */ - struct delayed_work timeout_work; - struct list_head cleanup_list; -}; - -struct mcast_req { - int func; - struct ib_sa_mad sa_mad; - struct list_head group_list; - struct list_head func_list; - struct mcast_group *group; - int clean; -}; - - -#define safe_atomic_dec(ref) \ - do {\ - if (atomic_dec_and_test(ref)) \ - mcg_warn_group(group, "did not expect to reach zero\n"); \ - } while (0) - -static const char *get_state_string(enum mcast_group_state state) -{ - switch (state) { - case MCAST_IDLE: - return "MCAST_IDLE"; - case MCAST_JOIN_SENT: - return "MCAST_JOIN_SENT"; - case MCAST_LEAVE_SENT: - return "MCAST_LEAVE_SENT"; - case MCAST_RESP_READY: - return "MCAST_RESP_READY"; - } - return "Invalid State"; -} - -static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx, - union ib_gid *mgid) -{ - struct rb_node *node = ctx->mcg_table.rb_node; - struct mcast_group *group; - int ret; - - while (node) { - group = rb_entry(node, struct mcast_group, node); - ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); - if (!ret) - return group; - - if (ret < 0) - node = node->rb_left; - else - node = node->rb_right; - } - return NULL; -} - -static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx, - struct mcast_group *group) -{ - struct rb_node **link = &ctx->mcg_table.rb_node; - struct rb_node *parent = NULL; - struct mcast_group *cur_group; - int ret; - - while (*link) { - parent = *link; - cur_group = rb_entry(parent, struct mcast_group, node); - - ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, - sizeof group->rec.mgid); - if (ret < 0) - link = &(*link)->rb_left; - else if (ret > 0) - link = &(*link)->rb_right; - else - return cur_group; - } - rb_link_node(&group->node, parent, link); - rb_insert_color(&group->node, &ctx->mcg_table); - return NULL; -} - -static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) -{ - struct mlx4_ib_dev *dev = ctx->dev; - struct ib_ah_attr ah_attr; - - spin_lock(&dev->sm_lock); - if (!dev->sm_ah[ctx->port - 1]) { - /* port is not yet Active, sm_ah not ready */ - spin_unlock(&dev->sm_lock); - return -EAGAIN; - } - mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); - spin_unlock(&dev->sm_lock); - return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, - IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad); -} - -static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, - struct ib_mad *mad) -{ - struct mlx4_ib_dev *dev = ctx->dev; - struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1]; - struct ib_wc wc; - struct ib_ah_attr ah_attr; - - /* Our agent might not yet be registered when mads start to arrive */ - if (!agent) - return -EAGAIN; - - ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); - - wc.pkey_index = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = ctx->port; - wc.slid = ah_attr.dlid; /* opensm lid */ - wc.src_qp = 1; - return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad); -} - -static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad) -{ - struct ib_sa_mad mad; - struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data; - int ret; - - /* we rely on a mad request as arrived from a VF */ - memcpy(&mad, sa_mad, sizeof mad); - - /* fix port GID to be the real one (slave 0) */ - sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0]; - - /* assign our own TID */ - mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux); - group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */ - - ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad); - /* set timeout handler */ - if (!ret) { - /* calls mlx4_ib_mcg_timeout_handler */ - queue_delayed_work(group->demux->mcg_wq, &group->timeout_work, - msecs_to_jiffies(MAD_TIMEOUT_MS)); - } - - return ret; -} - -static int send_leave_to_wire(struct mcast_group *group, u8 join_state) -{ - struct ib_sa_mad mad; - struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data; - int ret; - - memset(&mad, 0, sizeof mad); - mad.mad_hdr.base_version = 1; - mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; - mad.mad_hdr.class_version = 2; - mad.mad_hdr.method = IB_SA_METHOD_DELETE; - mad.mad_hdr.status = cpu_to_be16(0); - mad.mad_hdr.class_specific = cpu_to_be16(0); - mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux); - group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */ - mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); - mad.mad_hdr.attr_mod = cpu_to_be32(0); - mad.sa_hdr.sm_key = 0x0; - mad.sa_hdr.attr_offset = cpu_to_be16(7); - mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID | - IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE; - - *sa_data = group->rec; - sa_data->scope_join_state = join_state; - - ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad); - if (ret) - group->state = MCAST_IDLE; - - /* set timeout handler */ - if (!ret) { - /* calls mlx4_ib_mcg_timeout_handler */ - queue_delayed_work(group->demux->mcg_wq, &group->timeout_work, - msecs_to_jiffies(MAD_TIMEOUT_MS)); - } - - return ret; -} - -static int send_reply_to_slave(int slave, struct mcast_group *group, - struct ib_sa_mad *req_sa_mad, u16 status) -{ - struct ib_sa_mad mad; - struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data; - struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data; - int ret; - - memset(&mad, 0, sizeof mad); - mad.mad_hdr.base_version = 1; - mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; - mad.mad_hdr.class_version = 2; - mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; - mad.mad_hdr.status = cpu_to_be16(status); - mad.mad_hdr.class_specific = cpu_to_be16(0); - mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid; - *(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */ - mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); - mad.mad_hdr.attr_mod = cpu_to_be32(0); - mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key; - mad.sa_hdr.attr_offset = cpu_to_be16(7); - mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */ - - *sa_data = group->rec; - - /* reconstruct VF's requested join_state and port_gid */ - sa_data->scope_join_state &= 0xf0; - sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f); - memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid); - - ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad); - return ret; -} - -static int check_selector(ib_sa_comp_mask comp_mask, - ib_sa_comp_mask selector_mask, - ib_sa_comp_mask value_mask, - u8 src_value, u8 dst_value) -{ - int err; - u8 selector = dst_value >> 6; - dst_value &= 0x3f; - src_value &= 0x3f; - - if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) - return 0; - - switch (selector) { - case IB_SA_GT: - err = (src_value <= dst_value); - break; - case IB_SA_LT: - err = (src_value >= dst_value); - break; - case IB_SA_EQ: - err = (src_value != dst_value); - break; - default: - err = 0; - break; - } - - return err; -} - -static u16 cmp_rec(struct ib_sa_mcmember_data *src, - struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask) -{ - /* src is group record, dst is request record */ - /* MGID must already match */ - /* Port_GID we always replace to our Port_GID, so it is a match */ - -#define MAD_STATUS_REQ_INVALID 0x0200 - if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) - return MAD_STATUS_REQ_INVALID; - if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) - return MAD_STATUS_REQ_INVALID; - if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, - IB_SA_MCMEMBER_REC_MTU, - src->mtusel_mtu, dst->mtusel_mtu)) - return MAD_STATUS_REQ_INVALID; - if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && - src->tclass != dst->tclass) - return MAD_STATUS_REQ_INVALID; - if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) - return MAD_STATUS_REQ_INVALID; - if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, - IB_SA_MCMEMBER_REC_RATE, - src->ratesel_rate, dst->ratesel_rate)) - return MAD_STATUS_REQ_INVALID; - if (check_selector(comp_mask, - IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, - IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, - src->lifetmsel_lifetm, dst->lifetmsel_lifetm)) - return MAD_STATUS_REQ_INVALID; - if (comp_mask & IB_SA_MCMEMBER_REC_SL && - (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) != - (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000)) - return MAD_STATUS_REQ_INVALID; - if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && - (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) != - (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00)) - return MAD_STATUS_REQ_INVALID; - if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && - (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) != - (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff)) - return MAD_STATUS_REQ_INVALID; - if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && - (src->scope_join_state & 0xf0) != - (dst->scope_join_state & 0xf0)) - return MAD_STATUS_REQ_INVALID; - - /* join_state checked separately, proxy_join ignored */ - - return 0; -} - -/* release group, return 1 if this was last release and group is destroyed - * timout work is canceled sync */ -static int release_group(struct mcast_group *group, int from_timeout_handler) -{ - struct mlx4_ib_demux_ctx *ctx = group->demux; - int nzgroup; - - mutex_lock(&ctx->mcg_table_lock); - mutex_lock(&group->lock); - if (atomic_dec_and_test(&group->refcount)) { - if (!from_timeout_handler) { - if (group->state != MCAST_IDLE && - !cancel_delayed_work(&group->timeout_work)) { - atomic_inc(&group->refcount); - mutex_unlock(&group->lock); - mutex_unlock(&ctx->mcg_table_lock); - return 0; - } - } - - nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0); - if (nzgroup) - del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr); - if (!list_empty(&group->pending_list)) - mcg_warn_group(group, "releasing a group with non empty pending list\n"); - if (nzgroup) - rb_erase(&group->node, &ctx->mcg_table); - list_del_init(&group->mgid0_list); - mutex_unlock(&group->lock); - mutex_unlock(&ctx->mcg_table_lock); - kfree(group); - return 1; - } else { - mutex_unlock(&group->lock); - mutex_unlock(&ctx->mcg_table_lock); - } - return 0; -} - -static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) -{ - int i; - - for (i = 0; i < 3; i++, join_state >>= 1) - if (join_state & 0x1) - group->members[i] += inc; -} - -static u8 get_leave_state(struct mcast_group *group) -{ - u8 leave_state = 0; - int i; - - for (i = 0; i < 3; i++) - if (!group->members[i]) - leave_state |= (1 << i); - - return leave_state & (group->rec.scope_join_state & 7); -} - -static int join_group(struct mcast_group *group, int slave, u8 join_mask) -{ - int ret = 0; - u8 join_state; - - /* remove bits that slave is already member of, and adjust */ - join_state = join_mask & (~group->func[slave].join_state); - adjust_membership(group, join_state, 1); - group->func[slave].join_state |= join_state; - if (group->func[slave].state != MCAST_MEMBER && join_state) { - group->func[slave].state = MCAST_MEMBER; - ret = 1; - } - return ret; -} - -static int leave_group(struct mcast_group *group, int slave, u8 leave_state) -{ - int ret = 0; - - adjust_membership(group, leave_state, -1); - group->func[slave].join_state &= ~leave_state; - if (!group->func[slave].join_state) { - group->func[slave].state = MCAST_NOT_MEMBER; - ret = 1; - } - return ret; -} - -static int check_leave(struct mcast_group *group, int slave, u8 leave_mask) -{ - if (group->func[slave].state != MCAST_MEMBER) - return MAD_STATUS_REQ_INVALID; - - /* make sure we're not deleting unset bits */ - if (~group->func[slave].join_state & leave_mask) - return MAD_STATUS_REQ_INVALID; - - if (!leave_mask) - return MAD_STATUS_REQ_INVALID; - - return 0; -} - -static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) -{ - struct delayed_work *delay = to_delayed_work(work); - struct mcast_group *group; - struct mcast_req *req = NULL; - - group = container_of(delay, typeof(*group), timeout_work); - - mutex_lock(&group->lock); - if (group->state == MCAST_JOIN_SENT) { - if (!list_empty(&group->pending_list)) { - req = list_first_entry(&group->pending_list, struct mcast_req, group_list); - list_del(&req->group_list); - list_del(&req->func_list); - --group->func[req->func].num_pend_reqs; - mutex_unlock(&group->lock); - kfree(req); - if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) { - if (release_group(group, 1)) - return; - } else { - kfree(group); - return; - } - mutex_lock(&group->lock); - } else - mcg_warn_group(group, "DRIVER BUG\n"); - } else if (group->state == MCAST_LEAVE_SENT) { - if (group->rec.scope_join_state & 7) - group->rec.scope_join_state &= 0xf8; - group->state = MCAST_IDLE; - mutex_unlock(&group->lock); - if (release_group(group, 1)) - return; - mutex_lock(&group->lock); - } else - mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state)); - group->state = MCAST_IDLE; - atomic_inc(&group->refcount); - if (!queue_work(group->demux->mcg_wq, &group->work)) - safe_atomic_dec(&group->refcount); - - mutex_unlock(&group->lock); -} - -static int handle_leave_req(struct mcast_group *group, u8 leave_mask, - struct mcast_req *req) -{ - u16 status; - - if (req->clean) - leave_mask = group->func[req->func].join_state; - - status = check_leave(group, req->func, leave_mask); - if (!status) - leave_group(group, req->func, leave_mask); - - if (!req->clean) - send_reply_to_slave(req->func, group, &req->sa_mad, status); - --group->func[req->func].num_pend_reqs; - list_del(&req->group_list); - list_del(&req->func_list); - kfree(req); - return 1; -} - -static int handle_join_req(struct mcast_group *group, u8 join_mask, - struct mcast_req *req) -{ - u8 group_join_state = group->rec.scope_join_state & 7; - int ref = 0; - u16 status; - struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; - - if (join_mask == (group_join_state & join_mask)) { - /* port's membership need not change */ - status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask); - if (!status) - join_group(group, req->func, join_mask); - - --group->func[req->func].num_pend_reqs; - send_reply_to_slave(req->func, group, &req->sa_mad, status); - list_del(&req->group_list); - list_del(&req->func_list); - kfree(req); - ++ref; - } else { - /* port's membership needs to be updated */ - group->prev_state = group->state; - if (send_join_to_wire(group, &req->sa_mad)) { - --group->func[req->func].num_pend_reqs; - list_del(&req->group_list); - list_del(&req->func_list); - kfree(req); - ref = 1; - group->state = group->prev_state; - } else - group->state = MCAST_JOIN_SENT; - } - - return ref; -} - -static void mlx4_ib_mcg_work_handler(struct work_struct *work) -{ - struct mcast_group *group; - struct mcast_req *req = NULL; - struct ib_sa_mcmember_data *sa_data; - u8 req_join_state; - int rc = 1; /* release_count - this is for the scheduled work */ - u16 status; - u8 method; - - group = container_of(work, typeof(*group), work); - - mutex_lock(&group->lock); - - /* First, let's see if a response from SM is waiting regarding this group. - * If so, we need to update the group's REC. If this is a bad response, we - * may need to send a bad response to a VF waiting for it. If VF is waiting - * and this is a good response, the VF will be answered later in this func. */ - if (group->state == MCAST_RESP_READY) { - /* cancels mlx4_ib_mcg_timeout_handler */ - cancel_delayed_work(&group->timeout_work); - status = be16_to_cpu(group->response_sa_mad.mad_hdr.status); - method = group->response_sa_mad.mad_hdr.method; - if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) { - mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n", - be64_to_cpu(group->response_sa_mad.mad_hdr.tid), - be64_to_cpu(group->last_req_tid)); - group->state = group->prev_state; - goto process_requests; - } - if (status) { - if (!list_empty(&group->pending_list)) - req = list_first_entry(&group->pending_list, - struct mcast_req, group_list); - if ((method == IB_MGMT_METHOD_GET_RESP)) { - if (req) { - send_reply_to_slave(req->func, group, &req->sa_mad, status); - --group->func[req->func].num_pend_reqs; - list_del(&req->group_list); - list_del(&req->func_list); - kfree(req); - ++rc; - } else - mcg_warn_group(group, "no request for failed join\n"); - } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing) - ++rc; - } else { - u8 resp_join_state; - u8 cur_join_state; - - resp_join_state = ((struct ib_sa_mcmember_data *) - group->response_sa_mad.data)->scope_join_state & 7; - cur_join_state = group->rec.scope_join_state & 7; - - if (method == IB_MGMT_METHOD_GET_RESP) { - /* successfull join */ - if (!cur_join_state && resp_join_state) - --rc; - } else if (!resp_join_state) - ++rc; - memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec); - } - group->state = MCAST_IDLE; - } - -process_requests: - /* We should now go over pending join/leave requests, as long as we are idle. */ - while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) { - req = list_first_entry(&group->pending_list, struct mcast_req, - group_list); - sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; - req_join_state = sa_data->scope_join_state & 0x7; - - /* For a leave request, we will immediately answer the VF, and - * update our internal counters. The actual leave will be sent - * to SM later, if at all needed. We dequeue the request now. */ - if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE) - rc += handle_leave_req(group, req_join_state, req); - else - rc += handle_join_req(group, req_join_state, req); - } - - /* Handle leaves */ - if (group->state == MCAST_IDLE) { - req_join_state = get_leave_state(group); - if (req_join_state) { - group->rec.scope_join_state &= ~req_join_state; - group->prev_state = group->state; - if (send_leave_to_wire(group, req_join_state)) { - group->state = group->prev_state; - ++rc; - } else - group->state = MCAST_LEAVE_SENT; - } - } - - if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) - goto process_requests; - mutex_unlock(&group->lock); - - while (rc--) - release_group(group, 0); -} - -static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx, - __be64 tid, - union ib_gid *new_mgid) -{ - struct mcast_group *group = NULL, *cur_group; - struct mcast_req *req; - struct list_head *pos; - struct list_head *n; - - mutex_lock(&ctx->mcg_table_lock); - list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) { - group = list_entry(pos, struct mcast_group, mgid0_list); - mutex_lock(&group->lock); - if (group->last_req_tid == tid) { - if (memcmp(new_mgid, &mgid0, sizeof mgid0)) { - group->rec.mgid = *new_mgid; - sprintf(group->name, "%016llx%016llx", - be64_to_cpu(group->rec.mgid.global.subnet_prefix), - be64_to_cpu(group->rec.mgid.global.interface_id)); - list_del_init(&group->mgid0_list); - cur_group = mcast_insert(ctx, group); - if (cur_group) { - /* A race between our code and SM. Silently cleaning the new one */ - req = list_first_entry(&group->pending_list, - struct mcast_req, group_list); - --group->func[req->func].num_pend_reqs; - list_del(&req->group_list); - list_del(&req->func_list); - kfree(req); - mutex_unlock(&group->lock); - mutex_unlock(&ctx->mcg_table_lock); - release_group(group, 0); - return NULL; - } - - atomic_inc(&group->refcount); - add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr); - mutex_unlock(&group->lock); - mutex_unlock(&ctx->mcg_table_lock); - return group; - } else { - struct mcast_req *tmp1, *tmp2; - - list_del(&group->mgid0_list); - if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE) - cancel_delayed_work_sync(&group->timeout_work); - - list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) { - list_del(&tmp1->group_list); - kfree(tmp1); - } - mutex_unlock(&group->lock); - mutex_unlock(&ctx->mcg_table_lock); - kfree(group); - return NULL; - } - } - mutex_unlock(&group->lock); - } - mutex_unlock(&ctx->mcg_table_lock); - - return NULL; -} - -static ssize_t sysfs_show_group(struct device *dev, - struct device_attribute *attr, char *buf); - -static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx, - union ib_gid *mgid, int create, - gfp_t gfp_mask) -{ - struct mcast_group *group, *cur_group; - int is_mgid0; - int i; - - is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); - if (!is_mgid0) { - group = mcast_find(ctx, mgid); - if (group) - goto found; - } - - if (!create) - return ERR_PTR(-ENOENT); - - group = kzalloc(sizeof *group, gfp_mask); - if (!group) - return ERR_PTR(-ENOMEM); - - group->demux = ctx; - group->rec.mgid = *mgid; - INIT_LIST_HEAD(&group->pending_list); - INIT_LIST_HEAD(&group->mgid0_list); - for (i = 0; i < MAX_VFS; ++i) - INIT_LIST_HEAD(&group->func[i].pending); - INIT_WORK(&group->work, mlx4_ib_mcg_work_handler); - INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler); - mutex_init(&group->lock); - sprintf(group->name, "%016llx%016llx", - be64_to_cpu(group->rec.mgid.global.subnet_prefix), - be64_to_cpu(group->rec.mgid.global.interface_id)); - sysfs_attr_init(&group->dentry.attr); - group->dentry.show = sysfs_show_group; - group->dentry.store = NULL; - group->dentry.attr.name = group->name; - group->dentry.attr.mode = 0400; - group->state = MCAST_IDLE; - - if (is_mgid0) { - list_add(&group->mgid0_list, &ctx->mcg_mgid0_list); - goto found; - } - - cur_group = mcast_insert(ctx, group); - if (cur_group) { - mcg_warn("group just showed up %s - confused\n", cur_group->name); - kfree(group); - return ERR_PTR(-EINVAL); - } - - add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr); - -found: - atomic_inc(&group->refcount); - return group; -} - -static void queue_req(struct mcast_req *req) -{ - struct mcast_group *group = req->group; - - atomic_inc(&group->refcount); /* for the request */ - atomic_inc(&group->refcount); /* for scheduling the work */ - list_add_tail(&req->group_list, &group->pending_list); - list_add_tail(&req->func_list, &group->func[req->func].pending); - /* calls mlx4_ib_mcg_work_handler */ - if (!queue_work(group->demux->mcg_wq, &group->work)) - safe_atomic_dec(&group->refcount); -} - -int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave, - struct ib_sa_mad *mad) -{ - struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data; - struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1]; - struct mcast_group *group; - - switch (mad->mad_hdr.method) { - case IB_MGMT_METHOD_GET_RESP: - case IB_SA_METHOD_DELETE_RESP: - mutex_lock(&ctx->mcg_table_lock); - group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL); - mutex_unlock(&ctx->mcg_table_lock); - if (IS_ERR(group)) { - if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) { - __be64 tid = mad->mad_hdr.tid; - *(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */ - group = search_relocate_mgid0_group(ctx, tid, &rec->mgid); - } else - group = NULL; - } - - if (!group) - return 1; - - mutex_lock(&group->lock); - group->response_sa_mad = *mad; - group->prev_state = group->state; - group->state = MCAST_RESP_READY; - /* calls mlx4_ib_mcg_work_handler */ - atomic_inc(&group->refcount); - if (!queue_work(ctx->mcg_wq, &group->work)) - safe_atomic_dec(&group->refcount); - mutex_unlock(&group->lock); - release_group(group, 0); - return 1; /* consumed */ - case IB_MGMT_METHOD_SET: - case IB_SA_METHOD_GET_TABLE: - case IB_SA_METHOD_GET_TABLE_RESP: - case IB_SA_METHOD_DELETE: - return 0; /* not consumed, pass-through to guest over tunnel */ - default: - mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n", - port, mad->mad_hdr.method); - return 1; /* consumed */ - } -} - -int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, - int slave, struct ib_sa_mad *sa_mad) -{ - struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data; - struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1]; - struct mcast_group *group; - struct mcast_req *req; - int may_create = 0; - - if (ctx->flushing) - return -EAGAIN; - - switch (sa_mad->mad_hdr.method) { - case IB_MGMT_METHOD_SET: - may_create = 1; - case IB_SA_METHOD_DELETE: - req = kzalloc(sizeof *req, GFP_KERNEL); - if (!req) - return -ENOMEM; - - req->func = slave; - req->sa_mad = *sa_mad; - - mutex_lock(&ctx->mcg_table_lock); - group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL); - mutex_unlock(&ctx->mcg_table_lock); - if (IS_ERR(group)) { - kfree(req); - return PTR_ERR(group); - } - mutex_lock(&group->lock); - if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) { - mutex_unlock(&group->lock); - mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n", - port, slave, MAX_PEND_REQS_PER_FUNC); - release_group(group, 0); - kfree(req); - return -ENOMEM; - } - ++group->func[slave].num_pend_reqs; - req->group = group; - queue_req(req); - mutex_unlock(&group->lock); - release_group(group, 0); - return 1; /* consumed */ - case IB_SA_METHOD_GET_TABLE: - case IB_MGMT_METHOD_GET_RESP: - case IB_SA_METHOD_GET_TABLE_RESP: - case IB_SA_METHOD_DELETE_RESP: - return 0; /* not consumed, pass-through */ - default: - mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n", - port, slave, sa_mad->mad_hdr.method); - return 1; /* consumed */ - } -} - -static ssize_t sysfs_show_group(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct mcast_group *group = - container_of(attr, struct mcast_group, dentry); - struct mcast_req *req = NULL; - char pending_str[40]; - char state_str[40]; - ssize_t len = 0; - int f; - - if (group->state == MCAST_IDLE) - sprintf(state_str, "%s", get_state_string(group->state)); - else - sprintf(state_str, "%s(TID=0x%llx)", - get_state_string(group->state), - be64_to_cpu(group->last_req_tid)); - if (list_empty(&group->pending_list)) { - sprintf(pending_str, "No"); - } else { - req = list_first_entry(&group->pending_list, struct mcast_req, group_list); - sprintf(pending_str, "Yes(TID=0x%llx)", - be64_to_cpu(req->sa_mad.mad_hdr.tid)); - } - len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ", - group->rec.scope_join_state & 0xf, - group->members[2], group->members[1], group->members[0], - atomic_read(&group->refcount), - pending_str, - state_str); - for (f = 0; f < MAX_VFS; ++f) - if (group->func[f].state == MCAST_MEMBER) - len += sprintf(buf + len, "%d[%1x] ", - f, group->func[f].join_state); - - len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x " - "%4x %4x %2x %2x)\n", - be16_to_cpu(group->rec.pkey), - be32_to_cpu(group->rec.qkey), - (group->rec.mtusel_mtu & 0xc0) >> 6, - group->rec.mtusel_mtu & 0x3f, - group->rec.tclass, - (group->rec.ratesel_rate & 0xc0) >> 6, - group->rec.ratesel_rate & 0x3f, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8, - be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff, - group->rec.proxy_join); - - return len; -} - -int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx) -{ - char name[20]; - - atomic_set(&ctx->tid, 0); - sprintf(name, "mlx4_ib_mcg%d", ctx->port); - ctx->mcg_wq = create_singlethread_workqueue(name); - if (!ctx->mcg_wq) - return -ENOMEM; - - mutex_init(&ctx->mcg_table_lock); - ctx->mcg_table = RB_ROOT; - INIT_LIST_HEAD(&ctx->mcg_mgid0_list); - ctx->flushing = 0; - - return 0; -} - -static void force_clean_group(struct mcast_group *group) -{ - struct mcast_req *req, *tmp - ; - list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) { - list_del(&req->group_list); - kfree(req); - } - del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr); - rb_erase(&group->node, &group->demux->mcg_table); - kfree(group); -} - -static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq) -{ - int i; - struct rb_node *p; - struct mcast_group *group; - unsigned long end; - int count; - - if (ctx->flushing) - return; - - ctx->flushing = 1; - for (i = 0; i < MAX_VFS; ++i) - clean_vf_mcast(ctx, i); - - end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000); - do { - count = 0; - mutex_lock(&ctx->mcg_table_lock); - for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) - ++count; - mutex_unlock(&ctx->mcg_table_lock); - if (!count) - break; - - msleep(1); - } while (time_after(end, jiffies)); - - flush_workqueue(ctx->mcg_wq); - if (destroy_wq) - destroy_workqueue(ctx->mcg_wq); - - mutex_lock(&ctx->mcg_table_lock); - while ((p = rb_first(&ctx->mcg_table)) != NULL) { - group = rb_entry(p, struct mcast_group, node); - if (atomic_read(&group->refcount)) - mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group); - - force_clean_group(group); - } - mutex_unlock(&ctx->mcg_table_lock); - - if (!destroy_wq) - ctx->flushing = 0; -} - -struct clean_work { - struct work_struct work; - struct mlx4_ib_demux_ctx *ctx; - int destroy_wq; -}; - -static void mcg_clean_task(struct work_struct *work) -{ - struct clean_work *cw = container_of(work, struct clean_work, work); - - _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq); - kfree(cw); -} - -void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq) -{ - struct clean_work *work; - - if (destroy_wq) { - _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq); - return; - } - - work = kmalloc(sizeof *work, GFP_KERNEL); - if (!work) { - mcg_warn("failed allocating work for cleanup\n"); - return; - } - - work->ctx = ctx; - work->destroy_wq = destroy_wq; - INIT_WORK(&work->work, mcg_clean_task); - queue_work(clean_wq, &work->work); -} - -static void build_leave_mad(struct mcast_req *req) -{ - struct ib_sa_mad *mad = &req->sa_mad; - - mad->mad_hdr.method = IB_SA_METHOD_DELETE; -} - - -static void clear_pending_reqs(struct mcast_group *group, int vf) -{ - struct mcast_req *req, *tmp, *group_first = NULL; - int clear; - int pend = 0; - - if (!list_empty(&group->pending_list)) - group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list); - - list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) { - clear = 1; - if (group_first == req && - (group->state == MCAST_JOIN_SENT || - group->state == MCAST_LEAVE_SENT)) { - clear = cancel_delayed_work(&group->timeout_work); - pend = !clear; - group->state = MCAST_IDLE; - } - if (clear) { - --group->func[vf].num_pend_reqs; - list_del(&req->group_list); - list_del(&req->func_list); - kfree(req); - atomic_dec(&group->refcount); - } - } - - if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) { - mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n", - list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs); - } -} - -static int push_deleteing_req(struct mcast_group *group, int slave) -{ - struct mcast_req *req; - struct mcast_req *pend_req; - - if (!group->func[slave].join_state) - return 0; - - req = kzalloc(sizeof *req, GFP_KERNEL); - if (!req) { - mcg_warn_group(group, "failed allocation - may leave stall groups\n"); - return -ENOMEM; - } - - if (!list_empty(&group->func[slave].pending)) { - pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list); - if (pend_req->clean) { - kfree(req); - return 0; - } - } - - req->clean = 1; - req->func = slave; - req->group = group; - ++group->func[slave].num_pend_reqs; - build_leave_mad(req); - queue_req(req); - return 0; -} - -void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave) -{ - struct mcast_group *group; - struct rb_node *p; - - mutex_lock(&ctx->mcg_table_lock); - for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) { - group = rb_entry(p, struct mcast_group, node); - mutex_lock(&group->lock); - if (atomic_read(&group->refcount)) { - /* clear pending requests of this VF */ - clear_pending_reqs(group, slave); - push_deleteing_req(group, slave); - } - mutex_unlock(&group->lock); - } - mutex_unlock(&ctx->mcg_table_lock); -} - - -int mlx4_ib_mcg_init(void) -{ - clean_wq = create_singlethread_workqueue("mlx4_ib_mcg"); - if (!clean_wq) - return -ENOMEM; - - return 0; -} - -void mlx4_ib_mcg_destroy(void) -{ - destroy_workqueue(clean_wq); -} diff --git a/trunk/drivers/infiniband/hw/mlx4/mlx4_ib.h b/trunk/drivers/infiniband/hw/mlx4/mlx4_ib.h index e04cbc9a54a5..c136bb618e29 100644 --- a/trunk/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/trunk/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -37,12 +37,9 @@ #include #include #include -#include #include #include -#include -#include #include #include @@ -65,9 +62,6 @@ enum { #define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1) #define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) -/*module param to indicate if SM assigns the alias_GUID*/ -extern int mlx4_ib_sm_guid_assign; - struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; @@ -139,10 +133,8 @@ struct mlx4_ib_wq { }; enum mlx4_ib_qp_flags { - MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, - MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, - MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, - MLX4_IB_SRIOV_SQP = 1 << 31, + MLX4_IB_QP_LSO = 1 << 0, + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, }; struct mlx4_ib_gid_entry { @@ -152,80 +144,6 @@ struct mlx4_ib_gid_entry { u8 port; }; -enum mlx4_ib_qp_type { - /* - * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries - * here (and in that order) since the MAD layer uses them as - * indices into a 2-entry table. - */ - MLX4_IB_QPT_SMI = IB_QPT_SMI, - MLX4_IB_QPT_GSI = IB_QPT_GSI, - - MLX4_IB_QPT_RC = IB_QPT_RC, - MLX4_IB_QPT_UC = IB_QPT_UC, - MLX4_IB_QPT_UD = IB_QPT_UD, - MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6, - MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE, - MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET, - MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI, - MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT, - - MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16, - MLX4_IB_QPT_PROXY_SMI = 1 << 17, - MLX4_IB_QPT_PROXY_GSI = 1 << 18, - MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19, - MLX4_IB_QPT_TUN_SMI = 1 << 20, - MLX4_IB_QPT_TUN_GSI = 1 << 21, -}; - -#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \ - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \ - MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI) - -enum mlx4_ib_mad_ifc_flags { - MLX4_MAD_IFC_IGNORE_MKEY = 1, - MLX4_MAD_IFC_IGNORE_BKEY = 2, - MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY | - MLX4_MAD_IFC_IGNORE_BKEY), - MLX4_MAD_IFC_NET_VIEW = 4, -}; - -enum { - MLX4_NUM_TUNNEL_BUFS = 256, -}; - -struct mlx4_ib_tunnel_header { - struct mlx4_av av; - __be32 remote_qpn; - __be32 qkey; - __be16 vlan; - u8 mac[6]; - __be16 pkey_index; - u8 reserved[6]; -}; - -struct mlx4_ib_buf { - void *addr; - dma_addr_t map; -}; - -struct mlx4_rcv_tunnel_hdr { - __be32 flags_src_qp; /* flags[6:5] is defined for VLANs: - * 0x0 - no vlan was in the packet - * 0x01 - C-VLAN was in the packet */ - u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */ - u8 reserved; - __be16 pkey_index; - __be16 sl_vid; - __be16 slid_mac_47_32; - __be32 mac_31_0; -}; - -struct mlx4_ib_proxy_sqp_hdr { - struct ib_grh grh; - struct mlx4_rcv_tunnel_hdr tun; -} __packed; - struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; @@ -241,7 +159,6 @@ struct mlx4_ib_qp { int sq_spare_wqes; struct mlx4_ib_wq sq; - enum mlx4_ib_qp_type mlx4_ib_qp_type; struct ib_umem *umem; struct mlx4_mtt mtt; int buf_size; @@ -257,8 +174,6 @@ struct mlx4_ib_qp { int mlx_type; struct list_head gid_list; struct list_head steering_rules; - struct mlx4_ib_buf *sqp_proxy_rcv; - }; struct mlx4_ib_srq { @@ -281,138 +196,6 @@ struct mlx4_ib_ah { union mlx4_ext_av av; }; -/****************************************/ -/* alias guid support */ -/****************************************/ -#define NUM_PORT_ALIAS_GUID 2 -#define NUM_ALIAS_GUID_IN_REC 8 -#define NUM_ALIAS_GUID_REC_IN_PORT 16 -#define GUID_REC_SIZE 8 -#define NUM_ALIAS_GUID_PER_PORT 128 -#define MLX4_NOT_SET_GUID (0x00LL) -#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL)) - -enum mlx4_guid_alias_rec_status { - MLX4_GUID_INFO_STATUS_IDLE, - MLX4_GUID_INFO_STATUS_SET, - MLX4_GUID_INFO_STATUS_PENDING, -}; - -enum mlx4_guid_alias_rec_ownership { - MLX4_GUID_DRIVER_ASSIGN, - MLX4_GUID_SYSADMIN_ASSIGN, - MLX4_GUID_NONE_ASSIGN, /*init state of each record*/ -}; - -enum mlx4_guid_alias_rec_method { - MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET, - MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE, -}; - -struct mlx4_sriov_alias_guid_info_rec_det { - u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC]; - ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/ - enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/ - u8 method; /*set or delete*/ - enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/ -}; - -struct mlx4_sriov_alias_guid_port_rec_det { - struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT]; - struct workqueue_struct *wq; - struct delayed_work alias_guid_work; - u8 port; - struct mlx4_sriov_alias_guid *parent; - struct list_head cb_list; -}; - -struct mlx4_sriov_alias_guid { - struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS]; - spinlock_t ag_work_lock; - struct ib_sa_client *sa_client; -}; - -struct mlx4_ib_demux_work { - struct work_struct work; - struct mlx4_ib_dev *dev; - int slave; - int do_init; - u8 port; - -}; - -struct mlx4_ib_tun_tx_buf { - struct mlx4_ib_buf buf; - struct ib_ah *ah; -}; - -struct mlx4_ib_demux_pv_qp { - struct ib_qp *qp; - enum ib_qp_type proxy_qpt; - struct mlx4_ib_buf *ring; - struct mlx4_ib_tun_tx_buf *tx_ring; - spinlock_t tx_lock; - unsigned tx_ix_head; - unsigned tx_ix_tail; -}; - -enum mlx4_ib_demux_pv_state { - DEMUX_PV_STATE_DOWN, - DEMUX_PV_STATE_STARTING, - DEMUX_PV_STATE_ACTIVE, - DEMUX_PV_STATE_DOWNING, -}; - -struct mlx4_ib_demux_pv_ctx { - int port; - int slave; - enum mlx4_ib_demux_pv_state state; - int has_smi; - struct ib_device *ib_dev; - struct ib_cq *cq; - struct ib_pd *pd; - struct ib_mr *mr; - struct work_struct work; - struct workqueue_struct *wq; - struct mlx4_ib_demux_pv_qp qp[2]; -}; - -struct mlx4_ib_demux_ctx { - struct ib_device *ib_dev; - int port; - struct workqueue_struct *wq; - struct workqueue_struct *ud_wq; - spinlock_t ud_lock; - __be64 subnet_prefix; - __be64 guid_cache[128]; - struct mlx4_ib_dev *dev; - /* the following lock protects both mcg_table and mcg_mgid0_list */ - struct mutex mcg_table_lock; - struct rb_root mcg_table; - struct list_head mcg_mgid0_list; - struct workqueue_struct *mcg_wq; - struct mlx4_ib_demux_pv_ctx **tun; - atomic_t tid; - int flushing; /* flushing the work queue */ -}; - -struct mlx4_ib_sriov { - struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS]; - struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS]; - /* when using this spinlock you should use "irq" because - * it may be called from interrupt context.*/ - spinlock_t going_down_lock; - int is_going_down; - - struct mlx4_sriov_alias_guid alias_guid; - - /* CM paravirtualization fields */ - struct list_head cm_list; - spinlock_t id_map_lock; - struct rb_root sl_id_map; - struct idr pv_id_table; -}; - struct mlx4_ib_iboe { spinlock_t lock; struct net_device *netdevs[MLX4_MAX_PORTS]; @@ -420,42 +203,6 @@ struct mlx4_ib_iboe { union ib_gid gid_table[MLX4_MAX_PORTS][128]; }; -struct pkey_mgt { - u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; - u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; - struct list_head pkey_port_list[MLX4_MFUNC_MAX]; - struct kobject *device_parent[MLX4_MFUNC_MAX]; -}; - -struct mlx4_ib_iov_sysfs_attr { - void *ctx; - struct kobject *kobj; - unsigned long data; - u32 entry_num; - char name[15]; - struct device_attribute dentry; - struct device *dev; -}; - -struct mlx4_ib_iov_sysfs_attr_ar { - struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1]; -}; - -struct mlx4_ib_iov_port { - char name[100]; - u8 num; - struct mlx4_ib_dev *dev; - struct list_head list; - struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar; - struct ib_port_attr attr; - struct kobject *cur_port; - struct kobject *admin_alias_parent; - struct kobject *gids_parent; - struct kobject *pkeys_parent; - struct kobject *mcgs_parent; - struct mlx4_ib_iov_sysfs_attr mcg_dentry; -}; - struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; @@ -469,7 +216,6 @@ struct mlx4_ib_dev { struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; struct ib_ah *sm_ah[MLX4_MAX_PORTS]; spinlock_t sm_lock; - struct mlx4_ib_sriov sriov; struct mutex cap_mask_mutex; bool ib_active; @@ -477,11 +223,6 @@ struct mlx4_ib_dev { int counters[MLX4_MAX_PORTS]; int *eq_table; int eq_added; - struct kobject *iov_parent; - struct kobject *ports_parent; - struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; - struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS]; - struct pkey_mgt pkeys; }; struct ib_event_work { @@ -490,13 +231,6 @@ struct ib_event_work { struct mlx4_eqe ib_eqe; }; -struct mlx4_ib_qp_tunnel_init_attr { - struct ib_qp_init_attr init_attr; - int slave; - enum ib_qp_type proxy_qp_type; - u8 port; -}; - static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -566,9 +300,6 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) return container_of(ibah, struct mlx4_ib_ah, ibah); } -int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); -void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); - int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db); void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); @@ -625,7 +356,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); -int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, +int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad); int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -640,13 +371,6 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, u64 iova); int mlx4_ib_unmap_fmr(struct list_head *fmr_list); int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); -int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props, int netw_view); -int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey, int netw_view); - -int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid, int netw_view); int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, u8 *mac, int *is_mcast, u8 port); @@ -661,69 +385,10 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) return !!(ah->av.ib.g_slid & 0x80); } -int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx); -void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq); -void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave); -int mlx4_ib_mcg_init(void); -void mlx4_ib_mcg_destroy(void); - -int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid); - -int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave, - struct ib_sa_mad *sa_mad); -int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave, - struct ib_sa_mad *mad); - int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid); void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, enum ib_event_type type); -void mlx4_ib_tunnels_update_work(struct work_struct *work); - -int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, - enum ib_qp_type qpt, struct ib_wc *wc, - struct ib_grh *grh, struct ib_mad *mad); -int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, - enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad); -__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); - -int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, - struct ib_mad *mad); - -int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, - struct ib_mad *mad); - -void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev); -void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id); - -/* alias guid support */ -void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port); -int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev); -void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev); -void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port); - -void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, - int block_num, - u8 port_num, u8 *p_data); - -void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, - int block_num, u8 port_num, - u8 *p_data); - -int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, - struct attribute *attr); -void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, - struct attribute *attr); -ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index); - -int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ; - -void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device); - -__be64 mlx4_ib_gen_node_guid(void); - - #endif /* MLX4_IB_H */ diff --git a/trunk/drivers/infiniband/hw/mlx4/qp.c b/trunk/drivers/infiniband/hw/mlx4/qp.c index 96fe103f5973..f585eddef4b7 100644 --- a/trunk/drivers/infiniband/hw/mlx4/qp.c +++ b/trunk/drivers/infiniband/hw/mlx4/qp.c @@ -38,7 +38,6 @@ #include #include #include -#include #include @@ -111,62 +110,16 @@ static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) return container_of(mqp, struct mlx4_ib_sqp, qp); } -static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) -{ - if (!mlx4_is_master(dev->dev)) - return 0; - - return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn && - qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn + - 8 * MLX4_MFUNC_MAX; -} - static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { - int proxy_sqp = 0; - int real_sqp = 0; - int i; - /* PPF or Native -- real SQP */ - real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && - qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && - qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3); - if (real_sqp) - return 1; - /* VF or PF -- proxy SQP */ - if (mlx4_is_mfunc(dev->dev)) { - for (i = 0; i < dev->dev->caps.num_ports; i++) { - if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] || - qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) { - proxy_sqp = 1; - break; - } - } - } - return proxy_sqp; + return qp->mqp.qpn >= dev->dev->caps.sqp_start && + qp->mqp.qpn <= dev->dev->caps.sqp_start + 3; } -/* used for INIT/CLOSE port logic */ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { - int proxy_qp0 = 0; - int real_qp0 = 0; - int i; - /* PPF or Native -- real QP0 */ - real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && - qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && - qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1); - if (real_qp0) - return 1; - /* VF or PF -- proxy QP0 */ - if (mlx4_is_mfunc(dev->dev)) { - for (i = 0; i < dev->dev->caps.num_ports; i++) { - if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) { - proxy_qp0 = 1; - break; - } - } - } - return proxy_qp0; + return qp->mqp.qpn >= dev->dev->caps.sqp_start && + qp->mqp.qpn <= dev->dev->caps.sqp_start + 1; } static void *get_wqe(struct mlx4_ib_qp *qp, int offset) @@ -317,7 +270,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) } } -static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) +static int send_wqe_overhead(enum ib_qp_type type, u32 flags) { /* * UD WQEs must have a datagram segment. @@ -326,29 +279,19 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) * header and space for the ICRC). */ switch (type) { - case MLX4_IB_QPT_UD: + case IB_QPT_UD: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg) + ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); - case MLX4_IB_QPT_PROXY_SMI_OWNER: - case MLX4_IB_QPT_PROXY_SMI: - case MLX4_IB_QPT_PROXY_GSI: - return sizeof (struct mlx4_wqe_ctrl_seg) + - sizeof (struct mlx4_wqe_datagram_seg) + 64; - case MLX4_IB_QPT_TUN_SMI_OWNER: - case MLX4_IB_QPT_TUN_GSI: - return sizeof (struct mlx4_wqe_ctrl_seg) + - sizeof (struct mlx4_wqe_datagram_seg); - - case MLX4_IB_QPT_UC: + case IB_QPT_UC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg); - case MLX4_IB_QPT_RC: + case IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); - case MLX4_IB_QPT_SMI: - case MLX4_IB_QPT_GSI: + case IB_QPT_SMI: + case IB_QPT_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + ALIGN(MLX4_IB_UD_HEADER_SIZE + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, @@ -402,7 +345,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, } static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp) + enum ib_qp_type type, struct mlx4_ib_qp *qp) { int s; @@ -417,8 +360,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, * For MLX transport we need 2 extra S/G entries: * one for the header and one for the checksum at the end */ - if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI || - type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) && + if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) return -EINVAL; @@ -462,9 +404,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, */ if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && qp->sq_signal_bits && BITS_PER_LONG == 64 && - type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && - !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | - MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) + type != IB_QPT_SMI && type != IB_QPT_GSI) qp->sq.wqe_shift = ilog2(64); else qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); @@ -536,54 +476,6 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev, return 0; } -static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) -{ - int i; - - qp->sqp_proxy_rcv = - kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt, - GFP_KERNEL); - if (!qp->sqp_proxy_rcv) - return -ENOMEM; - for (i = 0; i < qp->rq.wqe_cnt; i++) { - qp->sqp_proxy_rcv[i].addr = - kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr), - GFP_KERNEL); - if (!qp->sqp_proxy_rcv[i].addr) - goto err; - qp->sqp_proxy_rcv[i].map = - ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr, - sizeof (struct mlx4_ib_proxy_sqp_hdr), - DMA_FROM_DEVICE); - } - return 0; - -err: - while (i > 0) { - --i; - ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, - sizeof (struct mlx4_ib_proxy_sqp_hdr), - DMA_FROM_DEVICE); - kfree(qp->sqp_proxy_rcv[i].addr); - } - kfree(qp->sqp_proxy_rcv); - qp->sqp_proxy_rcv = NULL; - return -ENOMEM; -} - -static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) -{ - int i; - - for (i = 0; i < qp->rq.wqe_cnt; i++) { - ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, - sizeof (struct mlx4_ib_proxy_sqp_hdr), - DMA_FROM_DEVICE); - kfree(qp->sqp_proxy_rcv[i].addr); - } - kfree(qp->sqp_proxy_rcv); -} - static int qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) @@ -594,67 +486,10 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) + struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) { int qpn; int err; - struct mlx4_ib_sqp *sqp; - struct mlx4_ib_qp *qp; - enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; - - /* When tunneling special qps, we use a plain UD qp */ - if (sqpn) { - if (mlx4_is_mfunc(dev->dev) && - (!mlx4_is_master(dev->dev) || - !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { - if (init_attr->qp_type == IB_QPT_GSI) - qp_type = MLX4_IB_QPT_PROXY_GSI; - else if (mlx4_is_master(dev->dev)) - qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; - else - qp_type = MLX4_IB_QPT_PROXY_SMI; - } - qpn = sqpn; - /* add extra sg entry for tunneling */ - init_attr->cap.max_recv_sge++; - } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) { - struct mlx4_ib_qp_tunnel_init_attr *tnl_init = - container_of(init_attr, - struct mlx4_ib_qp_tunnel_init_attr, init_attr); - if ((tnl_init->proxy_qp_type != IB_QPT_SMI && - tnl_init->proxy_qp_type != IB_QPT_GSI) || - !mlx4_is_master(dev->dev)) - return -EINVAL; - if (tnl_init->proxy_qp_type == IB_QPT_GSI) - qp_type = MLX4_IB_QPT_TUN_GSI; - else if (tnl_init->slave == mlx4_master_func_num(dev->dev)) - qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; - else - qp_type = MLX4_IB_QPT_TUN_SMI; - /* we are definitely in the PPF here, since we are creating - * tunnel QPs. base_tunnel_sqpn is therefore valid. */ - qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave - + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; - sqpn = qpn; - } - - if (!*caller_qp) { - if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || - (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { - sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL); - if (!sqp) - return -ENOMEM; - qp = &sqp->qp; - } else { - qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL); - if (!qp) - return -ENOMEM; - } - } else - qp = *caller_qp; - - qp->mlx4_ib_qp_type = qp_type; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); @@ -715,7 +550,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; - err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); + err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); if (err) goto err; @@ -751,13 +586,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } if (sqpn) { - if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { - if (alloc_proxy_bufs(pd->device, qp)) { - err = -ENOMEM; - goto err_wrid; - } - } + qpn = sqpn; } else { /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE * BlueFlame setup flow wrongly causes VLAN insertion. */ @@ -766,7 +595,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, else err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); if (err) - goto err_proxy; + goto err_wrid; } err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); @@ -784,16 +613,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); qp->mqp.event = mlx4_ib_qp_event; - if (!*caller_qp) - *caller_qp = qp; + return 0; err_qpn: if (!sqpn) mlx4_qp_release_range(dev->dev, qpn, 1); -err_proxy: - if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) - free_proxy_bufs(pd->device, qp); + err_wrid: if (pd->uobject) { if (qp_has_rq(init_attr)) @@ -817,8 +643,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, mlx4_db_free(dev->dev, &qp->db); err: - if (!*caller_qp) - kfree(qp); return err; } @@ -931,7 +755,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_qp_free(dev->dev, &qp->mqp); - if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) + if (!is_sqp(dev, qp)) mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); mlx4_mtt_cleanup(dev->dev, &qp->mtt); @@ -944,9 +768,6 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, } else { kfree(qp->sq.wrid); kfree(qp->rq.wrid); - if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - free_proxy_bufs(&dev->ib_dev, qp); mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); if (qp->rq.wqe_cnt) mlx4_db_free(dev->dev, &qp->db); @@ -955,46 +776,25 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, del_gid_entries(qp); } -static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) -{ - /* Native or PPF */ - if (!mlx4_is_mfunc(dev->dev) || - (mlx4_is_master(dev->dev) && - attr->create_flags & MLX4_IB_SRIOV_SQP)) { - return dev->dev->phys_caps.base_sqpn + - (attr->qp_type == IB_QPT_SMI ? 0 : 2) + - attr->port_num - 1; - } - /* PF or VF -- creating proxies */ - if (attr->qp_type == IB_QPT_SMI) - return dev->dev->caps.qp0_proxy[attr->port_num - 1]; - else - return dev->dev->caps.qp1_proxy[attr->port_num - 1]; -} - struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct mlx4_ib_qp *qp = NULL; + struct mlx4_ib_sqp *sqp; + struct mlx4_ib_qp *qp; int err; u16 xrcdn = 0; /* - * We only support LSO, vendor flag1, and multicast loopback blocking, - * and only for kernel UD QPs. + * We only support LSO and multicast loopback blocking, and + * only for kernel UD QPs. */ - if (init_attr->create_flags & ~(MLX4_IB_QP_LSO | - MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | - MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP)) + if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) return ERR_PTR(-EINVAL); if (init_attr->create_flags && - (udata || - ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) && - init_attr->qp_type != IB_QPT_UD) || - ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && - init_attr->qp_type > IB_QPT_GSI))) + (udata || init_attr->qp_type != IB_QPT_UD)) return ERR_PTR(-EINVAL); switch (init_attr->qp_type) { @@ -1010,17 +810,18 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, /* fall through */ case IB_QPT_RC: case IB_QPT_UC: + case IB_QPT_UD: case IB_QPT_RAW_PACKET: + { qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - /* fall through */ - case IB_QPT_UD: - { - err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp); - if (err) + + err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp); + if (err) { + kfree(qp); return ERR_PTR(err); + } qp->ibqp.qp_num = qp->mqp.qpn; qp->xrcdn = xrcdn; @@ -1034,11 +835,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (udata) return ERR_PTR(-EINVAL); + sqp = kzalloc(sizeof *sqp, GFP_KERNEL); + if (!sqp) + return ERR_PTR(-ENOMEM); + + qp = &sqp->qp; + err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, - get_sqp_num(to_mdev(pd->device), init_attr), - &qp); - if (err) + to_mdev(pd->device)->dev->caps.sqp_start + + (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + + init_attr->port_num - 1, + qp); + if (err) { + kfree(sqp); return ERR_PTR(err); + } qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; @@ -1073,27 +884,18 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) return 0; } -static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) +static int to_mlx4_st(enum ib_qp_type type) { switch (type) { - case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC; - case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC; - case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD; - case MLX4_IB_QPT_XRC_INI: - case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; - case MLX4_IB_QPT_SMI: - case MLX4_IB_QPT_GSI: - case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; - - case MLX4_IB_QPT_PROXY_SMI_OWNER: - case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ? - MLX4_QP_ST_MLX : -1); - case MLX4_IB_QPT_PROXY_SMI: - case MLX4_IB_QPT_TUN_SMI: - case MLX4_IB_QPT_PROXY_GSI: - case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ? - MLX4_QP_ST_UD : -1); - default: return -1; + case IB_QPT_RC: return MLX4_QP_ST_RC; + case IB_QPT_UC: return MLX4_QP_ST_UC; + case IB_QPT_UD: return MLX4_QP_ST_UD; + case IB_QPT_XRC_INI: + case IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; + default: return -1; } } @@ -1241,7 +1043,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, return -ENOMEM; context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | - (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); + (to_mlx4_st(ibqp->qp_type) << 16)); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); @@ -1319,16 +1121,13 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_PKEY_INDEX) { - if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) - context->pri_path.disable_pkey_check = 0x40; context->pri_path.pkey_index = attr->pkey_index; optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; } if (attr_mask & IB_QP_AV) { if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, - attr_mask & IB_QP_PORT ? - attr->port_num : qp->port)) + attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) goto out; optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | @@ -1411,24 +1210,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_RQ_PSN) context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); - /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */ if (attr_mask & IB_QP_QKEY) { - if (qp->mlx4_ib_qp_type & - (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) - context->qkey = cpu_to_be32(IB_QP_SET_QKEY); - else { - if (mlx4_is_mfunc(dev->dev) && - !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) && - (attr->qkey & MLX4_RESERVED_QKEY_MASK) == - MLX4_RESERVED_QKEY_BASE) { - pr_err("Cannot use reserved QKEY" - " 0x%x (range 0xffff0000..0xffffffff" - " is reserved)\n", attr->qkey); - err = -EINVAL; - goto out; - } - context->qkey = cpu_to_be32(attr->qkey); - } + context->qkey = cpu_to_be32(attr->qkey); optpar |= MLX4_QP_OPTPAR_Q_KEY; } @@ -1444,17 +1227,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_RAW_PACKET)) { context->pri_path.sched_queue = (qp->port - 1) << 6; - if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || - qp->mlx4_ib_qp_type & - (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) { + if (is_qp0(dev, qp)) context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; - if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI) - context->pri_path.fl = 0x80; - } else { - if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) - context->pri_path.fl = 0x80; + else context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; - } } if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && @@ -1570,7 +1346,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > dev->num_ports)) { + (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { pr_debug("qpn 0x%x: invalid port number (%d) specified " "for transition %d to %d. qp_type %d\n", ibqp->qp_num, attr->port_num, cur_state, @@ -1624,114 +1400,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return err; } -static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, - struct ib_send_wr *wr, - void *wqe, unsigned *mlx_seg_len) -{ - struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); - struct ib_device *ib_dev = &mdev->ib_dev; - struct mlx4_wqe_mlx_seg *mlx = wqe; - struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); - u16 pkey; - u32 qkey; - int send_size; - int header_size; - int spc; - int i; - - if (wr->opcode != IB_WR_SEND) - return -EINVAL; - - send_size = 0; - - for (i = 0; i < wr->num_sge; ++i) - send_size += wr->sg_list[i].length; - - /* for proxy-qp0 sends, need to add in size of tunnel header */ - /* for tunnel-qp0 sends, tunnel header is already in s/g list */ - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) - send_size += sizeof (struct mlx4_ib_tunnel_header); - - ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header); - - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { - sqp->ud_header.lrh.service_level = - be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; - sqp->ud_header.lrh.destination_lid = - cpu_to_be16(ah->av.ib.g_slid & 0x7f); - sqp->ud_header.lrh.source_lid = - cpu_to_be16(ah->av.ib.g_slid & 0x7f); - } - - mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - - /* force loopback */ - mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR); - mlx->rlid = sqp->ud_header.lrh.destination_lid; - - sqp->ud_header.lrh.virtual_lane = 0; - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); - ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); - sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); - else - sqp->ud_header.bth.destination_qpn = - cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); - - sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) - return -EINVAL; - sqp->ud_header.deth.qkey = cpu_to_be32(qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); - - sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; - sqp->ud_header.immediate_present = 0; - - header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); - - /* - * Inline data segments may not cross a 64 byte boundary. If - * our UD header is bigger than the space available up to the - * next 64 byte boundary in the WQE, use two inline data - * segments to hold the UD header. - */ - spc = MLX4_INLINE_ALIGN - - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); - if (header_size <= spc) { - inl->byte_count = cpu_to_be32(1 << 31 | header_size); - memcpy(inl + 1, sqp->header_buf, header_size); - i = 1; - } else { - inl->byte_count = cpu_to_be32(1 << 31 | spc); - memcpy(inl + 1, sqp->header_buf, spc); - - inl = (void *) (inl + 1) + spc; - memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); - /* - * Need a barrier here to make sure all the data is - * visible before the byte_count field is set. - * Otherwise the HCA prefetcher could grab the 64-byte - * chunk with this inline segment and get a valid (!= - * 0xffffffff) byte count but stale data, and end up - * generating a packet with bad headers. - * - * The first inline segment's byte_count field doesn't - * need a barrier, because it comes after a - * control/MLX segment and therefore is at an offset - * of 16 mod 64. - */ - wmb(); - inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); - i = 2; - } - - *mlx_seg_len = - ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); - return 0; -} - static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) { @@ -1750,7 +1418,6 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, int is_vlan = 0; int is_grh; u16 vlan; - int err = 0; send_size = 0; for (i = 0; i < wr->num_sge; ++i) @@ -1759,24 +1426,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); if (is_eth) { - if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { - /* When multi-function is enabled, the ib_core gid - * indexes don't necessarily match the hw ones, so - * we must use our own cache */ - sgid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sgid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; - } else { - err = ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, &sgid); - if (err) - return err; - } - + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid); vlan = rdma_get_vlan_id(&sgid); is_vlan = vlan < 0x1000; } @@ -1795,21 +1446,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; - if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { - /* When multi-function is enabled, the ib_core gid - * indexes don't necessarily match the hw ones, so - * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; - } else - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid); + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); } @@ -1821,8 +1459,6 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); - if (ah->av.ib.port_pd & cpu_to_be32(0x80000000)) - mlx->flags |= cpu_to_be32(0x1); /* force loopback */ mlx->rlid = sqp->ud_header.lrh.destination_lid; } @@ -2031,63 +1667,6 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); } -static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, - struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr, enum ib_qp_type qpt) -{ - union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; - struct mlx4_av sqp_av = {0}; - int port = *((u8 *) &av->ib.port_pd) & 0x3; - - /* force loopback */ - sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000); - sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */ - sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel & - cpu_to_be32(0xf0000000); - - memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); - /* This function used only for sending on QP1 proxies */ - dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); - /* Use QKEY from the QP context, which is set by master */ - dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); -} - -static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) -{ - struct mlx4_wqe_inline_seg *inl = wqe; - struct mlx4_ib_tunnel_header hdr; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); - int spc; - int i; - - memcpy(&hdr.av, &ah->av, sizeof hdr.av); - hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); - hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); - hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); - - spc = MLX4_INLINE_ALIGN - - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); - if (sizeof (hdr) <= spc) { - memcpy(inl + 1, &hdr, sizeof (hdr)); - wmb(); - inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr)); - i = 1; - } else { - memcpy(inl + 1, &hdr, spc); - wmb(); - inl->byte_count = cpu_to_be32(1 << 31 | spc); - - inl = (void *) (inl + 1) + spc; - memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc); - wmb(); - inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc)); - i = 2; - } - - *mlx_seg_len = - ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16); -} - static void set_mlx_icrc_seg(void *dseg) { u32 *t = dseg; @@ -2169,13 +1748,6 @@ static __be32 send_ieth(struct ib_send_wr *wr) } } -static void add_zero_len_inline(void *wqe) -{ - struct mlx4_wqe_inline_seg *inl = wqe; - memset(wqe, 0, 16); - inl->byte_count = cpu_to_be32(1 << 31); -} - int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -2234,9 +1806,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, wqe += sizeof *ctrl; size = sizeof *ctrl / 16; - switch (qp->mlx4_ib_qp_type) { - case MLX4_IB_QPT_RC: - case MLX4_IB_QPT_UC: + switch (ibqp->qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: @@ -2297,25 +1869,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } break; - case MLX4_IB_QPT_TUN_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); - if (unlikely(err)) { - *bad_wr = wr; - goto out; - } - wqe += seglen; - size += seglen / 16; - break; - case MLX4_IB_QPT_TUN_SMI: - case MLX4_IB_QPT_TUN_GSI: - /* this is a UD qp used in MAD responses to slaves. */ - set_datagram_seg(wqe, wr); - /* set the forced-loopback bit in the data seg av */ - *(__be32 *) wqe |= cpu_to_be32(0x80000000); - wqe += sizeof (struct mlx4_wqe_datagram_seg); - size += sizeof (struct mlx4_wqe_datagram_seg) / 16; - break; - case MLX4_IB_QPT_UD: + case IB_QPT_UD: set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; @@ -2332,47 +1886,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } break; - case MLX4_IB_QPT_PROXY_SMI_OWNER: - if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) { - err = -ENOSYS; - *bad_wr = wr; - goto out; - } - err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); - if (unlikely(err)) { - *bad_wr = wr; - goto out; - } - wqe += seglen; - size += seglen / 16; - /* to start tunnel header on a cache-line boundary */ - add_zero_len_inline(wqe); - wqe += 16; - size++; - build_tunnel_header(wr, wqe, &seglen); - wqe += seglen; - size += seglen / 16; - break; - case MLX4_IB_QPT_PROXY_SMI: - /* don't allow QP0 sends on guests */ - err = -ENOSYS; - *bad_wr = wr; - goto out; - case MLX4_IB_QPT_PROXY_GSI: - /* If we are tunneling special qps, this is a UD qp. - * In this case we first add a UD segment targeting - * the tunnel qp, and then add a header with address - * information */ - set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type); - wqe += sizeof (struct mlx4_wqe_datagram_seg); - size += sizeof (struct mlx4_wqe_datagram_seg) / 16; - build_tunnel_header(wr, wqe, &seglen); - wqe += seglen; - size += seglen / 16; - break; - - case MLX4_IB_QPT_SMI: - case MLX4_IB_QPT_GSI: + case IB_QPT_SMI: + case IB_QPT_GSI: err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; @@ -2398,10 +1913,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); /* Add one more inline data segment for ICRC for MLX sends */ - if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || - qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI || - qp->mlx4_ib_qp_type & - (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) { + if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI)) { set_mlx_icrc_seg(dseg + 1); size += sizeof (struct mlx4_wqe_data_seg) / 16; } @@ -2493,10 +2006,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int err = 0; int nreq; int ind; - int max_gs; int i; - max_gs = qp->rq.max_gs; spin_lock_irqsave(&qp->rq.lock, flags); ind = qp->rq.head & (qp->rq.wqe_cnt - 1); @@ -2516,25 +2027,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, scat = get_recv_wqe(qp, ind); - if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { - ib_dma_sync_single_for_device(ibqp->device, - qp->sqp_proxy_rcv[ind].map, - sizeof (struct mlx4_ib_proxy_sqp_hdr), - DMA_FROM_DEVICE); - scat->byte_count = - cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr)); - /* use dma lkey from upper layer entry */ - scat->lkey = cpu_to_be32(wr->sg_list->lkey); - scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map); - scat++; - max_gs--; - } - for (i = 0; i < wr->num_sge; ++i) __set_data_seg(scat + i, wr->sg_list + i); - if (i < max_gs) { + if (i < qp->rq.max_gs) { scat[i].byte_count = 0; scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); scat[i].addr = 0; diff --git a/trunk/drivers/infiniband/hw/mlx4/sysfs.c b/trunk/drivers/infiniband/hw/mlx4/sysfs.c deleted file mode 100644 index 5b2a01dfb907..000000000000 --- a/trunk/drivers/infiniband/hw/mlx4/sysfs.c +++ /dev/null @@ -1,794 +0,0 @@ -/* - * Copyright (c) 2012 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/*#include "core_priv.h"*/ -#include "mlx4_ib.h" -#include -#include -#include - -#include -/*show_admin_alias_guid returns the administratively assigned value of that GUID. - * Values returned in buf parameter string: - * 0 - requests opensm to assign a value. - * ffffffffffffffff - delete this entry. - * other - value assigned by administrator. - */ -static ssize_t show_admin_alias_guid(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int record_num;/*0-15*/ - int guid_index_in_rec; /*0 - 7*/ - struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = - container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); - struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; - struct mlx4_ib_dev *mdev = port->dev; - - record_num = mlx4_ib_iov_dentry->entry_num / 8 ; - guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ; - - return sprintf(buf, "%llx\n", - be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid. - ports_guid[port->num - 1]. - all_rec_per_port[record_num]. - all_recs[8 * guid_index_in_rec])); -} - -/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. - * Values in buf parameter string: - * 0 - requests opensm to assign a value. - * 0xffffffffffffffff - delete this entry. - * other - guid value assigned by the administrator. - */ -static ssize_t store_admin_alias_guid(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - int record_num;/*0-15*/ - int guid_index_in_rec; /*0 - 7*/ - struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = - container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); - struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; - struct mlx4_ib_dev *mdev = port->dev; - u64 sysadmin_ag_val; - - record_num = mlx4_ib_iov_dentry->entry_num / 8; - guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8; - if (0 == record_num && 0 == guid_index_in_rec) { - pr_err("GUID 0 block 0 is RO\n"); - return count; - } - sscanf(buf, "%llx", &sysadmin_ag_val); - *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1]. - all_rec_per_port[record_num]. - all_recs[GUID_REC_SIZE * guid_index_in_rec] = - cpu_to_be64(sysadmin_ag_val); - - /* Change the state to be pending for update */ - mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status - = MLX4_GUID_INFO_STATUS_IDLE ; - - mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method - = MLX4_GUID_INFO_RECORD_SET; - - switch (sysadmin_ag_val) { - case MLX4_GUID_FOR_DELETE_VAL: - mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method - = MLX4_GUID_INFO_RECORD_DELETE; - mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership - = MLX4_GUID_SYSADMIN_ASSIGN; - break; - /* The sysadmin requests the SM to re-assign */ - case MLX4_NOT_SET_GUID: - mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership - = MLX4_GUID_DRIVER_ASSIGN; - break; - /* The sysadmin requests a specific value.*/ - default: - mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership - = MLX4_GUID_SYSADMIN_ASSIGN; - break; - } - - /* set the record index */ - mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes - = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec); - - mlx4_ib_init_alias_guid_work(mdev, port->num - 1); - - return count; -} - -static ssize_t show_port_gid(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = - container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); - struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; - struct mlx4_ib_dev *mdev = port->dev; - union ib_gid gid; - ssize_t ret; - - ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num, - mlx4_ib_iov_dentry->entry_num, &gid, 1); - if (ret) - return ret; - ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) gid.raw)[0]), - be16_to_cpu(((__be16 *) gid.raw)[1]), - be16_to_cpu(((__be16 *) gid.raw)[2]), - be16_to_cpu(((__be16 *) gid.raw)[3]), - be16_to_cpu(((__be16 *) gid.raw)[4]), - be16_to_cpu(((__be16 *) gid.raw)[5]), - be16_to_cpu(((__be16 *) gid.raw)[6]), - be16_to_cpu(((__be16 *) gid.raw)[7])); - return ret; -} - -static ssize_t show_phys_port_pkey(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = - container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); - struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; - struct mlx4_ib_dev *mdev = port->dev; - u16 pkey; - ssize_t ret; - - ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num, - mlx4_ib_iov_dentry->entry_num, &pkey, 1); - if (ret) - return ret; - - return sprintf(buf, "0x%04x\n", pkey); -} - -#define DENTRY_REMOVE(_dentry) \ -do { \ - sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \ -} while (0); - -static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry, - char *_name, struct kobject *_kobj, - ssize_t (*show)(struct device *dev, - struct device_attribute *attr, - char *buf), - ssize_t (*store)(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) - ) -{ - int ret = 0; - struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry; - - vdentry->ctx = _ctx; - vdentry->dentry.show = show; - vdentry->dentry.store = store; - sysfs_attr_init(&vdentry->dentry.attr); - vdentry->dentry.attr.name = vdentry->name; - vdentry->dentry.attr.mode = 0; - vdentry->kobj = _kobj; - snprintf(vdentry->name, 15, "%s", _name); - - if (vdentry->dentry.store) - vdentry->dentry.attr.mode |= S_IWUSR; - - if (vdentry->dentry.show) - vdentry->dentry.attr.mode |= S_IRUGO; - - ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr); - if (ret) { - pr_err("failed to create %s\n", vdentry->dentry.attr.name); - vdentry->ctx = NULL; - return ret; - } - - return ret; -} - -int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, - struct attribute *attr) -{ - struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1]; - int ret; - - ret = sysfs_create_file(port->mcgs_parent, attr); - if (ret) - pr_err("failed to create %s\n", attr->name); - - return ret; -} - -void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, - struct attribute *attr) -{ - struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1]; - - sysfs_remove_file(port->mcgs_parent, attr); -} - -static int add_port_entries(struct mlx4_ib_dev *device, int port_num) -{ - int i; - char buff[10]; - struct mlx4_ib_iov_port *port = NULL; - int ret = 0 ; - struct ib_port_attr attr; - - /* get the physical gid and pkey table sizes.*/ - ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1); - if (ret) - goto err; - - port = &device->iov_ports[port_num - 1]; - port->dev = device; - port->num = port_num; - /* Directory structure: - * iov - - * port num - - * admin_guids - * gids (operational) - * mcg_table - */ - port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar), - GFP_KERNEL); - if (!port->dentr_ar) { - ret = -ENOMEM; - goto err; - } - sprintf(buff, "%d", port_num); - port->cur_port = kobject_create_and_add(buff, - kobject_get(device->ports_parent)); - if (!port->cur_port) { - ret = -ENOMEM; - goto kobj_create_err; - } - /* admin GUIDs */ - port->admin_alias_parent = kobject_create_and_add("admin_guids", - kobject_get(port->cur_port)); - if (!port->admin_alias_parent) { - ret = -ENOMEM; - goto err_admin_guids; - } - for (i = 0 ; i < attr.gid_tbl_len; i++) { - sprintf(buff, "%d", i); - port->dentr_ar->dentries[i].entry_num = i; - ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i], - buff, port->admin_alias_parent, - show_admin_alias_guid, store_admin_alias_guid); - if (ret) - goto err_admin_alias_parent; - } - - /* gids subdirectory (operational gids) */ - port->gids_parent = kobject_create_and_add("gids", - kobject_get(port->cur_port)); - if (!port->gids_parent) { - ret = -ENOMEM; - goto err_gids; - } - - for (i = 0 ; i < attr.gid_tbl_len; i++) { - sprintf(buff, "%d", i); - port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i; - ret = create_sysfs_entry(port, - &port->dentr_ar->dentries[attr.gid_tbl_len + i], - buff, - port->gids_parent, show_port_gid, NULL); - if (ret) - goto err_gids_parent; - } - - /* physical port pkey table */ - port->pkeys_parent = - kobject_create_and_add("pkeys", kobject_get(port->cur_port)); - if (!port->pkeys_parent) { - ret = -ENOMEM; - goto err_pkeys; - } - - for (i = 0 ; i < attr.pkey_tbl_len; i++) { - sprintf(buff, "%d", i); - port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i; - ret = create_sysfs_entry(port, - &port->dentr_ar->dentries[2 * attr.gid_tbl_len + i], - buff, port->pkeys_parent, - show_phys_port_pkey, NULL); - if (ret) - goto err_pkeys_parent; - } - - /* MCGs table */ - port->mcgs_parent = - kobject_create_and_add("mcgs", kobject_get(port->cur_port)); - if (!port->mcgs_parent) { - ret = -ENOMEM; - goto err_mcgs; - } - return 0; - -err_mcgs: - kobject_put(port->cur_port); - -err_pkeys_parent: - kobject_put(port->pkeys_parent); - -err_pkeys: - kobject_put(port->cur_port); - -err_gids_parent: - kobject_put(port->gids_parent); - -err_gids: - kobject_put(port->cur_port); - -err_admin_alias_parent: - kobject_put(port->admin_alias_parent); - -err_admin_guids: - kobject_put(port->cur_port); - kobject_put(port->cur_port); /* once more for create_and_add buff */ - -kobj_create_err: - kobject_put(device->ports_parent); - kfree(port->dentr_ar); - -err: - pr_err("add_port_entries FAILED: for port:%d, error: %d\n", - port_num, ret); - return ret; -} - -static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) -{ - char base_name[9]; - - /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->pdev), max); - strncpy(base_name, name, 8); /*till xxxx:yy:*/ - base_name[8] = '\0'; - /* with no ARI only 3 last bits are used so when the fn is higher than 8 - * need to add it to the dev num, so count in the last number will be - * modulo 8 */ - sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8)); -} - -struct mlx4_port { - struct kobject kobj; - struct mlx4_ib_dev *dev; - struct attribute_group pkey_group; - struct attribute_group gid_group; - u8 port_num; - int slave; -}; - - -static void mlx4_port_release(struct kobject *kobj) -{ - struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj); - struct attribute *a; - int i; - - for (i = 0; (a = p->pkey_group.attrs[i]); ++i) - kfree(a); - kfree(p->pkey_group.attrs); - for (i = 0; (a = p->gid_group.attrs[i]); ++i) - kfree(a); - kfree(p->gid_group.attrs); - kfree(p); -} - -struct port_attribute { - struct attribute attr; - ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf); - ssize_t (*store)(struct mlx4_port *, struct port_attribute *, - const char *buf, size_t count); -}; - -static ssize_t port_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct port_attribute *port_attr = - container_of(attr, struct port_attribute, attr); - struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj); - - if (!port_attr->show) - return -EIO; - return port_attr->show(p, port_attr, buf); -} - -static ssize_t port_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *buf, size_t size) -{ - struct port_attribute *port_attr = - container_of(attr, struct port_attribute, attr); - struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj); - - if (!port_attr->store) - return -EIO; - return port_attr->store(p, port_attr, buf, size); -} - -static const struct sysfs_ops port_sysfs_ops = { - .show = port_attr_show, - .store = port_attr_store, -}; - -static struct kobj_type port_type = { - .release = mlx4_port_release, - .sysfs_ops = &port_sysfs_ops, -}; - -struct port_table_attribute { - struct port_attribute attr; - char name[8]; - int index; -}; - -static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr, - char *buf) -{ - struct port_table_attribute *tab_attr = - container_of(attr, struct port_table_attribute, attr); - ssize_t ret = -ENODEV; - - if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >= - (p->dev->dev->caps.pkey_table_len[p->port_num])) - ret = sprintf(buf, "none\n"); - else - ret = sprintf(buf, "%d\n", - p->dev->pkeys.virt2phys_pkey[p->slave] - [p->port_num - 1][tab_attr->index]); - return ret; -} - -static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, - const char *buf, size_t count) -{ - struct port_table_attribute *tab_attr = - container_of(attr, struct port_table_attribute, attr); - int idx; - int err; - - /* do not allow remapping Dom0 virtual pkey table */ - if (p->slave == mlx4_master_func_num(p->dev->dev)) - return -EINVAL; - - if (!strncasecmp(buf, "no", 2)) - idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1; - else if (sscanf(buf, "%i", &idx) != 1 || - idx >= p->dev->dev->caps.pkey_table_len[p->port_num] || - idx < 0) - return -EINVAL; - - p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1] - [tab_attr->index] = idx; - mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num, - tab_attr->index, idx); - err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num); - if (err) { - pr_err("mlx4_gen_pkey_eqe failed for slave %d," - " port %d, index %d\n", p->slave, p->port_num, idx); - return err; - } - return count; -} - -static ssize_t show_port_gid_idx(struct mlx4_port *p, - struct port_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", p->slave); -} - -static struct attribute ** -alloc_group_attrs(ssize_t (*show)(struct mlx4_port *, - struct port_attribute *, char *buf), - ssize_t (*store)(struct mlx4_port *, struct port_attribute *, - const char *buf, size_t count), - int len) -{ - struct attribute **tab_attr; - struct port_table_attribute *element; - int i; - - tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL); - if (!tab_attr) - return NULL; - - for (i = 0; i < len; i++) { - element = kzalloc(sizeof (struct port_table_attribute), - GFP_KERNEL); - if (!element) - goto err; - if (snprintf(element->name, sizeof (element->name), - "%d", i) >= sizeof (element->name)) { - kfree(element); - goto err; - } - sysfs_attr_init(&element->attr.attr); - element->attr.attr.name = element->name; - if (store) { - element->attr.attr.mode = S_IWUSR | S_IRUGO; - element->attr.store = store; - } else - element->attr.attr.mode = S_IRUGO; - - element->attr.show = show; - element->index = i; - tab_attr[i] = &element->attr.attr; - } - return tab_attr; - -err: - while (--i >= 0) - kfree(tab_attr[i]); - kfree(tab_attr); - return NULL; -} - -static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) -{ - struct mlx4_port *p; - int i; - int ret; - - p = kzalloc(sizeof *p, GFP_KERNEL); - if (!p) - return -ENOMEM; - - p->dev = dev; - p->port_num = port_num; - p->slave = slave; - - ret = kobject_init_and_add(&p->kobj, &port_type, - kobject_get(dev->dev_ports_parent[slave]), - "%d", port_num); - if (ret) - goto err_alloc; - - p->pkey_group.name = "pkey_idx"; - p->pkey_group.attrs = - alloc_group_attrs(show_port_pkey, store_port_pkey, - dev->dev->caps.pkey_table_len[port_num]); - if (!p->pkey_group.attrs) - goto err_alloc; - - ret = sysfs_create_group(&p->kobj, &p->pkey_group); - if (ret) - goto err_free_pkey; - - p->gid_group.name = "gid_idx"; - p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1); - if (!p->gid_group.attrs) - goto err_free_pkey; - - ret = sysfs_create_group(&p->kobj, &p->gid_group); - if (ret) - goto err_free_gid; - - list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]); - return 0; - -err_free_gid: - kfree(p->gid_group.attrs[0]); - kfree(p->gid_group.attrs); - -err_free_pkey: - for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i) - kfree(p->pkey_group.attrs[i]); - kfree(p->pkey_group.attrs); - -err_alloc: - kobject_put(dev->dev_ports_parent[slave]); - kfree(p); - return ret; -} - -static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave) -{ - char name[32]; - int err; - int port; - struct kobject *p, *t; - struct mlx4_port *mport; - - get_name(dev, name, slave, sizeof name); - - dev->pkeys.device_parent[slave] = - kobject_create_and_add(name, kobject_get(dev->iov_parent)); - - if (!dev->pkeys.device_parent[slave]) { - err = -ENOMEM; - goto fail_dev; - } - - INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]); - - dev->dev_ports_parent[slave] = - kobject_create_and_add("ports", - kobject_get(dev->pkeys.device_parent[slave])); - - if (!dev->dev_ports_parent[slave]) { - err = -ENOMEM; - goto err_ports; - } - - for (port = 1; port <= dev->dev->caps.num_ports; ++port) { - err = add_port(dev, port, slave); - if (err) - goto err_add; - } - return 0; - -err_add: - list_for_each_entry_safe(p, t, - &dev->pkeys.pkey_port_list[slave], - entry) { - list_del(&p->entry); - mport = container_of(p, struct mlx4_port, kobj); - sysfs_remove_group(p, &mport->pkey_group); - sysfs_remove_group(p, &mport->gid_group); - kobject_put(p); - } - kobject_put(dev->dev_ports_parent[slave]); - -err_ports: - kobject_put(dev->pkeys.device_parent[slave]); - /* extra put for the device_parent create_and_add */ - kobject_put(dev->pkeys.device_parent[slave]); - -fail_dev: - kobject_put(dev->iov_parent); - return err; -} - -static int register_pkey_tree(struct mlx4_ib_dev *device) -{ - int i; - - if (!mlx4_is_master(device->dev)) - return 0; - - for (i = 0; i <= device->dev->num_vfs; ++i) - register_one_pkey_tree(device, i); - - return 0; -} - -static void unregister_pkey_tree(struct mlx4_ib_dev *device) -{ - int slave; - struct kobject *p, *t; - struct mlx4_port *port; - - if (!mlx4_is_master(device->dev)) - return; - - for (slave = device->dev->num_vfs; slave >= 0; --slave) { - list_for_each_entry_safe(p, t, - &device->pkeys.pkey_port_list[slave], - entry) { - list_del(&p->entry); - port = container_of(p, struct mlx4_port, kobj); - sysfs_remove_group(p, &port->pkey_group); - sysfs_remove_group(p, &port->gid_group); - kobject_put(p); - kobject_put(device->dev_ports_parent[slave]); - } - kobject_put(device->dev_ports_parent[slave]); - kobject_put(device->pkeys.device_parent[slave]); - kobject_put(device->pkeys.device_parent[slave]); - kobject_put(device->iov_parent); - } -} - -int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) -{ - int i; - int ret = 0; - - if (!mlx4_is_master(dev->dev)) - return 0; - - dev->iov_parent = - kobject_create_and_add("iov", - kobject_get(dev->ib_dev.ports_parent->parent)); - if (!dev->iov_parent) { - ret = -ENOMEM; - goto err; - } - dev->ports_parent = - kobject_create_and_add("ports", - kobject_get(dev->iov_parent)); - if (!dev->iov_parent) { - ret = -ENOMEM; - goto err_ports; - } - - for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) { - ret = add_port_entries(dev, i); - if (ret) - goto err_add_entries; - } - - ret = register_pkey_tree(dev); - if (ret) - goto err_add_entries; - return 0; - -err_add_entries: - kobject_put(dev->ports_parent); - -err_ports: - kobject_put(dev->iov_parent); -err: - kobject_put(dev->ib_dev.ports_parent->parent); - pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret); - return ret; -} - -static void unregister_alias_guid_tree(struct mlx4_ib_dev *device) -{ - struct mlx4_ib_iov_port *p; - int i; - - if (!mlx4_is_master(device->dev)) - return; - - for (i = 0; i < device->dev->caps.num_ports; i++) { - p = &device->iov_ports[i]; - kobject_put(p->admin_alias_parent); - kobject_put(p->gids_parent); - kobject_put(p->pkeys_parent); - kobject_put(p->mcgs_parent); - kobject_put(p->cur_port); - kobject_put(p->cur_port); - kobject_put(p->cur_port); - kobject_put(p->cur_port); - kobject_put(p->cur_port); - kobject_put(p->dev->ports_parent); - kfree(p->dentr_ar); - } -} - -void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device) -{ - unregister_alias_guid_tree(device); - unregister_pkey_tree(device); - kobject_put(device->ports_parent); - kobject_put(device->iov_parent); - kobject_put(device->iov_parent); - kobject_put(device->ib_dev.ports_parent->parent); -} diff --git a/trunk/drivers/infiniband/hw/nes/nes_cm.c b/trunk/drivers/infiniband/hw/nes/nes_cm.c index 020e95c4c4b9..49a9383137db 100644 --- a/trunk/drivers/infiniband/hw/nes/nes_cm.c +++ b/trunk/drivers/infiniband/hw/nes/nes_cm.c @@ -1356,7 +1356,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi else netdev = nesvnic->netdev; - neigh = dst_neigh_lookup(&rt->dst, &dst_ip); + neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev); rcu_read_lock(); if (neigh) { @@ -1465,12 +1465,8 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loopbackpartner = NULL; /* get the mac addr for the remote node */ - if (ipv4_is_loopback(htonl(cm_node->rem_addr))) { - arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE); - } else { - oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); - arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex); - } + oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); + arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex); if (arpindex < 0) { kfree(cm_node); return NULL; @@ -3153,11 +3149,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); - if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) - nesqp->nesqp_context->ip0 = - cpu_to_le32(ntohl(nesvnic->local_ipaddr)); - else - nesqp->nesqp_context->ip0 = + nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); nesqp->nesqp_context->misc2 |= cpu_to_le32( @@ -3182,10 +3174,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) memset(&nes_quad, 0, sizeof(nes_quad)); nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) - nes_quad.SrcIpadr = nesvnic->local_ipaddr; - else - nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; @@ -3538,11 +3527,7 @@ static void cm_event_connected(struct nes_cm_event *event) cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); - if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) - nesqp->nesqp_context->ip0 = - cpu_to_le32(ntohl(nesvnic->local_ipaddr)); - else - nesqp->nesqp_context->ip0 = + nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); nesqp->nesqp_context->misc2 |= cpu_to_le32( @@ -3571,10 +3556,7 @@ static void cm_event_connected(struct nes_cm_event *event) nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) - nes_quad.SrcIpadr = nesvnic->local_ipaddr; - else - nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; diff --git a/trunk/drivers/net/ethernet/mellanox/mlx4/cmd.c b/trunk/drivers/net/ethernet/mellanox/mlx4/cmd.c index 3d1899ff1076..c8fef4353021 100644 --- a/trunk/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/trunk/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -40,7 +40,6 @@ #include #include -#include #include @@ -395,8 +394,7 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr; int ret; - mutex_lock(&priv->cmd.slave_cmd_mutex); - + down(&priv->cmd.slave_sem); vhcr->in_param = cpu_to_be64(in_param); vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0; vhcr->in_modifier = cpu_to_be32(in_modifier); @@ -404,7 +402,6 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, vhcr->token = cpu_to_be16(CMD_POLL_TOKEN); vhcr->status = 0; vhcr->flags = !!(priv->cmd.use_events) << 6; - if (mlx4_is_master(dev)) { ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr); if (!ret) { @@ -441,8 +438,7 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, mlx4_err(dev, "failed execution of VHCR_POST command" "opcode 0x%x\n", op); } - - mutex_unlock(&priv->cmd.slave_cmd_mutex); + up(&priv->cmd.slave_sem); return ret; } @@ -631,162 +627,6 @@ static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } -static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox) -{ - struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf); - struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf); - int err; - int i; - - if (index & 0x1f) - return -EINVAL; - - in_mad->attr_mod = cpu_to_be32(index / 32); - - err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, - MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, - MLX4_CMD_NATIVE); - if (err) - return err; - - for (i = 0; i < 32; ++i) - pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]); - - return err; -} - -static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox) -{ - int i; - int err; - - for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) { - err = query_pkey_block(dev, port, i, table + i, inbox, outbox); - if (err) - return err; - } - - return 0; -} -#define PORT_CAPABILITY_LOCATION_IN_SMP 20 -#define PORT_STATE_OFFSET 32 - -static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf) -{ - if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP) - return IB_PORT_ACTIVE; - else - return IB_PORT_DOWN; -} - -static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd) -{ - struct ib_smp *smp = inbox->buf; - u32 index; - u8 port; - u16 *table; - int err; - int vidx, pidx; - struct mlx4_priv *priv = mlx4_priv(dev); - struct ib_smp *outsmp = outbox->buf; - __be16 *outtab = (__be16 *)(outsmp->data); - __be32 slave_cap_mask; - __be64 slave_node_guid; - port = vhcr->in_modifier; - - if (smp->base_version == 1 && - smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && - smp->class_version == 1) { - if (smp->method == IB_MGMT_METHOD_GET) { - if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) { - index = be32_to_cpu(smp->attr_mod); - if (port < 1 || port > dev->caps.num_ports) - return -EINVAL; - table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL); - if (!table) - return -ENOMEM; - /* need to get the full pkey table because the paravirtualized - * pkeys may be scattered among several pkey blocks. - */ - err = get_full_pkey_table(dev, port, table, inbox, outbox); - if (!err) { - for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) { - pidx = priv->virt2phys_pkey[slave][port - 1][vidx]; - outtab[vidx % 32] = cpu_to_be16(table[pidx]); - } - } - kfree(table); - return err; - } - if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) { - /*get the slave specific caps:*/ - /*do the command */ - err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, - vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); - /* modify the response for slaves */ - if (!err && slave != mlx4_master_func_num(dev)) { - u8 *state = outsmp->data + PORT_STATE_OFFSET; - - *state = (*state & 0xf0) | vf_port_state(dev, port, slave); - slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; - memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4); - } - return err; - } - if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) { - /* compute slave's gid block */ - smp->attr_mod = cpu_to_be32(slave / 8); - /* execute cmd */ - err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, - vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); - if (!err) { - /* if needed, move slave gid to index 0 */ - if (slave % 8) - memcpy(outsmp->data, - outsmp->data + (slave % 8) * 8, 8); - /* delete all other gids */ - memset(outsmp->data + 8, 0, 56); - } - return err; - } - if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { - err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, - vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); - if (!err) { - slave_node_guid = mlx4_get_slave_node_guid(dev, slave); - memcpy(outsmp->data + 12, &slave_node_guid, 8); - } - return err; - } - } - } - if (slave != mlx4_master_func_num(dev) && - ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) || - (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && - smp->method == IB_MGMT_METHOD_SET))) { - mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, " - "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n", - slave, smp->method, smp->mgmt_class, - be16_to_cpu(smp->attr_id)); - return -EPERM; - } - /*default:*/ - return mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, - vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); -} - int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1110,7 +950,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_INIT2INIT_QP_wrapper + .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_INIT2RTR_QP, @@ -1128,7 +968,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_RTR2RTS_QP_wrapper + .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_RTS2RTS_QP, @@ -1137,7 +977,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_RTS2RTS_QP_wrapper + .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_SQERR2RTS_QP, @@ -1146,7 +986,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_SQERR2RTS_QP_wrapper + .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_2ERR_QP, @@ -1173,7 +1013,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_SQD2SQD_QP_wrapper + .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_SQD2RTS_QP, @@ -1182,7 +1022,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_SQD2RTS_QP_wrapper + .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_2RST_QP, @@ -1220,24 +1060,6 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, - { - .opcode = MLX4_CMD_CONF_SPECIAL_QP, - .has_inbox = false, - .has_outbox = false, - .out_is_imm = false, - .encode_slave_id = false, - .verify = NULL, /* XXX verify: only demux can do this */ - .wrapper = NULL - }, - { - .opcode = MLX4_CMD_MAD_IFC, - .has_inbox = true, - .has_outbox = true, - .out_is_imm = false, - .encode_slave_id = false, - .verify = NULL, - .wrapper = mlx4_MAD_IFC_wrapper - }, { .opcode = MLX4_CMD_QUERY_IF_STAT, .has_inbox = false, @@ -1518,8 +1340,6 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) goto inform_slave_state; - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave); - /* write the version in the event field */ reply |= mlx4_comm_get_version(); @@ -1556,21 +1376,19 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, goto reset_slave; slave_state[slave].vhcr_dma |= param; slave_state[slave].active = true; - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave); break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) goto reset_slave; - - mutex_lock(&priv->cmd.slave_cmd_mutex); + down(&priv->cmd.slave_sem); if (mlx4_master_process_vhcr(dev, slave, NULL)) { mlx4_err(dev, "Failed processing vhcr for slave:%d," " resetting slave.\n", slave); - mutex_unlock(&priv->cmd.slave_cmd_mutex); + up(&priv->cmd.slave_sem); goto reset_slave; } - mutex_unlock(&priv->cmd.slave_cmd_mutex); + up(&priv->cmd.slave_sem); break; default: mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave); @@ -1711,6 +1529,14 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) struct mlx4_slave_state *s_state; int i, j, err, port; + priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, + &priv->mfunc.vhcr_dma, + GFP_KERNEL); + if (!priv->mfunc.vhcr) { + mlx4_err(dev, "Couldn't allocate vhcr.\n"); + return -ENOMEM; + } + if (mlx4_is_master(dev)) priv->mfunc.comm = ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) + @@ -1764,7 +1590,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) INIT_WORK(&priv->mfunc.master.slave_flr_event_work, mlx4_master_handle_slave_flr); spin_lock_init(&priv->mfunc.master.slave_state_lock); - spin_lock_init(&priv->mfunc.master.slave_eq.event_lock); priv->mfunc.master.comm_wq = create_singlethread_workqueue("mlx4_comm"); if (!priv->mfunc.master.comm_wq) @@ -1773,6 +1598,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_init_resource_tracker(dev)) goto err_thread; + sema_init(&priv->cmd.slave_sem, 1); err = mlx4_ARM_COMM_CHANNEL(dev); if (err) { mlx4_err(dev, " Failed to arm comm channel eq: %x\n", @@ -1786,6 +1612,8 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) mlx4_err(dev, "Couldn't sync toggles\n"); goto err_comm; } + + sema_init(&priv->cmd.slave_sem, 1); } return 0; @@ -1815,7 +1643,6 @@ int mlx4_cmd_init(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); mutex_init(&priv->cmd.hcr_mutex); - mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; priv->cmd.toggle = 1; @@ -1832,30 +1659,14 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } } - if (mlx4_is_mfunc(dev)) { - priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, - &priv->mfunc.vhcr_dma, - GFP_KERNEL); - if (!priv->mfunc.vhcr) { - mlx4_err(dev, "Couldn't allocate VHCR.\n"); - goto err_hcr; - } - } - priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, MLX4_MAILBOX_SIZE, MLX4_MAILBOX_SIZE, 0); if (!priv->cmd.pool) - goto err_vhcr; + goto err_hcr; return 0; -err_vhcr: - if (mlx4_is_mfunc(dev)) - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, - priv->mfunc.vhcr, priv->mfunc.vhcr_dma); - priv->mfunc.vhcr = NULL; - err_hcr: if (!mlx4_is_slave(dev)) iounmap(priv->cmd.hcr); @@ -1878,6 +1689,9 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) } iounmap(priv->mfunc.comm); + dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + priv->mfunc.vhcr, priv->mfunc.vhcr_dma); + priv->mfunc.vhcr = NULL; } void mlx4_cmd_cleanup(struct mlx4_dev *dev) @@ -1888,10 +1702,6 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev) if (!mlx4_is_slave(dev)) iounmap(priv->cmd.hcr); - if (mlx4_is_mfunc(dev)) - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, - priv->mfunc.vhcr, priv->mfunc.vhcr_dma); - priv->mfunc.vhcr = NULL; } /* diff --git a/trunk/drivers/net/ethernet/mellanox/mlx4/eq.c b/trunk/drivers/net/ethernet/mellanox/mlx4/eq.c index 51c764901ad2..99a04648fab0 100644 --- a/trunk/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/trunk/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -164,16 +164,13 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq; - struct mlx4_eqe *s_eqe; - unsigned long flags; + struct mlx4_eqe *s_eqe = + &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)]; - spin_lock_irqsave(&slave_eq->event_lock, flags); - s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)]; if ((!!(s_eqe->owner & 0x80)) ^ (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) { mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. " "No free EQE on slave events queue\n", slave); - spin_unlock_irqrestore(&slave_eq->event_lock, flags); return; } @@ -186,7 +183,6 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.slave_event_work); - spin_unlock_irqrestore(&slave_eq->event_lock, flags); } static void mlx4_slave_event(struct mlx4_dev *dev, int slave, @@ -204,196 +200,6 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave, slave_event(dev, slave, eqe); } -int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) -{ - struct mlx4_eqe eqe; - - struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave]; - - if (!s_slave->active) - return 0; - - memset(&eqe, 0, sizeof eqe); - - eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; - eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE; - eqe.event.port_mgmt_change.port = port; - - return mlx4_GEN_EQE(dev, slave, &eqe); -} -EXPORT_SYMBOL(mlx4_gen_pkey_eqe); - -int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) -{ - struct mlx4_eqe eqe; - - /*don't send if we don't have the that slave */ - if (dev->num_vfs < slave) - return 0; - memset(&eqe, 0, sizeof eqe); - - eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; - eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO; - eqe.event.port_mgmt_change.port = port; - - return mlx4_GEN_EQE(dev, slave, &eqe); -} -EXPORT_SYMBOL(mlx4_gen_guid_change_eqe); - -int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, - u8 port_subtype_change) -{ - struct mlx4_eqe eqe; - - /*don't send if we don't have the that slave */ - if (dev->num_vfs < slave) - return 0; - memset(&eqe, 0, sizeof eqe); - - eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE; - eqe.subtype = port_subtype_change; - eqe.event.port_change.port = cpu_to_be32(port << 28); - - mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__, - port_subtype_change, slave, port); - return mlx4_GEN_EQE(dev, slave, &eqe); -} -EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe); - -enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; - if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) { - pr_err("%s: Error: asking for slave:%d, port:%d\n", - __func__, slave, port); - return SLAVE_PORT_DOWN; - } - return s_state[slave].port_state[port]; -} -EXPORT_SYMBOL(mlx4_get_slave_port_state); - -static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, - enum slave_port_state state) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; - - if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { - pr_err("%s: Error: asking for slave:%d, port:%d\n", - __func__, slave, port); - return -1; - } - s_state[slave].port_state[port] = state; - - return 0; -} - -static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) -{ - int i; - enum slave_port_gen_event gen_event; - - for (i = 0; i < dev->num_slaves; i++) - set_and_calc_slave_port_state(dev, i, port, event, &gen_event); -} -/************************************************************************** - The function get as input the new event to that port, - and according to the prev state change the slave's port state. - The events are: - MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, - MLX4_PORT_STATE_DEV_EVENT_PORT_UP - MLX4_PORT_STATE_IB_EVENT_GID_VALID - MLX4_PORT_STATE_IB_EVENT_GID_INVALID -***************************************************************************/ -int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, - u8 port, int event, - enum slave_port_gen_event *gen_event) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_slave_state *ctx = NULL; - unsigned long flags; - int ret = -1; - enum slave_port_state cur_state = - mlx4_get_slave_port_state(dev, slave, port); - - *gen_event = SLAVE_PORT_GEN_EVENT_NONE; - - if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { - pr_err("%s: Error: asking for slave:%d, port:%d\n", - __func__, slave, port); - return ret; - } - - ctx = &priv->mfunc.master.slave_state[slave]; - spin_lock_irqsave(&ctx->lock, flags); - - mlx4_dbg(dev, "%s: slave: %d, current state: %d new event :%d\n", - __func__, slave, cur_state, event); - - switch (cur_state) { - case SLAVE_PORT_DOWN: - if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event) - mlx4_set_slave_port_state(dev, slave, port, - SLAVE_PENDING_UP); - break; - case SLAVE_PENDING_UP: - if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) - mlx4_set_slave_port_state(dev, slave, port, - SLAVE_PORT_DOWN); - else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) { - mlx4_set_slave_port_state(dev, slave, port, - SLAVE_PORT_UP); - *gen_event = SLAVE_PORT_GEN_EVENT_UP; - } - break; - case SLAVE_PORT_UP: - if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) { - mlx4_set_slave_port_state(dev, slave, port, - SLAVE_PORT_DOWN); - *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; - } else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID == - event) { - mlx4_set_slave_port_state(dev, slave, port, - SLAVE_PENDING_UP); - *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; - } - break; - default: - pr_err("%s: BUG!!! UNKNOWN state: " - "slave:%d, port:%d\n", __func__, slave, port); - goto out; - } - ret = mlx4_get_slave_port_state(dev, slave, port); - mlx4_dbg(dev, "%s: slave: %d, current state: %d new event" - " :%d gen_event: %d\n", - __func__, slave, cur_state, event, *gen_event); - -out: - spin_unlock_irqrestore(&ctx->lock, flags); - return ret; -} - -EXPORT_SYMBOL(set_and_calc_slave_port_state); - -int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) -{ - struct mlx4_eqe eqe; - - memset(&eqe, 0, sizeof eqe); - - eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; - eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO; - eqe.event.port_mgmt_change.port = port; - eqe.event.port_mgmt_change.params.port_info.changed_attr = - cpu_to_be32((u32) attr); - - slave_event(dev, ALL_SLAVES, &eqe); - return 0; -} -EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev); - void mlx4_master_handle_slave_flr(struct work_struct *work) { struct mlx4_mfunc_master_ctx *master = @@ -445,7 +251,6 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) u32 flr_slave; u8 update_slave_state; int i; - enum slave_port_gen_event gen_event; while ((eqe = next_eqe_sw(eq))) { /* @@ -542,49 +347,35 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) case MLX4_EVENT_TYPE_PORT_CHANGE: port = be32_to_cpu(eqe->event.port_change.port) >> 28; if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, + mlx4_dispatch_event(dev, + MLX4_DEV_EVENT_PORT_DOWN, port); mlx4_priv(dev)->sense.do_sense_port[port] = 1; - if (!mlx4_is_master(dev)) - break; - for (i = 0; i < dev->num_slaves; i++) { - if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { - if (i == mlx4_master_func_num(dev)) - continue; - mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN" + if (mlx4_is_master(dev)) + /*change the state of all slave's port + * to down:*/ + for (i = 0; i < dev->num_slaves; i++) { + mlx4_dbg(dev, "%s: Sending " + "MLX4_PORT_CHANGE_SUBTYPE_DOWN" " to slave: %d, port:%d\n", __func__, i, port); + if (i == dev->caps.function) + continue; mlx4_slave_event(dev, i, eqe); - } else { /* IB port */ - set_and_calc_slave_port_state(dev, i, port, - MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, - &gen_event); - /*we can be in pending state, then do not send port_down event*/ - if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) { - if (i == mlx4_master_func_num(dev)) - continue; - mlx4_slave_event(dev, i, eqe); - } } - } } else { - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port); - + mlx4_dispatch_event(dev, + MLX4_DEV_EVENT_PORT_UP, + port); mlx4_priv(dev)->sense.do_sense_port[port] = 0; - if (!mlx4_is_master(dev)) - break; - if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + if (mlx4_is_master(dev)) { for (i = 0; i < dev->num_slaves; i++) { - if (i == mlx4_master_func_num(dev)) + if (i == dev->caps.function) continue; mlx4_slave_event(dev, i, eqe); } - else /* IB port */ - /* port-up event will be sent to a slave when the - * slave's alias-guid is set. This is done in alias_GUID.c - */ - set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP); + } } break; diff --git a/trunk/drivers/net/ethernet/mellanox/mlx4/fw.c b/trunk/drivers/net/ethernet/mellanox/mlx4/fw.c index 419914bf4834..c69648487321 100644 --- a/trunk/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/trunk/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -183,7 +183,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x24 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c -#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30 +#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0X30 #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 @@ -194,39 +194,21 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8 #define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc -#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10 -#define QUERY_FUNC_CAP_QP0_PROXY 0x14 -#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18 -#define QUERY_FUNC_CAP_QP1_PROXY 0x1c - #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80 #define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 if (vhcr->op_modifier == 1) { + field = vhcr->in_modifier; + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + field = 0; /* ensure force vlan and force mac bits are not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); /* ensure that phy_wqe_gid bit is not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); - field = vhcr->in_modifier; /* phys-port = logical-port */ - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - - /* size is now the QP number */ - size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); - - size += 2; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); - - size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY); - - size += 2; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); - } else if (vhcr->op_modifier == 0) { /* enable rdma and ethernet interfaces */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA); @@ -271,118 +253,99 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, return err; } -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, - struct mlx4_func_cap *func_cap) +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; - u8 field, op_modifier; + u8 field; u32 size; + int i; int err = 0; - op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier, - MLX4_CMD_QUERY_FUNC_CAP, + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FUNC_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) goto out; outbox = mailbox->buf; - if (!op_modifier) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); - if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { - mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); - err = -EPROTONOSUPPORT; - goto out; - } - func_cap->flags = field; - - MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); - func_cap->num_ports = field; + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); + if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { + mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); + err = -EPROTONOSUPPORT; + goto out; + } + func_cap->flags = field; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); - func_cap->pf_context_behaviour = size; + MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); + func_cap->num_ports = field; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); - func_cap->qp_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); + func_cap->pf_context_behaviour = size; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); - func_cap->srq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); + func_cap->qp_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); - func_cap->cq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); + func_cap->srq_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); - func_cap->max_eq = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); + func_cap->cq_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); - func_cap->reserved_eq = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + func_cap->max_eq = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); - func_cap->mpt_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + func_cap->reserved_eq = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); - func_cap->mtt_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); + func_cap->mpt_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); - func_cap->mcg_quota = size & 0xFFFFFF; - goto out; - } + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); + func_cap->mtt_quota = size & 0xFFFFFF; - /* logical port query */ - if (gen_or_port > dev->caps.num_ports) { - err = -EINVAL; - goto out; - } + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + func_cap->mcg_quota = size & 0xFFFFFF; - if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); - if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { - mlx4_err(dev, "VLAN is enforced on this port\n"); - err = -EPROTONOSUPPORT; + for (i = 1; i <= func_cap->num_ports; ++i) { + err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 1, + MLX4_CMD_QUERY_FUNC_CAP, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (err) goto out; - } - if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { - mlx4_err(dev, "Force mac is enabled on this port\n"); - err = -EPROTONOSUPPORT; - goto out; - } - } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); - if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { - mlx4_err(dev, "phy_wqe_gid is " - "enforced on this ib port\n"); - err = -EPROTONOSUPPORT; - goto out; + if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); + if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { + mlx4_err(dev, "VLAN is enforced on this port\n"); + err = -EPROTONOSUPPORT; + goto out; + } + + if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { + mlx4_err(dev, "Force mac is enabled on this port\n"); + err = -EPROTONOSUPPORT; + goto out; + } + } else if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { + mlx4_err(dev, "phy_wqe_gid is " + "enforced on this ib port\n"); + err = -EPROTONOSUPPORT; + goto out; + } } - } - MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - func_cap->physical_port = field; - if (func_cap->physical_port != gen_or_port) { - err = -ENOSYS; - goto out; + MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + func_cap->physical_port[i] = field; } - MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); - func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; - - MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); - func_cap->qp0_proxy_qpn = size & 0xFFFFFF; - - MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); - func_cap->qp1_tunnel_qpn = size & 0xFFFFFF; - - MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); - func_cap->qp1_proxy_qpn = size & 0xFFFFFF; - /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: * - num remains the maximum resource index @@ -752,7 +715,6 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { - u64 flags; int err = 0; u8 field; @@ -761,11 +723,6 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; - /* add port mng change event capability unconditionally to slaves */ - MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); - flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; - MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); - /* For guests, report Blueflame disabled */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET); field &= 0x7f; @@ -1388,19 +1345,6 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, return err; } -/* for IB-type ports only in SRIOV mode. Checks that both proxy QP0 - * and real QP0 are active, so that the paravirtualized QP0 is ready - * to operate */ -static int check_qp0_state(struct mlx4_dev *dev, int function, int port) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - /* irrelevant if not infiniband */ - if (priv->mfunc.master.qp0_state[port].proxy_qp0_active && - priv->mfunc.master.qp0_state[port].qp0_active) - return 1; - return 0; -} - int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1414,29 +1358,17 @@ int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) return 0; - if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { - /* Enable port only if it was previously disabled */ - if (!priv->mfunc.master.init_port_ref[port]) { - err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); - if (err) - return err; - } - priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); - } else { - if (slave == mlx4_master_func_num(dev)) { - if (check_qp0_state(dev, slave, port) && - !priv->mfunc.master.qp0_state[port].port_active) { - err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); - if (err) - return err; - priv->mfunc.master.qp0_state[port].port_active = 1; - priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); - } - } else - priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); + if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) + return -ENODEV; + + /* Enable port only if it was previously disabled */ + if (!priv->mfunc.master.init_port_ref[port]) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; } + priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); ++priv->mfunc.master.init_port_ref[port]; return 0; } @@ -1509,29 +1441,15 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, (1 << port))) return 0; - if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { - if (priv->mfunc.master.init_port_ref[port] == 1) { - err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, - 1000, MLX4_CMD_NATIVE); - if (err) - return err; - } - priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); - } else { - /* infiniband port */ - if (slave == mlx4_master_func_num(dev)) { - if (!priv->mfunc.master.qp0_state[port].qp0_active && - priv->mfunc.master.qp0_state[port].port_active) { - err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, - 1000, MLX4_CMD_NATIVE); - if (err) - return err; - priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); - priv->mfunc.master.qp0_state[port].port_active = 0; - } - } else - priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); + if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) + return -ENODEV; + if (priv->mfunc.master.init_port_ref[port] == 1) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, + MLX4_CMD_NATIVE); + if (err) + return err; } + priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); --priv->mfunc.master.init_port_ref[port]; return 0; } diff --git a/trunk/drivers/net/ethernet/mellanox/mlx4/fw.h b/trunk/drivers/net/ethernet/mellanox/mlx4/fw.h index 85abe9c11a22..83fcbbf1b169 100644 --- a/trunk/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/trunk/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -134,12 +134,8 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; - u32 qp0_tunnel_qpn; - u32 qp0_proxy_qpn; - u32 qp1_tunnel_qpn; - u32 qp1_proxy_qpn; - u8 physical_port; - u8 port_flags; + u8 physical_port[MLX4_MAX_PORTS + 1]; + u8 port_flags[MLX4_MAX_PORTS + 1]; }; struct mlx4_adapter { @@ -196,8 +192,7 @@ struct mlx4_set_ib_param { }; int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap); -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, - struct mlx4_func_cap *func_cap); +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap); int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/trunk/drivers/net/ethernet/mellanox/mlx4/main.c b/trunk/drivers/net/ethernet/mellanox/mlx4/main.c index bc1e5d41c292..2f816c6aed72 100644 --- a/trunk/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/trunk/drivers/net/ethernet/mellanox/mlx4/main.c @@ -95,6 +95,8 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " Not in use with device managed" " flow steering"); +#define MLX4_VF (1 << 0) + #define HCA_GLOBAL_CAP_MASK 0 #define PF_CONTEXT_BEHAVIOUR_MASK 0 @@ -297,12 +299,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) mlx4_dbg(dev, "Steering mode is: %s\n", mlx4_steering_mode_str(dev->caps.steering_mode)); - /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ - if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) + /* Sense port always allowed on supported devices for ConnectX1 and 2 */ + if (dev->pdev->device != 0x1003) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; - /* Don't do sense port on multifunction devices (for now at least) */ - if (mlx4_is_mfunc(dev)) - dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; dev->caps.log_num_macs = log_num_mac; dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; @@ -385,7 +384,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; - dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; return 0; } /*The function checks if there are live vf, return the num of them*/ @@ -411,54 +409,20 @@ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) { u32 qk = MLX4_RESERVED_QKEY_BASE; - - if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || - qpn < dev->phys_caps.base_proxy_sqpn) + if (qpn >= dev->caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || + qpn < dev->caps.sqp_start) return -EINVAL; - if (qpn >= dev->phys_caps.base_tunnel_sqpn) + if (qpn >= dev->caps.base_tunnel_sqpn) /* tunnel qp */ - qk += qpn - dev->phys_caps.base_tunnel_sqpn; + qk += qpn - dev->caps.base_tunnel_sqpn; else - qk += qpn - dev->phys_caps.base_proxy_sqpn; + qk += qpn - dev->caps.sqp_start; *qkey = qk; return 0; } EXPORT_SYMBOL(mlx4_get_parav_qkey); -void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) -{ - struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); - - if (!mlx4_is_master(dev)) - return; - - priv->virt2phys_pkey[slave][port - 1][i] = val; -} -EXPORT_SYMBOL(mlx4_sync_pkey_table); - -void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) -{ - struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); - - if (!mlx4_is_master(dev)) - return; - - priv->slave_node_guids[slave] = guid; -} -EXPORT_SYMBOL(mlx4_put_slave_node_guid); - -__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) -{ - struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); - - if (!mlx4_is_master(dev)) - return 0; - - return priv->slave_node_guids[slave]; -} -EXPORT_SYMBOL(mlx4_get_slave_node_guid); - int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -529,10 +493,9 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) } memset(&func_cap, 0, sizeof(func_cap)); - err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); + err = mlx4_QUERY_FUNC_CAP(dev, &func_cap); if (err) { - mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", - err); + mlx4_err(dev, "QUERY_FUNC_CAP command failed, aborting.\n"); return err; } @@ -560,33 +523,12 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } - dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - - if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { - err = -ENOMEM; - goto err_mem; - } - for (i = 1; i <= dev->caps.num_ports; ++i) { - err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); - if (err) { - mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" - " port %d, aborting (%d).\n", i, err); - goto err_mem; - } - dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; - dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; - dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; - dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.port_mask[i] = dev->caps.port_type[i]; if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, &dev->caps.gid_table_len[i], &dev->caps.pkey_table_len[i])) - goto err_mem; + return -ENODEV; } if (dev->caps.uar_page_size * (dev->caps.num_uars - @@ -596,20 +538,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) "PCI resource 2 size of 0x%llx, aborting.\n", dev->caps.uar_page_size * dev->caps.num_uars, (unsigned long long) pci_resource_len(dev->pdev, 2)); - goto err_mem; + return -ENODEV; } return 0; - -err_mem: - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - dev->caps.qp0_tunnel = dev->caps.qp0_proxy = - dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; - - return err; } /* @@ -1160,10 +1092,10 @@ static void mlx4_slave_exit(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - mutex_lock(&priv->cmd.slave_cmd_mutex); + down(&priv->cmd.slave_sem); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function.\n"); - mutex_unlock(&priv->cmd.slave_cmd_mutex); + up(&priv->cmd.slave_sem); } static int map_bf_area(struct mlx4_dev *dev) @@ -1215,7 +1147,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev) u32 slave_read; u32 cmd_channel_ver; - mutex_lock(&priv->cmd.slave_cmd_mutex); + down(&priv->cmd.slave_sem); priv->cmd.max_cmds = 1; mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, @@ -1264,13 +1196,12 @@ static int mlx4_init_slave(struct mlx4_dev *dev) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) goto err; - - mutex_unlock(&priv->cmd.slave_cmd_mutex); + up(&priv->cmd.slave_sem); return 0; err: mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); - mutex_unlock(&priv->cmd.slave_cmd_mutex); + up(&priv->cmd.slave_sem); return -EIO; } @@ -1917,7 +1848,7 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) iounmap(owner); } -static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) +static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx4_priv *priv; struct mlx4_dev *dev; @@ -1940,11 +1871,12 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) /* * Check for BARs. */ - if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && + if (((id == NULL) || !(id->driver_data & MLX4_VF)) && !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing DCS, aborting." - "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n", - pci_dev_data, pci_resource_flags(pdev, 0)); + "(id == 0X%p, id->driver_data: 0x%lx," + " pci_resource_flags(pdev, 0):0x%lx)\n", id, + id ? id->driver_data : 0, pci_resource_flags(pdev, 0)); err = -ENODEV; goto err_disable_pdev; } @@ -2009,7 +1941,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) dev->rev_id = pdev->revision; /* Detect if this device is a virtual function */ - if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { + if (id && id->driver_data & MLX4_VF) { /* When acting as pf, we normally skip vfs unless explicitly * requested to probe them. */ if (num_vfs && extended_func_num(pdev) > probe_vf) { @@ -2037,11 +1969,12 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) } if (num_vfs) { - mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs); + mlx4_warn(dev, "Enabling sriov with:%d vfs\n", num_vfs); err = pci_enable_sriov(pdev, num_vfs); if (err) { - mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", - err); + mlx4_err(dev, "Failed to enable sriov," + "continuing without sriov enabled" + " (err = %d).\n", err); err = 0; } else { mlx4_warn(dev, "Running in master mode\n"); @@ -2156,7 +2089,6 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) mlx4_sense_init(dev); mlx4_start_sense(dev); - priv->pci_dev_data = pci_dev_data; pci_set_drvdata(pdev, dev); return 0; @@ -2226,7 +2158,7 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev, { printk_once(KERN_INFO "%s", mlx4_version); - return __mlx4_init_one(pdev, id->driver_data); + return __mlx4_init_one(pdev, id); } static void mlx4_remove_one(struct pci_dev *pdev) @@ -2285,18 +2217,12 @@ static void mlx4_remove_one(struct pci_dev *pdev) if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); if (dev->flags & MLX4_FLAG_SRIOV) { - mlx4_warn(dev, "Disabling SR-IOV\n"); + mlx4_warn(dev, "Disabling sriov\n"); pci_disable_sriov(pdev); } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); - - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - kfree(priv); pci_release_regions(pdev); pci_disable_device(pdev); @@ -2306,46 +2232,41 @@ static void mlx4_remove_one(struct pci_dev *pdev) int mlx4_restart_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); - struct mlx4_priv *priv = mlx4_priv(dev); - int pci_dev_data; - - pci_dev_data = priv->pci_dev_data; mlx4_remove_one(pdev); - return __mlx4_init_one(pdev, pci_dev_data); + return __mlx4_init_one(pdev, NULL); } static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { /* MT25408 "Hermon" SDR */ - { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x6340), 0 }, /* MT25408 "Hermon" DDR */ - { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x634a), 0 }, /* MT25408 "Hermon" QDR */ - { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x6354), 0 }, /* MT25408 "Hermon" DDR PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x6732), 0 }, /* MT25408 "Hermon" QDR PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x673c), 0 }, /* MT25408 "Hermon" EN 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x6368), 0 }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x6750), 0 }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x6372), 0 }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x675a), 0 }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/ - { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x6764), 0 }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ - { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x6746), 0 }, /* MT26478 ConnectX2 40GigE PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, + { PCI_VDEVICE(MELLANOX, 0x676e), 0 }, /* MT25400 Family [ConnectX-2 Virtual Function] */ - { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, + { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_VF }, /* MT27500 Family [ConnectX-3] */ { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, /* MT27500 Family [ConnectX-3 Virtual Function] */ - { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, + { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_VF }, { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ @@ -2374,7 +2295,7 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { - int ret = __mlx4_init_one(pdev, 0); + int ret = __mlx4_init_one(pdev, NULL); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } diff --git a/trunk/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/trunk/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 1cf42036d7bb..dba69d98734a 100644 --- a/trunk/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/trunk/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -452,7 +452,6 @@ struct mlx4_slave_state { /*initialized via the kzalloc*/ u8 is_slave_going_down; u32 cookie; - enum slave_port_state port_state[MLX4_MAX_PORTS + 1]; }; struct slave_list { @@ -473,7 +472,6 @@ struct mlx4_slave_event_eq { u32 eqn; u32 cons; u32 prod; - spinlock_t event_lock; struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE]; }; @@ -513,9 +511,9 @@ struct mlx4_cmd { struct pci_pool *pool; void __iomem *hcr; struct mutex hcr_mutex; - struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; + struct semaphore slave_sem; int max_cmds; spinlock_t context_lock; int free_head; @@ -768,11 +766,6 @@ struct _rule_hw { }; }; -enum { - MLX4_PCI_DEV_IS_VF = 1 << 0, - MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1, -}; - struct mlx4_priv { struct mlx4_dev dev; @@ -780,8 +773,6 @@ struct mlx4_priv { struct list_head ctx_list; spinlock_t ctx_lock; - int pci_dev_data; - struct list_head pgdir_list; struct mutex pgdir_mutex; @@ -816,9 +807,6 @@ struct mlx4_priv { struct io_mapping *bf_mapping; int reserved_mtts; int fs_hash_mode; - u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; - __be64 slave_node_guids[MLX4_MFUNC_MAX]; - }; static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) @@ -1023,61 +1011,16 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); -int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd); int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); -int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd); -int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd); -int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd); -int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd); -int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd); -int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd); -int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd); int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); -int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd); int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe); diff --git a/trunk/drivers/net/ethernet/mellanox/mlx4/port.c b/trunk/drivers/net/ethernet/mellanox/mlx4/port.c index 4c51b05efa28..e36dd0f2fa73 100644 --- a/trunk/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/trunk/drivers/net/ethernet/mellanox/mlx4/port.c @@ -732,16 +732,6 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, new_cap_mask = ((__be32 *) inbox->buf)[1]; } - /* slave may not set the IS_SM capability for the port */ - if (slave != mlx4_master_func_num(dev) && - (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM)) - return -EINVAL; - - /* No DEV_MGMT in multifunc mode */ - if (mlx4_is_mfunc(dev) && - (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP)) - return -EINVAL; - agg_cap_mask = 0; slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; diff --git a/trunk/drivers/net/ethernet/mellanox/mlx4/qp.c b/trunk/drivers/net/ethernet/mellanox/mlx4/qp.c index 81e2abe07bbb..fb2b36759cbf 100644 --- a/trunk/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/trunk/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -67,18 +67,10 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) complete(&qp->free); } -/* used for INIT/CLOSE port logic */ -static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0) +static int is_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp) { - /* this procedure is called after we already know we are on the master */ - /* qp0 is either the proxy qp0, or the real qp0 */ - u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev); - *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1; - - *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn && - qp->qpn <= dev->phys_caps.base_sqpn + 1; - - return *real_qp0 || *proxy_qp0; + return qp->qpn >= dev->caps.sqp_start && + qp->qpn <= dev->caps.sqp_start + 1; } static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, @@ -130,8 +122,6 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; int ret = 0; - int real_qp0 = 0; - int proxy_qp0 = 0; u8 port; if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE || @@ -143,12 +133,9 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native); if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR && cur_state != MLX4_QP_STATE_RST && - is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { + is_qp0(dev, qp)) { port = (qp->qpn & 1) + 1; - if (proxy_qp0) - priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; - else - priv->mfunc.master.qp0_state[port].qp0_active = 0; + priv->mfunc.master.qp0_state[port].qp0_active = 0; } return ret; } @@ -175,23 +162,6 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, new_state == MLX4_QP_STATE_RST ? 2 : 0, op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native); - if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { - port = (qp->qpn & 1) + 1; - if (cur_state != MLX4_QP_STATE_ERR && - cur_state != MLX4_QP_STATE_RST && - new_state == MLX4_QP_STATE_ERR) { - if (proxy_qp0) - priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; - else - priv->mfunc.master.qp0_state[port].qp0_active = 0; - } else if (new_state == MLX4_QP_STATE_RTR) { - if (proxy_qp0) - priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1; - else - priv->mfunc.master.qp0_state[port].qp0_active = 1; - } - } - mlx4_free_cmd_mailbox(dev, mailbox); return ret; } @@ -422,7 +392,6 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; int err; int reserved_from_top = 0; - int k; spin_lock_init(&qp_table->lock); INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); @@ -437,7 +406,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * We also reserve the MSB of the 24-bit QP number to indicate * that a QP is an XRC QP. */ - dev->phys_caps.base_sqpn = + dev->caps.sqp_start = ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); { @@ -468,66 +437,13 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) } - /* Reserve 8 real SQPs in both native and SRIOV modes. - * In addition, in SRIOV mode, reserve 8 proxy SQPs per function - * (for all PFs and VFs), and 8 corresponding tunnel QPs. - * Each proxy SQP works opposite its own tunnel QP. - * - * The QPs are arranged as follows: - * a. 8 real SQPs - * b. All the proxy SQPs (8 per function) - * c. All the tunnel QPs (8 per function) - */ - err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 + - 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev), + (1 << 23) - 1, dev->caps.sqp_start + 8, reserved_from_top); if (err) return err; - if (mlx4_is_mfunc(dev)) { - /* for PPF use */ - dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8; - dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX; - - /* In mfunc, calculate proxy and tunnel qp offsets for the PF here, - * since the PF does not call mlx4_slave_caps */ - dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - - if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { - err = -ENOMEM; - goto err_mem; - } - - for (k = 0; k < dev->caps.num_ports; k++) { - dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn + - 8 * mlx4_master_func_num(dev) + k; - dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX; - dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn + - 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k; - dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX; - } - } - - - err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn); - if (err) - goto err_mem; - return 0; - -err_mem: - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - dev->caps.qp0_tunnel = dev->caps.qp0_proxy = - dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; - return err; + return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start); } void mlx4_cleanup_qp_table(struct mlx4_dev *dev) diff --git a/trunk/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/trunk/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 49e9de725d0a..293c9e820c49 100644 --- a/trunk/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/trunk/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -242,15 +242,6 @@ static int res_tracker_insert(struct rb_root *root, struct res_common *res) return 0; } -enum qp_transition { - QP_TRANS_INIT2RTR, - QP_TRANS_RTR2RTS, - QP_TRANS_RTS2RTS, - QP_TRANS_SQERR2RTS, - QP_TRANS_SQD2SQD, - QP_TRANS_SQD2RTS -}; - /* For Debug uses */ static const char *ResourceType(enum mlx4_resource rt) { @@ -317,41 +308,14 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev, } } -static void update_pkey_index(struct mlx4_dev *dev, int slave, - struct mlx4_cmd_mailbox *inbox) +static void update_ud_gid(struct mlx4_dev *dev, + struct mlx4_qp_context *qp_ctx, u8 slave) { - u8 sched = *(u8 *)(inbox->buf + 64); - u8 orig_index = *(u8 *)(inbox->buf + 35); - u8 new_index; - struct mlx4_priv *priv = mlx4_priv(dev); - int port; - - port = (sched >> 6 & 1) + 1; - - new_index = priv->virt2phys_pkey[slave][port - 1][orig_index]; - *(u8 *)(inbox->buf + 35) = new_index; - - mlx4_dbg(dev, "port = %d, orig pkey index = %d, " - "new pkey index = %d\n", port, orig_index, new_index); -} - -static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, - u8 slave) -{ - struct mlx4_qp_context *qp_ctx = inbox->buf + 8; - enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf); - u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; if (MLX4_QP_ST_UD == ts) qp_ctx->pri_path.mgid_index = 0x80 | slave; - if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) { - if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) - qp_ctx->pri_path.mgid_index = slave & 0x7F; - if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) - qp_ctx->alt_path.mgid_index = slave & 0x7F; - } - mlx4_dbg(dev, "slave %d, new gid index: 0x%x ", slave, qp_ctx->pri_path.mgid_index); } @@ -1141,13 +1105,7 @@ static void res_end_move(struct mlx4_dev *dev, int slave, static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn) { - return mlx4_is_qp_reserved(dev, qpn) && - (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn)); -} - -static int fw_reserved(struct mlx4_dev *dev, int qpn) -{ - return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; + return mlx4_is_qp_reserved(dev, qpn); } static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, @@ -1187,7 +1145,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - if (!fw_reserved(dev, qpn)) { + if (!valid_reserved(dev, slave, qpn)) { err = __mlx4_qp_alloc_icm(dev, qpn); if (err) { res_abort_move(dev, slave, RES_QP, qpn); @@ -1540,7 +1498,7 @@ static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - if (!fw_reserved(dev, qpn)) + if (!valid_reserved(dev, slave, qpn)) __mlx4_qp_free_icm(dev, qpn); res_end_move(dev, slave, RES_QP, qpn); @@ -1980,19 +1938,6 @@ static u32 qp_get_srqn(struct mlx4_qp_context *qpc) return be32_to_cpu(qpc->srqn) & 0x1ffffff; } -static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr, - struct mlx4_qp_context *context) -{ - u32 qpn = vhcr->in_modifier & 0xffffff; - u32 qkey = 0; - - if (mlx4_get_parav_qkey(dev, qpn, &qkey)) - return; - - /* adjust qkey in qp context */ - context->qkey = cpu_to_be32(qkey); -} - int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -2045,8 +1990,6 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, goto ex_put_scq; } - adjust_proxy_tun_qkey(dev, vhcr, qpc); - update_pkey_index(dev, slave, inbox); err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put_srq; @@ -2192,48 +2135,6 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, return err; } -static int verify_qp_parameters(struct mlx4_dev *dev, - struct mlx4_cmd_mailbox *inbox, - enum qp_transition transition, u8 slave) -{ - u32 qp_type; - struct mlx4_qp_context *qp_ctx; - enum mlx4_qp_optpar optpar; - - qp_ctx = inbox->buf + 8; - qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; - optpar = be32_to_cpu(*(__be32 *) inbox->buf); - - switch (qp_type) { - case MLX4_QP_ST_RC: - case MLX4_QP_ST_UC: - switch (transition) { - case QP_TRANS_INIT2RTR: - case QP_TRANS_RTR2RTS: - case QP_TRANS_RTS2RTS: - case QP_TRANS_SQD2SQD: - case QP_TRANS_SQD2RTS: - if (slave != mlx4_master_func_num(dev)) - /* slaves have only gid index 0 */ - if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) - if (qp_ctx->pri_path.mgid_index) - return -EINVAL; - if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) - if (qp_ctx->alt_path.mgid_index) - return -EINVAL; - break; - default: - break; - } - - break; - default: - break; - } - - return 0; -} - int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -2721,123 +2622,16 @@ int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave, return err; } -int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd) -{ - struct mlx4_qp_context *context = inbox->buf + 8; - adjust_proxy_tun_qkey(dev, vhcr, context); - update_pkey_index(dev, slave, inbox); - return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); -} - int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { - int err; struct mlx4_qp_context *qpc = inbox->buf + 8; - err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); - if (err) - return err; - - update_pkey_index(dev, slave, inbox); - update_gid(dev, inbox, (u8)slave); - adjust_proxy_tun_qkey(dev, vhcr, qpc); - - return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); -} - -int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd) -{ - int err; - struct mlx4_qp_context *context = inbox->buf + 8; - - err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); - if (err) - return err; - - update_pkey_index(dev, slave, inbox); - update_gid(dev, inbox, (u8)slave); - adjust_proxy_tun_qkey(dev, vhcr, context); - return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); -} - -int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd) -{ - int err; - struct mlx4_qp_context *context = inbox->buf + 8; - - err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); - if (err) - return err; - - update_pkey_index(dev, slave, inbox); - update_gid(dev, inbox, (u8)slave); - adjust_proxy_tun_qkey(dev, vhcr, context); - return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); -} - - -int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd) -{ - struct mlx4_qp_context *context = inbox->buf + 8; - adjust_proxy_tun_qkey(dev, vhcr, context); - return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); -} - -int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd) -{ - int err; - struct mlx4_qp_context *context = inbox->buf + 8; - - err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); - if (err) - return err; - - adjust_proxy_tun_qkey(dev, vhcr, context); - update_gid(dev, inbox, (u8)slave); - update_pkey_index(dev, slave, inbox); - return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); -} - -int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, - struct mlx4_vhcr *vhcr, - struct mlx4_cmd_mailbox *inbox, - struct mlx4_cmd_mailbox *outbox, - struct mlx4_cmd_info *cmd) -{ - int err; - struct mlx4_qp_context *context = inbox->buf + 8; - - err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); - if (err) - return err; + update_ud_gid(dev, qpc, (u8)slave); - adjust_proxy_tun_qkey(dev, vhcr, context); - update_gid(dev, inbox, (u8)slave); - update_pkey_index(dev, slave, inbox); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } diff --git a/trunk/include/linux/mlx4/device.h b/trunk/include/linux/mlx4/device.h index 6d1acb04cd17..6e1b0f973a03 100644 --- a/trunk/include/linux/mlx4/device.h +++ b/trunk/include/linux/mlx4/device.h @@ -54,13 +54,7 @@ enum { }; enum { - MLX4_PORT_CAP_IS_SM = 1 << 1, - MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19, -}; - -enum { - MLX4_MAX_PORTS = 2, - MLX4_MAX_PORT_PKEYS = 128 + MLX4_MAX_PORTS = 2 }; /* base qkey for use in sriov tunnel-qp/proxy-qp communication. @@ -197,25 +191,6 @@ enum { MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0, }; -enum slave_port_state { - SLAVE_PORT_DOWN = 0, - SLAVE_PENDING_UP, - SLAVE_PORT_UP, -}; - -enum slave_port_gen_event { - SLAVE_PORT_GEN_EVENT_DOWN = 0, - SLAVE_PORT_GEN_EVENT_UP, - SLAVE_PORT_GEN_EVENT_NONE, -}; - -enum slave_port_state_event { - MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, - MLX4_PORT_STATE_DEV_EVENT_PORT_UP, - MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, - MLX4_PORT_STATE_IB_EVENT_GID_INVALID, -}; - enum { MLX4_PERM_LOCAL_READ = 1 << 10, MLX4_PERM_LOCAL_WRITE = 1 << 11, @@ -328,9 +303,6 @@ struct mlx4_phys_caps { u32 gid_phys_table_len[MLX4_MAX_PORTS + 1]; u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1]; u32 num_phys_eqs; - u32 base_sqpn; - u32 base_proxy_sqpn; - u32 base_tunnel_sqpn; }; struct mlx4_caps { @@ -361,10 +333,9 @@ struct mlx4_caps { int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; - u32 *qp0_proxy; - u32 *qp1_proxy; - u32 *qp0_tunnel; - u32 *qp1_tunnel; + int sqp_start; + u32 base_sqpn; + u32 base_tunnel_sqpn; int num_srqs; int max_srq_wqes; int max_srq_sge; @@ -418,7 +389,6 @@ struct mlx4_caps { enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; u32 max_counters; u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; - u16 sqp_demux; }; struct mlx4_buf_list { @@ -701,10 +671,6 @@ struct mlx4_init_port_param { for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if ((type) == (dev)->caps.port_mask[(port)]) -#define mlx4_foreach_non_ib_transport_port(port, dev) \ - for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ - if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB)) - #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ @@ -726,18 +692,7 @@ static inline int mlx4_is_master(struct mlx4_dev *dev) static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { - return (qpn < dev->phys_caps.base_sqpn + 8 + - 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); -} - -static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) -{ - int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8; - - if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8) - return 1; - - return 0; + return (qpn < dev->caps.sqp_start + 8); } static inline int mlx4_is_mfunc(struct mlx4_dev *dev) @@ -972,20 +927,6 @@ int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); -void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, - int i, int val); - int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); -int mlx4_is_slave_active(struct mlx4_dev *dev, int slave); -int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port); -int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port); -int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr); -int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change); -enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port); -int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event); - -void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); -__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); - #endif /* MLX4_DEVICE_H */ diff --git a/trunk/include/linux/mlx4/driver.h b/trunk/include/linux/mlx4/driver.h index c257e1b211be..d813704b963b 100644 --- a/trunk/include/linux/mlx4/driver.h +++ b/trunk/include/linux/mlx4/driver.h @@ -45,8 +45,6 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_PORT_DOWN, MLX4_DEV_EVENT_PORT_REINIT, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, - MLX4_DEV_EVENT_SLAVE_INIT, - MLX4_DEV_EVENT_SLAVE_SHUTDOWN, }; struct mlx4_interface { diff --git a/trunk/include/linux/mlx4/qp.h b/trunk/include/linux/mlx4/qp.h index 4b4ad6ffef92..338388ba260a 100644 --- a/trunk/include/linux/mlx4/qp.h +++ b/trunk/include/linux/mlx4/qp.h @@ -126,8 +126,7 @@ struct mlx4_rss_context { struct mlx4_qp_path { u8 fl; - u8 reserved1[1]; - u8 disable_pkey_check; + u8 reserved1[2]; u8 pkey_index; u8 counter_index; u8 grh_mylmc; diff --git a/trunk/include/rdma/ib_cache.h b/trunk/include/rdma/ib_cache.h index ad9a3c280944..00a2b8ec327f 100644 --- a/trunk/include/rdma/ib_cache.h +++ b/trunk/include/rdma/ib_cache.h @@ -100,22 +100,6 @@ int ib_find_cached_pkey(struct ib_device *device, u16 pkey, u16 *index); -/** - * ib_find_exact_cached_pkey - Returns the PKey table index where a specified - * PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit) - * @device: The device to query. - * @port_num: The port number of the device to search for the PKey. - * @pkey: The PKey value to search for. - * @index: The index into the cached PKey table where the PKey was found. - * - * ib_find_exact_cached_pkey() searches the specified PKey table in - * the local software cache. - */ -int ib_find_exact_cached_pkey(struct ib_device *device, - u8 port_num, - u16 pkey, - u16 *index); - /** * ib_get_cached_lmc - Returns a cached lmc table entry * @device: The device to query. diff --git a/trunk/include/rdma/ib_verbs.h b/trunk/include/rdma/ib_verbs.h index 46bc045bbe15..07996af8265a 100644 --- a/trunk/include/rdma/ib_verbs.h +++ b/trunk/include/rdma/ib_verbs.h @@ -614,9 +614,6 @@ enum ib_qp_type { enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, - /* reserve bits 26-31 for low level drivers' internal use */ - IB_QP_CREATE_RESERVED_START = 1 << 26, - IB_QP_CREATE_RESERVED_END = 1 << 31, }; struct ib_qp_init_attr {