Skip to content

Commit

Permalink
RDMA/nes: Improve use of PBLs
Browse files Browse the repository at this point in the history
Two level 256 byte PBLs was not implemented so the driver could report
out of memory when in fact there were PBLs still available.

This solution prefers to use 4KB PBLs over two level 256B PBLs until
the number of 4KB PBLs falls below a threshold.  At this point the 4KB
PBL structure is converted to use 256B PBLs which prevents the driver
from running out of 4KB PBLs too quickly.

Signed-off-by: Don Wood <donald.e.wood@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Don Wood authored and Roland Dreier committed Mar 6, 2009
1 parent 2869975 commit 0145f34
Showing 1 changed file with 134 additions and 57 deletions.
191 changes: 134 additions & 57 deletions drivers/infiniband/hw/nes/nes_verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1887,21 +1887,75 @@ static int nes_destroy_cq(struct ib_cq *ib_cq)
return ret;
}

/**
* root_256
*/
static u32 root_256(struct nes_device *nesdev,
struct nes_root_vpbl *root_vpbl,
struct nes_root_vpbl *new_root,
u16 pbl_count_4k,
u16 pbl_count_256)
{
u64 leaf_pbl;
int i, j, k;

if (pbl_count_4k == 1) {
new_root->pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
512, &new_root->pbl_pbase);

if (new_root->pbl_vbase == NULL)
return 0;

leaf_pbl = (u64)root_vpbl->pbl_pbase;
for (i = 0; i < 16; i++) {
new_root->pbl_vbase[i].pa_low =
cpu_to_le32((u32)leaf_pbl);
new_root->pbl_vbase[i].pa_high =
cpu_to_le32((u32)((((u64)leaf_pbl) >> 32)));
leaf_pbl += 256;
}
} else {
for (i = 3; i >= 0; i--) {
j = i * 16;
root_vpbl->pbl_vbase[j] = root_vpbl->pbl_vbase[i];
leaf_pbl = le32_to_cpu(root_vpbl->pbl_vbase[j].pa_low) +
(((u64)le32_to_cpu(root_vpbl->pbl_vbase[j].pa_high))
<< 32);
for (k = 1; k < 16; k++) {
leaf_pbl += 256;
root_vpbl->pbl_vbase[j + k].pa_low =
cpu_to_le32((u32)leaf_pbl);
root_vpbl->pbl_vbase[j + k].pa_high =
cpu_to_le32((u32)((((u64)leaf_pbl) >> 32)));
}
}
}

return 1;
}


/**
* nes_reg_mr
*/
static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl,
dma_addr_t single_buffer, u16 pbl_count, u16 residual_page_count,
int acc, u64 *iova_start)
dma_addr_t single_buffer, u16 pbl_count_4k,
u16 residual_page_count_4k, int acc, u64 *iova_start,
u16 *actual_pbl_cnt, u8 *used_4k_pbls)
{
struct nes_hw_cqp_wqe *cqp_wqe;
struct nes_cqp_request *cqp_request;
unsigned long flags;
int ret;
struct nes_adapter *nesadapter = nesdev->nesadapter;
/* int count; */
uint pg_cnt = 0;
u16 pbl_count_256;
u16 pbl_count = 0;
u8 use_256_pbls = 0;
u8 use_4k_pbls = 0;
u16 use_two_level = (pbl_count_4k > 1) ? 1 : 0;
struct nes_root_vpbl new_root = {0, 0, 0};
u32 opcode = 0;
u16 major_code;

Expand All @@ -1914,41 +1968,70 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
cqp_request->waiting = 1;
cqp_wqe = &cqp_request->cqp_wqe;

spin_lock_irqsave(&nesadapter->pbl_lock, flags);
/* track PBL resources */
if (pbl_count != 0) {
if (pbl_count > 1) {
/* Two level PBL */
if ((pbl_count+1) > nesadapter->free_4kpbl) {
nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n");
spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
nes_free_cqp_request(nesdev, cqp_request);
return -ENOMEM;
} else {
nesadapter->free_4kpbl -= pbl_count+1;
}
} else if (residual_page_count > 32) {
if (pbl_count > nesadapter->free_4kpbl) {
nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n");
spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
nes_free_cqp_request(nesdev, cqp_request);
return -ENOMEM;
} else {
nesadapter->free_4kpbl -= pbl_count;
if (pbl_count_4k) {
spin_lock_irqsave(&nesadapter->pbl_lock, flags);

pg_cnt = ((pbl_count_4k - 1) * 512) + residual_page_count_4k;
pbl_count_256 = (pg_cnt + 31) / 32;
if (pg_cnt <= 32) {
if (pbl_count_256 <= nesadapter->free_256pbl)
use_256_pbls = 1;
else if (pbl_count_4k <= nesadapter->free_4kpbl)
use_4k_pbls = 1;
} else if (pg_cnt <= 2048) {
if (((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) &&
(nesadapter->free_4kpbl > (nesadapter->max_4kpbl >> 1))) {
use_4k_pbls = 1;
} else if ((pbl_count_256 + 1) <= nesadapter->free_256pbl) {
use_256_pbls = 1;
use_two_level = 1;
} else if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) {
use_4k_pbls = 1;
}
} else {
if (pbl_count > nesadapter->free_256pbl) {
nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n");
spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
nes_free_cqp_request(nesdev, cqp_request);
return -ENOMEM;
} else {
nesadapter->free_256pbl -= pbl_count;
}
if ((pbl_count_4k + 1) <= nesadapter->free_4kpbl)
use_4k_pbls = 1;
}

if (use_256_pbls) {
pbl_count = pbl_count_256;
nesadapter->free_256pbl -= pbl_count + use_two_level;
} else if (use_4k_pbls) {
pbl_count = pbl_count_4k;
nesadapter->free_4kpbl -= pbl_count + use_two_level;
} else {
spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
nes_debug(NES_DBG_MR, "Out of Pbls\n");
nes_free_cqp_request(nesdev, cqp_request);
return -ENOMEM;
}

spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
}

spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
if (use_256_pbls && use_two_level) {
if (root_256(nesdev, root_vpbl, &new_root, pbl_count_4k, pbl_count_256) == 1) {
if (new_root.pbl_pbase != 0)
root_vpbl = &new_root;
} else {
spin_lock_irqsave(&nesadapter->pbl_lock, flags);
nesadapter->free_256pbl += pbl_count_256 + use_two_level;
use_256_pbls = 0;

if (pbl_count_4k == 1)
use_two_level = 0;
pbl_count = pbl_count_4k;

if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) {
nesadapter->free_4kpbl -= pbl_count + use_two_level;
use_4k_pbls = 1;
}
spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);

if (use_4k_pbls == 0)
return -ENOMEM;
}
}

opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ |
NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
Expand Down Expand Up @@ -1977,10 +2060,9 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
} else {
set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase);
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count);
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX,
(((pbl_count - 1) * 4096) + (residual_page_count*8)));
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (pg_cnt * 8));

if ((pbl_count > 1) || (residual_page_count > 32))
if (use_4k_pbls)
cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
}
barrier();
Expand All @@ -1996,23 +2078,26 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
stag, ret, cqp_request->major_code, cqp_request->minor_code);
major_code = cqp_request->major_code;
nes_put_cqp_request(nesdev, cqp_request);

if ((!ret || major_code) && pbl_count != 0) {
spin_lock_irqsave(&nesadapter->pbl_lock, flags);
if (pbl_count > 1)
nesadapter->free_4kpbl += pbl_count+1;
else if (residual_page_count > 32)
nesadapter->free_4kpbl += pbl_count;
else
nesadapter->free_256pbl += pbl_count;
if (use_256_pbls)
nesadapter->free_256pbl += pbl_count + use_two_level;
else if (use_4k_pbls)
nesadapter->free_4kpbl += pbl_count + use_two_level;
spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
}
if (new_root.pbl_pbase)
pci_free_consistent(nesdev->pcidev, 512, new_root.pbl_vbase,
new_root.pbl_pbase);

if (!ret)
return -ETIME;
else if (major_code)
return -EIO;
else
return 0;

*actual_pbl_cnt = pbl_count + use_two_level;
*used_4k_pbls = use_4k_pbls;
return 0;
}

Expand Down Expand Up @@ -2177,18 +2262,14 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
pbl_count = root_pbl_index;
}
ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start);
buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start,
&nesmr->pbls_used, &nesmr->pbl_4k);

if (ret == 0) {
nesmr->ibmr.rkey = stag;
nesmr->ibmr.lkey = stag;
nesmr->mode = IWNES_MEMREG_TYPE_MEM;
ibmr = &nesmr->ibmr;
nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0;
nesmr->pbls_used = pbl_count;
if (pbl_count > 1) {
nesmr->pbls_used++;
}
} else {
kfree(nesmr);
ibmr = ERR_PTR(-ENOMEM);
Expand Down Expand Up @@ -2466,8 +2547,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
stag, (unsigned int)iova_start,
(unsigned int)region_length, stag_index,
(unsigned long long)region->length, pbl_count);
ret = nes_reg_mr( nesdev, nespd, stag, region->length, &root_vpbl,
first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, &iova_start);
ret = nes_reg_mr(nesdev, nespd, stag, region->length, &root_vpbl,
first_dma_addr, pbl_count, (u16)cur_pbl_index, acc,
&iova_start, &nesmr->pbls_used, &nesmr->pbl_4k);

nes_debug(NES_DBG_MR, "ret=%d\n", ret);

Expand All @@ -2476,11 +2558,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
nesmr->ibmr.lkey = stag;
nesmr->mode = IWNES_MEMREG_TYPE_MEM;
ibmr = &nesmr->ibmr;
nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0;
nesmr->pbls_used = pbl_count;
if (pbl_count > 1) {
nesmr->pbls_used++;
}
} else {
ib_umem_release(region);
kfree(nesmr);
Expand Down

0 comments on commit 0145f34

Please sign in to comment.