Skip to content

Commit

Permalink
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
Browse files Browse the repository at this point in the history
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.

Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool.  The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.

On io completion, a cpu will push the XRI back on to its private pool.  A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.

On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.

Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
  • Loading branch information
James Smart authored and Martin K. Petersen committed Feb 6, 2019
1 parent ace44e4 commit c490850
Show file tree
Hide file tree
Showing 13 changed files with 1,527 additions and 327 deletions.
26 changes: 18 additions & 8 deletions drivers/scsi/lpfc/lpfc.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,6 @@ typedef struct lpfc_vpd {
} sli3Feat;
} lpfc_vpd_t;

struct lpfc_scsi_buf;


/*
* lpfc stat counters
Expand Down Expand Up @@ -597,6 +595,13 @@ struct lpfc_mbox_ext_buf_ctx {
struct list_head ext_dmabuf_list;
};

struct lpfc_epd_pool {
/* Expedite pool */
struct list_head list;
u32 count;
spinlock_t lock; /* lock for expedite pool */
};

struct lpfc_ras_fwlog {
uint8_t *fwlog_buff;
uint32_t fw_buffcount; /* Buffer size posted to FW */
Expand All @@ -618,19 +623,19 @@ struct lpfc_ras_fwlog {

struct lpfc_hba {
/* SCSI interface function jump table entries */
struct lpfc_scsi_buf * (*lpfc_get_scsi_buf)
struct lpfc_io_buf * (*lpfc_get_scsi_buf)
(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
struct scsi_cmnd *cmnd);
int (*lpfc_scsi_prep_dma_buf)
(struct lpfc_hba *, struct lpfc_scsi_buf *);
(struct lpfc_hba *, struct lpfc_io_buf *);
void (*lpfc_scsi_unprep_dma_buf)
(struct lpfc_hba *, struct lpfc_scsi_buf *);
(struct lpfc_hba *, struct lpfc_io_buf *);
void (*lpfc_release_scsi_buf)
(struct lpfc_hba *, struct lpfc_scsi_buf *);
(struct lpfc_hba *, struct lpfc_io_buf *);
void (*lpfc_rampdown_queue_depth)
(struct lpfc_hba *);
void (*lpfc_scsi_prep_cmnd)
(struct lpfc_vport *, struct lpfc_scsi_buf *,
(struct lpfc_vport *, struct lpfc_io_buf *,
struct lpfc_nodelist *);

/* IOCB interface function jump table entries */
Expand Down Expand Up @@ -673,9 +678,12 @@ struct lpfc_hba {
(struct lpfc_hba *);

int (*lpfc_bg_scsi_prep_dma_buf)
(struct lpfc_hba *, struct lpfc_scsi_buf *);
(struct lpfc_hba *, struct lpfc_io_buf *);
/* Add new entries here */

/* expedite pool */
struct lpfc_epd_pool epd_pool;

/* SLI4 specific HBA data structure */
struct lpfc_sli4_hba sli4_hba;

Expand Down Expand Up @@ -789,6 +797,7 @@ struct lpfc_hba {

/* HBA Config Parameters */
uint32_t cfg_ack0;
uint32_t cfg_xri_rebalancing;
uint32_t cfg_enable_npiv;
uint32_t cfg_enable_rrq;
uint32_t cfg_topology;
Expand Down Expand Up @@ -1014,6 +1023,7 @@ struct lpfc_hba {
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
struct dentry *hba_debugfs_root;
atomic_t debugfs_vport_count;
struct dentry *debug_multixri_pools;
struct dentry *debug_hbqinfo;
struct dentry *debug_dumpHostSlim;
struct dentry *debug_dumpHBASlim;
Expand Down
9 changes: 9 additions & 0 deletions drivers/scsi/lpfc/lpfc_attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -5266,6 +5266,12 @@ static DEVICE_ATTR_RW(lpfc_max_scsicmpl_time);
*/
LPFC_ATTR_R(ack0, 0, 0, 1, "Enable ACK0 support");

/*
# lpfc_xri_rebalancing: enable or disable XRI rebalancing feature
# range is [0,1]. Default value is 1.
*/
LPFC_ATTR_R(xri_rebalancing, 1, 0, 1, "Enable/Disable XRI rebalancing");

/*
* lpfc_io_sched: Determine scheduling algrithmn for issuing FCP cmds
* range is [0,1]. Default value is 0.
Expand Down Expand Up @@ -5723,6 +5729,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
&dev_attr_lpfc_use_adisc,
&dev_attr_lpfc_first_burst_size,
&dev_attr_lpfc_ack0,
&dev_attr_lpfc_xri_rebalancing,
&dev_attr_lpfc_topology,
&dev_attr_lpfc_scan_down,
&dev_attr_lpfc_link_speed,
Expand Down Expand Up @@ -6788,6 +6795,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
lpfc_multi_ring_rctl_init(phba, lpfc_multi_ring_rctl);
lpfc_multi_ring_type_init(phba, lpfc_multi_ring_type);
lpfc_ack0_init(phba, lpfc_ack0);
lpfc_xri_rebalancing_init(phba, lpfc_xri_rebalancing);
lpfc_topology_init(phba, lpfc_topology);
lpfc_link_speed_init(phba, lpfc_link_speed);
lpfc_poll_tmo_init(phba, lpfc_poll_tmo);
Expand Down Expand Up @@ -6846,6 +6854,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
phba->nvmet_support = 0;
phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
phba->cfg_enable_bbcr = 0;
phba->cfg_xri_rebalancing = 0;
} else {
/* We MUST have FCP support */
if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
Expand Down
16 changes: 16 additions & 0 deletions drivers/scsi/lpfc/lpfc_crtn.h
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ int lpfc_sli4_io_sgl_update(struct lpfc_hba *phba);
int lpfc_sli4_post_io_sgl_list(struct lpfc_hba *phba,
struct list_head *blist, int xricnt);
int lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc);
void lpfc_io_free(struct lpfc_hba *phba);
void lpfc_free_sgl_list(struct lpfc_hba *, struct list_head *);
uint32_t lpfc_sli_port_speed_get(struct lpfc_hba *);
int lpfc_sli4_request_firmware_update(struct lpfc_hba *, uint8_t);
Expand Down Expand Up @@ -573,6 +574,21 @@ void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
struct lpfc_iocbq *cmdiocb,
struct lpfc_wcqe_complete *abts_cmpl);
void lpfc_create_multixri_pools(struct lpfc_hba *phba);
void lpfc_create_destroy_pools(struct lpfc_hba *phba);
void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid);
void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 cnt);
void lpfc_adjust_high_watermark(struct lpfc_hba *phba, u32 hwqid);
void lpfc_keep_pvt_pool_above_lowwm(struct lpfc_hba *phba, u32 hwqid);
void lpfc_adjust_pvt_pool_count(struct lpfc_hba *phba, u32 hwqid);
#ifdef LPFC_MXP_STAT
void lpfc_snapshot_mxp(struct lpfc_hba *, u32);
#endif
struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
struct lpfc_nodelist *ndlp, u32 hwqid,
int);
void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd,
struct lpfc_sli4_hdw_queue *qp);
void lpfc_nvme_cmd_template(void);
void lpfc_nvmet_cmd_template(void);
extern int lpfc_enable_nvmet_cnt;
Expand Down
Loading

0 comments on commit c490850

Please sign in to comment.