Skip to content

Commit

Permalink
scsi: pm80xx: Increase number of supported queues
Browse files Browse the repository at this point in the history
Current driver uses fixed number of Inbound and Outbound queues and all of
the I/O, TMF and internal requests are submitted through those. A global
spin lock is used to control the shared access. This can create a lock
contention and it is real bottleneck in the I/O path.

To avoid this, the number of supported Inbound and Outbound queues is
increased to 64, and the number of queues used is decided based on number
of CPU cores online and number of MSI-X vectors allocated. Also add locks
per queue instead of using the global lock.

Link: https://lore.kernel.org/r/20201005145011.23674-2-Viswas.G@microchip.com.com
Acked-by: Jack Wang <jinpu.wang@cloud.ionos.com>
Signed-off-by: Viswas G <Viswas.G@microchip.com>
Signed-off-by: Ruksar Devadi <Ruksar.devadi@microchip.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
  • Loading branch information
Viswas G authored and Martin K. Petersen committed Oct 8, 2020
1 parent 1725ba8 commit 05c6c02
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 104 deletions.
6 changes: 4 additions & 2 deletions drivers/scsi/pm8001/pm8001_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,10 @@ static ssize_t pm8001_ctl_ib_queue_log_show(struct device *cdev,
int offset;
char *str = buf;
int start = 0;
u32 ib_offset = pm8001_ha->ib_offset;
#define IB_MEMMAP(c) \
(*(u32 *)((u8 *)pm8001_ha-> \
memoryMap.region[IB].virt_ptr + \
memoryMap.region[ib_offset].virt_ptr + \
pm8001_ha->evtlog_ib_offset + (c)))

for (offset = 0; offset < IB_OB_READ_TIMES; offset++) {
Expand Down Expand Up @@ -442,9 +443,10 @@ static ssize_t pm8001_ctl_ob_queue_log_show(struct device *cdev,
int offset;
char *str = buf;
int start = 0;
u32 ob_offset = pm8001_ha->ob_offset;
#define OB_MEMMAP(c) \
(*(u32 *)((u8 *)pm8001_ha-> \
memoryMap.region[OB].virt_ptr + \
memoryMap.region[ob_offset].virt_ptr + \
pm8001_ha->evtlog_ob_offset + (c)))

for (offset = 0; offset < IB_OB_READ_TIMES; offset++) {
Expand Down
17 changes: 8 additions & 9 deletions drivers/scsi/pm8001/pm8001_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,8 @@ enum port_type {
/* driver compile-time configuration */
#define PM8001_MAX_CCB 256 /* max ccbs supported */
#define PM8001_MPI_QUEUE 1024 /* maximum mpi queue entries */
#define PM8001_MAX_INB_NUM 1
#define PM8001_MAX_OUTB_NUM 1
#define PM8001_MAX_SPCV_INB_NUM 1
#define PM8001_MAX_SPCV_OUTB_NUM 4
#define PM8001_MAX_INB_NUM 64
#define PM8001_MAX_OUTB_NUM 64
#define PM8001_CAN_QUEUE 508 /* SCSI Queue depth */

/* Inbound/Outbound queue size */
Expand All @@ -94,11 +92,6 @@ enum port_type {
#define PM8001_MAX_MSIX_VEC 64 /* max msi-x int for spcv/ve */

#define USI_MAX_MEMCNT_BASE 5
#define IB (USI_MAX_MEMCNT_BASE + 1)
#define CI (IB + PM8001_MAX_SPCV_INB_NUM)
#define OB (CI + PM8001_MAX_SPCV_INB_NUM)
#define PI (OB + PM8001_MAX_SPCV_OUTB_NUM)
#define USI_MAX_MEMCNT (PI + PM8001_MAX_SPCV_OUTB_NUM)
#define CONFIG_SCSI_PM8001_MAX_DMA_SG 528
#define PM8001_MAX_DMA_SG CONFIG_SCSI_PM8001_MAX_DMA_SG
enum memory_region_num {
Expand All @@ -112,6 +105,12 @@ enum memory_region_num {
};
#define PM8001_EVENT_LOG_SIZE (128 * 1024)

/**
* maximum DMA memory regions(number of IBQ + number of IBQ CI
* + number of OBQ + number of OBQ PI)
*/
#define USI_MAX_MEMCNT (USI_MAX_MEMCNT_BASE + 1 + ((2 * PM8001_MAX_INB_NUM) \
+ (2 * PM8001_MAX_OUTB_NUM)))
/*error code*/
enum mpi_err {
MPI_IO_STATUS_SUCCESS = 0x0,
Expand Down
32 changes: 18 additions & 14 deletions drivers/scsi/pm8001/pm8001_hwi.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,10 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha)
u32 offsetib, offsetob;
void __iomem *addressib = pm8001_ha->inbnd_q_tbl_addr;
void __iomem *addressob = pm8001_ha->outbnd_q_tbl_addr;
u32 ib_offset = pm8001_ha->ib_offset;
u32 ob_offset = pm8001_ha->ob_offset;
u32 ci_offset = pm8001_ha->ci_offset;
u32 pi_offset = pm8001_ha->pi_offset;

pm8001_ha->main_cfg_tbl.pm8001_tbl.inbound_q_nppd_hppd = 0;
pm8001_ha->main_cfg_tbl.pm8001_tbl.outbound_hw_event_pid0_3 = 0;
Expand Down Expand Up @@ -223,19 +227,19 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha)
pm8001_ha->inbnd_q_tbl[i].element_pri_size_cnt =
PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x00<<30);
pm8001_ha->inbnd_q_tbl[i].upper_base_addr =
pm8001_ha->memoryMap.region[IB + i].phys_addr_hi;
pm8001_ha->memoryMap.region[ib_offset + i].phys_addr_hi;
pm8001_ha->inbnd_q_tbl[i].lower_base_addr =
pm8001_ha->memoryMap.region[IB + i].phys_addr_lo;
pm8001_ha->memoryMap.region[ib_offset + i].phys_addr_lo;
pm8001_ha->inbnd_q_tbl[i].base_virt =
(u8 *)pm8001_ha->memoryMap.region[IB + i].virt_ptr;
(u8 *)pm8001_ha->memoryMap.region[ib_offset + i].virt_ptr;
pm8001_ha->inbnd_q_tbl[i].total_length =
pm8001_ha->memoryMap.region[IB + i].total_len;
pm8001_ha->memoryMap.region[ib_offset + i].total_len;
pm8001_ha->inbnd_q_tbl[i].ci_upper_base_addr =
pm8001_ha->memoryMap.region[CI + i].phys_addr_hi;
pm8001_ha->memoryMap.region[ci_offset + i].phys_addr_hi;
pm8001_ha->inbnd_q_tbl[i].ci_lower_base_addr =
pm8001_ha->memoryMap.region[CI + i].phys_addr_lo;
pm8001_ha->memoryMap.region[ci_offset + i].phys_addr_lo;
pm8001_ha->inbnd_q_tbl[i].ci_virt =
pm8001_ha->memoryMap.region[CI + i].virt_ptr;
pm8001_ha->memoryMap.region[ci_offset + i].virt_ptr;
offsetib = i * 0x20;
pm8001_ha->inbnd_q_tbl[i].pi_pci_bar =
get_pci_bar_index(pm8001_mr32(addressib,
Expand All @@ -249,21 +253,21 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha)
pm8001_ha->outbnd_q_tbl[i].element_size_cnt =
PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x01<<30);
pm8001_ha->outbnd_q_tbl[i].upper_base_addr =
pm8001_ha->memoryMap.region[OB + i].phys_addr_hi;
pm8001_ha->memoryMap.region[ob_offset + i].phys_addr_hi;
pm8001_ha->outbnd_q_tbl[i].lower_base_addr =
pm8001_ha->memoryMap.region[OB + i].phys_addr_lo;
pm8001_ha->memoryMap.region[ob_offset + i].phys_addr_lo;
pm8001_ha->outbnd_q_tbl[i].base_virt =
(u8 *)pm8001_ha->memoryMap.region[OB + i].virt_ptr;
(u8 *)pm8001_ha->memoryMap.region[ob_offset + i].virt_ptr;
pm8001_ha->outbnd_q_tbl[i].total_length =
pm8001_ha->memoryMap.region[OB + i].total_len;
pm8001_ha->memoryMap.region[ob_offset + i].total_len;
pm8001_ha->outbnd_q_tbl[i].pi_upper_base_addr =
pm8001_ha->memoryMap.region[PI + i].phys_addr_hi;
pm8001_ha->memoryMap.region[pi_offset + i].phys_addr_hi;
pm8001_ha->outbnd_q_tbl[i].pi_lower_base_addr =
pm8001_ha->memoryMap.region[PI + i].phys_addr_lo;
pm8001_ha->memoryMap.region[pi_offset + i].phys_addr_lo;
pm8001_ha->outbnd_q_tbl[i].interrup_vec_cnt_delay =
0 | (10 << 16) | (i << 24);
pm8001_ha->outbnd_q_tbl[i].pi_virt =
pm8001_ha->memoryMap.region[PI + i].virt_ptr;
pm8001_ha->memoryMap.region[pi_offset + i].virt_ptr;
offsetob = i * 0x24;
pm8001_ha->outbnd_q_tbl[i].ci_pci_bar =
get_pci_bar_index(pm8001_mr32(addressob,
Expand Down
117 changes: 73 additions & 44 deletions drivers/scsi/pm8001/pm8001_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,36 @@ static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha);
static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
const struct pci_device_id *ent)
{
int i;
int i, count = 0, rc = 0;
u32 ci_offset, ib_offset, ob_offset, pi_offset;
struct inbound_queue_table *circularQ;

spin_lock_init(&pm8001_ha->lock);
spin_lock_init(&pm8001_ha->bitmap_lock);
PM8001_INIT_DBG(pm8001_ha,
pm8001_printk("pm8001_alloc: PHY:%x\n",
pm8001_ha->chip->n_phy));

/* Setup Interrupt */
rc = pm8001_setup_irq(pm8001_ha);
if (rc) {
PM8001_FAIL_DBG(pm8001_ha, pm8001_printk(
"pm8001_setup_irq failed [ret: %d]\n", rc));
goto err_out_shost;
}
/* Request Interrupt */
rc = pm8001_request_irq(pm8001_ha);
if (rc)
goto err_out_shost;

count = pm8001_ha->max_q_num;
/* Queues are chosen based on the number of cores/msix availability */
ib_offset = pm8001_ha->ib_offset = USI_MAX_MEMCNT_BASE + 1;
ci_offset = pm8001_ha->ci_offset = ib_offset + count;
ob_offset = pm8001_ha->ob_offset = ci_offset + count;
pi_offset = pm8001_ha->pi_offset = ob_offset + count;
pm8001_ha->max_memcnt = pi_offset + count;

for (i = 0; i < pm8001_ha->chip->n_phy; i++) {
pm8001_phy_init(pm8001_ha, i);
pm8001_ha->port[i].wide_port_phymap = 0;
Expand All @@ -293,54 +317,62 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
pm8001_ha->memoryMap.region[IOP].total_len = PM8001_EVENT_LOG_SIZE;
pm8001_ha->memoryMap.region[IOP].alignment = 32;

for (i = 0; i < PM8001_MAX_SPCV_INB_NUM; i++) {
for (i = 0; i < count; i++) {
circularQ = &pm8001_ha->inbnd_q_tbl[i];
spin_lock_init(&circularQ->iq_lock);
/* MPI Memory region 3 for consumer Index of inbound queues */
pm8001_ha->memoryMap.region[CI+i].num_elements = 1;
pm8001_ha->memoryMap.region[CI+i].element_size = 4;
pm8001_ha->memoryMap.region[CI+i].total_len = 4;
pm8001_ha->memoryMap.region[CI+i].alignment = 4;
pm8001_ha->memoryMap.region[ci_offset+i].num_elements = 1;
pm8001_ha->memoryMap.region[ci_offset+i].element_size = 4;
pm8001_ha->memoryMap.region[ci_offset+i].total_len = 4;
pm8001_ha->memoryMap.region[ci_offset+i].alignment = 4;

if ((ent->driver_data) != chip_8001) {
/* MPI Memory region 5 inbound queues */
pm8001_ha->memoryMap.region[IB+i].num_elements =
pm8001_ha->memoryMap.region[ib_offset+i].num_elements =
PM8001_MPI_QUEUE;
pm8001_ha->memoryMap.region[IB+i].element_size = 128;
pm8001_ha->memoryMap.region[IB+i].total_len =
pm8001_ha->memoryMap.region[ib_offset+i].element_size
= 128;
pm8001_ha->memoryMap.region[ib_offset+i].total_len =
PM8001_MPI_QUEUE * 128;
pm8001_ha->memoryMap.region[IB+i].alignment = 128;
pm8001_ha->memoryMap.region[ib_offset+i].alignment
= 128;
} else {
pm8001_ha->memoryMap.region[IB+i].num_elements =
pm8001_ha->memoryMap.region[ib_offset+i].num_elements =
PM8001_MPI_QUEUE;
pm8001_ha->memoryMap.region[IB+i].element_size = 64;
pm8001_ha->memoryMap.region[IB+i].total_len =
pm8001_ha->memoryMap.region[ib_offset+i].element_size
= 64;
pm8001_ha->memoryMap.region[ib_offset+i].total_len =
PM8001_MPI_QUEUE * 64;
pm8001_ha->memoryMap.region[IB+i].alignment = 64;
pm8001_ha->memoryMap.region[ib_offset+i].alignment = 64;
}
}

for (i = 0; i < PM8001_MAX_SPCV_OUTB_NUM; i++) {
for (i = 0; i < count; i++) {
/* MPI Memory region 4 for producer Index of outbound queues */
pm8001_ha->memoryMap.region[PI+i].num_elements = 1;
pm8001_ha->memoryMap.region[PI+i].element_size = 4;
pm8001_ha->memoryMap.region[PI+i].total_len = 4;
pm8001_ha->memoryMap.region[PI+i].alignment = 4;
pm8001_ha->memoryMap.region[pi_offset+i].num_elements = 1;
pm8001_ha->memoryMap.region[pi_offset+i].element_size = 4;
pm8001_ha->memoryMap.region[pi_offset+i].total_len = 4;
pm8001_ha->memoryMap.region[pi_offset+i].alignment = 4;

if (ent->driver_data != chip_8001) {
/* MPI Memory region 6 Outbound queues */
pm8001_ha->memoryMap.region[OB+i].num_elements =
pm8001_ha->memoryMap.region[ob_offset+i].num_elements =
PM8001_MPI_QUEUE;
pm8001_ha->memoryMap.region[OB+i].element_size = 128;
pm8001_ha->memoryMap.region[OB+i].total_len =
pm8001_ha->memoryMap.region[ob_offset+i].element_size
= 128;
pm8001_ha->memoryMap.region[ob_offset+i].total_len =
PM8001_MPI_QUEUE * 128;
pm8001_ha->memoryMap.region[OB+i].alignment = 128;
pm8001_ha->memoryMap.region[ob_offset+i].alignment
= 128;
} else {
/* MPI Memory region 6 Outbound queues */
pm8001_ha->memoryMap.region[OB+i].num_elements =
pm8001_ha->memoryMap.region[ob_offset+i].num_elements =
PM8001_MPI_QUEUE;
pm8001_ha->memoryMap.region[OB+i].element_size = 64;
pm8001_ha->memoryMap.region[OB+i].total_len =
pm8001_ha->memoryMap.region[ob_offset+i].element_size
= 64;
pm8001_ha->memoryMap.region[ob_offset+i].total_len =
PM8001_MPI_QUEUE * 64;
pm8001_ha->memoryMap.region[OB+i].alignment = 64;
pm8001_ha->memoryMap.region[ob_offset+i].alignment = 64;
}

}
Expand Down Expand Up @@ -369,7 +401,7 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
pm8001_ha->memoryMap.region[FORENSIC_MEM].total_len = 0x10000;
pm8001_ha->memoryMap.region[FORENSIC_MEM].element_size = 0x10000;
pm8001_ha->memoryMap.region[FORENSIC_MEM].alignment = 0x10000;
for (i = 0; i < USI_MAX_MEMCNT; i++) {
for (i = 0; i < pm8001_ha->max_memcnt; i++) {
if (pm8001_mem_alloc(pm8001_ha->pdev,
&pm8001_ha->memoryMap.region[i].virt_ptr,
&pm8001_ha->memoryMap.region[i].phys_addr,
Expand Down Expand Up @@ -405,6 +437,8 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
/* Initialize tags */
pm8001_tag_init(pm8001_ha);
return 0;
err_out_shost:
scsi_remove_host(pm8001_ha->shost);
err_out:
return 1;
}
Expand Down Expand Up @@ -899,7 +933,8 @@ static int pm8001_configure_phy_settings(struct pm8001_hba_info *pm8001_ha)
static u32 pm8001_setup_msix(struct pm8001_hba_info *pm8001_ha)
{
u32 number_of_intr;
int rc;
int rc, cpu_online_count;
unsigned int allocated_irq_vectors;

/* SPCv controllers supports 64 msi-x */
if (pm8001_ha->chip_id == chip_8001) {
Expand All @@ -908,13 +943,21 @@ static u32 pm8001_setup_msix(struct pm8001_hba_info *pm8001_ha)
number_of_intr = PM8001_MAX_MSIX_VEC;
}

cpu_online_count = num_online_cpus();
number_of_intr = min_t(int, cpu_online_count, number_of_intr);
rc = pci_alloc_irq_vectors(pm8001_ha->pdev, number_of_intr,
number_of_intr, PCI_IRQ_MSIX);
number_of_intr = rc;
allocated_irq_vectors = rc;
if (rc < 0)
return rc;

/* Assigns the number of interrupts */
number_of_intr = min_t(int, allocated_irq_vectors, number_of_intr);
pm8001_ha->number_of_intr = number_of_intr;

/* Maximum queue number updating in HBA structure */
pm8001_ha->max_q_num = number_of_intr;

PM8001_INIT_DBG(pm8001_ha, pm8001_printk(
"pci_alloc_irq_vectors request ret:%d no of intr %d\n",
rc, pm8001_ha->number_of_intr));
Expand Down Expand Up @@ -1069,13 +1112,6 @@ static int pm8001_pci_probe(struct pci_dev *pdev,
rc = -ENOMEM;
goto err_out_free;
}
/* Setup Interrupt */
rc = pm8001_setup_irq(pm8001_ha);
if (rc) {
PM8001_FAIL_DBG(pm8001_ha, pm8001_printk(
"pm8001_setup_irq failed [ret: %d]\n", rc));
goto err_out_shost;
}

PM8001_CHIP_DISP->chip_soft_rst(pm8001_ha);
rc = PM8001_CHIP_DISP->chip_init(pm8001_ha);
Expand All @@ -1088,13 +1124,6 @@ static int pm8001_pci_probe(struct pci_dev *pdev,
rc = scsi_add_host(shost, &pdev->dev);
if (rc)
goto err_out_ha_free;
/* Request Interrupt */
rc = pm8001_request_irq(pm8001_ha);
if (rc) {
PM8001_FAIL_DBG(pm8001_ha, pm8001_printk(
"pm8001_request_irq failed [ret: %d]\n", rc));
goto err_out_shost;
}

PM8001_CHIP_DISP->interrupt_enable(pm8001_ha, 0);
if (pm8001_ha->chip_id != chip_8001) {
Expand Down
11 changes: 9 additions & 2 deletions drivers/scsi/pm8001/pm8001_sas.h
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,7 @@ struct inbound_queue_table {
u32 reserved;
__le32 consumer_index;
u32 producer_idx;
spinlock_t iq_lock;
};
struct outbound_queue_table {
u32 element_size_cnt;
Expand Down Expand Up @@ -524,8 +525,8 @@ struct pm8001_hba_info {
void __iomem *fatal_tbl_addr; /*MPI IVT Table Addr */
union main_cfg_table main_cfg_tbl;
union general_status_table gs_tbl;
struct inbound_queue_table inbnd_q_tbl[PM8001_MAX_SPCV_INB_NUM];
struct outbound_queue_table outbnd_q_tbl[PM8001_MAX_SPCV_OUTB_NUM];
struct inbound_queue_table inbnd_q_tbl[PM8001_MAX_INB_NUM];
struct outbound_queue_table outbnd_q_tbl[PM8001_MAX_OUTB_NUM];
struct sas_phy_attribute_table phy_attr_table;
/* MPI SAS PHY attributes */
u8 sas_addr[SAS_ADDR_SIZE];
Expand Down Expand Up @@ -561,6 +562,12 @@ struct pm8001_hba_info {
u32 reset_in_progress;
u32 non_fatal_count;
u32 non_fatal_read_length;
u32 max_q_num;
u32 ib_offset;
u32 ob_offset;
u32 ci_offset;
u32 pi_offset;
u32 max_memcnt;
};

struct pm8001_work {
Expand Down
Loading

0 comments on commit 05c6c02

Please sign in to comment.