From da08e4259fbfd769d1e825a685d44132c8576450 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 1 Mar 2016 17:19:32 +0530 Subject: [PATCH 1/5] cxgb4/cxgb4vf: Use fl capacity to check if fl needs to be replenished Use freelist capacity instead of freelist size while checking, if freelist needs to be refilled Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 2 +- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index b4eb4680a27cc..22d9720309275 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2226,7 +2226,7 @@ static int process_responses(struct sge_rspq *q, int budget) budget_left--; } - if (q->offset >= 0 && rxq->fl.size - rxq->fl.avail >= 16) + if (q->offset >= 0 && fl_cap(&rxq->fl) - rxq->fl.avail >= 16) __refill_fl(q->adap, &rxq->fl); return budget - budget_left; } diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 6528231d8a59d..9772aad22bca2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1864,7 +1864,7 @@ static int process_responses(struct sge_rspq *rspq, int budget) * for new buffer pointers, refill the Free List. */ if (rspq->offset >= 0 && - rxq->fl.size - rxq->fl.avail >= 2*FL_PER_EQ_UNIT) + fl_cap(&rxq->fl) - rxq->fl.avail >= 2*FL_PER_EQ_UNIT) __refill_fl(rspq->adapter, &rxq->fl); return budget - budget_left; } From edadad80d65bf9c7aa9f2605dbd2eef94ccd47c0 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 1 Mar 2016 17:19:33 +0530 Subject: [PATCH 2/5] cxgb4/cxgb4vf: For T6 adapter, set FBMIN to 64 bytes T4 and T5 hardware will not coalesce Free List PCI-E Fetch Requests if the Host Driver provides more Free List Pointers than the Fetch Burst Minimum value. So if we set FBMIN to 64 bytes and the Host Driver supplies 128 bytes of Free List Pointer data, the hardware will issue two 64-byte PCI-E Fetch Requests rather than a single coallesced 128-byte Fetch Request. T6 fixes this. So, for T4/T5 we set the FBMIN value to 128 Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 12 +++++++++++- drivers/net/ethernet/chelsio/cxgb4/t4_values.h | 1 + drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 13 ++++++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 22d9720309275..deca4a2956cc0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2611,8 +2611,18 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong) | FW_IQ_CMD_FL0CONGCIF_F | FW_IQ_CMD_FL0CONGEN_F); + /* In T6, for egress queue type FL there is internal overhead + * of 16B for header going into FLM module. Hence the maximum + * allowed burst size is 448 bytes. For T4/T5, the hardware + * doesn't coalesce fetch requests if more than 64 bytes of + * Free List pointers are provided, so we use a 128-byte Fetch + * Burst Minimum there (T6 implements coalescing so we can use + * the smaller 64-byte value there). + */ c.fl0dcaen_to_fl0cidxfthresh = - htons(FW_IQ_CMD_FL0FBMIN_V(FETCHBURSTMIN_64B_X) | + htons(FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ? + FETCHBURSTMIN_128B_X : + FETCHBURSTMIN_64B_X) | FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ? FETCHBURSTMAX_512B_X : FETCHBURSTMAX_256B_X)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h index a5231fa771db9..36cf3073ca37d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h @@ -65,6 +65,7 @@ #define TIMERREG_COUNTER0_X 0 #define FETCHBURSTMIN_64B_X 2 +#define FETCHBURSTMIN_128B_X 3 #define FETCHBURSTMAX_256B_X 2 #define FETCHBURSTMAX_512B_X 3 diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 9772aad22bca2..ba6a4e3471f06 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2300,9 +2300,20 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, FW_IQ_CMD_FL0HOSTFCMODE_V(SGE_HOSTFCMODE_NONE) | FW_IQ_CMD_FL0PACKEN_F | FW_IQ_CMD_FL0PADEN_F); + + /* In T6, for egress queue type FL there is internal overhead + * of 16B for header going into FLM module. Hence the maximum + * allowed burst size is 448 bytes. For T4/T5, the hardware + * doesn't coalesce fetch requests if more than 64 bytes of + * Free List pointers are provided, so we use a 128-byte Fetch + * Burst Minimum there (T6 implements coalescing so we can use + * the smaller 64-byte value there). + */ cmd.fl0dcaen_to_fl0cidxfthresh = cpu_to_be16( - FW_IQ_CMD_FL0FBMIN_V(SGE_FETCHBURSTMIN_64B) | + FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ? + FETCHBURSTMIN_128B_X : + FETCHBURSTMIN_64B_X) | FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ? FETCHBURSTMAX_512B_X : FETCHBURSTMAX_256B_X)); From cb440364c72cbbf3dde084cf65b997b40ecb8efd Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 1 Mar 2016 17:19:34 +0530 Subject: [PATCH 3/5] cxgb4vf: Make sge init code more readable Adds a new function t4vf_fl_pkt_align() and use the same in SGE initialization code to find out freelist packet alignment Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 40 +------------- .../ethernet/chelsio/cxgb4vf/t4vf_common.h | 1 + .../net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 55 +++++++++++++++++++ 3 files changed, 59 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index ba6a4e3471f06..1ccd282949a54 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2618,7 +2618,6 @@ int t4vf_sge_init(struct adapter *adapter) u32 fl0 = sge_params->sge_fl_buffer_size[0]; u32 fl1 = sge_params->sge_fl_buffer_size[1]; struct sge *s = &adapter->sge; - unsigned int ingpadboundary, ingpackboundary, ingpad_shift; /* * Start by vetting the basic SGE parameters which have been set up by @@ -2630,7 +2629,8 @@ int t4vf_sge_init(struct adapter *adapter) fl0, fl1); return -EINVAL; } - if ((sge_params->sge_control & RXPKTCPLMODE_F) == 0) { + if ((sge_params->sge_control & RXPKTCPLMODE_F) != + RXPKTCPLMODE_V(RXPKTCPLMODE_SPLIT_X)) { dev_err(adapter->pdev_dev, "bad SGE CPL MODE\n"); return -EINVAL; } @@ -2643,41 +2643,7 @@ int t4vf_sge_init(struct adapter *adapter) s->stat_len = ((sge_params->sge_control & EGRSTATUSPAGESIZE_F) ? 128 : 64); s->pktshift = PKTSHIFT_G(sge_params->sge_control); - - /* T4 uses a single control field to specify both the PCIe Padding and - * Packing Boundary. T5 introduced the ability to specify these - * separately. The actual Ingress Packet Data alignment boundary - * within Packed Buffer Mode is the maximum of these two - * specifications. (Note that it makes no real practical sense to - * have the Pading Boudary be larger than the Packing Boundary but you - * could set the chip up that way and, in fact, legacy T4 code would - * end doing this because it would initialize the Padding Boundary and - * leave the Packing Boundary initialized to 0 (16 bytes).) - * Padding Boundary values in T6 starts from 8B, - * where as it is 32B for T4 and T5. - */ - if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) - ingpad_shift = INGPADBOUNDARY_SHIFT_X; - else - ingpad_shift = T6_INGPADBOUNDARY_SHIFT_X; - - ingpadboundary = 1 << (INGPADBOUNDARY_G(sge_params->sge_control) + - ingpad_shift); - if (is_t4(adapter->params.chip)) { - s->fl_align = ingpadboundary; - } else { - /* T5 has a different interpretation of one of the PCIe Packing - * Boundary values. - */ - ingpackboundary = INGPACKBOUNDARY_G(sge_params->sge_control2); - if (ingpackboundary == INGPACKBOUNDARY_16B_X) - ingpackboundary = 16; - else - ingpackboundary = 1 << (ingpackboundary + - INGPACKBOUNDARY_SHIFT_X); - - s->fl_align = max(ingpadboundary, ingpackboundary); - } + s->fl_align = t4vf_fl_pkt_align(adapter); /* A FL with <= fl_starve_thres buffers is starving and a periodic * timer will attempt to refill it. This needs to be larger than the diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 6ce302fe1a614..9b40a85cc1e4f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -309,6 +309,7 @@ int t4vf_port_init(struct adapter *, int); int t4vf_fw_reset(struct adapter *); int t4vf_set_params(struct adapter *, unsigned int, const u32 *, const u32 *); +int t4vf_fl_pkt_align(struct adapter *adapter); enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS }; int t4vf_bar2_sge_qregs(struct adapter *adapter, unsigned int qid, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 54220117dcba1..fed83d88fc4ef 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -417,6 +417,61 @@ int t4vf_set_params(struct adapter *adapter, unsigned int nparams, return t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), NULL); } +/** + * t4vf_fl_pkt_align - return the fl packet alignment + * @adapter: the adapter + * + * T4 has a single field to specify the packing and padding boundary. + * T5 onwards has separate fields for this and hence the alignment for + * next packet offset is maximum of these two. And T6 changes the + * Ingress Padding Boundary Shift, so it's all a mess and it's best + * if we put this in low-level Common Code ... + * + */ +int t4vf_fl_pkt_align(struct adapter *adapter) +{ + u32 sge_control, sge_control2; + unsigned int ingpadboundary, ingpackboundary, fl_align, ingpad_shift; + + sge_control = adapter->params.sge.sge_control; + + /* T4 uses a single control field to specify both the PCIe Padding and + * Packing Boundary. T5 introduced the ability to specify these + * separately. The actual Ingress Packet Data alignment boundary + * within Packed Buffer Mode is the maximum of these two + * specifications. (Note that it makes no real practical sense to + * have the Pading Boudary be larger than the Packing Boundary but you + * could set the chip up that way and, in fact, legacy T4 code would + * end doing this because it would initialize the Padding Boundary and + * leave the Packing Boundary initialized to 0 (16 bytes).) + * Padding Boundary values in T6 starts from 8B, + * where as it is 32B for T4 and T5. + */ + if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) + ingpad_shift = INGPADBOUNDARY_SHIFT_X; + else + ingpad_shift = T6_INGPADBOUNDARY_SHIFT_X; + + ingpadboundary = 1 << (INGPADBOUNDARY_G(sge_control) + ingpad_shift); + + fl_align = ingpadboundary; + if (!is_t4(adapter->params.chip)) { + /* T5 has a different interpretation of one of the PCIe Packing + * Boundary values. + */ + sge_control2 = adapter->params.sge.sge_control2; + ingpackboundary = INGPACKBOUNDARY_G(sge_control2); + if (ingpackboundary == INGPACKBOUNDARY_16B_X) + ingpackboundary = 16; + else + ingpackboundary = 1 << (ingpackboundary + + INGPACKBOUNDARY_SHIFT_X); + + fl_align = max(ingpadboundary, ingpackboundary); + } + return fl_align; +} + /** * t4vf_bar2_sge_qregs - return BAR2 SGE Queue register information * @adapter: the adapter From 5d7b80522b3f1c60147b8c0405ca57e015be26e5 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 1 Mar 2016 17:19:35 +0530 Subject: [PATCH 4/5] cxgb4vf: Remove redundant adapter ready check during probe Function t4vf_wait_dev_ready() is already called in t4vf_prep_adapter(), no need to call it again in adap_init0(). Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 8337514ababb1..5d989e4c42dca 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2236,16 +2236,6 @@ static int adap_init0(struct adapter *adapter) int err; u32 param, val = 0; - /* - * Wait for the device to become ready before proceeding ... - */ - err = t4vf_wait_dev_ready(adapter); - if (err) { - dev_err(adapter->pdev_dev, "device didn't become ready:" - " err=%d\n", err); - return err; - } - /* * Some environments do not properly handle PCIE FLRs -- e.g. in Linux * 2.6.31 and later we can't call pci_reset_function() in order to From f1ea2fe05dcc0104b445c127bf4bcf2b1b498b76 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 1 Mar 2016 17:19:36 +0530 Subject: [PATCH 5/5] cxgb4vf: Remove dead functions collect_netdev_[um]c_list_addrs Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- .../ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 46 ------------------- 1 file changed, 46 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 5d989e4c42dca..91857b81009ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -862,52 +862,6 @@ static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev) return ns; } -/* - * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting - * at a specified offset within the list, into an array of addrss pointers and - * return the number collected. - */ -static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev, - const u8 **addr, - unsigned int offset, - unsigned int maxaddrs) -{ - unsigned int index = 0; - unsigned int naddr = 0; - const struct netdev_hw_addr *ha; - - for_each_dev_addr(dev, ha) - if (index++ >= offset) { - addr[naddr++] = ha->addr; - if (naddr >= maxaddrs) - break; - } - return naddr; -} - -/* - * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting - * at a specified offset within the list, into an array of addrss pointers and - * return the number collected. - */ -static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev, - const u8 **addr, - unsigned int offset, - unsigned int maxaddrs) -{ - unsigned int index = 0; - unsigned int naddr = 0; - const struct netdev_hw_addr *ha; - - netdev_for_each_mc_addr(ha, dev) - if (index++ >= offset) { - addr[naddr++] = ha->addr; - if (naddr >= maxaddrs) - break; - } - return naddr; -} - static inline int cxgb4vf_set_addr_hash(struct port_info *pi) { struct adapter *adapter = pi->adapter;