From 0ec13aff058a82426c8d44b688c804cc4a5a0a3d Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 23 Jun 2021 21:13:10 -0700 Subject: [PATCH 1/7] Revert "ibmvnic: simplify reset_long_term_buff function" This reverts commit 1c7d45e7b2c29080bf6c8cd0e213cc3cbb62a054. We tried to optimize the number of hcalls we send and skipped sending the REQUEST_MAP calls for some maps. However during resets, we need to resend all the maps to the VIOS since the VIOS does not remember the old values. In fact we may have failed over to a new VIOS which will not have any of the mappings. When we send packets with map ids the VIOS does not know about, it triggers a FATAL reset. While the client does recover from the FATAL error reset, we are seeing a large number of such resets. Handling FATAL resets is lot more unnecessary work than issuing a few more hcalls so revert the commit and resend the maps to the VIOS. Fixes: 1c7d45e7b2c ("ibmvnic: simplify reset_long_term_buff function") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 46 ++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5788bb956d733..4b4eccc496a88 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -257,12 +257,40 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); } -static int reset_long_term_buff(struct ibmvnic_long_term_buff *ltb) +static int reset_long_term_buff(struct ibmvnic_adapter *adapter, + struct ibmvnic_long_term_buff *ltb) { - if (!ltb->buff) - return -EINVAL; + struct device *dev = &adapter->vdev->dev; + int rc; memset(ltb->buff, 0, ltb->size); + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + + reinit_completion(&adapter->fw_done); + rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); + if (rc) { + mutex_unlock(&adapter->fw_lock); + return rc; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + if (rc) { + dev_info(dev, + "Reset failed, long term map request timed out or aborted\n"); + mutex_unlock(&adapter->fw_lock); + return rc; + } + + if (adapter->fw_done_rc) { + dev_info(dev, + "Reset failed, attempting to free and reallocate buffer\n"); + free_long_term_buff(adapter, ltb); + mutex_unlock(&adapter->fw_lock); + return alloc_long_term_buff(adapter, ltb, ltb->size); + } + mutex_unlock(&adapter->fw_lock); return 0; } @@ -484,7 +512,8 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) rx_pool->size * rx_pool->buff_size); } else { - rc = reset_long_term_buff(&rx_pool->long_term_buff); + rc = reset_long_term_buff(adapter, + &rx_pool->long_term_buff); } if (rc) @@ -607,11 +636,12 @@ static int init_rx_pools(struct net_device *netdev) return 0; } -static int reset_one_tx_pool(struct ibmvnic_tx_pool *tx_pool) +static int reset_one_tx_pool(struct ibmvnic_adapter *adapter, + struct ibmvnic_tx_pool *tx_pool) { int rc, i; - rc = reset_long_term_buff(&tx_pool->long_term_buff); + rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff); if (rc) return rc; @@ -638,10 +668,10 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter) tx_scrqs = adapter->num_active_tx_pools; for (i = 0; i < tx_scrqs; i++) { - rc = reset_one_tx_pool(&adapter->tso_pool[i]); + rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]); if (rc) return rc; - rc = reset_one_tx_pool(&adapter->tx_pool[i]); + rc = reset_one_tx_pool(adapter, &adapter->tx_pool[i]); if (rc) return rc; } From 2ca220f92878470c6ba03f9946e412323093cc94 Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Wed, 23 Jun 2021 21:13:11 -0700 Subject: [PATCH 2/7] Revert "ibmvnic: remove duplicate napi_schedule call in open function" This reverts commit 7c451f3ef676c805a4b77a743a01a5c21a250a73. When a vnic interface is taken down and then up, connectivity is not restored. We bisected it to this commit. Reverting this commit until we can fully investigate the issue/benefit of the change. Fixes: 7c451f3ef676 ("ibmvnic: remove duplicate napi_schedule call in open function") Reported-by: Cristobal Forno Reported-by: Abdul Haleem Signed-off-by: Dany Madden Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4b4eccc496a88..8b2f6eb8eb213 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1210,6 +1210,11 @@ static int __ibmvnic_open(struct net_device *netdev) netif_tx_start_all_queues(netdev); + if (prev_state == VNIC_CLOSED) { + for (i = 0; i < adapter->req_rx_queues; i++) + napi_schedule(&adapter->napi[i]); + } + adapter->state = VNIC_OPEN; return rc; } From 65d6470d139a6c1655fccb5cbacbeaba8e8ad2f8 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 23 Jun 2021 21:13:12 -0700 Subject: [PATCH 3/7] ibmvnic: clean pending indirect buffs during reset We batch subordinate command response queue (scrq) descriptors that we need to send to the VIOS using an "indirect" buffer. If after we queue one or more scrqs in the indirect buffer encounter an error (say fail to allocate an skb), we leave the queued scrq descriptors in the indirect buffer until the next call to ibmvnic_xmit(). On the next call to ibmvnic_xmit(), it is possible that the adapter is going through a reset and it is possible that the long term buffers have been unmapped on the VIOS side. If we proceed to flush (send) the packets that are in the indirect buffer, we will end up using the old map ids and this can cause the VIOS to trigger an unnecessary FATAL error reset. Instead of flushing packets remaining on the indirect_buff, discard (clean) them instead. Fixes: 0d973388185d4 ("ibmvnic: Introduce xmit_more support using batched subCRQ hcalls") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 8b2f6eb8eb213..2d15b446ceb35 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -106,6 +106,8 @@ static void release_crq_queue(struct ibmvnic_adapter *); static int __ibmvnic_set_mac(struct net_device *, u8 *); static int init_crq_queue(struct ibmvnic_adapter *adapter); static int send_query_phys_parms(struct ibmvnic_adapter *adapter); +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -668,6 +670,7 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter) tx_scrqs = adapter->num_active_tx_pools; for (i = 0; i < tx_scrqs; i++) { + ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]); rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]); if (rc) return rc; @@ -1618,7 +1621,8 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, ind_bufp->index = 0; if (atomic_sub_return(entries, &tx_scrq->used) <= (adapter->req_tx_entries_per_subcrq / 2) && - __netif_subqueue_stopped(adapter->netdev, queue_num)) { + __netif_subqueue_stopped(adapter->netdev, queue_num) && + !test_bit(0, &adapter->resetting)) { netif_wake_subqueue(adapter->netdev, queue_num); netdev_dbg(adapter->netdev, "Started queue %d\n", queue_num); @@ -1711,7 +1715,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_send_failed++; tx_dropped++; ret = NETDEV_TX_OK; - ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } @@ -3175,6 +3178,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n", i); + ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]); if (adapter->tx_scrq[i]->irq) { free_irq(adapter->tx_scrq[i]->irq, adapter->tx_scrq[i]); From 72368f8b2b9e4106072a2728bed3367d54641c22 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 23 Jun 2021 21:13:13 -0700 Subject: [PATCH 4/7] ibmvnic: account for bufs already saved in indir_buf This fixes a crash in replenish_rx_pool() when called from ibmvnic_poll() after a previous call to replenish_rx_pool() encountered an error when allocating a socket buffer. Thanks to Rick Lindsley and Dany Madden for helping debug the crash. Fixes: 4f0b6812e9b9 ("ibmvnic: Introduce batched RX buffer descriptor transmission") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2d15b446ceb35..779de81a54a61 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -328,7 +328,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, rx_scrq = adapter->rx_scrq[pool->index]; ind_bufp = &rx_scrq->ind_buf; - for (i = 0; i < count; ++i) { + + /* netdev_skb_alloc() could have failed after we saved a few skbs + * in the indir_buf and we would not have sent them to VIOS yet. + * To account for them, start the loop at ind_bufp->index rather + * than 0. If we pushed all the skbs to VIOS, ind_bufp->index will + * be 0. + */ + for (i = ind_bufp->index; i < count; ++i) { skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); if (!skb) { dev_err(dev, "Couldn't replenish rx buff\n"); From 552a33729f1a7cc5115d0752064fe9abd6e3e336 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 23 Jun 2021 21:13:14 -0700 Subject: [PATCH 5/7] ibmvnic: set ltb->buff to NULL after freeing free_long_term_buff() checks ltb->buff to decide whether we have a long term buffer to free. So set ltb->buff to NULL afer freeing. While here, also clear ->map_id, fix up some coding style and log an error. Fixes: 9c4eaabd1bb39 ("Check CRQ command return codes") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 779de81a54a61..b8bdab0b27010 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -211,12 +211,11 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, mutex_lock(&adapter->fw_lock); adapter->fw_done_rc = 0; reinit_completion(&adapter->fw_done); - rc = send_request_map(adapter, ltb->addr, - ltb->size, ltb->map_id); + + rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); if (rc) { - dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return rc; + dev_err(dev, "send_request_map failed, rc = %d\n", rc); + goto out; } rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); @@ -224,20 +223,23 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, dev_err(dev, "Long term map request aborted or timed out,rc = %d\n", rc); - dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return rc; + goto out; } if (adapter->fw_done_rc) { dev_err(dev, "Couldn't map long term buffer,rc = %d\n", adapter->fw_done_rc); + rc = -1; + goto out; + } + rc = 0; +out: + if (rc) { dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return -1; + ltb->buff = NULL; } mutex_unlock(&adapter->fw_lock); - return 0; + return rc; } static void free_long_term_buff(struct ibmvnic_adapter *adapter, @@ -257,6 +259,8 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, adapter->reset_reason != VNIC_RESET_TIMEOUT) send_request_unmap(adapter, ltb->map_id); dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); + ltb->buff = NULL; + ltb->map_id = 0; } static int reset_long_term_buff(struct ibmvnic_adapter *adapter, From f6ebca8efa52e4ae770f0325d618e7bcf08ada0c Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 23 Jun 2021 21:13:15 -0700 Subject: [PATCH 6/7] ibmvnic: free tx_pool if tso_pool alloc fails Free tx_pool and clear it, if allocation of tso_pool fails. release_tx_pools() assumes we have both tx and tso_pools if ->tx_pool is non-NULL. If allocation of tso_pool fails in init_tx_pools(), the assumption will not be true and we would end up dereferencing ->tx_buff, ->free_map fields from a NULL pointer. Fixes: 3205306c6b8d ("ibmvnic: Update TX pool initialization routine") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b8bdab0b27010..ede65b32f8212 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -778,8 +778,11 @@ static int init_tx_pools(struct net_device *netdev) adapter->tso_pool = kcalloc(tx_subcrqs, sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); - if (!adapter->tso_pool) + if (!adapter->tso_pool) { + kfree(adapter->tx_pool); + adapter->tx_pool = NULL; return -1; + } adapter->num_active_tx_pools = tx_subcrqs; From 154b3b2a6ffca445379063ef49f71895104d5a5e Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 23 Jun 2021 21:13:16 -0700 Subject: [PATCH 7/7] ibmvnic: parenthesize a check Parenthesize a check to be more explicit and to fix a sparse warning seen on some distros. Fixes: 91dc5d2553fbf ("ibmvnic: fix miscellaneous checks") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index ede65b32f8212..1c572491441c6 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3266,7 +3266,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, /* H_EOI would fail with rc = H_FUNCTION when running * in XIVE mode which is expected, but not an error. */ - if (rc && rc != H_FUNCTION) + if (rc && (rc != H_FUNCTION)) dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc); }