From 44fbc1b6e0e291df1d31f5080777ce66a72ef37a Mon Sep 17 00:00:00 2001 From: Nick Child Date: Thu, 10 Nov 2022 15:32:16 -0600 Subject: [PATCH 1/3] ibmvnic: Assign IRQ affinity hints to device queues Assign affinity hints to ibmvnic device queue interrupts. Affinity hints are assigned and removed during sub-crq init and teardown, respectively. This update should improve latency if utilized as interrupt lines and processing are more equally distributed among CPU's. This implementation is based on the virtio_net driver. Signed-off-by: Thomas Falcon Signed-off-by: Dany Madden Signed-off-by: Nick Child Reviewed-by: Rick Lindsley Reviewed-by: Haren Myneni Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 140 +++++++++++++++++++++++++++++ drivers/net/ethernet/ibm/ibmvnic.h | 1 + 2 files changed, 141 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 9282381a438fe..0c969bdaf94d0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -68,6 +68,7 @@ #include #include #include +#include #include "ibmvnic.h" @@ -171,6 +172,132 @@ static int send_version_xchg(struct ibmvnic_adapter *adapter) return ibmvnic_send_crq(adapter, &crq); } +static void ibmvnic_clean_queue_affinity(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *queue) +{ + if (!(queue && queue->irq)) + return; + + cpumask_clear(queue->affinity_mask); + + if (irq_set_affinity_and_hint(queue->irq, NULL)) + netdev_warn(adapter->netdev, + "%s: Clear affinity failed, queue addr = %p, IRQ = %d\n", + __func__, queue, queue->irq); +} + +static void ibmvnic_clean_affinity(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_sub_crq_queue **rxqs; + struct ibmvnic_sub_crq_queue **txqs; + int num_rxqs, num_txqs; + int rc, i; + + rc = 0; + rxqs = adapter->rx_scrq; + txqs = adapter->tx_scrq; + num_txqs = adapter->num_active_tx_scrqs; + num_rxqs = adapter->num_active_rx_scrqs; + + netdev_dbg(adapter->netdev, "%s: Cleaning irq affinity hints", __func__); + if (txqs) { + for (i = 0; i < num_txqs; i++) + ibmvnic_clean_queue_affinity(adapter, txqs[i]); + } + if (rxqs) { + for (i = 0; i < num_rxqs; i++) + ibmvnic_clean_queue_affinity(adapter, rxqs[i]); + } +} + +static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue, + unsigned int *cpu, int *stragglers, + int stride) +{ + cpumask_var_t mask; + int i; + int rc = 0; + + if (!(queue && queue->irq)) + return rc; + + /* cpumask_var_t is either a pointer or array, allocation works here */ + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + /* while we have extra cpu give one extra to this irq */ + if (*stragglers) { + stride++; + (*stragglers)--; + } + /* atomic write is safer than writing bit by bit directly */ + for (i = 0; i < stride; i++) { + cpumask_set_cpu(*cpu, mask); + *cpu = cpumask_next_wrap(*cpu, cpu_online_mask, + nr_cpu_ids, false); + } + /* set queue affinity mask */ + cpumask_copy(queue->affinity_mask, mask); + rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask); + free_cpumask_var(mask); + + return rc; +} + +/* assumes cpu read lock is held */ +static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_sub_crq_queue **rxqs = adapter->rx_scrq; + struct ibmvnic_sub_crq_queue **txqs = adapter->tx_scrq; + struct ibmvnic_sub_crq_queue *queue; + int num_rxqs = adapter->num_active_rx_scrqs; + int num_txqs = adapter->num_active_tx_scrqs; + int total_queues, stride, stragglers, i; + unsigned int num_cpu, cpu; + int rc = 0; + + netdev_dbg(adapter->netdev, "%s: Setting irq affinity hints", __func__); + if (!(adapter->rx_scrq && adapter->tx_scrq)) { + netdev_warn(adapter->netdev, + "%s: Set affinity failed, queues not allocated\n", + __func__); + return; + } + + total_queues = num_rxqs + num_txqs; + num_cpu = num_online_cpus(); + /* number of cpu's assigned per irq */ + stride = max_t(int, num_cpu / total_queues, 1); + /* number of leftover cpu's */ + stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0; + /* next available cpu to assign irq to */ + cpu = cpumask_next(-1, cpu_online_mask); + + for (i = 0; i < num_txqs; i++) { + queue = txqs[i]; + rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers, + stride); + if (rc) + goto out; + } + + for (i = 0; i < num_rxqs; i++) { + queue = rxqs[i]; + rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers, + stride); + if (rc) + goto out; + } + +out: + if (rc) { + netdev_warn(adapter->netdev, + "%s: Set affinity failed, queue addr = %p, IRQ = %d, rc = %d.\n", + __func__, queue, queue->irq, rc); + ibmvnic_clean_affinity(adapter); + } +} + static long h_reg_sub_crq(unsigned long unit_address, unsigned long token, unsigned long length, unsigned long *number, unsigned long *irq) @@ -3626,6 +3753,8 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter) if (!adapter->tx_scrq || !adapter->rx_scrq) return -EINVAL; + ibmvnic_clean_affinity(adapter); + for (i = 0; i < adapter->req_tx_queues; i++) { netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i); rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]); @@ -3675,6 +3804,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); free_pages((unsigned long)scrq->msgs, 2); + free_cpumask_var(scrq->affinity_mask); kfree(scrq); } @@ -3695,6 +3825,8 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter dev_warn(dev, "Couldn't allocate crq queue messages page\n"); goto zero_page_failed; } + if (!zalloc_cpumask_var(&scrq->affinity_mask, GFP_KERNEL)) + goto cpumask_alloc_failed; scrq->msg_token = dma_map_single(dev, scrq->msgs, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -3747,6 +3879,8 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); map_failed: + free_cpumask_var(scrq->affinity_mask); +cpumask_alloc_failed: free_pages((unsigned long)scrq->msgs, 2); zero_page_failed: kfree(scrq); @@ -3758,6 +3892,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) { int i; + ibmvnic_clean_affinity(adapter); if (adapter->tx_scrq) { for (i = 0; i < adapter->num_active_tx_scrqs; i++) { if (!adapter->tx_scrq[i]) @@ -4035,6 +4170,11 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) goto req_rx_irq_failed; } } + + cpus_read_lock(); + ibmvnic_set_affinity(adapter); + cpus_read_unlock(); + return rc; req_rx_irq_failed: diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index e5c6ff3d0c472..6720fec1ae673 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -825,6 +825,7 @@ struct ibmvnic_sub_crq_queue { atomic_t used; char name[32]; u64 handle; + cpumask_var_t affinity_mask; } ____cacheline_aligned; struct ibmvnic_long_term_buff { From 92125c3a602454824d70edb1b2abb382811cab4f Mon Sep 17 00:00:00 2001 From: Nick Child Date: Thu, 10 Nov 2022 15:32:17 -0600 Subject: [PATCH 2/3] ibmvnic: Add hotpluggable CPU callbacks to reassign affinity hints When CPU's are added and removed, ibmvnic devices will reassign hint values. Introduce a new cpu hotplug state CPUHP_IBMVNIC_DEAD to signal to ibmvnic devices that the CPU has been removed and it is time to reset affinity hint assignments. On the other hand, when CPU's are being added, add a state instance to CPUHP_AP_ONLINE_DYN which will trigger a reassignment of affinity hints once the new CPU's are online. This implementation is based on the virtio_net driver. Signed-off-by: Thomas Falcon Signed-off-by: Dany Madden Signed-off-by: Nick Child Reviewed-by: Rick Lindsley Reviewed-by: Haren Myneni Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 89 +++++++++++++++++++++++++++++- drivers/net/ethernet/ibm/ibmvnic.h | 4 ++ include/linux/cpuhotplug.h | 1 + 3 files changed, 93 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 0c969bdaf94d0..2fc0d50dbb86d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -298,6 +298,57 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter) } } +static int ibmvnic_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct ibmvnic_adapter *adapter; + + adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node); + ibmvnic_set_affinity(adapter); + return 0; +} + +static int ibmvnic_cpu_dead(unsigned int cpu, struct hlist_node *node) +{ + struct ibmvnic_adapter *adapter; + + adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node_dead); + ibmvnic_set_affinity(adapter); + return 0; +} + +static int ibmvnic_cpu_down_prep(unsigned int cpu, struct hlist_node *node) +{ + struct ibmvnic_adapter *adapter; + + adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node); + ibmvnic_clean_affinity(adapter); + return 0; +} + +static enum cpuhp_state ibmvnic_online; + +static int ibmvnic_cpu_notif_add(struct ibmvnic_adapter *adapter) +{ + int ret; + + ret = cpuhp_state_add_instance_nocalls(ibmvnic_online, &adapter->node); + if (ret) + return ret; + ret = cpuhp_state_add_instance_nocalls(CPUHP_IBMVNIC_DEAD, + &adapter->node_dead); + if (!ret) + return ret; + cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node); + return ret; +} + +static void ibmvnic_cpu_notif_remove(struct ibmvnic_adapter *adapter) +{ + cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node); + cpuhp_state_remove_instance_nocalls(CPUHP_IBMVNIC_DEAD, + &adapter->node_dead); +} + static long h_reg_sub_crq(unsigned long unit_address, unsigned long token, unsigned long length, unsigned long *number, unsigned long *irq) @@ -6292,10 +6343,19 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } dev_info(&dev->dev, "ibmvnic registered\n"); + rc = ibmvnic_cpu_notif_add(adapter); + if (rc) { + netdev_err(netdev, "Registering cpu notifier failed\n"); + goto cpu_notif_add_failed; + } + complete(&adapter->probe_done); return 0; +cpu_notif_add_failed: + unregister_netdev(netdev); + ibmvnic_register_fail: device_remove_file(&dev->dev, &dev_attr_failover); @@ -6346,6 +6406,8 @@ static void ibmvnic_remove(struct vio_dev *dev) spin_unlock_irqrestore(&adapter->state_lock, flags); + ibmvnic_cpu_notif_remove(adapter); + flush_work(&adapter->ibmvnic_reset); flush_delayed_work(&adapter->ibmvnic_delayed_reset); @@ -6476,15 +6538,40 @@ static struct vio_driver ibmvnic_driver = { /* module functions */ static int __init ibmvnic_module_init(void) { + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/ibmvnic:online", + ibmvnic_cpu_online, + ibmvnic_cpu_down_prep); + if (ret < 0) + goto out; + ibmvnic_online = ret; + ret = cpuhp_setup_state_multi(CPUHP_IBMVNIC_DEAD, "net/ibmvnic:dead", + NULL, ibmvnic_cpu_dead); + if (ret) + goto err_dead; + + ret = vio_register_driver(&ibmvnic_driver); + if (ret) + goto err_vio_register; + pr_info("%s: %s %s\n", ibmvnic_driver_name, ibmvnic_driver_string, IBMVNIC_DRIVER_VERSION); - return vio_register_driver(&ibmvnic_driver); + return 0; +err_vio_register: + cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD); +err_dead: + cpuhp_remove_multi_state(ibmvnic_online); +out: + return ret; } static void __exit ibmvnic_module_exit(void) { vio_unregister_driver(&ibmvnic_driver); + cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD); + cpuhp_remove_multi_state(ibmvnic_online); } module_init(ibmvnic_module_init); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 6720fec1ae673..b35c9b6f913b6 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -984,6 +984,10 @@ struct ibmvnic_adapter { int reset_done_rc; bool wait_for_reset; + /* CPU hotplug instances for online & dead */ + struct hlist_node node; + struct hlist_node node_dead; + /* partner capabilities */ u64 min_tx_queues; u64 min_rx_queues; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f61447913db97..c8bc85a87b1ed 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -69,6 +69,7 @@ enum cpuhp_state { CPUHP_X86_APB_DEAD, CPUHP_X86_MCE_DEAD, CPUHP_VIRT_NET_DEAD, + CPUHP_IBMVNIC_DEAD, CPUHP_SLUB_DEAD, CPUHP_DEBUG_OBJ_DEAD, CPUHP_MM_WRITEBACK_DEAD, From df8f66d02df7b44516635edbc8c17b1311cfa0d8 Mon Sep 17 00:00:00 2001 From: Nick Child Date: Thu, 10 Nov 2022 15:32:18 -0600 Subject: [PATCH 3/3] ibmvnic: Update XPS assignments during affinity binding Transmit Packet Steering (XPS) maps cpu numbers to transmit queues. By running the same connection on the same set of cpu's, contention for the queue and cache miss rate can be minimized. When assigning a cpu mask for a tranmit queues irq number, assign the same cpu mask as the set of cpu's that XPS should use for that queue. Signed-off-by: Thomas Falcon Signed-off-by: Dany Madden Signed-off-by: Nick Child Reviewed-by: Rick Lindsley Reviewed-by: Haren Myneni Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2fc0d50dbb86d..e19a6bb3f4445 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -279,6 +279,16 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter) stride); if (rc) goto out; + + if (!queue) + continue; + + rc = __netif_set_xps_queue(adapter->netdev, + cpumask_bits(queue->affinity_mask), + i, XPS_CPUS); + if (rc) + netdev_warn(adapter->netdev, "%s: Set XPS on queue %d failed, rc = %d.\n", + __func__, i, rc); } for (i = 0; i < num_rxqs; i++) {