From d395381909a32060927e3f90116f938379be0636 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:14 +0300 Subject: [PATCH 01/18] netdevsim: Add max_vfs to bus_dev Currently there is no limit to the number of VFs netdevsim can enable. In a real systems this value exist and used by the driver. Fore example, some features might need to consider this value when allocating memory. Expose max_vfs variable to debugfs as configurable resource. If are VFs configured (num_vfs != 0) then changing of max_vfs not allowed. Co-developed-by: Yuval Avnery Signed-off-by: Yuval Avnery Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/bus.c | 97 ++++++++++++++++++++++++++++--- drivers/net/netdevsim/dev.c | 13 +++++ drivers/net/netdevsim/netdevsim.h | 10 ++++ 3 files changed, 112 insertions(+), 8 deletions(-) diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 0e9511661601a..4bd7ef3c04bea 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -27,9 +27,9 @@ static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev) static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev, unsigned int num_vfs) { - nsim_bus_dev->vfconfigs = kcalloc(num_vfs, - sizeof(struct nsim_vf_config), - GFP_KERNEL | __GFP_NOWARN); + if (nsim_bus_dev->max_vfs < num_vfs) + return -ENOMEM; + if (!nsim_bus_dev->vfconfigs) return -ENOMEM; nsim_bus_dev->num_vfs = num_vfs; @@ -39,8 +39,6 @@ static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev, static void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev) { - kfree(nsim_bus_dev->vfconfigs); - nsim_bus_dev->vfconfigs = NULL; nsim_bus_dev->num_vfs = 0; } @@ -56,7 +54,7 @@ nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - rtnl_lock(); + mutex_lock(&nsim_bus_dev->vfs_lock); if (nsim_bus_dev->num_vfs == num_vfs) goto exit_good; if (nsim_bus_dev->num_vfs && num_vfs) { @@ -74,7 +72,7 @@ nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr, exit_good: ret = count; exit_unlock: - rtnl_unlock(); + mutex_unlock(&nsim_bus_dev->vfs_lock); return ret; } @@ -92,6 +90,73 @@ static struct device_attribute nsim_bus_dev_numvfs_attr = __ATTR(sriov_numvfs, 0664, nsim_bus_dev_numvfs_show, nsim_bus_dev_numvfs_store); +ssize_t nsim_bus_dev_max_vfs_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_bus_dev *nsim_bus_dev = file->private_data; + char buf[11]; + size_t len; + + len = snprintf(buf, sizeof(buf), "%u\n", nsim_bus_dev->max_vfs); + if (len < 0) + return len; + + return simple_read_from_buffer(data, count, ppos, buf, len); +} + +ssize_t nsim_bus_dev_max_vfs_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_bus_dev *nsim_bus_dev = file->private_data; + struct nsim_vf_config *vfconfigs; + ssize_t ret; + char buf[10]; + u32 val; + + if (*ppos != 0) + return 0; + + if (count >= sizeof(buf)) + return -ENOSPC; + + mutex_lock(&nsim_bus_dev->vfs_lock); + /* Reject if VFs are configured */ + if (nsim_bus_dev->num_vfs) { + ret = -EBUSY; + goto unlock; + } + + ret = copy_from_user(buf, data, count); + if (ret) { + ret = -EFAULT; + goto unlock; + } + + buf[count] = '\0'; + ret = kstrtouint(buf, 10, &val); + if (ret) { + ret = -EIO; + goto unlock; + } + + vfconfigs = kcalloc(val, sizeof(struct nsim_vf_config), GFP_KERNEL | __GFP_NOWARN); + if (!vfconfigs) { + ret = -ENOMEM; + goto unlock; + } + + kfree(nsim_bus_dev->vfconfigs); + nsim_bus_dev->vfconfigs = vfconfigs; + nsim_bus_dev->max_vfs = val; + *ppos += count; + ret = count; +unlock: + mutex_unlock(&nsim_bus_dev->vfs_lock); + return ret; +} + static ssize_t new_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -311,6 +376,8 @@ static struct bus_type nsim_bus = { .num_vf = nsim_num_vf, }; +#define NSIM_BUS_DEV_MAX_VFS 4 + static struct nsim_bus_dev * nsim_bus_dev_new(unsigned int id, unsigned int port_count) { @@ -329,15 +396,28 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; nsim_bus_dev->initial_net = current->nsproxy->net_ns; + nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS; mutex_init(&nsim_bus_dev->nsim_bus_reload_lock); + mutex_init(&nsim_bus_dev->vfs_lock); /* Disallow using nsim_bus_dev */ smp_store_release(&nsim_bus_dev->init, false); + nsim_bus_dev->vfconfigs = kcalloc(nsim_bus_dev->max_vfs, + sizeof(struct nsim_vf_config), + GFP_KERNEL | __GFP_NOWARN); + if (!nsim_bus_dev->vfconfigs) { + err = -ENOMEM; + goto err_nsim_bus_dev_id_free; + } + err = device_register(&nsim_bus_dev->dev); if (err) - goto err_nsim_bus_dev_id_free; + goto err_nsim_vfs_free; + return nsim_bus_dev; +err_nsim_vfs_free: + kfree(nsim_bus_dev->vfconfigs); err_nsim_bus_dev_id_free: ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id); err_nsim_bus_dev_free: @@ -351,6 +431,7 @@ static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev) smp_store_release(&nsim_bus_dev->init, false); device_unregister(&nsim_bus_dev->dev); ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id); + kfree(nsim_bus_dev->vfconfigs); kfree(nsim_bus_dev); } diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 6189a4c0d39e9..12df93a34bfd6 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -192,6 +192,14 @@ static const struct file_operations nsim_dev_trap_fa_cookie_fops = { .owner = THIS_MODULE, }; +static const struct file_operations nsim_dev_max_vfs_fops = { + .open = simple_open, + .read = nsim_bus_dev_max_vfs_read, + .write = nsim_bus_dev_max_vfs_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) { char dev_ddir_name[sizeof(DRV_NAME) + 10]; @@ -231,6 +239,11 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) debugfs_create_bool("fail_trap_policer_counter_get", 0600, nsim_dev->ddir, &nsim_dev->fail_trap_policer_counter_get); + nsim_dev->max_vfs = debugfs_create_file("max_vfs", + 0600, + nsim_dev->ddir, + nsim_dev->nsim_bus_dev, + &nsim_dev_max_vfs_fops); nsim_udp_tunnels_debugfs_create(nsim_dev); return 0; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 7ff24e03577b5..12f56f2f85e87 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -212,6 +212,7 @@ struct nsim_dev { struct dentry *ddir; struct dentry *ports_ddir; struct dentry *take_snapshot; + struct dentry *max_vfs; struct bpf_offload_dev *bpf_dev; bool bpf_bind_accept; bool bpf_bind_verifier_accept; @@ -269,6 +270,13 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *fib_data); u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, enum nsim_resource_id res_id, bool max); +ssize_t nsim_bus_dev_max_vfs_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos); +ssize_t nsim_bus_dev_max_vfs_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos); + #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) void nsim_ipsec_init(struct netdevsim *ns); void nsim_ipsec_teardown(struct netdevsim *ns); @@ -308,7 +316,9 @@ struct nsim_bus_dev { struct net *initial_net; /* Purpose of this is to carry net pointer * during the probe time only. */ + unsigned int max_vfs; unsigned int num_vfs; + struct mutex vfs_lock; /* Protects vfconfigs */ struct nsim_vf_config *vfconfigs; /* Lock for devlink->reload_enabled in netdevsim module */ struct mutex nsim_bus_reload_lock; From 32ac15d8fd804615e79e365d6825da2a371f91f9 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:15 +0300 Subject: [PATCH 02/18] netdevsim: Disable VFs on nsim_dev_reload_destroy() call Move VFs disabling from device release() to nsim_dev_reload_destroy() to make VFs disabling and ports removal simultaneous. This is a requirement for VFs ports implemented in next patches. Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/bus.c | 5 +---- drivers/net/netdevsim/dev.c | 6 ++++++ drivers/net/netdevsim/netdevsim.h | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 4bd7ef3c04bea..d5c547c35e2f8 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -37,7 +37,7 @@ static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev, return 0; } -static void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev) +void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev) { nsim_bus_dev->num_vfs = 0; } @@ -233,9 +233,6 @@ static const struct attribute_group *nsim_bus_dev_attr_groups[] = { static void nsim_bus_dev_release(struct device *dev) { - struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); - - nsim_bus_dev_vfs_disable(nsim_bus_dev); } static struct device_type nsim_bus_dev_type = { diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 12df93a34bfd6..cd50c05b1e6e1 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1182,6 +1182,12 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) if (devlink_is_reload_failed(devlink)) return; debugfs_remove(nsim_dev->take_snapshot); + + mutex_lock(&nsim_dev->nsim_bus_dev->vfs_lock); + if (nsim_dev->nsim_bus_dev->num_vfs) + nsim_bus_dev_vfs_disable(nsim_dev->nsim_bus_dev); + mutex_unlock(&nsim_dev->nsim_bus_dev->vfs_lock); + nsim_dev_port_del_all(nsim_dev); nsim_dev_psample_exit(nsim_dev); nsim_dev_health_exit(nsim_dev); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 12f56f2f85e87..a1b49c8c01759 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -276,6 +276,7 @@ ssize_t nsim_bus_dev_max_vfs_read(struct file *file, ssize_t nsim_bus_dev_max_vfs_write(struct file *file, const char __user *data, size_t count, loff_t *ppos); +void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev); #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) void nsim_ipsec_init(struct netdevsim *ns); From 814b9ce65ec3b53404eeda7a11e1abb4af8d7df3 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:16 +0300 Subject: [PATCH 03/18] netdevsim: Implement port types and indexing Define type of ports, which netdevsim driver currently operates with as PF. Define new port type - VF, which will be implemented in following patches. Add helper functions to distinguish them. Add helper function to get VF index from port index. Add port indexing logic where PFs' indexes starts from 0, VFs' - from NSIM_DEV_VF_PORT_INDEX_BASE. All ports uses same index pool, which means that PF port may be created with index from VFs' indexes range. Maximum number of VFs, which the driver can allocate, is limited by UINT_MAX - BASE. Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/bus.c | 10 ++++++-- drivers/net/netdevsim/dev.c | 42 +++++++++++++++++++++++-------- drivers/net/netdevsim/netdevsim.h | 20 +++++++++++++++ 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index d5c547c35e2f8..e29146d9eddb9 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -141,6 +141,12 @@ ssize_t nsim_bus_dev_max_vfs_write(struct file *file, goto unlock; } + /* max_vfs limited by the maximum number of provided port indexes */ + if (val > NSIM_DEV_VF_PORT_INDEX_MAX - NSIM_DEV_VF_PORT_INDEX_BASE) { + ret = -ERANGE; + goto unlock; + } + vfconfigs = kcalloc(val, sizeof(struct nsim_vf_config), GFP_KERNEL | __GFP_NOWARN); if (!vfconfigs) { ret = -ENOMEM; @@ -178,7 +184,7 @@ new_port_store(struct device *dev, struct device_attribute *attr, mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); devlink_reload_disable(devlink); - ret = nsim_dev_port_add(nsim_bus_dev, port_index); + ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; @@ -207,7 +213,7 @@ del_port_store(struct device *dev, struct device_attribute *attr, mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); devlink_reload_disable(devlink); - ret = nsim_dev_port_del(nsim_bus_dev, port_index); + ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index cd50c05b1e6e1..93d6f3d54d11e 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -35,6 +35,25 @@ #include "netdevsim.h" +static unsigned int +nsim_dev_port_index(enum nsim_dev_port_type type, unsigned int port_index) +{ + switch (type) { + case NSIM_DEV_PORT_TYPE_VF: + port_index = NSIM_DEV_VF_PORT_INDEX_BASE + port_index; + break; + case NSIM_DEV_PORT_TYPE_PF: + break; + } + + return port_index; +} + +static inline unsigned int nsim_dev_port_index_to_vf_index(unsigned int port_index) +{ + return port_index - NSIM_DEV_VF_PORT_INDEX_BASE; +} + static struct dentry *nsim_dev_ddir; #define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32) @@ -923,7 +942,7 @@ static const struct devlink_ops nsim_dev_devlink_ops = { #define NSIM_DEV_MAX_MACS_DEFAULT 32 #define NSIM_DEV_TEST1_DEFAULT true -static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, +static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, unsigned int port_index) { struct devlink_port_attrs attrs = {}; @@ -934,7 +953,8 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL); if (!nsim_dev_port) return -ENOMEM; - nsim_dev_port->port_index = port_index; + nsim_dev_port->port_index = nsim_dev_port_index(type, port_index); + nsim_dev_port->port_type = type; devlink_port = &nsim_dev_port->devlink_port; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; @@ -943,7 +963,7 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, attrs.switch_id.id_len = nsim_dev->switch_id.id_len; devlink_port_attrs_set(devlink_port, &attrs); err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port, - port_index); + nsim_dev_port->port_index); if (err) goto err_port_free; @@ -1000,7 +1020,7 @@ static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev, int i, err; for (i = 0; i < port_count; i++) { - err = __nsim_dev_port_add(nsim_dev, i); + err = __nsim_dev_port_add(nsim_dev, NSIM_DEV_PORT_TYPE_PF, i); if (err) goto err_port_del_all; } @@ -1216,32 +1236,34 @@ void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev) } static struct nsim_dev_port * -__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, unsigned int port_index) +__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, + unsigned int port_index) { struct nsim_dev_port *nsim_dev_port; + port_index = nsim_dev_port_index(type, port_index); list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) if (nsim_dev_port->port_index == port_index) return nsim_dev_port; return NULL; } -int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, +int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type, unsigned int port_index) { struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); int err; mutex_lock(&nsim_dev->port_list_lock); - if (__nsim_dev_port_lookup(nsim_dev, port_index)) + if (__nsim_dev_port_lookup(nsim_dev, type, port_index)) err = -EEXIST; else - err = __nsim_dev_port_add(nsim_dev, port_index); + err = __nsim_dev_port_add(nsim_dev, type, port_index); mutex_unlock(&nsim_dev->port_list_lock); return err; } -int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, +int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type, unsigned int port_index) { struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); @@ -1249,7 +1271,7 @@ int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, int err = 0; mutex_lock(&nsim_dev->port_list_lock); - nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, port_index); + nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, type, port_index); if (!nsim_dev_port) err = -ENOENT; else diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index a1b49c8c01759..e025c1bc1c26f 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -197,10 +197,19 @@ static inline void nsim_dev_psample_exit(struct nsim_dev *nsim_dev) } #endif +enum nsim_dev_port_type { + NSIM_DEV_PORT_TYPE_PF, + NSIM_DEV_PORT_TYPE_VF, +}; + +#define NSIM_DEV_VF_PORT_INDEX_BASE 128 +#define NSIM_DEV_VF_PORT_INDEX_MAX UINT_MAX + struct nsim_dev_port { struct list_head list; struct devlink_port devlink_port; unsigned int port_index; + enum nsim_dev_port_type port_type; struct dentry *ddir; struct netdevsim *ns; }; @@ -260,8 +269,10 @@ void nsim_dev_exit(void); int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev); void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev); int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, + enum nsim_dev_port_type type, unsigned int port_index); int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, + enum nsim_dev_port_type type, unsigned int port_index); struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, @@ -278,6 +289,15 @@ ssize_t nsim_bus_dev_max_vfs_write(struct file *file, size_t count, loff_t *ppos); void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev); +static inline bool nsim_dev_port_is_pf(struct nsim_dev_port *nsim_dev_port) +{ + return nsim_dev_port->port_type == NSIM_DEV_PORT_TYPE_PF; +} + +static inline bool nsim_dev_port_is_vf(struct nsim_dev_port *nsim_dev_port) +{ + return nsim_dev_port->port_type == NSIM_DEV_PORT_TYPE_VF; +} #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) void nsim_ipsec_init(struct netdevsim *ns); void nsim_ipsec_teardown(struct netdevsim *ns); From 92ba1f29e6e2f16eb93a0a2c7c01985920b89222 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:17 +0300 Subject: [PATCH 04/18] netdevsim: Implement VFs Allow creation of netdevsim ports for VFs along with allocations of corresponding net devices and devlink ports. Add enums and helpers to distinguish PFs' ports from VFs' ports. Ports creation/deletion debugfs API intended to be used with physical ports only. VFs instantiation will be done in one of the next patches. Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 14 +++++- drivers/net/netdevsim/netdev.c | 90 ++++++++++++++++++++++++---------- 2 files changed, 77 insertions(+), 27 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 93d6f3d54d11e..8bd7654f4dca4 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -945,11 +945,15 @@ static const struct devlink_ops nsim_dev_devlink_ops = { static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, unsigned int port_index) { + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; struct devlink_port_attrs attrs = {}; struct nsim_dev_port *nsim_dev_port; struct devlink_port *devlink_port; int err; + if (type == NSIM_DEV_PORT_TYPE_VF && !nsim_bus_dev->num_vfs) + return -EINVAL; + nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL); if (!nsim_dev_port) return -ENOMEM; @@ -957,8 +961,14 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_typ nsim_dev_port->port_type = type; devlink_port = &nsim_dev_port->devlink_port; - attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = port_index + 1; + if (nsim_dev_port_is_pf(nsim_dev_port)) { + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = port_index + 1; + } else { + attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; + attrs.pci_vf.pf = 0; + attrs.pci_vf.vf = port_index; + } memcpy(attrs.switch_id.id, nsim_dev->switch_id.id, nsim_dev->switch_id.id_len); attrs.switch_id.id_len = nsim_dev->switch_id.id_len; devlink_port_attrs_set(devlink_port, &attrs); diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 659d3dceb687f..9352e18b4db90 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -261,6 +261,18 @@ static const struct net_device_ops nsim_netdev_ops = { .ndo_get_devlink_port = nsim_get_devlink_port, }; +static const struct net_device_ops nsim_vf_netdev_ops = { + .ndo_start_xmit = nsim_start_xmit, + .ndo_set_rx_mode = nsim_set_rx_mode, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = nsim_change_mtu, + .ndo_get_stats64 = nsim_get_stats64, + .ndo_setup_tc = nsim_setup_tc, + .ndo_set_features = nsim_set_features, + .ndo_get_devlink_port = nsim_get_devlink_port, +}; + static void nsim_setup(struct net_device *dev) { ether_setup(dev); @@ -280,6 +292,49 @@ static void nsim_setup(struct net_device *dev) dev->max_mtu = ETH_MAX_MTU; } +static int nsim_init_netdevsim(struct netdevsim *ns) +{ + int err; + + ns->netdev->netdev_ops = &nsim_netdev_ops; + + err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); + if (err) + return err; + + rtnl_lock(); + err = nsim_bpf_init(ns); + if (err) + goto err_utn_destroy; + + nsim_ipsec_init(ns); + + err = register_netdevice(ns->netdev); + if (err) + goto err_ipsec_teardown; + rtnl_unlock(); + return 0; + +err_ipsec_teardown: + nsim_ipsec_teardown(ns); + nsim_bpf_uninit(ns); +err_utn_destroy: + rtnl_unlock(); + nsim_udp_tunnels_info_destroy(ns->netdev); + return err; +} + +static int nsim_init_netdevsim_vf(struct netdevsim *ns) +{ + int err; + + ns->netdev->netdev_ops = &nsim_vf_netdev_ops; + rtnl_lock(); + err = register_netdevice(ns->netdev); + rtnl_unlock(); + return err; +} + struct netdevsim * nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) { @@ -299,33 +354,15 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) ns->nsim_dev_port = nsim_dev_port; ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); - dev->netdev_ops = &nsim_netdev_ops; nsim_ethtool_init(ns); - - err = nsim_udp_tunnels_info_create(nsim_dev, dev); + if (nsim_dev_port_is_pf(nsim_dev_port)) + err = nsim_init_netdevsim(ns); + else + err = nsim_init_netdevsim_vf(ns); if (err) goto err_free_netdev; - - rtnl_lock(); - err = nsim_bpf_init(ns); - if (err) - goto err_utn_destroy; - - nsim_ipsec_init(ns); - - err = register_netdevice(dev); - if (err) - goto err_ipsec_teardown; - rtnl_unlock(); - return ns; -err_ipsec_teardown: - nsim_ipsec_teardown(ns); - nsim_bpf_uninit(ns); -err_utn_destroy: - rtnl_unlock(); - nsim_udp_tunnels_info_destroy(dev); err_free_netdev: free_netdev(dev); return ERR_PTR(err); @@ -337,10 +374,13 @@ void nsim_destroy(struct netdevsim *ns) rtnl_lock(); unregister_netdevice(dev); - nsim_ipsec_teardown(ns); - nsim_bpf_uninit(ns); + if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { + nsim_ipsec_teardown(ns); + nsim_bpf_uninit(ns); + } rtnl_unlock(); - nsim_udp_tunnels_info_destroy(dev); + if (nsim_dev_port_is_pf(ns->nsim_dev_port)) + nsim_udp_tunnels_info_destroy(dev); free_netdev(dev); } From 160dc373eead2143ea51b7f8e2a6bf1e383f24f8 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:18 +0300 Subject: [PATCH 05/18] netdevsim: Implement legacy/switchdev mode for VFs Implement callbacks to set/get eswitch mode value. Add helpers to check current mode. Instantiate VFs' net devices and devlink ports on switchdev enabling and remove them on legacy enabling. Changing number of VFs while in switchdev mode triggers VFs creation/deletion. Also disable NDO API callback to set VF rate, since it's legacy API. Switchdev API to set VF rate will be implemented in one of the next patches. Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/bus.c | 17 +++++++- drivers/net/netdevsim/dev.c | 69 +++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdev.c | 5 +++ drivers/net/netdevsim/netdevsim.h | 14 +++++++ 4 files changed, 104 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index e29146d9eddb9..b56003dfe3cc4 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -27,6 +27,9 @@ static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev) static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev, unsigned int num_vfs) { + struct nsim_dev *nsim_dev; + int err = 0; + if (nsim_bus_dev->max_vfs < num_vfs) return -ENOMEM; @@ -34,12 +37,24 @@ static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev, return -ENOMEM; nsim_bus_dev->num_vfs = num_vfs; - return 0; + nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); + if (nsim_esw_mode_is_switchdev(nsim_dev)) { + err = nsim_esw_switchdev_enable(nsim_dev, NULL); + if (err) + nsim_bus_dev->num_vfs = 0; + } + + return err; } void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev) { + struct nsim_dev *nsim_dev; + nsim_bus_dev->num_vfs = 0; + nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); + if (nsim_esw_mode_is_switchdev(nsim_dev)) + nsim_esw_legacy_enable(nsim_dev, NULL); } static ssize_t diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 8bd7654f4dca4..ed9ce083d0add 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -439,6 +439,72 @@ static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev) devlink_region_destroy(nsim_dev->dummy_region); } +static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port); +int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port, *tmp; + + mutex_lock(&nsim_dev->port_list_lock); + list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) + if (nsim_dev_port_is_vf(nsim_dev_port)) + __nsim_dev_port_del(nsim_dev_port); + mutex_unlock(&nsim_dev->port_list_lock); + nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; + return 0; +} + +int nsim_esw_switchdev_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack) +{ + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; + int i, err; + + for (i = 0; i < nsim_bus_dev->num_vfs; i++) { + err = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_VF, i); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize VFs' netdevsim ports"); + pr_err("Failed to initialize VF id=%d. %d.\n", i, err); + goto err_port_add_vfs; + } + } + nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + return 0; + +err_port_add_vfs: + for (i--; i >= 0; i--) + nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_VF, i); + return err; +} + +static int nsim_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + int err = 0; + + mutex_lock(&nsim_dev->nsim_bus_dev->vfs_lock); + if (mode == nsim_dev->esw_mode) + goto unlock; + + if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + err = nsim_esw_legacy_enable(nsim_dev, extack); + else if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + err = nsim_esw_switchdev_enable(nsim_dev, extack); + else + err = -EINVAL; + +unlock: + mutex_unlock(&nsim_dev->nsim_bus_dev->vfs_lock); + return err; +} + +static int nsim_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + *mode = nsim_dev->esw_mode; + return 0; +} + struct nsim_trap_item { void *trap_ctx; enum devlink_trap_action action; @@ -925,6 +991,8 @@ nsim_dev_devlink_trap_policer_counter_get(struct devlink *devlink, } static const struct devlink_ops nsim_dev_devlink_ops = { + .eswitch_mode_set = nsim_devlink_eswitch_mode_set, + .eswitch_mode_get = nsim_devlink_eswitch_mode_get, .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT | DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), @@ -1177,6 +1245,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) devlink_params_publish(devlink); devlink_reload_enable(devlink); + nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; return 0; err_psample_exit: diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 9352e18b4db90..c3aeb15843e22 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -113,6 +113,11 @@ static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max) struct netdevsim *ns = netdev_priv(dev); struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev; + if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) { + pr_err("Not supported in switchdev mode. Please use devlink API.\n"); + return -EOPNOTSUPP; + } + if (vf >= nsim_bus_dev->num_vfs) return -EINVAL; diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index e025c1bc1c26f..13a0042124f74 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -257,8 +257,22 @@ struct nsim_dev { u32 sleep; } udp_ports; struct nsim_dev_psample *psample; + u16 esw_mode; }; +int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack); +int nsim_esw_switchdev_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack); + +static inline bool nsim_esw_mode_is_legacy(struct nsim_dev *nsim_dev) +{ + return nsim_dev->esw_mode == DEVLINK_ESWITCH_MODE_LEGACY; +} + +static inline bool nsim_esw_mode_is_switchdev(struct nsim_dev *nsim_dev) +{ + return nsim_dev->esw_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV; +} + static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev) { return devlink_net(priv_to_devlink(nsim_dev)); From 4677efc486e1872f62d4632c50f7183f82296fa6 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:19 +0300 Subject: [PATCH 06/18] devlink: Introduce rate object Allow registering rate object for devlink ports with dedicated devlink_rate_leaf_{create|destroy}() API. Implement new netlink DEVLINK_CMD_RATE_GET command that is used to retrieve rate object info. Add new DEVLINK_CMD_RATE_{NEW|DEL} commands that are used for notifications when creating/deleting leaf rate object. Rate API is intended to be used for rate limiting of individual devlink ports (leafs) and their aggregates (nodes). Example: $ devlink port show pci/0000:03:00.0/0 pci/0000:03:00.0/1 $ devlink port function rate show pci/0000:03:00.0/0: type leaf pci/0000:03:00.0/1: type leaf Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 14 +++ include/uapi/linux/devlink.h | 11 ++ net/core/devlink.c | 229 ++++++++++++++++++++++++++++++++++- 3 files changed, 253 insertions(+), 1 deletion(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 7c984cadfec42..2f5954d96c3ee 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -34,6 +34,7 @@ struct devlink_ops; struct devlink { struct list_head list; struct list_head port_list; + struct list_head rate_list; struct list_head sb_list; struct list_head dpipe_table_list; struct list_head resource_list; @@ -133,6 +134,15 @@ struct devlink_port_attrs { }; }; +struct devlink_rate { + struct list_head list; + enum devlink_rate_type type; + struct devlink *devlink; + void *priv; + + struct devlink_port *devlink_port; +}; + struct devlink_port { struct list_head list; struct list_head param_list; @@ -152,6 +162,8 @@ struct devlink_port { struct delayed_work type_warn_dw; struct list_head reporter_list; struct mutex reporters_lock; /* Protects reporter_list */ + + struct devlink_rate *devlink_rate; }; struct devlink_port_new_attrs { @@ -1512,6 +1524,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); +int devlink_rate_leaf_create(struct devlink_port *port, void *priv); +void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index f6008b2fa60ff..0c27b45c47db5 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -126,6 +126,11 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_TEST, + DEVLINK_CMD_RATE_GET, /* can dump */ + DEVLINK_CMD_RATE_SET, + DEVLINK_CMD_RATE_NEW, + DEVLINK_CMD_RATE_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -206,6 +211,10 @@ enum devlink_port_flavour { */ }; +enum devlink_rate_type { + DEVLINK_RATE_TYPE_LEAF, +}; + enum devlink_param_cmode { DEVLINK_PARAM_CMODE_RUNTIME, DEVLINK_PARAM_CMODE_DRIVERINIT, @@ -534,6 +543,8 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */ DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ + + DEVLINK_ATTR_RATE_TYPE, /* u16 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 69681f19388e5..3b785f51156f3 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -190,6 +190,25 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink, return devlink_port_get_from_attrs(devlink, info->attrs); } +static inline bool +devlink_rate_is_leaf(struct devlink_rate *devlink_rate) +{ + return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF; +} + +static struct devlink_rate * +devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info) +{ + struct devlink_rate *devlink_rate; + struct devlink_port *devlink_port; + + devlink_port = devlink_port_get_from_attrs(devlink, info->attrs); + if (IS_ERR(devlink_port)) + return ERR_CAST(devlink_port); + devlink_rate = devlink_port->devlink_rate; + return devlink_rate ?: ERR_PTR(-ENODEV); +} + struct devlink_sb { struct list_head list; unsigned int index; @@ -408,12 +427,13 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) #define DEVLINK_NL_FLAG_NEED_PORT BIT(0) #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) +#define DEVLINK_NL_FLAG_NEED_RATE BIT(2) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global * devlink lock is taken and protects from disruption by user-calls. */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(2) +#define DEVLINK_NL_FLAG_NO_LOCK BIT(3) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -442,6 +462,15 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, devlink_port = devlink_port_get_from_info(devlink, info); if (!IS_ERR(devlink_port)) info->user_ptr[1] = devlink_port; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) { + struct devlink_rate *devlink_rate; + + devlink_rate = devlink_rate_leaf_get_from_info(devlink, info); + if (IS_ERR(devlink_rate)) { + err = PTR_ERR(devlink_rate); + goto unlock; + } + info->user_ptr[1] = devlink_rate; } return 0; @@ -749,6 +778,39 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops * return 0; } +static int devlink_nl_rate_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_rate *devlink_rate, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, + struct netlink_ext_ack *extack) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type)) + goto nla_put_failure; + + if (devlink_rate_is_leaf(devlink_rate)) { + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, + devlink_rate->devlink_port->index)) + goto nla_put_failure; + } + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + static bool devlink_port_fn_state_valid(enum devlink_port_fn_state state) { @@ -920,6 +982,99 @@ static void devlink_port_notify(struct devlink_port *devlink_port, msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } +static void devlink_rate_notify(struct devlink_rate *devlink_rate, + enum devlink_command cmd) +{ + struct devlink *devlink = devlink_rate->devlink; + struct sk_buff *msg; + int err; + + WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && + cmd != DEVLINK_CMD_RATE_DEL); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + err = devlink_nl_rate_fill(msg, devlink, devlink_rate, + cmd, 0, 0, 0, NULL); + if (err) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); +} + +static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_rate *devlink_rate; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err = 0; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(devlink_rate, &devlink->rate_list, list) { + enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; + u32 id = NETLINK_CB(cb->skb).portid; + + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_rate_fill(msg, devlink, + devlink_rate, + cmd, id, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, NULL); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_rate *devlink_rate = info->user_ptr[1]; + struct devlink *devlink = devlink_rate->devlink; + struct sk_buff *msg; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_rate_fill(msg, devlink, devlink_rate, + DEVLINK_CMD_RATE_NEW, + info->snd_portid, info->snd_seq, 0, + info->extack); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; @@ -7802,6 +7957,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 }, [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 }, [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 }, + [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -7827,6 +7983,13 @@ static const struct genl_small_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, + { + .cmd = DEVLINK_CMD_RATE_GET, + .doit = devlink_nl_cmd_rate_get_doit, + .dumpit = devlink_nl_cmd_rate_get_dumpit, + .internal_flags = DEVLINK_NL_FLAG_NEED_RATE, + /* can be retrieved by unprivileged users */ + }, { .cmd = DEVLINK_CMD_PORT_SPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, @@ -8202,6 +8365,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); __devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); + INIT_LIST_HEAD(&devlink->rate_list); INIT_LIST_HEAD(&devlink->sb_list); INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); INIT_LIST_HEAD(&devlink->resource_list); @@ -8304,6 +8468,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->resource_list)); WARN_ON(!list_empty(&devlink->dpipe_table_list)); WARN_ON(!list_empty(&devlink->sb_list)); + WARN_ON(!list_empty(&devlink->rate_list)); WARN_ON(!list_empty(&devlink->port_list)); xa_destroy(&devlink->snapshot_ids); @@ -8620,6 +8785,68 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro } EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set); +/** + * devlink_rate_leaf_create - create devlink rate leaf + * + * @devlink_port: devlink port object to create rate object on + * @priv: driver private data + * + * Create devlink rate object of type leaf on provided @devlink_port. + * Throws call trace if @devlink_port already has a devlink rate object. + * + * Context: Takes and release devlink->lock . + * + * Return: -ENOMEM if failed to allocate rate object, 0 otherwise. + */ +int +devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv) +{ + struct devlink *devlink = devlink_port->devlink; + struct devlink_rate *devlink_rate; + + devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL); + if (!devlink_rate) + return -ENOMEM; + + mutex_lock(&devlink->lock); + WARN_ON(devlink_port->devlink_rate); + devlink_rate->type = DEVLINK_RATE_TYPE_LEAF; + devlink_rate->devlink = devlink; + devlink_rate->devlink_port = devlink_port; + devlink_rate->priv = priv; + list_add_tail(&devlink_rate->list, &devlink->rate_list); + devlink_port->devlink_rate = devlink_rate; + devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW); + mutex_unlock(&devlink->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_rate_leaf_create); + +/** + * devlink_rate_leaf_destroy - destroy devlink rate leaf + * + * @devlink_port: devlink port linked to the rate object + * + * Context: Takes and release devlink->lock . + */ +void devlink_rate_leaf_destroy(struct devlink_port *devlink_port) +{ + struct devlink_rate *devlink_rate = devlink_port->devlink_rate; + struct devlink *devlink = devlink_port->devlink; + + if (!devlink_rate) + return; + + mutex_lock(&devlink->lock); + devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL); + list_del(&devlink_rate->list); + devlink_port->devlink_rate = NULL; + mutex_unlock(&devlink->lock); + kfree(devlink_rate); +} +EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy); + static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, char *name, size_t len) { From 885dfe121b3862d0cc1de98a69f1ca37d18e3495 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:20 +0300 Subject: [PATCH 07/18] netdevsim: Register devlink rate leaf objects per VF Register devlink rate leaf objects per VF. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index ed9ce083d0add..356287a2c320d 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1055,11 +1055,20 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_typ goto err_port_debugfs_exit; } + if (nsim_dev_port_is_vf(nsim_dev_port)) { + err = devlink_rate_leaf_create(&nsim_dev_port->devlink_port, + nsim_dev_port); + if (err) + goto err_nsim_destroy; + } + devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev); list_add(&nsim_dev_port->list, &nsim_dev->port_list); return 0; +err_nsim_destroy: + nsim_destroy(nsim_dev_port->ns); err_port_debugfs_exit: nsim_dev_port_debugfs_exit(nsim_dev_port); err_dl_port_unregister: @@ -1074,6 +1083,8 @@ static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) struct devlink_port *devlink_port = &nsim_dev_port->devlink_port; list_del(&nsim_dev_port->list); + if (nsim_dev_port_is_vf(nsim_dev_port)) + devlink_rate_leaf_destroy(&nsim_dev_port->devlink_port); devlink_port_type_clear(devlink_port); nsim_destroy(nsim_dev_port->ns); nsim_dev_port_debugfs_exit(nsim_dev_port); From a27d8e352bf252eb44b04c9e628a8d759956bdd2 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:21 +0300 Subject: [PATCH 08/18] selftest: netdevsim: Add devlink rate test Test verifies that all netdevsim VF ports have rate leaf object created by default. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- .../drivers/net/netdevsim/devlink.sh | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 40909c254365b..c654be0e8300f 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -5,12 +5,13 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS="fw_flash_test params_test regions_test reload_test \ netns_reload_test resource_test dev_info_test \ - empty_reporter_test dummy_reporter_test" + empty_reporter_test dummy_reporter_test rate_test" NUM_NETIFS=0 source $lib_dir/lib.sh BUS_ADDR=10 PORT_COUNT=4 +VF_COUNT=4 DEV_NAME=netdevsim$BUS_ADDR SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/ DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/ @@ -507,6 +508,28 @@ dummy_reporter_test() log_test "dummy reporter test" } +rate_leafs_get() +{ + local handle=$1 + + cmd_jq "devlink port function rate show -j" \ + '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))' +} + +rate_test() +{ + RET=0 + + echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs + devlink dev eswitch set $DL_HANDLE mode switchdev + local leafs=`rate_leafs_get $DL_HANDLE` + local num_leafs=`echo $leafs | wc -w` + [ "$num_leafs" == "$VF_COUNT" ] + check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs" + + log_test "rate test" +} + setup_prepare() { modprobe netdevsim From 1897db2ec3109eb1dd07b357c95c5e03d54e41b9 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:22 +0300 Subject: [PATCH 09/18] devlink: Allow setting tx rate for devlink rate leaf objects Implement support for DEVLINK_CMD_RATE_SET command with new attributes DEVLINK_ATTR_RATE_TX_{SHARE|MAX} that are used to set devlink rate shared/max tx rate values. Extend devlink ops with new callbacks rate_leaf_tx_{share|max}_set() to allow supporting drivers to implement rate control through devlink. New attributes are optional. Driver implementations are allowed to support either or both of them. Shared rate example: $ devlink port function rate set netdevsim/netdevsim10/0 tx_share 10mbit $ devlink port function rate show netdevsim/netdevsim10/0 netdevsim/netdevsim10/0: type leaf tx_share 10mbit Max rate example: $ devlink port function rate set netdevsim/netdevsim10/0 tx_max 100mbit $ devlink port function rate show netdevsim/netdevsim10/0 netdevsim/netdevsim10/0: type leaf tx_max 100mbit Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 10 +++++ include/uapi/linux/devlink.h | 2 + net/core/devlink.c | 86 ++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index 2f5954d96c3ee..46d553545f83c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -139,6 +139,8 @@ struct devlink_rate { enum devlink_rate_type type; struct devlink *devlink; void *priv; + u64 tx_share; + u64 tx_max; struct devlink_port *devlink_port; }; @@ -1465,6 +1467,14 @@ struct devlink_ops { struct devlink_port *port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); + + /** + * Rate control callbacks. + */ + int (*rate_leaf_tx_share_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); + int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 0c27b45c47db5..ae94cd2a1078e 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -545,6 +545,8 @@ enum devlink_attr { DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ DEVLINK_ATTR_RATE_TYPE, /* u16 */ + DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ + DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 3b785f51156f3..37839fd5ca73f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -803,6 +803,14 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, goto nla_put_failure; } + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE, + devlink_rate->tx_share, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX, + devlink_rate->tx_max, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -1495,6 +1503,76 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, return devlink->ops->port_del(devlink, port_index, extack); } +static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, + const struct devlink_ops *ops, + struct genl_info *info) +{ + struct nlattr **attrs = info->attrs; + u64 rate; + int err; + + if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) { + rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]); + err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + if (err) + return err; + devlink_rate->tx_share = rate; + } + + if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) { + rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]); + err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + if (err) + return err; + devlink_rate->tx_max = rate; + } + + return 0; +} + +static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, + struct genl_info *info, + enum devlink_rate_type type) +{ + struct nlattr **attrs = info->attrs; + + if (type == DEVLINK_RATE_TYPE_LEAF) { + if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the leafs"); + return false; + } + if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs"); + return false; + } + } else { + WARN_ON("Unknown type of rate object"); + return false; + } + + return true; +} + +static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_rate *devlink_rate = info->user_ptr[1]; + struct devlink *devlink = devlink_rate->devlink; + const struct devlink_ops *ops = devlink->ops; + int err; + + if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type)) + return -EOPNOTSUPP; + + err = devlink_nl_rate_set(devlink_rate, ops, info); + + if (!err) + devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW); + return err; +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -7958,6 +8036,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 }, [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 }, [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 }, + [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, + [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -7990,6 +8070,12 @@ static const struct genl_small_ops devlink_nl_ops[] = { .internal_flags = DEVLINK_NL_FLAG_NEED_RATE, /* can be retrieved by unprivileged users */ }, + { + .cmd = DEVLINK_CMD_RATE_SET, + .doit = devlink_nl_cmd_rate_set_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_RATE, + }, { .cmd = DEVLINK_CMD_PORT_SPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, From 605c4f8f199b8374cf01624822aa0b5f2c09d1c7 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:23 +0300 Subject: [PATCH 10/18] netdevsim: Implement devlink rate leafs tx rate support Implement new devlink ops that allow shared and max tx rate control for devlink port rate objects (leafs) through devlink API. Expose rate values of VF ports to netdevsim debugfs. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 78 +++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 356287a2c320d..5be6f7e86711f 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -276,17 +276,26 @@ static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev) static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) { + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; + unsigned int port_index = nsim_dev_port->port_index; char port_ddir_name[16]; char dev_link_name[32]; - sprintf(port_ddir_name, "%u", nsim_dev_port->port_index); + sprintf(port_ddir_name, "%u", port_index); nsim_dev_port->ddir = debugfs_create_dir(port_ddir_name, nsim_dev->ports_ddir); if (IS_ERR(nsim_dev_port->ddir)) return PTR_ERR(nsim_dev_port->ddir); - sprintf(dev_link_name, "../../../" DRV_NAME "%u", - nsim_dev->nsim_bus_dev->dev.id); + sprintf(dev_link_name, "../../../" DRV_NAME "%u", nsim_bus_dev->dev.id); + if (nsim_dev_port_is_vf(nsim_dev_port)) { + unsigned int vf_id = nsim_dev_port_index_to_vf_index(port_index); + + debugfs_create_u16("tx_share", 0400, nsim_dev_port->ddir, + &nsim_bus_dev->vfconfigs[vf_id].min_tx_rate); + debugfs_create_u16("tx_max", 0400, nsim_dev_port->ddir, + &nsim_bus_dev->vfconfigs[vf_id].max_tx_rate); + } debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); return 0; @@ -990,6 +999,67 @@ nsim_dev_devlink_trap_policer_counter_get(struct devlink *devlink, return 0; } +#define NSIM_LINK_SPEED_MAX 5000 /* Mbps */ +#define NSIM_LINK_SPEED_UNIT 125000 /* 1 Mbps given in bytes/sec to avoid + * u64 overflow during conversion from + * bytes to bits. + */ + +static int nsim_rate_bytes_to_units(char *name, u64 *rate, struct netlink_ext_ack *extack) +{ + u64 val; + u32 rem; + + val = div_u64_rem(*rate, NSIM_LINK_SPEED_UNIT, &rem); + if (rem) { + pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", + name, *rate); + NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps."); + return -EINVAL; + } + + if (val > NSIM_LINK_SPEED_MAX) { + pr_err("%s rate value %lluMbps exceed link maximum speed 5000Mbps.\n", + name, val); + NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed 5000Mbps."); + return -EINVAL; + } + *rate = val; + return 0; +} + +static int nsim_leaf_tx_share_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv; + struct nsim_bus_dev *nsim_bus_dev = nsim_dev_port->ns->nsim_bus_dev; + int vf_id = nsim_dev_port_index_to_vf_index(nsim_dev_port->port_index); + int err; + + err = nsim_rate_bytes_to_units("tx_share", &tx_share, extack); + if (err) + return err; + + nsim_bus_dev->vfconfigs[vf_id].min_tx_rate = tx_share; + return 0; +} + +static int nsim_leaf_tx_max_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv; + struct nsim_bus_dev *nsim_bus_dev = nsim_dev_port->ns->nsim_bus_dev; + int vf_id = nsim_dev_port_index_to_vf_index(nsim_dev_port->port_index); + int err; + + err = nsim_rate_bytes_to_units("tx_max", &tx_max, extack); + if (err) + return err; + + nsim_bus_dev->vfconfigs[vf_id].max_tx_rate = tx_max; + return 0; +} + static const struct devlink_ops nsim_dev_devlink_ops = { .eswitch_mode_set = nsim_devlink_eswitch_mode_set, .eswitch_mode_get = nsim_devlink_eswitch_mode_get, @@ -1005,6 +1075,8 @@ static const struct devlink_ops nsim_dev_devlink_ops = { .trap_group_set = nsim_dev_devlink_trap_group_set, .trap_policer_set = nsim_dev_devlink_trap_policer_set, .trap_policer_counter_get = nsim_dev_devlink_trap_policer_counter_get, + .rate_leaf_tx_share_set = nsim_leaf_tx_share_set, + .rate_leaf_tx_max_set = nsim_leaf_tx_max_set, }; #define NSIM_DEV_MAX_MACS_DEFAULT 32 From 31f0723336063f20ce81cc16ca4775979ff0a26b Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:24 +0300 Subject: [PATCH 11/18] selftest: netdevsim: Add devlink port shared/max tx rate test Test verifies that netdevsim VFs can set and retrieve shared/max tx rate through new devlink API. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- .../drivers/net/netdevsim/devlink.sh | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index c654be0e8300f..05dcefc054d1a 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -516,6 +516,45 @@ rate_leafs_get() '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))' } +rate_attr_set() +{ + local handle=$1 + local name=$2 + local value=$3 + local units=$4 + + devlink port function rate set $handle $name $value$units +} + +rate_attr_get() +{ + local handle=$1 + local name=$2 + + cmd_jq "devlink port function rate show $handle -j" '.[][].'$name +} + +rate_attr_tx_rate_check() +{ + local handle=$1 + local name=$2 + local rate=$3 + local debug_file=$4 + + rate_attr_set $handle $name $rate mbit + check_err $? "Failed to set $name value" + + local debug_value=$(cat $debug_file) + check_err $? "Failed to read $name value from debugfs" + [ "$debug_value" == "$rate" ] + check_err $? "Unexpected $name debug value $debug_value != $rate" + + local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 )) + check_err $? "Failed to get $name attr value" + [ "$api_value" == "$rate" ] + check_err $? "Unexpected $name attr value $api_value != $rate" +} + rate_test() { RET=0 @@ -527,6 +566,22 @@ rate_test() [ "$num_leafs" == "$VF_COUNT" ] check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs" + rate=10 + for r_obj in $leafs + do + rate_attr_tx_rate_check $r_obj tx_share $rate \ + $DEBUGFS_DIR/ports/${r_obj##*/}/tx_share + rate=$(($rate+10)) + done + + rate=100 + for r_obj in $leafs + do + rate_attr_tx_rate_check $r_obj tx_max $rate \ + $DEBUGFS_DIR/ports/${r_obj##*/}/tx_max + rate=$(($rate+100)) + done + log_test "rate test" } From a8ecb93ef03de4c59fb6289f99bc9616a852c917 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:25 +0300 Subject: [PATCH 12/18] devlink: Introduce rate nodes Implement support for DEVLINK_CMD_RATE_{NEW|DEL} commands that are used to create and delete devlink rate nodes. Add new attribute DEVLINK_ATTR_RATE_NODE_NAME that specify node name string. The node name is an alphanumeric identifier. No valid node name can be a devlink port index, eg. decimal number. Extend devlink ops with new callbacks rate_node_{new|del}() and rate_node_tx_{share|max}_set() to allow supporting drivers to implement ports rate grouping and setting tx rate of rate nodes through devlink. Expose devlink_rate_nodes_destroy() function to allow vendor driver do proper cleanup of internally allocated resources for the nodes if the driver goes down or due to any other reasons which requires nodes to be destroyed. Disallow moving device from switchdev to legacy mode if any node exists on that device. User must explicitly delete nodes before switching mode. Example: $ devlink port function rate add netdevsim/netdevsim10/group1 $ devlink port function rate set netdevsim/netdevsim10/group1 \ tx_share 10mbit tx_max 100mbit Add + set command can be combined: $ devlink port function rate add netdevsim/netdevsim10/group1 \ tx_share 10mbit tx_max 100mbit $ devlink port function rate show netdevsim/netdevsim10/group1 netdevsim/netdevsim10/group1: type node tx_share 10mbit tx_max 100mbit $ devlink port function rate del netdevsim/netdevsim10/group1 Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 14 ++- include/uapi/linux/devlink.h | 3 + net/core/devlink.c | 238 +++++++++++++++++++++++++++++++++-- 3 files changed, 247 insertions(+), 8 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 46d553545f83c..13162b5791247 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -142,7 +142,10 @@ struct devlink_rate { u64 tx_share; u64 tx_max; - struct devlink_port *devlink_port; + union { + struct devlink_port *devlink_port; + char *name; + }; }; struct devlink_port { @@ -1475,6 +1478,14 @@ struct devlink_ops { u64 tx_share, struct netlink_ext_ack *extack); int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_max, struct netlink_ext_ack *extack); + int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); + int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); + int (*rate_node_new)(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack); + int (*rate_node_del)(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) @@ -1536,6 +1547,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, bool external); int devlink_rate_leaf_create(struct devlink_port *port, void *priv); void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); +void devlink_rate_nodes_destroy(struct devlink *devlink); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ae94cd2a1078e..7e15853b77fe4 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -213,6 +213,7 @@ enum devlink_port_flavour { enum devlink_rate_type { DEVLINK_RATE_TYPE_LEAF, + DEVLINK_RATE_TYPE_NODE, }; enum devlink_param_cmode { @@ -547,6 +548,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TYPE, /* u16 */ DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ + DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 37839fd5ca73f..589d750b70e4e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -196,6 +196,12 @@ devlink_rate_is_leaf(struct devlink_rate *devlink_rate) return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF; } +static inline bool +devlink_rate_is_node(struct devlink_rate *devlink_rate) +{ + return devlink_rate->type == DEVLINK_RATE_TYPE_NODE; +} + static struct devlink_rate * devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info) { @@ -209,6 +215,55 @@ devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info) return devlink_rate ?: ERR_PTR(-ENODEV); } +static struct devlink_rate * +devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name) +{ + static struct devlink_rate *devlink_rate; + + list_for_each_entry(devlink_rate, &devlink->rate_list, list) { + if (devlink_rate_is_node(devlink_rate) && + !strcmp(node_name, devlink_rate->name)) + return devlink_rate; + } + return ERR_PTR(-ENODEV); +} + +static struct devlink_rate * +devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) +{ + const char *rate_node_name; + size_t len; + + if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME]) + return ERR_PTR(-EINVAL); + rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]); + len = strlen(rate_node_name); + /* Name cannot be empty or decimal number */ + if (!len || strspn(rate_node_name, "0123456789") == len) + return ERR_PTR(-EINVAL); + + return devlink_rate_node_get_by_name(devlink, rate_node_name); +} + +static struct devlink_rate * +devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info) +{ + return devlink_rate_node_get_from_attrs(devlink, info->attrs); +} + +static struct devlink_rate * +devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info) +{ + struct nlattr **attrs = info->attrs; + + if (attrs[DEVLINK_ATTR_PORT_INDEX]) + return devlink_rate_leaf_get_from_info(devlink, info); + else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME]) + return devlink_rate_node_get_from_info(devlink, info); + else + return ERR_PTR(-EINVAL); +} + struct devlink_sb { struct list_head list; unsigned int index; @@ -428,12 +483,13 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) #define DEVLINK_NL_FLAG_NEED_PORT BIT(0) #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) #define DEVLINK_NL_FLAG_NEED_RATE BIT(2) +#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global * devlink lock is taken and protects from disruption by user-calls. */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(3) +#define DEVLINK_NL_FLAG_NO_LOCK BIT(4) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -465,12 +521,21 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) { struct devlink_rate *devlink_rate; - devlink_rate = devlink_rate_leaf_get_from_info(devlink, info); + devlink_rate = devlink_rate_get_from_info(devlink, info); if (IS_ERR(devlink_rate)) { err = PTR_ERR(devlink_rate); goto unlock; } info->user_ptr[1] = devlink_rate; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) { + struct devlink_rate *rate_node; + + rate_node = devlink_rate_node_get_from_info(devlink, info); + if (IS_ERR(rate_node)) { + err = PTR_ERR(rate_node); + goto unlock; + } + info->user_ptr[1] = rate_node; } return 0; @@ -801,6 +866,10 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_rate->devlink_port->index)) goto nla_put_failure; + } else if (devlink_rate_is_node(devlink_rate)) { + if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME, + devlink_rate->name)) + goto nla_put_failure; } if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE, @@ -1508,13 +1577,17 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, struct genl_info *info) { struct nlattr **attrs = info->attrs; + int err = -EOPNOTSUPP; u64 rate; - int err; if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) { rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]); - err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv, - rate, info->extack); + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv, + rate, info->extack); if (err) return err; devlink_rate->tx_share = rate; @@ -1522,8 +1595,12 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) { rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]); - err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv, - rate, info->extack); + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv, + rate, info->extack); if (err) return err; devlink_rate->tx_max = rate; @@ -1547,6 +1624,15 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs"); return false; } + } else if (type == DEVLINK_RATE_TYPE_NODE) { + if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes"); + return false; + } + if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes"); + return false; + } } else { WARN_ON("Unknown type of rate object"); return false; @@ -1573,6 +1659,78 @@ static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, return err; } +static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_rate *rate_node; + const struct devlink_ops *ops; + int err; + + ops = devlink->ops; + if (!ops || !ops->rate_node_new || !ops->rate_node_del) { + NL_SET_ERR_MSG_MOD(info->extack, "Rate nodes aren't supported"); + return -EOPNOTSUPP; + } + + if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE)) + return -EOPNOTSUPP; + + rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs); + if (!IS_ERR(rate_node)) + return -EEXIST; + else if (rate_node == ERR_PTR(-EINVAL)) + return -EINVAL; + + rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL); + if (!rate_node) + return -ENOMEM; + + rate_node->devlink = devlink; + rate_node->type = DEVLINK_RATE_TYPE_NODE; + rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL); + if (!rate_node->name) { + err = -ENOMEM; + goto err_strdup; + } + + err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack); + if (err) + goto err_node_new; + + err = devlink_nl_rate_set(rate_node, ops, info); + if (err) + goto err_rate_set; + + list_add(&rate_node->list, &devlink->rate_list); + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); + return 0; + +err_rate_set: + ops->rate_node_del(rate_node, rate_node->priv, info->extack); +err_node_new: + kfree(rate_node->name); +err_strdup: + kfree(rate_node); + return err; +} + +static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_rate *rate_node = info->user_ptr[1]; + struct devlink *devlink = rate_node->devlink; + const struct devlink_ops *ops = devlink->ops; + int err; + + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL); + err = ops->rate_node_del(rate_node, rate_node->priv, info->extack); + list_del(&rate_node->list); + kfree(rate_node->name); + kfree(rate_node); + return err; +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -2441,6 +2599,30 @@ static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } +static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct devlink_rate *devlink_rate; + u16 old_mode; + int err; + + if (!devlink->ops->eswitch_mode_get) + return -EOPNOTSUPP; + err = devlink->ops->eswitch_mode_get(devlink, &old_mode); + if (err) + return err; + + if (old_mode == mode) + return 0; + + list_for_each_entry(devlink_rate, &devlink->rate_list, list) + if (devlink_rate_is_node(devlink_rate)) { + NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists."); + return -EBUSY; + } + return 0; +} + static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) { @@ -2455,6 +2637,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, if (!ops->eswitch_mode_set) return -EOPNOTSUPP; mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); + err = devlink_rate_nodes_check(devlink, mode, info->extack); + if (err) + return err; err = ops->eswitch_mode_set(devlink, mode, info->extack); if (err) return err; @@ -8038,6 +8223,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 }, [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, + [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -8076,6 +8262,17 @@ static const struct genl_small_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_RATE, }, + { + .cmd = DEVLINK_CMD_RATE_NEW, + .doit = devlink_nl_cmd_rate_new_doit, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = DEVLINK_CMD_RATE_DEL, + .doit = devlink_nl_cmd_rate_del_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_RATE_NODE, + }, { .cmd = DEVLINK_CMD_PORT_SPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, @@ -8933,6 +9130,33 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port) } EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy); +/** + * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device + * + * @devlink: devlink instance + * + * Destroy all rate nodes on specified device + * + * Context: Takes and release devlink->lock . + */ +void devlink_rate_nodes_destroy(struct devlink *devlink) +{ + static struct devlink_rate *devlink_rate, *tmp; + const struct devlink_ops *ops = devlink->ops; + + mutex_lock(&devlink->lock); + list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) { + if (devlink_rate_is_node(devlink_rate)) { + ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL); + list_del(&devlink_rate->list); + kfree(devlink_rate->name); + kfree(devlink_rate); + } + } + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy); + static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, char *name, size_t len) { From 885226f5680e099ec54564332ea85412372b4958 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:26 +0300 Subject: [PATCH 13/18] netdevsim: Implement support for devlink rate nodes Implement new devlink ops that allow creation, deletion and setting of shared/max tx rate of devlink rate nodes through devlink API. Expose rate node and it's tx rates to netdevsim debugfs. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 80 +++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdevsim.h | 1 + 2 files changed, 81 insertions(+) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 5be6f7e86711f..9f01b6c04cfdd 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -263,12 +263,16 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) nsim_dev->ddir, nsim_dev->nsim_bus_dev, &nsim_dev_max_vfs_fops); + nsim_dev->nodes_ddir = debugfs_create_dir("rate_nodes", nsim_dev->ddir); + if (IS_ERR(nsim_dev->nodes_ddir)) + return PTR_ERR(nsim_dev->nodes_ddir); nsim_udp_tunnels_debugfs_create(nsim_dev); return 0; } static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev) { + debugfs_remove_recursive(nsim_dev->nodes_ddir); debugfs_remove_recursive(nsim_dev->ports_ddir); debugfs_remove_recursive(nsim_dev->ddir); } @@ -451,8 +455,10 @@ static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev) static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port); int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack) { + struct devlink *devlink = priv_to_devlink(nsim_dev); struct nsim_dev_port *nsim_dev_port, *tmp; + devlink_rate_nodes_destroy(devlink); mutex_lock(&nsim_dev->port_list_lock); list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) if (nsim_dev_port_is_vf(nsim_dev_port)) @@ -1060,6 +1066,76 @@ static int nsim_leaf_tx_max_set(struct devlink_rate *devlink_rate, void *priv, return 0; } +struct nsim_rate_node { + struct dentry *ddir; + u16 tx_share; + u16 tx_max; +}; + +static int nsim_node_tx_share_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + int err; + + err = nsim_rate_bytes_to_units("tx_share", &tx_share, extack); + if (err) + return err; + + nsim_node->tx_share = tx_share; + return 0; +} + +static int nsim_node_tx_max_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + int err; + + err = nsim_rate_bytes_to_units("tx_max", &tx_max, extack); + if (err) + return err; + + nsim_node->tx_max = tx_max; + return 0; +} + +static int nsim_rate_node_new(struct devlink_rate *node, void **priv, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(node->devlink); + struct nsim_rate_node *nsim_node; + + if (!nsim_esw_mode_is_switchdev(nsim_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Node creation allowed only in switchdev mode."); + return -EOPNOTSUPP; + } + + nsim_node = kzalloc(sizeof(*nsim_node), GFP_KERNEL); + if (!nsim_node) + return -ENOMEM; + + nsim_node->ddir = debugfs_create_dir(node->name, nsim_dev->nodes_ddir); + if (!nsim_node->ddir) { + kfree(nsim_node); + return -ENOMEM; + } + debugfs_create_u16("tx_share", 0400, nsim_node->ddir, &nsim_node->tx_share); + debugfs_create_u16("tx_max", 0400, nsim_node->ddir, &nsim_node->tx_max); + *priv = nsim_node; + return 0; +} + +static int nsim_rate_node_del(struct devlink_rate *node, void *priv, + struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + + debugfs_remove_recursive(nsim_node->ddir); + kfree(nsim_node); + return 0; +} + static const struct devlink_ops nsim_dev_devlink_ops = { .eswitch_mode_set = nsim_devlink_eswitch_mode_set, .eswitch_mode_get = nsim_devlink_eswitch_mode_get, @@ -1077,6 +1153,10 @@ static const struct devlink_ops nsim_dev_devlink_ops = { .trap_policer_counter_get = nsim_dev_devlink_trap_policer_counter_get, .rate_leaf_tx_share_set = nsim_leaf_tx_share_set, .rate_leaf_tx_max_set = nsim_leaf_tx_max_set, + .rate_node_tx_share_set = nsim_node_tx_share_set, + .rate_node_tx_max_set = nsim_node_tx_max_set, + .rate_node_new = nsim_rate_node_new, + .rate_node_del = nsim_rate_node_del, }; #define NSIM_DEV_MAX_MACS_DEFAULT 32 diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 13a0042124f74..d62a1386f9f1d 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -222,6 +222,7 @@ struct nsim_dev { struct dentry *ports_ddir; struct dentry *take_snapshot; struct dentry *max_vfs; + struct dentry *nodes_ddir; struct bpf_offload_dev *bpf_dev; bool bpf_bind_accept; bool bpf_bind_verifier_accept; From 413ee943d788610b0675cb343fac5a23d7877613 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:27 +0300 Subject: [PATCH 14/18] selftest: netdevsim: Add devlink rate nodes test Test verifies that it is possible to create, delete and set min/max tx rate of devlink rate node on netdevsim VF. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- .../drivers/net/netdevsim/devlink.sh | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 05dcefc054d1a..301d92069f996 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -516,6 +516,14 @@ rate_leafs_get() '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))' } +rate_nodes_get() +{ + local handle=$1 + + cmd_jq "devlink port function rate show -j" \ + '.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))' +} + rate_attr_set() { local handle=$1 @@ -555,6 +563,20 @@ rate_attr_tx_rate_check() check_err $? "Unexpected $name attr value $api_value != $rate" } +rate_node_add() +{ + local handle=$1 + + devlink port function rate add $handle +} + +rate_node_del() +{ + local handle=$1 + + devlink port function rate del $handle +} + rate_test() { RET=0 @@ -582,6 +604,29 @@ rate_test() rate=$(($rate+100)) done + local node1_name='group1' + local node1="$DL_HANDLE/$node1_name" + rate_node_add "$node1" + check_err $? "Failed to add node $node1" + + local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` + [ $num_nodes == 1 ] + check_err $? "Expected 1 rate node in output but got $num_nodes" + + local node_tx_share=10 + rate_attr_tx_rate_check $node1 tx_share $node_tx_share \ + $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share + + local node_tx_max=100 + rate_attr_tx_rate_check $node1 tx_max $node_tx_max \ + $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max + + rate_node_del "$node1" + check_err $? "Failed to delete node $node1" + local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` + [ $num_nodes == 0 ] + check_err $? "Expected 0 rate node but got $num_nodes" + log_test "rate test" } From d7555984507822458b32a6405881038241d140be Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:28 +0300 Subject: [PATCH 15/18] devlink: Allow setting parent node of rate objects Refactor DEVLINK_CMD_RATE_{GET|SET} command handlers to support setting a node as a parent for another rate object (leaf or node) by means of new attribute DEVLINK_ATTR_RATE_PARENT_NODE_NAME. Extend devlink ops with new callbacks rate_{leaf|node}_parent_set() to set node as a parent for rate object to allow supporting drivers to implement rate grouping through devlink. Driver implementations are allowed to support leafs or node children only. Invoking callback with NULL as parent should be threated by the driver as unset parent action. Extend rate object struct with reference counter to disallow deleting a node with any child pointing to it. User should unset parent for the child explicitly. Example: $ devlink port function rate add netdevsim/netdevsim10/group1 $ devlink port function rate add netdevsim/netdevsim10/group2 $ devlink port function rate set netdevsim/netdevsim10/group1 parent group2 $ devlink port function rate show netdevsim/netdevsim10/group1 netdevsim/netdevsim10/group1: type node parent group2 $ devlink port function rate set netdevsim/netdevsim10/group1 noparent Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 14 +++- include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 125 ++++++++++++++++++++++++++++++++++- 3 files changed, 137 insertions(+), 3 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 13162b5791247..eb045f1b5d1df 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -142,9 +142,13 @@ struct devlink_rate { u64 tx_share; u64 tx_max; + struct devlink_rate *parent; union { struct devlink_port *devlink_port; - char *name; + struct { + char *name; + refcount_t refcnt; + }; }; }; @@ -1486,6 +1490,14 @@ struct devlink_ops { struct netlink_ext_ack *extack); int (*rate_node_del)(struct devlink_rate *rate_node, void *priv, struct netlink_ext_ack *extack); + int (*rate_leaf_parent_set)(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack); + int (*rate_node_parent_set)(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 7e15853b77fe4..32f53a0069d6c 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -549,6 +549,7 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + DEVLINK_ATTR_RATE_PARENT_NODE_NAME, /* string */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 589d750b70e4e..464f564082475 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -880,6 +880,11 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, devlink_rate->tx_max, DEVLINK_ATTR_PAD)) goto nla_put_failure; + if (devlink_rate->parent) + if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, + devlink_rate->parent->name)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -1152,6 +1157,18 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } +static bool +devlink_rate_is_parent_node(struct devlink_rate *devlink_rate, + struct devlink_rate *parent) +{ + while (parent) { + if (parent == devlink_rate) + return true; + parent = parent->parent; + } + return false; +} + static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; @@ -1572,11 +1589,75 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, return devlink->ops->port_del(devlink, port_index, extack); } +static int +devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate, + struct genl_info *info, + struct nlattr *nla_parent) +{ + struct devlink *devlink = devlink_rate->devlink; + const char *parent_name = nla_data(nla_parent); + const struct devlink_ops *ops = devlink->ops; + size_t len = strlen(parent_name); + struct devlink_rate *parent; + int err = -EOPNOTSUPP; + + parent = devlink_rate->parent; + if (parent && len) { + NL_SET_ERR_MSG_MOD(info->extack, "Rate object already has parent."); + return -EBUSY; + } else if (parent && !len) { + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_parent_set(devlink_rate, NULL, + devlink_rate->priv, NULL, + info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_parent_set(devlink_rate, NULL, + devlink_rate->priv, NULL, + info->extack); + if (err) + return err; + + refcount_dec(&parent->refcnt); + devlink_rate->parent = NULL; + } else if (!parent && len) { + parent = devlink_rate_node_get_by_name(devlink, parent_name); + if (IS_ERR(parent)) + return -ENODEV; + + if (parent == devlink_rate) { + NL_SET_ERR_MSG_MOD(info->extack, "Parent to self is not allowed"); + return -EINVAL; + } + + if (devlink_rate_is_node(devlink_rate) && + devlink_rate_is_parent_node(devlink_rate, parent->parent)) { + NL_SET_ERR_MSG_MOD(info->extack, "Node is already a parent of parent node."); + return -EEXIST; + } + + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_parent_set(devlink_rate, parent, + devlink_rate->priv, parent->priv, + info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_parent_set(devlink_rate, parent, + devlink_rate->priv, parent->priv, + info->extack); + if (err) + return err; + + refcount_inc(&parent->refcnt); + devlink_rate->parent = parent; + } + + return 0; +} + static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, const struct devlink_ops *ops, struct genl_info *info) { - struct nlattr **attrs = info->attrs; + struct nlattr *nla_parent, **attrs = info->attrs; int err = -EOPNOTSUPP; u64 rate; @@ -1606,6 +1687,14 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, devlink_rate->tx_max = rate; } + nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME]; + if (nla_parent) { + err = devlink_nl_rate_parent_node_set(devlink_rate, info, + nla_parent); + if (err) + return err; + } + return 0; } @@ -1624,6 +1713,11 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs"); return false; } + if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] && + !ops->rate_leaf_parent_set) { + NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs"); + return false; + } } else if (type == DEVLINK_RATE_TYPE_NODE) { if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes"); @@ -1633,6 +1727,11 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes"); return false; } + if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] && + !ops->rate_node_parent_set) { + NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes"); + return false; + } } else { WARN_ON("Unknown type of rate object"); return false; @@ -1702,6 +1801,7 @@ static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, if (err) goto err_rate_set; + refcount_set(&rate_node->refcnt, 1); list_add(&rate_node->list, &devlink->rate_list); devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); return 0; @@ -1723,8 +1823,15 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, const struct devlink_ops *ops = devlink->ops; int err; + if (refcount_read(&rate_node->refcnt) > 1) { + NL_SET_ERR_MSG_MOD(info->extack, "Node has children. Cannot delete node."); + return -EBUSY; + } + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL); err = ops->rate_node_del(rate_node, rate_node->priv, info->extack); + if (rate_node->parent) + refcount_dec(&rate_node->parent->refcnt); list_del(&rate_node->list); kfree(rate_node->name); kfree(rate_node); @@ -8224,6 +8331,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -9135,7 +9243,8 @@ EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy); * * @devlink: devlink instance * - * Destroy all rate nodes on specified device + * Unset parent for all rate objects and destroy all rate nodes + * on specified device. * * Context: Takes and release devlink->lock . */ @@ -9145,6 +9254,18 @@ void devlink_rate_nodes_destroy(struct devlink *devlink) const struct devlink_ops *ops = devlink->ops; mutex_lock(&devlink->lock); + list_for_each_entry(devlink_rate, &devlink->rate_list, list) { + if (!devlink_rate->parent) + continue; + + refcount_dec(&devlink_rate->parent->refcnt); + if (devlink_rate_is_leaf(devlink_rate)) + ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv, + NULL, NULL); + else if (devlink_rate_is_node(devlink_rate)) + ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv, + NULL, NULL); + } list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) { if (devlink_rate_is_node(devlink_rate)) { ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL); From f3d101b485ca2c831088d72878fe6e7416676cb8 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:29 +0300 Subject: [PATCH 16/18] netdevsim: Allow setting parent node of rate objects Implement new devlink ops that allow setting rate node as a parent for devlink port (leaf) or another devlink node through devlink API. Expose parent names to netdevsim debugfs in read only mode. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 91 +++++++++++++++++++++++++++++-- drivers/net/netdevsim/netdevsim.h | 2 + 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 9f01b6c04cfdd..527b019ae0b2d 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -222,6 +222,7 @@ static const struct file_operations nsim_dev_max_vfs_fops = { static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) { char dev_ddir_name[sizeof(DRV_NAME) + 10]; + int err; sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id); nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir); @@ -264,10 +265,17 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) nsim_dev->nsim_bus_dev, &nsim_dev_max_vfs_fops); nsim_dev->nodes_ddir = debugfs_create_dir("rate_nodes", nsim_dev->ddir); - if (IS_ERR(nsim_dev->nodes_ddir)) - return PTR_ERR(nsim_dev->nodes_ddir); + if (IS_ERR(nsim_dev->nodes_ddir)) { + err = PTR_ERR(nsim_dev->nodes_ddir); + goto err_out; + } nsim_udp_tunnels_debugfs_create(nsim_dev); return 0; + +err_out: + debugfs_remove_recursive(nsim_dev->ports_ddir); + debugfs_remove_recursive(nsim_dev->ddir); + return err; } static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev) @@ -277,6 +285,27 @@ static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev) debugfs_remove_recursive(nsim_dev->ddir); } +static ssize_t nsim_dev_rate_parent_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + char **name_ptr = file->private_data; + size_t len; + + if (!*name_ptr) + return 0; + + len = strlen(*name_ptr); + return simple_read_from_buffer(data, count, ppos, *name_ptr, len); +} + +static const struct file_operations nsim_dev_rate_parent_fops = { + .open = simple_open, + .read = nsim_dev_rate_parent_read, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) { @@ -299,6 +328,11 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, &nsim_bus_dev->vfconfigs[vf_id].min_tx_rate); debugfs_create_u16("tx_max", 0400, nsim_dev_port->ddir, &nsim_bus_dev->vfconfigs[vf_id].max_tx_rate); + nsim_dev_port->rate_parent = debugfs_create_file("rate_parent", + 0400, + nsim_dev_port->ddir, + &nsim_dev_port->parent_name, + &nsim_dev_rate_parent_fops); } debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); @@ -1068,6 +1102,8 @@ static int nsim_leaf_tx_max_set(struct devlink_rate *devlink_rate, void *priv, struct nsim_rate_node { struct dentry *ddir; + struct dentry *rate_parent; + char *parent_name; u16 tx_share; u16 tx_max; }; @@ -1105,6 +1141,7 @@ static int nsim_rate_node_new(struct devlink_rate *node, void **priv, { struct nsim_dev *nsim_dev = devlink_priv(node->devlink); struct nsim_rate_node *nsim_node; + int err; if (!nsim_esw_mode_is_switchdev(nsim_dev)) { NL_SET_ERR_MSG_MOD(extack, "Node creation allowed only in switchdev mode."); @@ -1117,13 +1154,28 @@ static int nsim_rate_node_new(struct devlink_rate *node, void **priv, nsim_node->ddir = debugfs_create_dir(node->name, nsim_dev->nodes_ddir); if (!nsim_node->ddir) { - kfree(nsim_node); - return -ENOMEM; + err = -ENOMEM; + goto err_node; } debugfs_create_u16("tx_share", 0400, nsim_node->ddir, &nsim_node->tx_share); debugfs_create_u16("tx_max", 0400, nsim_node->ddir, &nsim_node->tx_max); + nsim_node->rate_parent = debugfs_create_file("rate_parent", 0400, + nsim_node->ddir, + &nsim_node->parent_name, + &nsim_dev_rate_parent_fops); + if (IS_ERR(nsim_node->rate_parent)) { + err = PTR_ERR(nsim_node->rate_parent); + goto err_ddir; + } + *priv = nsim_node; return 0; + +err_ddir: + debugfs_remove_recursive(nsim_node->ddir); +err_node: + kfree(nsim_node); + return err; } static int nsim_rate_node_del(struct devlink_rate *node, void *priv, @@ -1131,11 +1183,40 @@ static int nsim_rate_node_del(struct devlink_rate *node, void *priv, { struct nsim_rate_node *nsim_node = priv; + debugfs_remove(nsim_node->rate_parent); debugfs_remove_recursive(nsim_node->ddir); kfree(nsim_node); return 0; } +static int nsim_rate_leaf_parent_set(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv_child; + + if (parent) + nsim_dev_port->parent_name = parent->name; + else + nsim_dev_port->parent_name = NULL; + return 0; +} + +static int nsim_rate_node_parent_set(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv_child; + + if (parent) + nsim_node->parent_name = parent->name; + else + nsim_node->parent_name = NULL; + return 0; +} + static const struct devlink_ops nsim_dev_devlink_ops = { .eswitch_mode_set = nsim_devlink_eswitch_mode_set, .eswitch_mode_get = nsim_devlink_eswitch_mode_get, @@ -1157,6 +1238,8 @@ static const struct devlink_ops nsim_dev_devlink_ops = { .rate_node_tx_max_set = nsim_node_tx_max_set, .rate_node_new = nsim_rate_node_new, .rate_node_del = nsim_rate_node_del, + .rate_leaf_parent_set = nsim_rate_leaf_parent_set, + .rate_node_parent_set = nsim_rate_node_parent_set, }; #define NSIM_DEV_MAX_MACS_DEFAULT 32 diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index d62a1386f9f1d..cdfdf2a995780 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -211,6 +211,8 @@ struct nsim_dev_port { unsigned int port_index; enum nsim_dev_port_type port_type; struct dentry *ddir; + struct dentry *rate_parent; + char *parent_name; struct netdevsim *ns; }; From 1a9c0482f5557f5906294d3327a981bf842ba436 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:30 +0300 Subject: [PATCH 17/18] selftest: netdevsim: Add devlink rate grouping test Test verifies that netdevsim correctly implements devlink ops callbacks that set node as a parent of devlink leaf or node rate object. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- .../drivers/net/netdevsim/devlink.sh | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 301d92069f996..9de1d123f4f5d 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -563,6 +563,26 @@ rate_attr_tx_rate_check() check_err $? "Unexpected $name attr value $api_value != $rate" } +rate_attr_parent_check() +{ + local handle=$1 + local parent=$2 + local debug_file=$3 + + rate_attr_set $handle parent $parent + check_err $? "Failed to set parent" + + debug_value=$(cat $debug_file) + check_err $? "Failed to get parent debugfs value" + [ "$debug_value" == "$parent" ] + check_err $? "Unexpected parent debug value $debug_value != $parent" + + api_value=$(rate_attr_get $r_obj parent) + check_err $? "Failed to get parent attr value" + [ "$api_value" == "$parent" ] + check_err $? "Unexpected parent attr value $api_value != $parent" +} + rate_node_add() { local handle=$1 @@ -627,6 +647,28 @@ rate_test() [ $num_nodes == 0 ] check_err $? "Expected 0 rate node but got $num_nodes" + local node1_name='group1' + local node1="$DL_HANDLE/$node1_name" + rate_node_add "$node1" + check_err $? "Failed to add node $node1" + + rate_attr_parent_check $r_obj $node1_name \ + $DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent + + local node2_name='group2' + local node2="$DL_HANDLE/$node2_name" + rate_node_add "$node2" + check_err $? "Failed to add node $node2" + + rate_attr_parent_check $node2 $node1_name \ + $DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent + rate_node_del "$node2" + check_err $? "Failed to delete node $node2" + rate_attr_set "$r_obj" noparent + check_err $? "Failed to unset $r_obj parent node" + rate_node_del "$node1" + check_err $? "Failed to delete node $node1" + log_test "rate test" } From b62767e7bab3a397166a2fa36b409e5e2859f100 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:31 +0300 Subject: [PATCH 18/18] Documentation: devlink rate objects Add devlink rate objects section at devlink port documentation. Add devlink rate support info at netdevsim devlink documentation. Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- .../networking/devlink/devlink-port.rst | 35 +++++++++++++++++++ .../networking/devlink/netdevsim.rst | 26 ++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index ab790e7980b81..7627b1da01f26 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -164,6 +164,41 @@ device to instantiate the subfunction device on particular PCI function. A subfunction device is created on the :ref:`Documentation/driver-api/auxiliary_bus.rst `. At this point a matching subfunction driver binds to the subfunction's auxiliary device. +Rate object management +====================== + +Devlink provides API to manage tx rates of single devlink port or a group. +This is done through rate objects, which can be one of the two types: + +``leaf`` + Represents a single devlink port; created/destroyed by the driver. Since leaf + have 1to1 mapping to its devlink port, in user space it is referred as + ``pci//``; + +``node`` + Represents a group of rate objects (leafs and/or nodes); created/deleted by + request from the userspace; initially empty (no rate objects added). In + userspace it is referred as ``pci//``, where + ``node_name`` can be any identifier, except decimal number, to avoid + collisions with leafs. + +API allows to configure following rate object's parameters: + +``tx_share`` + Minimum TX rate value shared among all other rate objects, or rate objects + that parts of the parent group, if it is a part of the same group. + +``tx_max`` + Maximum TX rate value. + +``parent`` + Parent node name. Parent node rate limits are considered as additional limits + to all node children limits. ``tx_max`` is an upper limit for children. + ``tx_share`` is a total bandwidth distributed among children. + +Driver implementations are allowed to support both or either rate object types +and setting methods of their parameters. + Terms and Definitions ===================== diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst index 02c2d20dc6732..8a292fb5aaea3 100644 --- a/Documentation/networking/devlink/netdevsim.rst +++ b/Documentation/networking/devlink/netdevsim.rst @@ -57,6 +57,32 @@ entries, FIB rule entries and nexthops that the driver will allow. $ devlink resource set netdevsim/netdevsim0 path /nexthops size 16 $ devlink dev reload netdevsim/netdevsim0 +Rate objects +============ + +The ``netdevsim`` driver supports rate objects management, which includes: + +- registerging/unregistering leaf rate objects per VF devlink port; +- creation/deletion node rate objects; +- setting tx_share and tx_max rate values for any rate object type; +- setting parent node for any rate object type. + +Rate nodes and it's parameters are exposed in ``netdevsim`` debugfs in RO mode. +For example created rate node with name ``some_group``: + +.. code:: shell + + $ ls /sys/kernel/debug/netdevsim/netdevsim0/rate_groups/some_group + rate_parent tx_max tx_share + +Same parameters are exposed for leaf objects in corresponding ports directories. +For ex.: + +.. code:: shell + + $ ls /sys/kernel/debug/netdevsim/netdevsim0/ports/1 + dev ethtool rate_parent tx_max tx_share + Driver-specific Traps =====================