From e52bc7c28ac9f54db6f86b19ed65c599def18c98 Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Fri, 19 Feb 2016 09:23:59 -0500 Subject: [PATCH 1/8] lib/bitmap.c: conversion routines to/from u32 array Aimed at transferring bitmaps to/from user-space in a 32/64-bit agnostic way. Tested: unit tests (next patch) on qemu i386, x86_64, ppc, ppc64 BE and LE, ARM. Signed-off-by: David Decotigny Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/bitmap.h | 10 +++++ lib/bitmap.c | 89 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 9653fdb76a427..e9b0b9ab07e5a 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -59,6 +59,8 @@ * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region + * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words) + * bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words) */ /* @@ -163,6 +165,14 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); +extern unsigned int bitmap_from_u32array(unsigned long *bitmap, + unsigned int nbits, + const u32 *buf, + unsigned int nwords); +extern unsigned int bitmap_to_u32array(u32 *buf, + unsigned int nwords, + const unsigned long *bitmap, + unsigned int nbits); #ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); #else diff --git a/lib/bitmap.c b/lib/bitmap.c index 814814397cce3..c66da508cbf78 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -1059,6 +1061,93 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) } EXPORT_SYMBOL(bitmap_allocate_region); +/** + * bitmap_from_u32array - copy the contents of a u32 array of bits to bitmap + * @bitmap: array of unsigned longs, the destination bitmap, non NULL + * @nbits: number of bits in @bitmap + * @buf: array of u32 (in host byte order), the source bitmap, non NULL + * @nwords: number of u32 words in @buf + * + * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining + * bits between nword and nbits in @bitmap (if any) are cleared. In + * last word of @bitmap, the bits beyond nbits (if any) are kept + * unchanged. + * + * Return the number of bits effectively copied. + */ +unsigned int +bitmap_from_u32array(unsigned long *bitmap, unsigned int nbits, + const u32 *buf, unsigned int nwords) +{ + unsigned int dst_idx, src_idx; + + for (src_idx = dst_idx = 0; dst_idx < BITS_TO_LONGS(nbits); ++dst_idx) { + unsigned long part = 0; + + if (src_idx < nwords) + part = buf[src_idx++]; + +#if BITS_PER_LONG == 64 + if (src_idx < nwords) + part |= ((unsigned long) buf[src_idx++]) << 32; +#endif + + if (dst_idx < nbits/BITS_PER_LONG) + bitmap[dst_idx] = part; + else { + unsigned long mask = BITMAP_LAST_WORD_MASK(nbits); + + bitmap[dst_idx] = (bitmap[dst_idx] & ~mask) + | (part & mask); + } + } + + return min_t(unsigned int, nbits, 32*nwords); +} +EXPORT_SYMBOL(bitmap_from_u32array); + +/** + * bitmap_to_u32array - copy the contents of bitmap to a u32 array of bits + * @buf: array of u32 (in host byte order), the dest bitmap, non NULL + * @nwords: number of u32 words in @buf + * @bitmap: array of unsigned longs, the source bitmap, non NULL + * @nbits: number of bits in @bitmap + * + * copy min(nbits, 32*nwords) bits from @bitmap to @buf. Remaining + * bits after nbits in @buf (if any) are cleared. + * + * Return the number of bits effectively copied. + */ +unsigned int +bitmap_to_u32array(u32 *buf, unsigned int nwords, + const unsigned long *bitmap, unsigned int nbits) +{ + unsigned int dst_idx = 0, src_idx = 0; + + while (dst_idx < nwords) { + unsigned long part = 0; + + if (src_idx < BITS_TO_LONGS(nbits)) { + part = bitmap[src_idx]; + if (src_idx >= nbits/BITS_PER_LONG) + part &= BITMAP_LAST_WORD_MASK(nbits); + src_idx++; + } + + buf[dst_idx++] = part & 0xffffffffUL; + +#if BITS_PER_LONG == 64 + if (dst_idx < nwords) { + part >>= 32; + buf[dst_idx++] = part & 0xffffffffUL; + } +#endif + } + + return min_t(unsigned int, nbits, 32*nwords); +} +EXPORT_SYMBOL(bitmap_to_u32array); + /** * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order. * @dst: destination buffer From 5fd003f56c2c584b62a0486ad25bbd4be02b8b6c Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Fri, 19 Feb 2016 09:24:00 -0500 Subject: [PATCH 2/8] test_bitmap: unit tests for lib/bitmap.c This is mainly testing bitmap construction and conversion to/from u32[] for now. Tested: qemu i386, x86_64, ppc, ppc64 BE and LE, ARM. Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- lib/Kconfig.debug | 8 + lib/Makefile | 1 + lib/test_bitmap.c | 358 ++++++++++++++++++++++++++ tools/testing/selftests/lib/Makefile | 2 +- tools/testing/selftests/lib/bitmap.sh | 10 + 5 files changed, 378 insertions(+), 1 deletion(-) create mode 100644 lib/test_bitmap.c create mode 100755 tools/testing/selftests/lib/bitmap.sh diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ecb9e75614bf8..f890ee5e13859 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1738,6 +1738,14 @@ config TEST_KSTRTOX config TEST_PRINTF tristate "Test printf() family of functions at runtime" +config TEST_BITMAP + tristate "Test bitmap_*() family of functions at runtime" + default n + help + Enable this option to test the bitmap functions at boot. + + If unsure, say N. + config TEST_RHASHTABLE tristate "Perform selftest on resizable hash table" default n diff --git a/lib/Makefile b/lib/Makefile index a7c26a41a738b..dda4039588b1d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o +obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c new file mode 100644 index 0000000000000..e2cbd43d193cf --- /dev/null +++ b/lib/test_bitmap.c @@ -0,0 +1,358 @@ +/* + * Test cases for printf facility. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +static unsigned total_tests __initdata; +static unsigned failed_tests __initdata; + +static char pbl_buffer[PAGE_SIZE] __initdata; + + +static bool __init +__check_eq_uint(const char *srcfile, unsigned int line, + const unsigned int exp_uint, unsigned int x) +{ + if (exp_uint != x) { + pr_warn("[%s:%u] expected %u, got %u\n", + srcfile, line, exp_uint, x); + return false; + } + return true; +} + + +static bool __init +__check_eq_bitmap(const char *srcfile, unsigned int line, + const unsigned long *exp_bmap, unsigned int exp_nbits, + const unsigned long *bmap, unsigned int nbits) +{ + if (exp_nbits != nbits) { + pr_warn("[%s:%u] bitmap length mismatch: expected %u, got %u\n", + srcfile, line, exp_nbits, nbits); + return false; + } + + if (!bitmap_equal(exp_bmap, bmap, nbits)) { + pr_warn("[%s:%u] bitmaps contents differ: expected \"%*pbl\", got \"%*pbl\"\n", + srcfile, line, + exp_nbits, exp_bmap, nbits, bmap); + return false; + } + return true; +} + +static bool __init +__check_eq_pbl(const char *srcfile, unsigned int line, + const char *expected_pbl, + const unsigned long *bitmap, unsigned int nbits) +{ + snprintf(pbl_buffer, sizeof(pbl_buffer), "%*pbl", nbits, bitmap); + if (strcmp(expected_pbl, pbl_buffer)) { + pr_warn("[%s:%u] expected \"%s\", got \"%s\"\n", + srcfile, line, + expected_pbl, pbl_buffer); + return false; + } + return true; +} + +static bool __init +__check_eq_u32_array(const char *srcfile, unsigned int line, + const u32 *exp_arr, unsigned int exp_len, + const u32 *arr, unsigned int len) +{ + if (exp_len != len) { + pr_warn("[%s:%u] array length differ: expected %u, got %u\n", + srcfile, line, + exp_len, len); + return false; + } + + if (memcmp(exp_arr, arr, len*sizeof(*arr))) { + pr_warn("[%s:%u] array contents differ\n", srcfile, line); + print_hex_dump(KERN_WARNING, " exp: ", DUMP_PREFIX_OFFSET, + 32, 4, exp_arr, exp_len*sizeof(*exp_arr), false); + print_hex_dump(KERN_WARNING, " got: ", DUMP_PREFIX_OFFSET, + 32, 4, arr, len*sizeof(*arr), false); + return false; + } + + return true; +} + +#define __expect_eq(suffix, ...) \ + ({ \ + int result = 0; \ + total_tests++; \ + if (!__check_eq_ ## suffix(__FILE__, __LINE__, \ + ##__VA_ARGS__)) { \ + failed_tests++; \ + result = 1; \ + } \ + result; \ + }) + +#define expect_eq_uint(...) __expect_eq(uint, ##__VA_ARGS__) +#define expect_eq_bitmap(...) __expect_eq(bitmap, ##__VA_ARGS__) +#define expect_eq_pbl(...) __expect_eq(pbl, ##__VA_ARGS__) +#define expect_eq_u32_array(...) __expect_eq(u32_array, ##__VA_ARGS__) + +static void __init test_zero_fill_copy(void) +{ + DECLARE_BITMAP(bmap1, 1024); + DECLARE_BITMAP(bmap2, 1024); + + bitmap_zero(bmap1, 1024); + bitmap_zero(bmap2, 1024); + + /* single-word bitmaps */ + expect_eq_pbl("", bmap1, 23); + + bitmap_fill(bmap1, 19); + expect_eq_pbl("0-18", bmap1, 1024); + + bitmap_copy(bmap2, bmap1, 23); + expect_eq_pbl("0-18", bmap2, 1024); + + bitmap_fill(bmap2, 23); + expect_eq_pbl("0-22", bmap2, 1024); + + bitmap_copy(bmap2, bmap1, 23); + expect_eq_pbl("0-18", bmap2, 1024); + + bitmap_zero(bmap1, 23); + expect_eq_pbl("", bmap1, 1024); + + /* multi-word bitmaps */ + bitmap_zero(bmap1, 1024); + expect_eq_pbl("", bmap1, 1024); + + bitmap_fill(bmap1, 109); + expect_eq_pbl("0-108", bmap1, 1024); + + bitmap_copy(bmap2, bmap1, 1024); + expect_eq_pbl("0-108", bmap2, 1024); + + bitmap_fill(bmap2, 1024); + expect_eq_pbl("0-1023", bmap2, 1024); + + bitmap_copy(bmap2, bmap1, 1024); + expect_eq_pbl("0-108", bmap2, 1024); + + /* the following tests assume a 32- or 64-bit arch (even 128b + * if we care) + */ + + bitmap_fill(bmap2, 1024); + bitmap_copy(bmap2, bmap1, 109); /* ... but 0-padded til word length */ + expect_eq_pbl("0-108,128-1023", bmap2, 1024); + + bitmap_fill(bmap2, 1024); + bitmap_copy(bmap2, bmap1, 97); /* ... but aligned on word length */ + expect_eq_pbl("0-108,128-1023", bmap2, 1024); + + bitmap_zero(bmap2, 97); /* ... but 0-padded til word length */ + expect_eq_pbl("128-1023", bmap2, 1024); +} + +static void __init test_bitmap_u32_array_conversions(void) +{ + DECLARE_BITMAP(bmap1, 1024); + DECLARE_BITMAP(bmap2, 1024); + u32 exp_arr[32], arr[32]; + unsigned nbits; + + for (nbits = 0 ; nbits < 257 ; ++nbits) { + const unsigned int used_u32s = DIV_ROUND_UP(nbits, 32); + unsigned int i, rv; + + bitmap_zero(bmap1, nbits); + bitmap_set(bmap1, nbits, 1024 - nbits); /* garbage */ + + memset(arr, 0xff, sizeof(arr)); + rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits); + expect_eq_uint(nbits, rv); + + memset(exp_arr, 0xff, sizeof(exp_arr)); + memset(exp_arr, 0, used_u32s*sizeof(*exp_arr)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + + bitmap_fill(bmap2, 1024); + rv = bitmap_from_u32array(bmap2, nbits, arr, used_u32s); + expect_eq_uint(nbits, rv); + expect_eq_bitmap(bmap1, 1024, bmap2, 1024); + + for (i = 0 ; i < nbits ; ++i) { + /* + * test conversion bitmap -> u32[] + */ + + bitmap_zero(bmap1, 1024); + __set_bit(i, bmap1); + bitmap_set(bmap1, nbits, 1024 - nbits); /* garbage */ + + memset(arr, 0xff, sizeof(arr)); + rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits); + expect_eq_uint(nbits, rv); + + /* 1st used u32 words contain expected bit set, the + * remaining words are left unchanged (0xff) + */ + memset(exp_arr, 0xff, sizeof(exp_arr)); + memset(exp_arr, 0, used_u32s*sizeof(*exp_arr)); + exp_arr[i/32] = (1U<<(i%32)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + + + /* same, with longer array to fill + */ + memset(arr, 0xff, sizeof(arr)); + rv = bitmap_to_u32array(arr, 32, bmap1, nbits); + expect_eq_uint(nbits, rv); + + /* 1st used u32 words contain expected bit set, the + * remaining words are all 0s + */ + memset(exp_arr, 0, sizeof(exp_arr)); + exp_arr[i/32] = (1U<<(i%32)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + + /* + * test conversion u32[] -> bitmap + */ + + /* the 1st nbits of bmap2 are identical to + * bmap1, the remaining bits of bmap2 are left + * unchanged (all 1s) + */ + bitmap_fill(bmap2, 1024); + rv = bitmap_from_u32array(bmap2, nbits, + exp_arr, used_u32s); + expect_eq_uint(nbits, rv); + + expect_eq_bitmap(bmap1, 1024, bmap2, 1024); + + /* same, with more bits to fill + */ + memset(arr, 0xff, sizeof(arr)); /* garbage */ + memset(arr, 0, used_u32s*sizeof(u32)); + arr[i/32] = (1U<<(i%32)); + + bitmap_fill(bmap2, 1024); + rv = bitmap_from_u32array(bmap2, 1024, arr, used_u32s); + expect_eq_uint(used_u32s*32, rv); + + /* the 1st nbits of bmap2 are identical to + * bmap1, the remaining bits of bmap2 are cleared + */ + bitmap_zero(bmap1, 1024); + __set_bit(i, bmap1); + expect_eq_bitmap(bmap1, 1024, bmap2, 1024); + + + /* + * test short conversion bitmap -> u32[] (1 + * word too short) + */ + if (used_u32s > 1) { + bitmap_zero(bmap1, 1024); + __set_bit(i, bmap1); + bitmap_set(bmap1, nbits, + 1024 - nbits); /* garbage */ + memset(arr, 0xff, sizeof(arr)); + + rv = bitmap_to_u32array(arr, used_u32s - 1, + bmap1, nbits); + expect_eq_uint((used_u32s - 1)*32, rv); + + /* 1st used u32 words contain expected + * bit set, the remaining words are + * left unchanged (0xff) + */ + memset(exp_arr, 0xff, sizeof(exp_arr)); + memset(exp_arr, 0, + (used_u32s-1)*sizeof(*exp_arr)); + if ((i/32) < (used_u32s - 1)) + exp_arr[i/32] = (1U<<(i%32)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + } + + /* + * test short conversion u32[] -> bitmap (3 + * bits too short) + */ + if (nbits > 3) { + memset(arr, 0xff, sizeof(arr)); /* garbage */ + memset(arr, 0, used_u32s*sizeof(*arr)); + arr[i/32] = (1U<<(i%32)); + + bitmap_zero(bmap1, 1024); + rv = bitmap_from_u32array(bmap1, nbits - 3, + arr, used_u32s); + expect_eq_uint(nbits - 3, rv); + + /* we are expecting the bit < nbits - + * 3 (none otherwise), and the rest of + * bmap1 unchanged (0-filled) + */ + bitmap_zero(bmap2, 1024); + if (i < nbits - 3) + __set_bit(i, bmap2); + expect_eq_bitmap(bmap2, 1024, bmap1, 1024); + + /* do the same with bmap1 initially + * 1-filled + */ + + bitmap_fill(bmap1, 1024); + rv = bitmap_from_u32array(bmap1, nbits - 3, + arr, used_u32s); + expect_eq_uint(nbits - 3, rv); + + /* we are expecting the bit < nbits - + * 3 (none otherwise), and the rest of + * bmap1 unchanged (1-filled) + */ + bitmap_zero(bmap2, 1024); + if (i < nbits - 3) + __set_bit(i, bmap2); + bitmap_set(bmap2, nbits-3, 1024 - nbits + 3); + expect_eq_bitmap(bmap2, 1024, bmap1, 1024); + } + } + } +} + +static int __init test_bitmap_init(void) +{ + test_zero_fill_copy(); + test_bitmap_u32_array_conversions(); + + if (failed_tests == 0) + pr_info("all %u tests passed\n", total_tests); + else + pr_warn("failed %u out of %u tests\n", + failed_tests, total_tests); + + return failed_tests ? -EINVAL : 0; +} + +static void __exit test_bitmap_cleanup(void) +{ +} + +module_init(test_bitmap_init); +module_exit(test_bitmap_cleanup); + +MODULE_AUTHOR("david decotigny "); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index 47147b9685140..08360060ab146 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -3,6 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := printf.sh +TEST_PROGS := printf.sh bitmap.sh include ../lib.mk diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh new file mode 100755 index 0000000000000..2da187b6ddad5 --- /dev/null +++ b/tools/testing/selftests/lib/bitmap.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Runs bitmap infrastructure tests using test_bitmap kernel module + +if /sbin/modprobe -q test_bitmap; then + /sbin/modprobe -q -r test_bitmap + echo "bitmap: ok" +else + echo "bitmap: [FAIL]" + exit 1 +fi From ac2c7ad0e5d6030452c9af2fafd192e17fd04264 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Feb 2016 09:24:01 -0500 Subject: [PATCH 3/8] net/ethtool: introduce a new ioctl for per queue setting Introduce a new ioctl ETHTOOL_PERQUEUE for per queue parameters setting. The following patches will enable some SUB_COMMANDs for per queue setting. Signed-off-by: Kan Liang Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 17 +++++++++++++++++ net/core/ethtool.c | 27 +++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 190aea0faaf48..f15ae02621a19 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1202,6 +1202,21 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) #define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT) +#define MAX_NUM_QUEUE 4096 + +/** + * struct ethtool_per_queue_op - apply sub command to the queues in mask. + * @cmd: ETHTOOL_PERQUEUE + * @sub_command: the sub command which apply to each queues + * @queue_mask: Bitmap of the queues which sub command apply to + * @data: A complete command structure following for each of the queues addressed + */ +struct ethtool_per_queue_op { + __u32 cmd; + __u32 sub_command; + __u32 queue_mask[DIV_ROUND_UP(MAX_NUM_QUEUE, 32)]; + char data[]; +}; /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* Get settings. */ @@ -1285,6 +1300,8 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ #define ETHTOOL_GPHYSTATS 0x0000004a /* get PHY-specific statistics */ +#define ETHTOOL_PERQUEUE 0x0000004b /* Set per queue options */ + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c2d3118b1395c..d640ecf71e744 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1888,13 +1888,27 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_per_queue_op per_queue_opt; + + if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt))) + return -EFAULT; + + switch (per_queue_opt.sub_command) { + + default: + return -EOPNOTSUPP; + }; +} + /* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) { struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); void __user *useraddr = ifr->ifr_data; - u32 ethcmd; + u32 ethcmd, sub_cmd; int rc; netdev_features_t old_features; @@ -1904,8 +1918,14 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) return -EFAULT; + if (ethcmd == ETHTOOL_PERQUEUE) { + if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd))) + return -EFAULT; + } else { + sub_cmd = ethcmd; + } /* Allow some commands to be done by anyone */ - switch (ethcmd) { + switch (sub_cmd) { case ETHTOOL_GSET: case ETHTOOL_GDRVINFO: case ETHTOOL_GMSGLVL: @@ -2135,6 +2155,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPHYSTATS: rc = ethtool_get_phy_stats(dev, useraddr); break; + case ETHTOOL_PERQUEUE: + rc = ethtool_set_per_queue(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } From 421797b1aa363cb897f29f7d365e068dc9d9db81 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Feb 2016 09:24:02 -0500 Subject: [PATCH 4/8] net/ethtool: support get coalesce per queue This patch implements sub command ETHTOOL_GCOALESCE for ioctl ETHTOOL_PERQUEUE. It introduces an interface get_per_queue_coalesce to get coalesce of each masked queue from device driver. Then the interrupt coalescing parameters will be copied back to user space one by one. Signed-off-by: Kan Liang Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 8 +++++++- net/core/ethtool.c | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 653dc9c4ebac7..de56600023a72 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -201,6 +201,11 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * @get_module_eeprom: Get the eeprom information from the plug-in module * @get_eee: Get Energy-Efficient (EEE) supported and status. * @set_eee: Set EEE status (enable/disable) as well as LPI timers. + * @get_per_queue_coalesce: Get interrupt coalescing parameters per queue. + * It must check that the given queue number is valid. If neither a RX nor + * a TX queue has this number, return -EINVAL. If only a RX queue or a TX + * queue has this number, set the inapplicable fields to ~0 and return 0. + * Returns a negative error code or zero. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -279,7 +284,8 @@ struct ethtool_ops { const struct ethtool_tunable *, void *); int (*set_tunable)(struct net_device *, const struct ethtool_tunable *, const void *); - + int (*get_per_queue_coalesce)(struct net_device *, u32, + struct ethtool_coalesce *); }; #endif /* _LINUX_ETHTOOL_H */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d640ecf71e744..2a6c3a26f63f7 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1888,6 +1888,38 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) +{ + u32 bit; + int ret; + DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE); + + if (!dev->ethtool_ops->get_per_queue_coalesce) + return -EOPNOTSUPP; + + useraddr += sizeof(*per_queue_opt); + + bitmap_from_u32array(queue_mask, + MAX_NUM_QUEUE, + per_queue_opt->queue_mask, + DIV_ROUND_UP(MAX_NUM_QUEUE, 32)); + + for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { + struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; + + ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce); + if (ret != 0) + return ret; + if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) + return -EFAULT; + useraddr += sizeof(coalesce); + } + + return 0; +} + static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) { struct ethtool_per_queue_op per_queue_opt; @@ -1896,7 +1928,8 @@ static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) return -EFAULT; switch (per_queue_opt.sub_command) { - + case ETHTOOL_GCOALESCE: + return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); default: return -EOPNOTSUPP; }; From f38d138a7da6510a1184e3bc5f425deb187c3265 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Feb 2016 09:24:03 -0500 Subject: [PATCH 5/8] net/ethtool: support set coalesce per queue This patch implements sub command ETHTOOL_SCOALESCE for ioctl ETHTOOL_PERQUEUE. It introduces an interface set_per_queue_coalesce to set coalesce of each masked queue to device driver. The wanted coalesce information are stored in "data" for each masked queue, which can copy from userspace. If it fails to set coalesce to device driver, the value which already set to specific queue will be tried to rollback. Signed-off-by: Kan Liang Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 7 +++++ net/core/ethtool.c | 61 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index de56600023a72..472d7d7b01c2b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -206,6 +206,11 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * a TX queue has this number, return -EINVAL. If only a RX queue or a TX * queue has this number, set the inapplicable fields to ~0 and return 0. * Returns a negative error code or zero. + * @set_per_queue_coalesce: Set interrupt coalescing parameters per queue. + * It must check that the given queue number is valid. If neither a RX nor + * a TX queue has this number, return -EINVAL. If only a RX queue or a TX + * queue has this number, ignore the inapplicable fields. + * Returns a negative error code or zero. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -286,6 +291,8 @@ struct ethtool_ops { const struct ethtool_tunable *, const void *); int (*get_per_queue_coalesce)(struct net_device *, u32, struct ethtool_coalesce *); + int (*set_per_queue_coalesce)(struct net_device *, u32, + struct ethtool_coalesce *); }; #endif /* _LINUX_ETHTOOL_H */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 2a6c3a26f63f7..2406101002b1b 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1920,6 +1920,65 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev, return 0; } +static int ethtool_set_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) +{ + u32 bit; + int i, ret = 0; + int n_queue; + struct ethtool_coalesce *backup = NULL, *tmp = NULL; + DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE); + + if ((!dev->ethtool_ops->set_per_queue_coalesce) || + (!dev->ethtool_ops->get_per_queue_coalesce)) + return -EOPNOTSUPP; + + useraddr += sizeof(*per_queue_opt); + + bitmap_from_u32array(queue_mask, + MAX_NUM_QUEUE, + per_queue_opt->queue_mask, + DIV_ROUND_UP(MAX_NUM_QUEUE, 32)); + n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE); + tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL); + if (!backup) + return -ENOMEM; + + for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { + struct ethtool_coalesce coalesce; + + ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp); + if (ret != 0) + goto roll_back; + + tmp++; + + if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) { + ret = -EFAULT; + goto roll_back; + } + + ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce); + if (ret != 0) + goto roll_back; + + useraddr += sizeof(coalesce); + } + +roll_back: + if (ret != 0) { + tmp = backup; + for_each_set_bit(i, queue_mask, bit) { + dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp); + tmp++; + } + } + kfree(backup); + + return ret; +} + static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) { struct ethtool_per_queue_op per_queue_opt; @@ -1930,6 +1989,8 @@ static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) switch (per_queue_opt.sub_command) { case ETHTOOL_GCOALESCE: return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); + case ETHTOOL_SCOALESCE: + return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt); default: return -EOPNOTSUPP; }; From a75e8005d506f374554b17383c39aa82db0ea860 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Feb 2016 09:24:04 -0500 Subject: [PATCH 6/8] i40e: queue-specific settings for interrupt moderation For i40e driver, each vector has its own ITR register. However, there are no concept of queue-specific settings in the driver proper. Only global variable is used to store ITR values. That will cause problems especially when resetting the vector. The specific ITR values could be lost. This patch move rx_itr_setting and tx_itr_setting to i40e_ring to store specific ITR register for each queue. i40e_get_coalesce and i40e_set_coalesce are also modified accordingly to support queue-specific settings. To make it compatible with old ethtool, if user doesn't specify the queue number, i40e_get_coalesce will return queue 0's value. While i40e_set_coalesce will apply value to all queues. Signed-off-by: Kan Liang Acked-by: Shannon Nelson Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e.h | 7 - .../net/ethernet/intel/i40e/i40e_debugfs.c | 15 +- .../net/ethernet/intel/i40e/i40e_ethtool.c | 139 ++++++++++++------ drivers/net/ethernet/intel/i40e/i40e_main.c | 12 +- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 9 +- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 8 + 6 files changed, 120 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index e99be9f696c39..2f6210ae8ba0f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -521,13 +521,6 @@ struct i40e_vsi { struct i40e_ring **tx_rings; u16 work_limit; - /* high bit set means dynamic, use accessor routines to read/write. - * hardware only supports 2us resolution for the ITR registers. - * these values always store the USER setting, and must be converted - * before programming to a register. - */ - u16 rx_itr_setting; - u16 tx_itr_setting; u16 int_rate_limit; /* value in usecs */ u16 rss_table_size; /* HW RSS table size */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 2a44f2e25a26e..0c97733d253c4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -302,6 +302,10 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " rx_rings[%i]: vsi = %p, q_vector = %p\n", i, rx_ring->vsi, rx_ring->q_vector); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: rx_itr_setting = %d (%s)\n", + i, rx_ring->rx_itr_setting, + ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); } for (i = 0; i < vsi->num_queue_pairs; i++) { struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); @@ -352,14 +356,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " tx_rings[%i]: DCB tc = %d\n", i, tx_ring->dcb_tc); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: tx_itr_setting = %d (%s)\n", + i, tx_ring->tx_itr_setting, + ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed"); } rcu_read_unlock(); dev_info(&pf->pdev->dev, - " work_limit = %d, rx_itr_setting = %d (%s), tx_itr_setting = %d (%s)\n", - vsi->work_limit, vsi->rx_itr_setting, - ITR_IS_DYNAMIC(vsi->rx_itr_setting) ? "dynamic" : "fixed", - vsi->tx_itr_setting, - ITR_IS_DYNAMIC(vsi->tx_itr_setting) ? "dynamic" : "fixed"); + " work_limit = %d\n", + vsi->work_limit); dev_info(&pf->pdev->dev, " max_frame = %d, rx_hdr_len = %d, rx_buf_len = %d dtype = %d\n", vsi->max_frame, vsi->rx_hdr_len, vsi->rx_buf_len, vsi->dtype); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index a85bc94cdf87d..a4705999cbc95 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1879,8 +1879,9 @@ static int i40e_set_phys_id(struct net_device *netdev, * 125us (8000 interrupts per second) == ITR(62) */ -static int i40e_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) +static int __i40e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; @@ -1888,14 +1889,24 @@ static int i40e_get_coalesce(struct net_device *netdev, ec->tx_max_coalesced_frames_irq = vsi->work_limit; ec->rx_max_coalesced_frames_irq = vsi->work_limit; - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) + /* rx and tx usecs has per queue value. If user doesn't specify the queue, + * return queue 0's value to represent. + */ + if (queue < 0) { + queue = 0; + } else if (queue >= vsi->num_queue_pairs) { + return -EINVAL; + } + + if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) + if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite * fits the original intent of the ethtool variable, @@ -1908,15 +1919,57 @@ static int i40e_get_coalesce(struct net_device *netdev, return 0; } -static int i40e_set_coalesce(struct net_device *netdev, +static int i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { - struct i40e_netdev_priv *np = netdev_priv(netdev); + return __i40e_get_coalesce(netdev, ec, -1); +} + +static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; struct i40e_q_vector *q_vector; + u16 vector, intrl; + + intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit); + + vsi->rx_rings[queue]->rx_itr_setting = ec->rx_coalesce_usecs; + vsi->tx_rings[queue]->tx_itr_setting = ec->tx_coalesce_usecs; + + if (ec->use_adaptive_rx_coalesce) + vsi->rx_rings[queue]->rx_itr_setting |= I40E_ITR_DYNAMIC; + else + vsi->rx_rings[queue]->rx_itr_setting &= ~I40E_ITR_DYNAMIC; + + if (ec->use_adaptive_tx_coalesce) + vsi->tx_rings[queue]->tx_itr_setting |= I40E_ITR_DYNAMIC; + else + vsi->tx_rings[queue]->tx_itr_setting &= ~I40E_ITR_DYNAMIC; + + q_vector = vsi->rx_rings[queue]->q_vector; + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[queue]->rx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + + q_vector = vsi->tx_rings[queue]->q_vector; + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[queue]->tx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + + wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); + i40e_flush(hw); +} + +static int __i40e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u16 vector; int i; if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) @@ -1933,59 +1986,49 @@ static int i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - vector = vsi->base_vector; - if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) { - vsi->rx_itr_setting = ec->rx_coalesce_usecs; - } else if (ec->rx_coalesce_usecs == 0) { - vsi->rx_itr_setting = ec->rx_coalesce_usecs; + if (ec->rx_coalesce_usecs == 0) { if (ec->use_adaptive_rx_coalesce) netif_info(pf, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else { - netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); - return -EINVAL; + } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); + return -EINVAL; } vsi->int_rate_limit = ec->rx_coalesce_usecs_high; - if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) { - vsi->tx_itr_setting = ec->tx_coalesce_usecs; - } else if (ec->tx_coalesce_usecs == 0) { - vsi->tx_itr_setting = ec->tx_coalesce_usecs; + if (ec->tx_coalesce_usecs == 0) { if (ec->use_adaptive_tx_coalesce) netif_info(pf, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); - } else { - netif_info(pf, drv, netdev, - "Invalid value, tx-usecs range is 0-8160\n"); - return -EINVAL; + } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); + return -EINVAL; } - if (ec->use_adaptive_rx_coalesce) - vsi->rx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC; - - if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; - - for (i = 0; i < vsi->num_q_vectors; i++, vector++) { - u16 intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit); - - q_vector = vsi->q_vectors[i]; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); - wr32(hw, I40E_PFINT_ITRN(0, vector - 1), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); - wr32(hw, I40E_PFINT_ITRN(1, vector - 1), q_vector->tx.itr); - wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); - i40e_flush(hw); + /* rx and tx usecs has per queue value. If user doesn't specify the queue, + * apply to all queues. + */ + if (queue < 0) { + for (i = 0; i < vsi->num_queue_pairs; i++) + i40e_set_itr_per_queue(vsi, ec, i); + } else if (queue < vsi->num_queue_pairs) { + i40e_set_itr_per_queue(vsi, ec, queue); + } else { + netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", + vsi->num_queue_pairs - 1); + return -EINVAL; } return 0; } +static int i40e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + return __i40e_set_coalesce(netdev, ec, -1); +} + /** * i40e_get_rss_hash_opts - Get RSS hash Input Set for each flow type * @pf: pointer to the physical function struct diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3c8d8c4491f80..70d9605a0d9e7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3124,11 +3124,11 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) struct i40e_q_vector *q_vector = vsi->q_vectors[i]; q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); q_vector->tx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); @@ -3220,10 +3220,10 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) /* set the ITR configuration */ q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); q_vector->tx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); @@ -7322,8 +7322,6 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) set_bit(__I40E_DOWN, &vsi->state); vsi->flags = 0; vsi->idx = vsi_idx; - vsi->rx_itr_setting = pf->rx_itr_default; - vsi->tx_itr_setting = pf->tx_itr_default; vsi->int_rate_limit = 0; vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ? pf->rss_table_size : 64; @@ -7490,6 +7488,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) tx_ring->dcb_tc = 0; if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; + tx_ring->tx_itr_setting = pf->tx_itr_default; vsi->tx_rings[i] = tx_ring; rx_ring = &tx_ring[1]; @@ -7506,6 +7505,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) set_ring_16byte_desc_enabled(rx_ring); else clear_ring_16byte_desc_enabled(rx_ring); + rx_ring->rx_itr_setting = pf->rx_itr_default; vsi->rx_rings[i] = rx_ring; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index cb52f39d514a4..084d0ab316b79 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1882,6 +1882,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, bool rx = false, tx = false; u32 rxval, txval; int vector; + int idx = q_vector->v_idx; vector = (q_vector->v_idx + vsi->base_vector); @@ -1891,17 +1892,17 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) && + !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 56009709528a1..cdd5dc00aec51 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -248,6 +248,14 @@ struct i40e_ring { u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + /* high bit set means dynamic, use accessor routines to read/write. + * hardware only supports 2us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 rx_itr_setting; + u16 tx_itr_setting; + u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ u16 rx_hdr_len; From be280bad15fafc0f7e7b90bdbd99170366f5e9bf Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Feb 2016 09:24:05 -0500 Subject: [PATCH 7/8] i40e/ethtool: support coalesce getting by queue This patch implements get_per_queue_coalesce for i40e driver. Signed-off-by: Kan Liang Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index a4705999cbc95..dd572abddb8ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1925,6 +1925,12 @@ static int i40e_get_coalesce(struct net_device *netdev, return __i40e_get_coalesce(netdev, ec, -1); } +static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40e_get_coalesce(netdev, ec, queue); +} + static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct ethtool_coalesce *ec, int queue) @@ -2914,6 +2920,7 @@ static const struct ethtool_ops i40e_ethtool_ops = { .get_ts_info = i40e_get_ts_info, .get_priv_flags = i40e_get_priv_flags, .set_priv_flags = i40e_set_priv_flags, + .get_per_queue_coalesce = i40e_get_per_queue_coalesce, }; void i40e_set_ethtool_ops(struct net_device *netdev) From f3757a4d9e823c43ccfe4db02b6cda77414e25f7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Feb 2016 09:24:06 -0500 Subject: [PATCH 8/8] i40e/ethtool: support coalesce setting by queue This patch implements set_per_queue_coalesce for i40e driver. Signed-off-by: Kan Liang Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index dd572abddb8ea..784b1659457ad 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2035,6 +2035,12 @@ static int i40e_set_coalesce(struct net_device *netdev, return __i40e_set_coalesce(netdev, ec, -1); } +static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40e_set_coalesce(netdev, ec, queue); +} + /** * i40e_get_rss_hash_opts - Get RSS hash Input Set for each flow type * @pf: pointer to the physical function struct @@ -2921,6 +2927,7 @@ static const struct ethtool_ops i40e_ethtool_ops = { .get_priv_flags = i40e_get_priv_flags, .set_priv_flags = i40e_set_priv_flags, .get_per_queue_coalesce = i40e_get_per_queue_coalesce, + .set_per_queue_coalesce = i40e_set_per_queue_coalesce, }; void i40e_set_ethtool_ops(struct net_device *netdev)