From 5652e63df3303c2a702bac25fbf710b9cb64dfba Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 6 Feb 2020 13:46:06 -0800 Subject: [PATCH 1/5] taprio: Fix enabling offload with wrong number of traffic classes If the driver implementing taprio offloading depends on the value of the network device number of traffic classes (dev->num_tc) for whatever reason, it was going to receive the value zero. The value was only set after the offloading function is called. So, moving setting the number of traffic classes to before the offloading function is called fixes this issue. This is safe because this only happens when taprio is instantiated (we don't allow this configuration to be changed without first removing taprio). Fixes: 9c66d1564676 ("taprio: Add support for hardware offloading") Reported-by: Po Liu Signed-off-by: Vinicius Costa Gomes Acked-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index c609373c86618..ad0dadcfcdba8 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1444,6 +1444,19 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, taprio_set_picos_per_byte(dev, q); + if (mqprio) { + netdev_set_num_tc(dev, mqprio->num_tc); + for (i = 0; i < mqprio->num_tc; i++) + netdev_set_tc_queue(dev, i, + mqprio->count[i], + mqprio->offset[i]); + + /* Always use supplied priority mappings */ + for (i = 0; i <= TC_BITMASK; i++) + netdev_set_prio_tc_map(dev, i, + mqprio->prio_tc_map[i]); + } + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); else @@ -1471,19 +1484,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, q->advance_timer.function = advance_sched; } - if (mqprio) { - netdev_set_num_tc(dev, mqprio->num_tc); - for (i = 0; i < mqprio->num_tc; i++) - netdev_set_tc_queue(dev, i, - mqprio->count[i], - mqprio->offset[i]); - - /* Always use supplied priority mappings */ - for (i = 0; i <= TC_BITMASK; i++) - netdev_set_prio_tc_map(dev, i, - mqprio->prio_tc_map[i]); - } - if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) { q->dequeue = taprio_dequeue_offload; q->peek = taprio_peek_offload; From a9d6227436f32142209f4428f2dc616761485112 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 6 Feb 2020 13:46:07 -0800 Subject: [PATCH 2/5] taprio: Fix still allowing changing the flags during runtime Because 'q->flags' starts as zero, and zero is a valid value, we aren't able to detect the transition from zero to something else during "runtime". The solution is to initialize 'q->flags' with an invalid value, so we can detect if 'q->flags' was set by the user or not. To better solidify the behavior, 'flags' handling is moved to a separate function. The behavior is: - 'flags' if unspecified by the user, is assumed to be zero; - 'flags' cannot change during "runtime" (i.e. a change() request cannot modify it); With this new function we can remove taprio_flags, which should reduce the risk of future accidents. Allowing flags to be changed was causing the following RCU stall: [ 1730.558249] rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: [ 1730.558258] rcu: 6-...0: (190 ticks this GP) idle=922/0/0x1 softirq=25580/25582 fqs=16250 [ 1730.558264] (detected by 2, t=65002 jiffies, g=33017, q=81) [ 1730.558269] Sending NMI from CPU 2 to CPUs 6: [ 1730.559277] NMI backtrace for cpu 6 [ 1730.559277] CPU: 6 PID: 0 Comm: swapper/6 Tainted: G E 5.5.0-rc6+ #35 [ 1730.559278] Hardware name: Gigabyte Technology Co., Ltd. Z390 AORUS ULTRA/Z390 AORUS ULTRA-CF, BIOS F7 03/14/2019 [ 1730.559278] RIP: 0010:__hrtimer_run_queues+0xe2/0x440 [ 1730.559278] Code: 48 8b 43 28 4c 89 ff 48 8b 75 c0 48 89 45 c8 e8 f4 bb 7c 00 0f 1f 44 00 00 65 8b 05 40 31 f0 68 89 c0 48 0f a3 05 3e 5c 25 01 <0f> 82 fc 01 00 00 48 8b 45 c8 48 89 df ff d0 89 45 c8 0f 1f 44 00 [ 1730.559279] RSP: 0018:ffff9970802d8f10 EFLAGS: 00000083 [ 1730.559279] RAX: 0000000000000006 RBX: ffff8b31645bff38 RCX: 0000000000000000 [ 1730.559280] RDX: 0000000000000000 RSI: ffffffff9710f2ec RDI: ffffffff978daf0e [ 1730.559280] RBP: ffff9970802d8f68 R08: 0000000000000000 R09: 0000000000000000 [ 1730.559280] R10: 0000018336d7944e R11: 0000000000000001 R12: ffff8b316e39f9c0 [ 1730.559281] R13: ffff8b316e39f940 R14: ffff8b316e39f998 R15: ffff8b316e39f7c0 [ 1730.559281] FS: 0000000000000000(0000) GS:ffff8b316e380000(0000) knlGS:0000000000000000 [ 1730.559281] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1730.559281] CR2: 00007f1105303760 CR3: 0000000227210005 CR4: 00000000003606e0 [ 1730.559282] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1730.559282] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1730.559282] Call Trace: [ 1730.559282] [ 1730.559283] ? taprio_dequeue_soft+0x2d0/0x2d0 [sch_taprio] [ 1730.559283] hrtimer_interrupt+0x104/0x220 [ 1730.559283] ? irqtime_account_irq+0x34/0xa0 [ 1730.559283] smp_apic_timer_interrupt+0x6d/0x230 [ 1730.559284] apic_timer_interrupt+0xf/0x20 [ 1730.559284] [ 1730.559284] RIP: 0010:cpu_idle_poll+0x35/0x1a0 [ 1730.559285] Code: 88 82 ff 65 44 8b 25 12 7d 73 68 0f 1f 44 00 00 e8 90 c3 89 ff fb 65 48 8b 1c 25 c0 7e 01 00 48 8b 03 a8 08 74 0b eb 1c f3 90 <48> 8b 03 a8 08 75 13 8b 05 be a8 a8 00 85 c0 75 ed e8 75 48 84 ff [ 1730.559285] RSP: 0018:ffff997080137ea8 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff13 [ 1730.559285] RAX: 0000000000000001 RBX: ffff8b316bc3c580 RCX: 0000000000000000 [ 1730.559286] RDX: 0000000000000001 RSI: 000000002819aad9 RDI: ffffffff978da730 [ 1730.559286] RBP: ffff997080137ec0 R08: 0000018324a6d387 R09: 0000000000000000 [ 1730.559286] R10: 0000000000000400 R11: 0000000000000001 R12: 0000000000000006 [ 1730.559286] R13: ffff8b316bc3c580 R14: 0000000000000000 R15: 0000000000000000 [ 1730.559287] ? cpu_idle_poll+0x20/0x1a0 [ 1730.559287] ? cpu_idle_poll+0x20/0x1a0 [ 1730.559287] do_idle+0x4d/0x1f0 [ 1730.559287] ? complete+0x44/0x50 [ 1730.559288] cpu_startup_entry+0x1b/0x20 [ 1730.559288] start_secondary+0x142/0x180 [ 1730.559288] secondary_startup_64+0xb6/0xc0 [ 1776.686313] nvme nvme0: I/O 96 QID 1 timeout, completion polled Fixes: 4cfd5779bd6e ("taprio: Add support for txtime-assist mode") Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 61 ++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index ad0dadcfcdba8..e2d4283bea6da 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(taprio_list_lock); #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) +#define TAPRIO_FLAGS_INVALID U32_MAX struct sched_entry { struct list_head list; @@ -1367,6 +1368,33 @@ static int taprio_mqprio_cmp(const struct net_device *dev, return 0; } +/* The semantics of the 'flags' argument in relation to 'change()' + * requests, are interpreted following two rules (which are applied in + * this order): (1) an omitted 'flags' argument is interpreted as + * zero; (2) the 'flags' of a "running" taprio instance cannot be + * changed. + */ +static int taprio_new_flags(const struct nlattr *attr, u32 old, + struct netlink_ext_ack *extack) +{ + u32 new = 0; + + if (attr) + new = nla_get_u32(attr); + + if (old != TAPRIO_FLAGS_INVALID && old != new) { + NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); + return -EOPNOTSUPP; + } + + if (!taprio_flags_valid(new)) { + NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); + return -EINVAL; + } + + return new; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1375,7 +1403,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; - u32 taprio_flags = 0; unsigned long flags; ktime_t start; int i, err; @@ -1388,21 +1415,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); - if (tb[TCA_TAPRIO_ATTR_FLAGS]) { - taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]); - - if (q->flags != 0 && q->flags != taprio_flags) { - NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); - return -EOPNOTSUPP; - } else if (!taprio_flags_valid(taprio_flags)) { - NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); - return -EINVAL; - } + err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS], + q->flags, extack); + if (err < 0) + return err; - q->flags = taprio_flags; - } + q->flags = err; - err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags); + err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); if (err < 0) return err; @@ -1457,7 +1477,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, mqprio->prio_tc_map[i]); } - if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); else err = taprio_disable_offload(dev, q, extack); @@ -1477,14 +1497,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]); } - if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && - !FULL_OFFLOAD_IS_ENABLED(taprio_flags) && + if (!TXTIME_ASSIST_IS_ENABLED(q->flags) && + !FULL_OFFLOAD_IS_ENABLED(q->flags) && !hrtimer_active(&q->advance_timer)) { hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; } - if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) { + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { q->dequeue = taprio_dequeue_offload; q->peek = taprio_peek_offload; } else { @@ -1501,7 +1521,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto unlock; } - if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) { + if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { setup_txtime(q, new_admin, start); if (!oper) { @@ -1528,7 +1548,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, spin_unlock_irqrestore(&q->current_entry_lock, flags); - if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) taprio_offload_config_changed(q); } @@ -1597,6 +1617,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, * and get the valid one on taprio_change(). */ q->clockid = -1; + q->flags = TAPRIO_FLAGS_INVALID; spin_lock(&taprio_list_lock); list_add(&q->taprio_list, &taprio_list); From 49c684d79cfdc3032344bf6f3deeea81c4efedbf Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 6 Feb 2020 13:46:08 -0800 Subject: [PATCH 3/5] taprio: Add missing policy validation for flags netlink policy validation for the 'flags' argument was missing. Fixes: 4cfd5779bd6e ("taprio: Add support for txtime-assist mode") Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index e2d4283bea6da..b82a9769ab406 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -767,6 +767,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, }; static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, From 7c16680a08ee1e444a67d232c679ccf5b30fad16 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 6 Feb 2020 13:46:09 -0800 Subject: [PATCH 4/5] taprio: Use taprio_reset_tc() to reset Traffic Classes configuration When destroying the current taprio instance, which can happen when the creation of one fails, we should reset the traffic class configuration back to the default state. netdev_reset_tc() is a better way because in addition to setting the number of traffic classes to zero, it also resets the priority to traffic classes mapping to the default value. Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler") Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b82a9769ab406..21df69071df2b 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1588,7 +1588,7 @@ static void taprio_destroy(struct Qdisc *sch) } q->qdiscs = NULL; - netdev_set_num_tc(dev, 0); + netdev_reset_tc(dev); if (q->oper_sched) call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb); From bfabd41da34180d05382312533a3adc2e012dee0 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 6 Feb 2020 13:46:10 -0800 Subject: [PATCH 5/5] taprio: Fix dropping packets when using taprio + ETF offloading When using taprio offloading together with ETF offloading, configured like this, for example: $ tc qdisc replace dev $IFACE parent root handle 100 taprio \ num_tc 4 \ map 2 2 1 0 3 2 2 2 2 2 2 2 2 2 2 2 \ queues 1@0 1@1 1@2 1@3 \ base-time $BASE_TIME \ sched-entry S 01 1000000 \ sched-entry S 0e 1000000 \ flags 0x2 $ tc qdisc replace dev $IFACE parent 100:1 etf \ offload delta 300000 clockid CLOCK_TAI During enqueue, it works out that the verification added for the "txtime" assisted mode is run when using taprio + ETF offloading, the only thing missing is initializing the 'next_txtime' of all the cycle entries. (if we don't set 'next_txtime' all packets from SO_TXTIME sockets are dropped) Fixes: 4cfd5779bd6e ("taprio: Add support for txtime-assist mode") Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 21df69071df2b..660fc45ee40fc 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1522,9 +1522,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto unlock; } - if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { - setup_txtime(q, new_admin, start); + setup_txtime(q, new_admin, start); + if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { if (!oper) { rcu_assign_pointer(q->oper_sched, new_admin); err = 0;