Skip to content

Commit

Permalink
net_sched: sch_fq: add dctcp-like marking
Browse files Browse the repository at this point in the history
Similar to 80ba92f ("codel: add ce_threshold attribute")

After EDT adoption, it became easier to implement DCTCP-like CE marking.

In many cases, queues are not building in the network fabric but on
the hosts themselves.

If packets leaving fq missed their Earliest Departure Time by XXX usec,
we mark them with ECN CE. This gives a feedback (after one RTT) to
the sender to slow down and find better operating mode.

Example :

tc qd replace dev eth0 root fq ce_threshold 2.5ms

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Eric Dumazet authored and David S. Miller committed Nov 11, 2018
1 parent c73e580 commit 48872c1
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
3 changes: 3 additions & 0 deletions include/uapi/linux/pkt_sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,8 @@ enum {

TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */

TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */

__TCA_FQ_MAX
};

Expand All @@ -882,6 +884,7 @@ struct tc_fq_qd_stats {
__u32 inactive_flows;
__u32 throttled_flows;
__u32 unthrottle_latency_ns;
__u64 ce_mark; /* packets above ce_threshold */
};

/* Heavy-Hitter Filter */
Expand Down
21 changes: 21 additions & 0 deletions net/sched/sch_fq.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ struct fq_sched_data {
u32 flow_refill_delay;
u32 flow_plimit; /* max packets per flow */
unsigned long flow_max_rate; /* optional max rate per flow */
u64 ce_threshold;
u32 orphan_mask; /* mask for orphaned skb */
u32 low_rate_threshold;
struct rb_root *fq_root;
Expand All @@ -107,6 +108,7 @@ struct fq_sched_data {
u64 stat_gc_flows;
u64 stat_internal_packets;
u64 stat_throttled;
u64 stat_ce_mark;
u64 stat_flows_plimit;
u64 stat_pkts_too_long;
u64 stat_allocation_errors;
Expand Down Expand Up @@ -454,6 +456,11 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
fq_flow_set_throttled(q, f);
goto begin;
}
if (time_next_packet &&
(s64)(now - time_next_packet - q->ce_threshold) > 0) {
INET_ECN_set_ce(skb);
q->stat_ce_mark++;
}
}

skb = fq_dequeue_head(sch, f);
Expand Down Expand Up @@ -650,6 +657,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
[TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
};

static int fq_change(struct Qdisc *sch, struct nlattr *opt,
Expand Down Expand Up @@ -729,6 +737,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_ORPHAN_MASK])
q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);

if (tb[TCA_FQ_CE_THRESHOLD])
q->ce_threshold = (u64)NSEC_PER_USEC *
nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);

if (!err) {
sch_tree_unlock(sch);
err = fq_resize(sch, fq_log);
Expand Down Expand Up @@ -779,6 +791,10 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
q->fq_trees_log = ilog2(1024);
q->orphan_mask = 1024 - 1;
q->low_rate_threshold = 550000 / 8;

/* Default ce_threshold of 4294 seconds */
q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;

qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);

if (opt)
Expand All @@ -792,6 +808,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_sched_data *q = qdisc_priv(sch);
u64 ce_threshold = q->ce_threshold;
struct nlattr *opts;

opts = nla_nest_start(skb, TCA_OPTIONS);
Expand All @@ -800,6 +817,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)

/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */

do_div(ce_threshold, NSEC_PER_USEC);

if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
Expand All @@ -812,6 +831,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
q->low_rate_threshold) ||
nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;

Expand Down Expand Up @@ -841,6 +861,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.throttled_flows = q->throttled_flows;
st.unthrottle_latency_ns = min_t(unsigned long,
q->unthrottle_latency_ns, ~0U);
st.ce_mark = q->stat_ce_mark;
sch_tree_unlock(sch);

return gnet_stats_copy_app(d, &st, sizeof(st));
Expand Down

0 comments on commit 48872c1

Please sign in to comment.