Skip to content

Commit

Permalink
net: sched: pie: enable timestamp based delay calculation
Browse files Browse the repository at this point in the history
RFC 8033 suggests an alternative approach to calculate the queue
delay in PIE by using a timestamp on every enqueued packet. This
patch adds an implementation of that approach and sets it as the
default method to calculate queue delay. The previous method (based
on Little's law) to calculate queue delay is set as optional.

Signed-off-by: Gautam Ramakrishnan <gautamramk@gmail.com>
Signed-off-by: Leslie Monis <lesliemonis@gmail.com>
Signed-off-by: Mohit P. Tahiliani <tahiliani@nitk.edu.in>
Acked-by: Dave Taht <dave.taht@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Gautam Ramakrishnan authored and David S. Miller committed Nov 20, 2019
1 parent f01b437 commit cec2975
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 29 deletions.
22 changes: 14 additions & 8 deletions include/uapi/linux/pkt_sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -950,19 +950,25 @@ enum {
TCA_PIE_BETA,
TCA_PIE_ECN,
TCA_PIE_BYTEMODE,
TCA_PIE_DQ_RATE_ESTIMATOR,
__TCA_PIE_MAX
};
#define TCA_PIE_MAX (__TCA_PIE_MAX - 1)

struct tc_pie_xstats {
__u64 prob; /* current probability */
__u32 delay; /* current delay in ms */
__u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */
__u32 packets_in; /* total number of packets enqueued */
__u32 dropped; /* packets dropped due to pie_action */
__u32 overlimit; /* dropped due to lack of space in queue */
__u32 maxq; /* maximum queue size */
__u32 ecn_mark; /* packets marked with ecn*/
__u64 prob; /* current probability */
__u32 delay; /* current delay in ms */
__u32 avg_dq_rate; /* current average dq_rate in
* bits/pie_time
*/
__u32 dq_rate_estimating; /* is avg_dq_rate being calculated? */
__u32 packets_in; /* total number of packets enqueued */
__u32 dropped; /* packets dropped due to pie_action */
__u32 overlimit; /* dropped due to lack of space
* in queue
*/
__u32 maxq; /* maximum queue size */
__u32 ecn_mark; /* packets marked with ecn*/
};

/* CBS */
Expand Down
120 changes: 99 additions & 21 deletions net/sched/sch_pie.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#define QUEUE_THRESHOLD 16384
#define DQCOUNT_INVALID -1
#define DTIME_INVALID 0xffffffffffffffff
#define MAX_PROB 0xffffffffffffffff
#define PIE_SCALE 8

Expand All @@ -34,6 +35,7 @@ struct pie_params {
u32 beta; /* and are used for shift relative to 1 */
bool ecn; /* true if ecn is enabled */
bool bytemode; /* to scale drop early prob based on pkt size */
u8 dq_rate_estimator; /* to calculate delay using Little's law */
};

/* variables used */
Expand Down Expand Up @@ -77,11 +79,34 @@ static void pie_params_init(struct pie_params *params)
params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */
params->ecn = false;
params->bytemode = false;
params->dq_rate_estimator = false;
}

/* private skb vars */
struct pie_skb_cb {
psched_time_t enqueue_time;
};

static struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
{
qdisc_cb_private_validate(skb, sizeof(struct pie_skb_cb));
return (struct pie_skb_cb *)qdisc_skb_cb(skb)->data;
}

static psched_time_t pie_get_enqueue_time(const struct sk_buff *skb)
{
return get_pie_cb(skb)->enqueue_time;
}

static void pie_set_enqueue_time(struct sk_buff *skb)
{
get_pie_cb(skb)->enqueue_time = psched_get_time();
}

static void pie_vars_init(struct pie_vars *vars)
{
vars->dq_count = DQCOUNT_INVALID;
vars->dq_tstamp = DTIME_INVALID;
vars->accu_prob = 0;
vars->avg_dq_rate = 0;
/* default of 150 ms in pschedtime */
Expand Down Expand Up @@ -172,6 +197,10 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,

/* we can enqueue the packet */
if (enqueue) {
/* Set enqueue time only when dq_rate_estimator is disabled. */
if (!q->params.dq_rate_estimator)
pie_set_enqueue_time(skb);

q->stats.packets_in++;
if (qdisc_qlen(sch) > q->stats.maxq)
q->stats.maxq = qdisc_qlen(sch);
Expand All @@ -194,6 +223,7 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
[TCA_PIE_BETA] = {.type = NLA_U32},
[TCA_PIE_ECN] = {.type = NLA_U32},
[TCA_PIE_BYTEMODE] = {.type = NLA_U32},
[TCA_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32},
};

static int pie_change(struct Qdisc *sch, struct nlattr *opt,
Expand Down Expand Up @@ -247,6 +277,10 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_PIE_BYTEMODE])
q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);

if (tb[TCA_PIE_DQ_RATE_ESTIMATOR])
q->params.dq_rate_estimator =
nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]);

/* Drop excess packets if new limit is lower */
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
Expand All @@ -266,6 +300,28 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
{
struct pie_sched_data *q = qdisc_priv(sch);
int qlen = sch->qstats.backlog; /* current queue size in bytes */
psched_time_t now = psched_get_time();
u32 dtime = 0;

/* If dq_rate_estimator is disabled, calculate qdelay using the
* packet timestamp.
*/
if (!q->params.dq_rate_estimator) {
q->vars.qdelay = now - pie_get_enqueue_time(skb);

if (q->vars.dq_tstamp != DTIME_INVALID)
dtime = now - q->vars.dq_tstamp;

q->vars.dq_tstamp = now;

if (qlen == 0)
q->vars.qdelay = 0;

if (dtime == 0)
return;

goto burst_allowance_reduction;
}

/* If current queue is about 10 packets or more and dq_count is unset
* we have enough packets to calculate the drain rate. Save
Expand All @@ -289,10 +345,10 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
q->vars.dq_count += skb->len;

if (q->vars.dq_count >= QUEUE_THRESHOLD) {
psched_time_t now = psched_get_time();
u32 dtime = now - q->vars.dq_tstamp;
u32 count = q->vars.dq_count << PIE_SCALE;

dtime = now - q->vars.dq_tstamp;

if (dtime == 0)
return;

Expand All @@ -317,34 +373,45 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
q->vars.dq_tstamp = psched_get_time();
}

if (q->vars.burst_time > 0) {
if (q->vars.burst_time > dtime)
q->vars.burst_time -= dtime;
else
q->vars.burst_time = 0;
}
goto burst_allowance_reduction;
}
}

return;

burst_allowance_reduction:
if (q->vars.burst_time > 0) {
if (q->vars.burst_time > dtime)
q->vars.burst_time -= dtime;
else
q->vars.burst_time = 0;
}
}

static void calculate_probability(struct Qdisc *sch)
{
struct pie_sched_data *q = qdisc_priv(sch);
u32 qlen = sch->qstats.backlog; /* queue size in bytes */
psched_time_t qdelay = 0; /* in pschedtime */
psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */
psched_time_t qdelay_old = 0; /* in pschedtime */
s64 delta = 0; /* determines the change in probability */
u64 oldprob;
u64 alpha, beta;
u32 power;
bool update_prob = true;

q->vars.qdelay_old = q->vars.qdelay;
if (q->params.dq_rate_estimator) {
qdelay_old = q->vars.qdelay;
q->vars.qdelay_old = q->vars.qdelay;

if (q->vars.avg_dq_rate > 0)
qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
else
qdelay = 0;
if (q->vars.avg_dq_rate > 0)
qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
else
qdelay = 0;
} else {
qdelay = q->vars.qdelay;
qdelay_old = q->vars.qdelay_old;
}

/* If qdelay is zero and qlen is not, it means qlen is very small, less
* than dequeue_rate, so we do not update probabilty in this round
Expand Down Expand Up @@ -430,14 +497,18 @@ static void calculate_probability(struct Qdisc *sch)
/* We restart the measurement cycle if the following conditions are met
* 1. If the delay has been low for 2 consecutive Tupdate periods
* 2. Calculated drop probability is zero
* 3. We have atleast one estimate for the avg_dq_rate ie.,
* is a non-zero value
* 3. If average dq_rate_estimator is enabled, we have atleast one
* estimate for the avg_dq_rate ie., is a non-zero value
*/
if ((q->vars.qdelay < q->params.target / 2) &&
(q->vars.qdelay_old < q->params.target / 2) &&
q->vars.prob == 0 &&
q->vars.avg_dq_rate > 0)
(!q->params.dq_rate_estimator || q->vars.avg_dq_rate > 0)) {
pie_vars_init(&q->vars);
}

if (!q->params.dq_rate_estimator)
q->vars.qdelay_old = qdelay;
}

static void pie_timer(struct timer_list *t)
Expand Down Expand Up @@ -497,7 +568,9 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode))
nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) ||
nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR,
q->params.dq_rate_estimator))
goto nla_put_failure;

return nla_nest_end(skb, opts);
Expand All @@ -514,16 +587,21 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.prob = q->vars.prob,
.delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
NSEC_PER_USEC,
/* unscale and return dq_rate in bytes per sec */
.avg_dq_rate = q->vars.avg_dq_rate *
(PSCHED_TICKS_PER_SEC) >> PIE_SCALE,
.packets_in = q->stats.packets_in,
.overlimit = q->stats.overlimit,
.maxq = q->stats.maxq,
.dropped = q->stats.dropped,
.ecn_mark = q->stats.ecn_mark,
};

/* avg_dq_rate is only valid if dq_rate_estimator is enabled */
st.dq_rate_estimating = q->params.dq_rate_estimator;

/* unscale and return dq_rate in bytes per sec */
if (q->params.dq_rate_estimator)
st.avg_dq_rate = q->vars.avg_dq_rate *
(PSCHED_TICKS_PER_SEC) >> PIE_SCALE;

return gnet_stats_copy_app(d, &st, sizeof(st));
}

Expand Down

0 comments on commit cec2975

Please sign in to comment.