From 0fc6432cc78d8dc683474d8e28ea30543ae033b3 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Tue, 29 Nov 2016 17:00:47 +0200 Subject: [PATCH 1/5] net: ethernet: ti: davinci_cpdma: add weight function for channels The weight of a channel is needed to split descriptors between channels. The weight can depend on maximum rate of channels, maximum rate of an interface or other reasons. The channel weight is in percentage and is independent for rx and tx channels. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_cpdma.c | 124 ++++++++++++++++++++++-- drivers/net/ethernet/ti/davinci_cpdma.h | 1 + 2 files changed, 115 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 56708a79a18a2..87456a937732b 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -122,6 +122,7 @@ struct cpdma_chan { struct cpdma_chan_stats stats; /* offsets into dmaregs */ int int_set, int_clear, td; + int weight; }; struct cpdma_control_info { @@ -474,29 +475,131 @@ u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr) } EXPORT_SYMBOL_GPL(cpdma_ctrl_txchs_state); +static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr, + int rx, int desc_num, + int per_ch_desc) +{ + struct cpdma_chan *chan, *most_chan = NULL; + int desc_cnt = desc_num; + int most_dnum = 0; + int min, max, i; + + if (!desc_num) + return; + + if (rx) { + min = rx_chan_num(0); + max = rx_chan_num(CPDMA_MAX_CHANNELS); + } else { + min = tx_chan_num(0); + max = tx_chan_num(CPDMA_MAX_CHANNELS); + } + + for (i = min; i < max; i++) { + chan = ctlr->channels[i]; + if (!chan) + continue; + + if (chan->weight) + chan->desc_num = (chan->weight * desc_num) / 100; + else + chan->desc_num = per_ch_desc; + + desc_cnt -= chan->desc_num; + + if (most_dnum < chan->desc_num) { + most_dnum = chan->desc_num; + most_chan = chan; + } + } + /* use remains */ + most_chan->desc_num += desc_cnt; +} + /** * cpdma_chan_split_pool - Splits ctrl pool between all channels. * Has to be called under ctlr lock */ -static void cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) +static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) { + int tx_per_ch_desc = 0, rx_per_ch_desc = 0; struct cpdma_desc_pool *pool = ctlr->pool; + int free_rx_num = 0, free_tx_num = 0; + int rx_weight = 0, tx_weight = 0; + int tx_desc_num, rx_desc_num; struct cpdma_chan *chan; - int ch_desc_num; - int i; + int i, tx_num = 0; if (!ctlr->chan_num) - return; - - /* calculate average size of pool slice */ - ch_desc_num = pool->num_desc / ctlr->chan_num; + return 0; - /* split ctlr pool */ for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) { chan = ctlr->channels[i]; - if (chan) - chan->desc_num = ch_desc_num; + if (!chan) + continue; + + if (is_rx_chan(chan)) { + if (!chan->weight) + free_rx_num++; + rx_weight += chan->weight; + } else { + if (!chan->weight) + free_tx_num++; + tx_weight += chan->weight; + tx_num++; + } + } + + if (rx_weight > 100 || tx_weight > 100) + return -EINVAL; + + tx_desc_num = (tx_num * pool->num_desc) / ctlr->chan_num; + rx_desc_num = pool->num_desc - tx_desc_num; + + if (free_tx_num) { + tx_per_ch_desc = tx_desc_num - (tx_weight * tx_desc_num) / 100; + tx_per_ch_desc /= free_tx_num; + } + if (free_rx_num) { + rx_per_ch_desc = rx_desc_num - (rx_weight * rx_desc_num) / 100; + rx_per_ch_desc /= free_rx_num; } + + cpdma_chan_set_descs(ctlr, 0, tx_desc_num, tx_per_ch_desc); + cpdma_chan_set_descs(ctlr, 1, rx_desc_num, rx_per_ch_desc); + + return 0; +} + +/* cpdma_chan_set_weight - set weight of a channel in percentage. + * Tx and Rx channels have separate weights. That is 100% for RX + * and 100% for Tx. The weight is used to split cpdma resources + * in correct proportion required by the channels, including number + * of descriptors. The channel rate is not enough to know the + * weight of a channel as the maximum rate of an interface is needed. + * If weight = 0, then channel uses rest of descriptors leaved by + * weighted channels. + */ +int cpdma_chan_set_weight(struct cpdma_chan *ch, int weight) +{ + struct cpdma_ctlr *ctlr = ch->ctlr; + unsigned long flags, ch_flags; + int ret; + + spin_lock_irqsave(&ctlr->lock, flags); + spin_lock_irqsave(&ch->lock, ch_flags); + if (ch->weight == weight) { + spin_unlock_irqrestore(&ch->lock, ch_flags); + spin_unlock_irqrestore(&ctlr->lock, flags); + return 0; + } + ch->weight = weight; + spin_unlock_irqrestore(&ch->lock, ch_flags); + + /* re-split pool using new channel weight */ + ret = cpdma_chan_split_pool(ctlr); + spin_unlock_irqrestore(&ctlr->lock, flags); + return ret; } struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, @@ -527,6 +630,7 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, chan->chan_num = chan_num; chan->handler = handler; chan->desc_num = ctlr->pool->num_desc / 2; + chan->weight = 0; if (is_rx_chan(chan)) { chan->hdp = ctlr->params.rxhdp + offset; diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h index a07b22b12bc1f..629020cbc2057 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.h +++ b/drivers/net/ethernet/ti/davinci_cpdma.h @@ -90,6 +90,7 @@ int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable); u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr); u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr); bool cpdma_check_free_tx_desc(struct cpdma_chan *chan); +int cpdma_chan_set_weight(struct cpdma_chan *ch, int weight); enum cpdma_control { CPDMA_CMD_IDLE, /* write-only */ From 8f32b90981dcdb355516fb95953133f8d4e6b11d Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Tue, 29 Nov 2016 17:00:48 +0200 Subject: [PATCH 2/5] net: ethernet: ti: davinci_cpdma: add set rate for a channel The cpdma has 8 rate limited tx channels. This patch adds ability for cpdma driver to use 8 tx h/w shapers. If at least one channel is not rate limited then it must have higher number, this is because the rate limited channels have to have higher priority then not rate limited channels. The channel priority is set in low-hi direction already, so that when a new channel is added with ethtool and it doesn't have rate yet, it cannot affect on rate limited channels. It can be useful for TSN streams and just in cases when h/w rate limited channels are needed. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_cpdma.c | 329 ++++++++++++++++++++---- drivers/net/ethernet/ti/davinci_cpdma.h | 5 + 2 files changed, 289 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 87456a937732b..c776e4575d2da 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -32,6 +32,7 @@ #define CPDMA_RXCONTROL 0x14 #define CPDMA_SOFTRESET 0x1c #define CPDMA_RXTEARDOWN 0x18 +#define CPDMA_TX_PRI0_RATE 0x30 #define CPDMA_TXINTSTATRAW 0x80 #define CPDMA_TXINTSTATMASKED 0x84 #define CPDMA_TXINTMASKSET 0x88 @@ -68,6 +69,8 @@ #define CPDMA_TEARDOWN_VALUE 0xfffffffc +#define CPDMA_MAX_RLIM_CNT 16384 + struct cpdma_desc { /* hardware fields */ u32 hw_next; @@ -123,6 +126,8 @@ struct cpdma_chan { /* offsets into dmaregs */ int int_set, int_clear, td; int weight; + u32 rate_factor; + u32 rate; }; struct cpdma_control_info { @@ -135,6 +140,7 @@ struct cpdma_control_info { }; static struct cpdma_control_info controls[] = { + [CPDMA_TX_RLIM] = {CPDMA_DMACONTROL, 8, 0xffff, ACCESS_RW}, [CPDMA_CMD_IDLE] = {CPDMA_DMACONTROL, 3, 1, ACCESS_WO}, [CPDMA_COPY_ERROR_FRAMES] = {CPDMA_DMACONTROL, 4, 1, ACCESS_RW}, [CPDMA_RX_OFF_LEN_UPDATE] = {CPDMA_DMACONTROL, 2, 1, ACCESS_RW}, @@ -302,6 +308,186 @@ static int _cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value) return 0; } +static int _cpdma_control_get(struct cpdma_ctlr *ctlr, int control) +{ + struct cpdma_control_info *info = &controls[control]; + int ret; + + if (!ctlr->params.has_ext_regs) + return -ENOTSUPP; + + if (ctlr->state != CPDMA_STATE_ACTIVE) + return -EINVAL; + + if (control < 0 || control >= ARRAY_SIZE(controls)) + return -ENOENT; + + if ((info->access & ACCESS_RO) != ACCESS_RO) + return -EPERM; + + ret = (dma_reg_read(ctlr, info->reg) >> info->shift) & info->mask; + return ret; +} + +/* cpdma_chan_set_chan_shaper - set shaper for a channel + * Has to be called under ctlr lock + */ +static int cpdma_chan_set_chan_shaper(struct cpdma_chan *chan) +{ + struct cpdma_ctlr *ctlr = chan->ctlr; + u32 rate_reg; + u32 rmask; + int ret; + + if (!chan->rate) + return 0; + + rate_reg = CPDMA_TX_PRI0_RATE + 4 * chan->chan_num; + dma_reg_write(ctlr, rate_reg, chan->rate_factor); + + rmask = _cpdma_control_get(ctlr, CPDMA_TX_RLIM); + rmask |= chan->mask; + + ret = _cpdma_control_set(ctlr, CPDMA_TX_RLIM, rmask); + return ret; +} + +static int cpdma_chan_on(struct cpdma_chan *chan) +{ + struct cpdma_ctlr *ctlr = chan->ctlr; + struct cpdma_desc_pool *pool = ctlr->pool; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + if (chan->state != CPDMA_STATE_IDLE) { + spin_unlock_irqrestore(&chan->lock, flags); + return -EBUSY; + } + if (ctlr->state != CPDMA_STATE_ACTIVE) { + spin_unlock_irqrestore(&chan->lock, flags); + return -EINVAL; + } + dma_reg_write(ctlr, chan->int_set, chan->mask); + chan->state = CPDMA_STATE_ACTIVE; + if (chan->head) { + chan_write(chan, hdp, desc_phys(pool, chan->head)); + if (chan->rxfree) + chan_write(chan, rxfree, chan->count); + } + + spin_unlock_irqrestore(&chan->lock, flags); + return 0; +} + +/* cpdma_chan_fit_rate - set rate for a channel and check if it's possible. + * rmask - mask of rate limited channels + * Returns min rate in Kb/s + */ +static int cpdma_chan_fit_rate(struct cpdma_chan *ch, u32 rate, + u32 *rmask, int *prio_mode) +{ + struct cpdma_ctlr *ctlr = ch->ctlr; + struct cpdma_chan *chan; + u32 old_rate = ch->rate; + u32 new_rmask = 0; + int rlim = 1; + int i; + + *prio_mode = 0; + for (i = tx_chan_num(0); i < tx_chan_num(CPDMA_MAX_CHANNELS); i++) { + chan = ctlr->channels[i]; + if (!chan) { + rlim = 0; + continue; + } + + if (chan == ch) + chan->rate = rate; + + if (chan->rate) { + if (rlim) { + new_rmask |= chan->mask; + } else { + ch->rate = old_rate; + dev_err(ctlr->dev, "Prev channel of %dch is not rate limited\n", + chan->chan_num); + return -EINVAL; + } + } else { + *prio_mode = 1; + rlim = 0; + } + } + + *rmask = new_rmask; + return 0; +} + +static u32 cpdma_chan_set_factors(struct cpdma_ctlr *ctlr, + struct cpdma_chan *ch) +{ + u32 delta = UINT_MAX, prev_delta = UINT_MAX, best_delta = UINT_MAX; + u32 best_send_cnt = 0, best_idle_cnt = 0; + u32 new_rate, best_rate = 0, rate_reg; + u64 send_cnt, idle_cnt; + u32 min_send_cnt, freq; + u64 divident, divisor; + + if (!ch->rate) { + ch->rate_factor = 0; + goto set_factor; + } + + freq = ctlr->params.bus_freq_mhz * 1000 * 32; + if (!freq) { + dev_err(ctlr->dev, "The bus frequency is not set\n"); + return -EINVAL; + } + + min_send_cnt = freq - ch->rate; + send_cnt = DIV_ROUND_UP(min_send_cnt, ch->rate); + while (send_cnt <= CPDMA_MAX_RLIM_CNT) { + divident = ch->rate * send_cnt; + divisor = min_send_cnt; + idle_cnt = DIV_ROUND_CLOSEST_ULL(divident, divisor); + + divident = freq * idle_cnt; + divisor = idle_cnt + send_cnt; + new_rate = DIV_ROUND_CLOSEST_ULL(divident, divisor); + + delta = new_rate >= ch->rate ? new_rate - ch->rate : delta; + if (delta < best_delta) { + best_delta = delta; + best_send_cnt = send_cnt; + best_idle_cnt = idle_cnt; + best_rate = new_rate; + + if (!delta) + break; + } + + if (prev_delta >= delta) { + prev_delta = delta; + send_cnt++; + continue; + } + + idle_cnt++; + divident = freq * idle_cnt; + send_cnt = DIV_ROUND_CLOSEST_ULL(divident, ch->rate); + send_cnt -= idle_cnt; + prev_delta = UINT_MAX; + } + + ch->rate = best_rate; + ch->rate_factor = best_send_cnt | (best_idle_cnt << 16); + +set_factor: + rate_reg = CPDMA_TX_PRI0_RATE + 4 * ch->chan_num; + dma_reg_write(ctlr, rate_reg, ch->rate_factor); + return 0; +} + struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params) { struct cpdma_ctlr *ctlr; @@ -332,8 +518,9 @@ EXPORT_SYMBOL_GPL(cpdma_ctlr_create); int cpdma_ctlr_start(struct cpdma_ctlr *ctlr) { + struct cpdma_chan *chan; unsigned long flags; - int i; + int i, prio_mode; spin_lock_irqsave(&ctlr->lock, flags); if (ctlr->state != CPDMA_STATE_IDLE) { @@ -369,12 +556,20 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr) ctlr->state = CPDMA_STATE_ACTIVE; + prio_mode = 0; for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) { - if (ctlr->channels[i]) - cpdma_chan_start(ctlr->channels[i]); + chan = ctlr->channels[i]; + if (chan) { + cpdma_chan_set_chan_shaper(chan); + cpdma_chan_on(chan); + + /* off prio mode if all tx channels are rate limited */ + if (is_tx_chan(chan) && !chan->rate) + prio_mode = 1; + } } - _cpdma_control_set(ctlr, CPDMA_TX_PRIO_FIXED, 1); + _cpdma_control_set(ctlr, CPDMA_TX_PRIO_FIXED, prio_mode); _cpdma_control_set(ctlr, CPDMA_RX_BUFFER_OFFSET, 0); spin_unlock_irqrestore(&ctlr->lock, flags); @@ -602,6 +797,75 @@ int cpdma_chan_set_weight(struct cpdma_chan *ch, int weight) return ret; } +/* cpdma_chan_get_min_rate - get minimum allowed rate for channel + * Should be called before cpdma_chan_set_rate. + * Returns min rate in Kb/s + */ +u32 cpdma_chan_get_min_rate(struct cpdma_ctlr *ctlr) +{ + unsigned int divident, divisor; + + divident = ctlr->params.bus_freq_mhz * 32 * 1000; + divisor = 1 + CPDMA_MAX_RLIM_CNT; + + return DIV_ROUND_UP(divident, divisor); +} + +/* cpdma_chan_set_rate - limits bandwidth for transmit channel. + * The bandwidth * limited channels have to be in order beginning from lowest. + * ch - transmit channel the bandwidth is configured for + * rate - bandwidth in Kb/s, if 0 - then off shaper + */ +int cpdma_chan_set_rate(struct cpdma_chan *ch, u32 rate) +{ + struct cpdma_ctlr *ctlr = ch->ctlr; + unsigned long flags, ch_flags; + int ret, prio_mode; + u32 rmask; + + if (!ch || !is_tx_chan(ch)) + return -EINVAL; + + if (ch->rate == rate) + return rate; + + spin_lock_irqsave(&ctlr->lock, flags); + spin_lock_irqsave(&ch->lock, ch_flags); + + ret = cpdma_chan_fit_rate(ch, rate, &rmask, &prio_mode); + if (ret) + goto err; + + ret = cpdma_chan_set_factors(ctlr, ch); + if (ret) + goto err; + + spin_unlock_irqrestore(&ch->lock, ch_flags); + + /* on shapers */ + _cpdma_control_set(ctlr, CPDMA_TX_RLIM, rmask); + _cpdma_control_set(ctlr, CPDMA_TX_PRIO_FIXED, prio_mode); + spin_unlock_irqrestore(&ctlr->lock, flags); + return ret; + +err: + spin_unlock_irqrestore(&ch->lock, ch_flags); + spin_unlock_irqrestore(&ctlr->lock, flags); + return ret; +} + +u32 cpdma_chan_get_rate(struct cpdma_chan *ch) +{ + unsigned long flags; + u32 rate; + + spin_lock_irqsave(&ch->lock, flags); + rate = ch->rate; + spin_unlock_irqrestore(&ch->lock, flags); + + return rate; +} + struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, cpdma_handler_fn handler, int rx_type) { @@ -629,6 +893,7 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, chan->state = CPDMA_STATE_IDLE; chan->chan_num = chan_num; chan->handler = handler; + chan->rate = 0; chan->desc_num = ctlr->pool->num_desc / 2; chan->weight = 0; @@ -924,28 +1189,20 @@ EXPORT_SYMBOL_GPL(cpdma_chan_process); int cpdma_chan_start(struct cpdma_chan *chan) { - struct cpdma_ctlr *ctlr = chan->ctlr; - struct cpdma_desc_pool *pool = ctlr->pool; - unsigned long flags; + struct cpdma_ctlr *ctlr = chan->ctlr; + unsigned long flags; + int ret; - spin_lock_irqsave(&chan->lock, flags); - if (chan->state != CPDMA_STATE_IDLE) { - spin_unlock_irqrestore(&chan->lock, flags); - return -EBUSY; - } - if (ctlr->state != CPDMA_STATE_ACTIVE) { - spin_unlock_irqrestore(&chan->lock, flags); - return -EINVAL; - } - dma_reg_write(ctlr, chan->int_set, chan->mask); - chan->state = CPDMA_STATE_ACTIVE; - if (chan->head) { - chan_write(chan, hdp, desc_phys(pool, chan->head)); - if (chan->rxfree) - chan_write(chan, rxfree, chan->count); - } + spin_lock_irqsave(&ctlr->lock, flags); + ret = cpdma_chan_set_chan_shaper(chan); + spin_unlock_irqrestore(&ctlr->lock, flags); + if (ret) + return ret; + + ret = cpdma_chan_on(chan); + if (ret) + return ret; - spin_unlock_irqrestore(&chan->lock, flags); return 0; } EXPORT_SYMBOL_GPL(cpdma_chan_start); @@ -1033,31 +1290,12 @@ int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable) int cpdma_control_get(struct cpdma_ctlr *ctlr, int control) { unsigned long flags; - struct cpdma_control_info *info = &controls[control]; int ret; spin_lock_irqsave(&ctlr->lock, flags); - - ret = -ENOTSUPP; - if (!ctlr->params.has_ext_regs) - goto unlock_ret; - - ret = -EINVAL; - if (ctlr->state != CPDMA_STATE_ACTIVE) - goto unlock_ret; - - ret = -ENOENT; - if (control < 0 || control >= ARRAY_SIZE(controls)) - goto unlock_ret; - - ret = -EPERM; - if ((info->access & ACCESS_RO) != ACCESS_RO) - goto unlock_ret; - - ret = (dma_reg_read(ctlr, info->reg) >> info->shift) & info->mask; - -unlock_ret: + ret = _cpdma_control_get(ctlr, control); spin_unlock_irqrestore(&ctlr->lock, flags); + return ret; } @@ -1069,6 +1307,7 @@ int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value) spin_lock_irqsave(&ctlr->lock, flags); ret = _cpdma_control_set(ctlr, control, value); spin_unlock_irqrestore(&ctlr->lock, flags); + return ret; } EXPORT_SYMBOL_GPL(cpdma_control_set); diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h index 629020cbc2057..4a167db2ababf 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.h +++ b/drivers/net/ethernet/ti/davinci_cpdma.h @@ -36,6 +36,7 @@ struct cpdma_params { u32 desc_hw_addr; int desc_mem_size; int desc_align; + u32 bus_freq_mhz; /* * Some instances of embedded cpdma controllers have extra control and @@ -91,8 +92,12 @@ u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr); u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr); bool cpdma_check_free_tx_desc(struct cpdma_chan *chan); int cpdma_chan_set_weight(struct cpdma_chan *ch, int weight); +int cpdma_chan_set_rate(struct cpdma_chan *ch, u32 rate); +u32 cpdma_chan_get_rate(struct cpdma_chan *ch); +u32 cpdma_chan_get_min_rate(struct cpdma_ctlr *ctlr); enum cpdma_control { + CPDMA_TX_RLIM, /* read-write */ CPDMA_CMD_IDLE, /* write-only */ CPDMA_COPY_ERROR_FRAMES, /* read-write */ CPDMA_RX_OFF_LEN_UPDATE, /* read-write */ From 83fcad0c986d9dbbb71e8c433d6c40dc6cb810ad Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Tue, 29 Nov 2016 17:00:49 +0200 Subject: [PATCH 3/5] net: ethernet: ti: cpsw: add .ndo to set per-queue rate This patch allows to rate limit queues tx queues for cpsw interface. The rate is set in absolute Mb/s units and cannot be more a speed an interface is connected with. The rate for a tx queue can be tested with: ethtool -L eth0 rx 4 tx 4 echo 100 > /sys/class/net/eth0/queues/tx-0/tx_maxrate echo 200 > /sys/class/net/eth0/queues/tx-1/tx_maxrate echo 50 > /sys/class/net/eth0/queues/tx-2/tx_maxrate echo 30 > /sys/class/net/eth0/queues/tx-3/tx_maxrate tc qdisc add dev eth0 root handle 1: multiq tc filter add dev eth0 parent 1: protocol ip prio 1 u32 match ip\ dport 5001 0xffff action skbedit queue_mapping 0 tc filter add dev eth0 parent 1: protocol ip prio 1 u32 match ip\ dport 5002 0xffff action skbedit queue_mapping 1 tc filter add dev eth0 parent 1: protocol ip prio 1 u32 match ip\ dport 5003 0xffff action skbedit queue_mapping 2 tc filter add dev eth0 parent 1: protocol ip prio 1 u32 match ip\ dport 5004 0xffff action skbedit queue_mapping 3 iperf -c 192.168.2.1 -b 110M -p 5001 -f m -t 60 iperf -c 192.168.2.1 -b 215M -p 5002 -f m -t 60 iperf -c 192.168.2.1 -b 55M -p 5003 -f m -t 60 iperf -c 192.168.2.1 -b 32M -p 5004 -f m -t 60 Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 87 ++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index caec6acd04d34..27379fa3d1b1a 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1872,6 +1872,88 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, return ret; } +static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + int tx_ch_num = ndev->real_num_tx_queues; + u32 consumed_rate, min_rate, max_rate; + struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_slave *slave; + int ret, i, weight; + int rlim_num = 0; + u32 ch_rate; + + ch_rate = netdev_get_tx_queue(ndev, queue)->tx_maxrate; + if (ch_rate == rate) + return 0; + + if (cpsw->data.dual_emac) + slave = &cpsw->slaves[priv->emac_port]; + else + slave = &cpsw->slaves[cpsw->data.active_slave]; + max_rate = slave->phy->speed; + + consumed_rate = 0; + for (i = 0; i < tx_ch_num; i++) { + if (i == queue) + ch_rate = rate; + else + ch_rate = netdev_get_tx_queue(ndev, i)->tx_maxrate; + + if (!ch_rate) + continue; + + rlim_num++; + consumed_rate += ch_rate; + } + + if (consumed_rate > max_rate) + dev_info(priv->dev, "The common rate shouldn't be more than %dMbps", + max_rate); + + if (consumed_rate > max_rate) { + if (max_rate == 10 && consumed_rate <= 100) { + max_rate = 100; + } else if (max_rate <= 100 && consumed_rate <= 1000) { + max_rate = 1000; + } else { + dev_err(priv->dev, "The common rate cannot be more than %dMbps", + max_rate); + return -EINVAL; + } + } + + if (consumed_rate > max_rate) { + dev_err(priv->dev, "The common rate cannot be more than %dMbps", + max_rate); + return -EINVAL; + } + + rate *= 1000; + min_rate = cpdma_chan_get_min_rate(cpsw->dma); + if ((rate < min_rate && rate)) { + dev_err(priv->dev, "The common rate cannot be less than %dMbps", + min_rate); + return -EINVAL; + } + + ret = pm_runtime_get_sync(cpsw->dev); + if (ret < 0) { + pm_runtime_put_noidle(cpsw->dev); + return ret; + } + + if (rlim_num == tx_ch_num) + max_rate = consumed_rate; + + weight = (rate * 100) / (max_rate * 1000); + cpdma_chan_set_weight(cpsw->txch[queue], weight); + + ret = cpdma_chan_set_rate(cpsw->txch[queue], rate); + pm_runtime_put(cpsw->dev); + return ret; +} + static const struct net_device_ops cpsw_netdev_ops = { .ndo_open = cpsw_ndo_open, .ndo_stop = cpsw_ndo_stop, @@ -1881,6 +1963,7 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = cpsw_ndo_tx_timeout, .ndo_set_rx_mode = cpsw_ndo_set_rx_mode, + .ndo_set_tx_maxrate = cpsw_ndo_set_tx_maxrate, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = cpsw_ndo_poll_controller, #endif @@ -2100,6 +2183,7 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx) int (*poll)(struct napi_struct *, int); struct cpsw_common *cpsw = priv->cpsw; void (*handler)(void *, int, int); + struct netdev_queue *queue; struct cpdma_chan **chan; int ret, *ch; @@ -2117,6 +2201,8 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx) while (*ch < ch_num) { chan[*ch] = cpdma_chan_create(cpsw->dma, *ch, handler, rx); + queue = netdev_get_tx_queue(priv->ndev, *ch); + queue->tx_maxrate = 0; if (IS_ERR(chan[*ch])) return PTR_ERR(chan[*ch]); @@ -2791,6 +2877,7 @@ static int cpsw_probe(struct platform_device *pdev) dma_params.desc_align = 16; dma_params.has_ext_regs = true; dma_params.desc_hw_addr = dma_params.desc_mem_phys; + dma_params.bus_freq_mhz = cpsw->bus_freq_mhz; cpsw->dma = cpdma_ctlr_create(&dma_params); if (!cpsw->dma) { From 342934a55898f830c38f066e6776e8996589fef3 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Tue, 29 Nov 2016 17:00:50 +0200 Subject: [PATCH 4/5] net: ethernet: ti: cpsw: optimize end of poll cycle Check budget fullness only after it's updated and update channel mask only once to keep budget balance between channels. It's also needed for farther changes. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 27379fa3d1b1a..5ea2e3dba21a8 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -788,19 +788,13 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) /* process every unprocessed channel */ ch_map = cpdma_ctrl_txchs_state(cpsw->dma); - for (ch = 0, num_tx = 0; num_tx < budget; ch_map >>= 1, ch++) { - if (!ch_map) { - ch_map = cpdma_ctrl_txchs_state(cpsw->dma); - if (!ch_map) - break; - - ch = 0; - } - + for (ch = 0, num_tx = 0; ch_map; ch_map >>= 1, ch++) { if (!(ch_map & 0x01)) continue; num_tx += cpdma_chan_process(cpsw->txch[ch], budget - num_tx); + if (num_tx >= budget) + break; } if (num_tx < budget) { @@ -823,19 +817,13 @@ static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) /* process every unprocessed channel */ ch_map = cpdma_ctrl_rxchs_state(cpsw->dma); - for (ch = 0, num_rx = 0; num_rx < budget; ch_map >>= 1, ch++) { - if (!ch_map) { - ch_map = cpdma_ctrl_rxchs_state(cpsw->dma); - if (!ch_map) - break; - - ch = 0; - } - + for (ch = 0, num_rx = 0; ch_map; ch_map >>= 1, ch++) { if (!(ch_map & 0x01)) continue; num_rx += cpdma_chan_process(cpsw->rxch[ch], budget - num_rx); + if (num_rx >= budget) + break; } if (num_rx < budget) { From 8feb0a1965072e2b19e05792b61f81f982eea5e8 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Tue, 29 Nov 2016 17:00:51 +0200 Subject: [PATCH 5/5] net: ethernet: ti: cpsw: split tx budget according between channels Split device budget between channels according to channel rate. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 159 +++++++++++++++++++++++++++------ 1 file changed, 130 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 5ea2e3dba21a8..dd5d830868059 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -365,6 +365,11 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset) __raw_writel(val, slave->regs + offset); } +struct cpsw_vector { + struct cpdma_chan *ch; + int budget; +}; + struct cpsw_common { struct device *dev; struct cpsw_platform_data data; @@ -380,8 +385,8 @@ struct cpsw_common { int rx_packet_max; struct cpsw_slave *slaves; struct cpdma_ctlr *dma; - struct cpdma_chan *txch[CPSW_MAX_QUEUES]; - struct cpdma_chan *rxch[CPSW_MAX_QUEUES]; + struct cpsw_vector txv[CPSW_MAX_QUEUES]; + struct cpsw_vector rxv[CPSW_MAX_QUEUES]; struct cpsw_ale *ale; bool quirk_irq; bool rx_irq_disabled; @@ -741,7 +746,7 @@ static void cpsw_rx_handler(void *token, int len, int status) return; } - ch = cpsw->rxch[skb_get_queue_mapping(new_skb)]; + ch = cpsw->rxv[skb_get_queue_mapping(new_skb)].ch; ret = cpdma_chan_submit(ch, new_skb, new_skb->data, skb_tailroom(new_skb), 0); if (WARN_ON(ret < 0)) @@ -783,8 +788,9 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) { u32 ch_map; - int num_tx, ch; + int num_tx, cur_budget, ch; struct cpsw_common *cpsw = napi_to_cpsw(napi_tx); + struct cpsw_vector *txv; /* process every unprocessed channel */ ch_map = cpdma_ctrl_txchs_state(cpsw->dma); @@ -792,7 +798,13 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) if (!(ch_map & 0x01)) continue; - num_tx += cpdma_chan_process(cpsw->txch[ch], budget - num_tx); + txv = &cpsw->txv[ch]; + if (unlikely(txv->budget > budget - num_tx)) + cur_budget = budget - num_tx; + else + cur_budget = txv->budget; + + num_tx += cpdma_chan_process(txv->ch, cur_budget); if (num_tx >= budget) break; } @@ -812,8 +824,9 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) { u32 ch_map; - int num_rx, ch; + int num_rx, cur_budget, ch; struct cpsw_common *cpsw = napi_to_cpsw(napi_rx); + struct cpsw_vector *rxv; /* process every unprocessed channel */ ch_map = cpdma_ctrl_rxchs_state(cpsw->dma); @@ -821,7 +834,13 @@ static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) if (!(ch_map & 0x01)) continue; - num_rx += cpdma_chan_process(cpsw->rxch[ch], budget - num_rx); + rxv = &cpsw->rxv[ch]; + if (unlikely(rxv->budget > budget - num_rx)) + cur_budget = budget - num_rx; + else + cur_budget = rxv->budget; + + num_rx += cpdma_chan_process(rxv->ch, cur_budget); if (num_rx >= budget) break; } @@ -1063,7 +1082,7 @@ static void cpsw_get_ethtool_stats(struct net_device *ndev, cpsw_gstrings_stats[l].stat_offset); for (ch = 0; ch < cpsw->rx_ch_num; ch++) { - cpdma_chan_get_stats(cpsw->rxch[ch], &ch_stats); + cpdma_chan_get_stats(cpsw->rxv[ch].ch, &ch_stats); for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) { p = (u8 *)&ch_stats + cpsw_gstrings_ch_stats[i].stat_offset; @@ -1072,7 +1091,7 @@ static void cpsw_get_ethtool_stats(struct net_device *ndev, } for (ch = 0; ch < cpsw->tx_ch_num; ch++) { - cpdma_chan_get_stats(cpsw->txch[ch], &ch_stats); + cpdma_chan_get_stats(cpsw->txv[ch].ch, &ch_stats); for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) { p = (u8 *)&ch_stats + cpsw_gstrings_ch_stats[i].stat_offset; @@ -1261,6 +1280,82 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) } } +/* split budget depending on channel rates */ +static void cpsw_split_budget(struct net_device *ndev) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_vector *txv = cpsw->txv; + u32 consumed_rate, bigest_rate = 0; + int budget, bigest_rate_ch = 0; + struct cpsw_slave *slave; + int i, rlim_ch_num = 0; + u32 ch_rate, max_rate; + int ch_budget = 0; + + if (cpsw->data.dual_emac) + slave = &cpsw->slaves[priv->emac_port]; + else + slave = &cpsw->slaves[cpsw->data.active_slave]; + + max_rate = slave->phy->speed * 1000; + + consumed_rate = 0; + for (i = 0; i < cpsw->tx_ch_num; i++) { + ch_rate = cpdma_chan_get_rate(txv[i].ch); + if (!ch_rate) + continue; + + rlim_ch_num++; + consumed_rate += ch_rate; + } + + if (cpsw->tx_ch_num == rlim_ch_num) { + max_rate = consumed_rate; + } else { + ch_budget = (consumed_rate * CPSW_POLL_WEIGHT) / max_rate; + ch_budget = (CPSW_POLL_WEIGHT - ch_budget) / + (cpsw->tx_ch_num - rlim_ch_num); + bigest_rate = (max_rate - consumed_rate) / + (cpsw->tx_ch_num - rlim_ch_num); + } + + /* split tx budget */ + budget = CPSW_POLL_WEIGHT; + for (i = 0; i < cpsw->tx_ch_num; i++) { + ch_rate = cpdma_chan_get_rate(txv[i].ch); + if (ch_rate) { + txv[i].budget = (ch_rate * CPSW_POLL_WEIGHT) / max_rate; + if (!txv[i].budget) + txv[i].budget = 1; + if (ch_rate > bigest_rate) { + bigest_rate_ch = i; + bigest_rate = ch_rate; + } + } else { + txv[i].budget = ch_budget; + if (!bigest_rate_ch) + bigest_rate_ch = i; + } + + budget -= txv[i].budget; + } + + if (budget) + txv[bigest_rate_ch].budget += budget; + + /* split rx budget */ + budget = CPSW_POLL_WEIGHT; + ch_budget = budget / cpsw->rx_ch_num; + for (i = 0; i < cpsw->rx_ch_num; i++) { + cpsw->rxv[i].budget = ch_budget; + budget -= ch_budget; + } + + if (budget) + cpsw->rxv[0].budget += budget; +} + static int cpsw_fill_rx_channels(struct cpsw_priv *priv) { struct cpsw_common *cpsw = priv->cpsw; @@ -1269,7 +1364,7 @@ static int cpsw_fill_rx_channels(struct cpsw_priv *priv) int ch, i, ret; for (ch = 0; ch < cpsw->rx_ch_num; ch++) { - ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxch[ch]); + ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch); for (i = 0; i < ch_buf_num; i++) { skb = __netdev_alloc_skb_ip_align(priv->ndev, cpsw->rx_packet_max, @@ -1280,8 +1375,9 @@ static int cpsw_fill_rx_channels(struct cpsw_priv *priv) } skb_set_queue_mapping(skb, ch); - ret = cpdma_chan_submit(cpsw->rxch[ch], skb, skb->data, - skb_tailroom(skb), 0); + ret = cpdma_chan_submit(cpsw->rxv[ch].ch, skb, + skb->data, skb_tailroom(skb), + 0); if (ret < 0) { cpsw_err(priv, ifup, "cannot submit skb to channel %d rx, error %d\n", @@ -1405,6 +1501,7 @@ static int cpsw_ndo_open(struct net_device *ndev) cpsw_set_coalesce(ndev, &coal); } + cpsw_split_budget(ndev); cpdma_ctlr_start(cpsw->dma); cpsw_intr_enable(cpsw); @@ -1474,7 +1571,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, if (q_idx >= cpsw->tx_ch_num) q_idx = q_idx % cpsw->tx_ch_num; - txch = cpsw->txch[q_idx]; + txch = cpsw->txv[q_idx].ch; ret = cpsw_tx_packet_submit(priv, skb, txch); if (unlikely(ret != 0)) { cpsw_err(priv, tx_err, "desc submit failed\n"); @@ -1681,8 +1778,8 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev) ndev->stats.tx_errors++; cpsw_intr_disable(cpsw); for (ch = 0; ch < cpsw->tx_ch_num; ch++) { - cpdma_chan_stop(cpsw->txch[ch]); - cpdma_chan_start(cpsw->txch[ch]); + cpdma_chan_stop(cpsw->txv[ch].ch); + cpdma_chan_start(cpsw->txv[ch].ch); } cpsw_intr_enable(cpsw); @@ -1887,7 +1984,6 @@ static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate) ch_rate = rate; else ch_rate = netdev_get_tx_queue(ndev, i)->tx_maxrate; - if (!ch_rate) continue; @@ -1935,9 +2031,12 @@ static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate) max_rate = consumed_rate; weight = (rate * 100) / (max_rate * 1000); - cpdma_chan_set_weight(cpsw->txch[queue], weight); + cpdma_chan_set_weight(cpsw->txv[queue].ch, weight); + ret = cpdma_chan_set_rate(cpsw->txv[queue].ch, rate); - ret = cpdma_chan_set_rate(cpsw->txch[queue], rate); + /* re-split budget between channels */ + if (!rate) + cpsw_split_budget(ndev); pm_runtime_put(cpsw->dev); return ret; } @@ -2172,30 +2271,30 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx) struct cpsw_common *cpsw = priv->cpsw; void (*handler)(void *, int, int); struct netdev_queue *queue; - struct cpdma_chan **chan; + struct cpsw_vector *vec; int ret, *ch; if (rx) { ch = &cpsw->rx_ch_num; - chan = cpsw->rxch; + vec = cpsw->rxv; handler = cpsw_rx_handler; poll = cpsw_rx_poll; } else { ch = &cpsw->tx_ch_num; - chan = cpsw->txch; + vec = cpsw->txv; handler = cpsw_tx_handler; poll = cpsw_tx_poll; } while (*ch < ch_num) { - chan[*ch] = cpdma_chan_create(cpsw->dma, *ch, handler, rx); + vec[*ch].ch = cpdma_chan_create(cpsw->dma, *ch, handler, rx); queue = netdev_get_tx_queue(priv->ndev, *ch); queue->tx_maxrate = 0; - if (IS_ERR(chan[*ch])) - return PTR_ERR(chan[*ch]); + if (IS_ERR(vec[*ch].ch)) + return PTR_ERR(vec[*ch].ch); - if (!chan[*ch]) + if (!vec[*ch].ch) return -EINVAL; cpsw_info(priv, ifup, "created new %d %s channel\n", *ch, @@ -2206,7 +2305,7 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx) while (*ch > ch_num) { (*ch)--; - ret = cpdma_chan_destroy(chan[*ch]); + ret = cpdma_chan_destroy(vec[*ch].ch); if (ret) return ret; @@ -2293,6 +2392,8 @@ static int cpsw_set_channels(struct net_device *ndev, if (ret) goto err; + cpsw_split_budget(ndev); + /* After this receive is started */ cpdma_ctlr_start(cpsw->dma); cpsw_intr_enable(cpsw); @@ -2874,9 +2975,9 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_dt_ret; } - cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0); - cpsw->rxch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1); - if (WARN_ON(!cpsw->rxch[0] || !cpsw->txch[0])) { + cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0); + cpsw->rxv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1); + if (WARN_ON(!cpsw->rxv[0].ch || !cpsw->txv[0].ch)) { dev_err(priv->dev, "error initializing dma channels\n"); ret = -ENOMEM; goto clean_dma_ret;