diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index 564427e8a6e80..1629b07f727b8 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
@@ -50,27 +50,6 @@ nfp_abm_ctrl_stat(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
 	return 0;
 }
 
-static int
-nfp_abm_ctrl_stat_all(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
-		      unsigned int stride, unsigned int offset, bool is_u64,
-		      u64 *res)
-{
-	u64 val, sum = 0;
-	unsigned int i;
-	int err;
-
-	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
-		err = nfp_abm_ctrl_stat(alink, sym, stride, offset, i,
-					is_u64, &val);
-		if (err)
-			return err;
-		sum += val;
-	}
-
-	*res = sum;
-	return 0;
-}
-
 int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val)
 {
 	struct nfp_cpp *cpp = abm->app->cpp;
@@ -155,42 +134,6 @@ int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
 				 i, true, &stats->overlimits);
 }
 
-int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
-			    struct nfp_alink_stats *stats)
-{
-	u64 pkts = 0, bytes = 0;
-	int i, err;
-
-	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
-		pkts += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i));
-		bytes += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i) + 8);
-	}
-	stats->tx_pkts = pkts;
-	stats->tx_bytes = bytes;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
-				    NFP_QLVL_STRIDE, NFP_QLVL_BLOG_BYTES,
-				    false, &stats->backlog_bytes);
-	if (err)
-		return err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
-				    NFP_QLVL_STRIDE, NFP_QLVL_BLOG_PKTS,
-				    false, &stats->backlog_pkts);
-	if (err)
-		return err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				    NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
-				    true, &stats->drops);
-	if (err)
-		return err;
-
-	return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				     NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
-				     true, &stats->overlimits);
-}
-
 int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
 			       struct nfp_alink_xstats *xstats)
 {
@@ -207,22 +150,6 @@ int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
 				 i, true, &xstats->ecn_marked);
 }
 
-int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
-			     struct nfp_alink_xstats *xstats)
-{
-	int err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				    NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
-				    true, &xstats->pdrop);
-	if (err)
-		return err;
-
-	return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				     NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
-				     true, &xstats->ecn_marked);
-}
-
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm)
 {
 	return nfp_mbox_cmd(abm->app->pf, NFP_MBOX_PCIE_ABM_ENABLE,
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index da5394886a78b..a5732d3bd1b76 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -7,6 +7,7 @@
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
 #include <linux/slab.h>
 
 #include "../nfpcore/nfp.h"
@@ -310,22 +311,14 @@ nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
 	alink->abm = abm;
 	alink->vnic = nn;
 	alink->id = id;
-	alink->parent = TC_H_ROOT;
 	alink->total_queues = alink->vnic->max_rx_rings;
-	alink->red_qdiscs = kvcalloc(alink->total_queues,
-				     sizeof(*alink->red_qdiscs),
-				     GFP_KERNEL);
-	if (!alink->red_qdiscs) {
-		err = -ENOMEM;
-		goto err_free_alink;
-	}
 
 	/* This is a multi-host app, make sure MAC/PHY is up, but don't
 	 * make the MAC/PHY state follow the state of any of the ports.
 	 */
 	err = nfp_eth_set_configured(app->cpp, eth_port->index, true);
 	if (err < 0)
-		goto err_free_qdiscs;
+		goto err_free_alink;
 
 	netif_keep_dst(nn->dp.netdev);
 
@@ -335,8 +328,6 @@ nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
 
 	return 0;
 
-err_free_qdiscs:
-	kvfree(alink->red_qdiscs);
 err_free_alink:
 	kfree(alink);
 	return err;
@@ -348,7 +339,6 @@ static void nfp_abm_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 
 	nfp_abm_kill_reprs(alink->abm, alink);
 	WARN(!radix_tree_empty(&alink->qdiscs), "left over qdiscs\n");
-	kvfree(alink->red_qdiscs);
 	kfree(alink);
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index d0d85f82bd1c4..240e2c8683fe8 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -9,6 +9,11 @@
 #include <net/devlink.h>
 #include <net/pkt_cls.h>
 
+/* Dump of 64 PRIOs and 256 REDs seems to take 850us on Xeon v4 @ 2.20GHz;
+ * 2.5ms / 400Hz seems more than sufficient for stats resolution.
+ */
+#define NFP_ABM_STATS_REFRESH_IVAL	(2500 * 1000) /* ns */
+
 #define NFP_ABM_LVL_INFINITY		S32_MAX
 
 struct nfp_app;
@@ -91,9 +96,19 @@ enum nfp_qdisc_type {
  * @children:		pointers to children
  *
  * @params_ok:		parameters of this Qdisc are OK for offload
+ * @offload_mark:	offload refresh state - selected for offload
+ * @offloaded:		Qdisc is currently offloaded to the HW
+ *
+ * @mq:			MQ Qdisc specific parameters and state
+ * @mq.stats:		current stats of the MQ Qdisc
+ * @mq.prev_stats:	previously reported @mq.stats
  *
  * @red:		RED Qdisc specific parameters and state
  * @red.threshold:	ECN marking threshold
+ * @red.stats:		current stats of the RED Qdisc
+ * @red.prev_stats:	previously reported @red.stats
+ * @red.xstats:		extended stats for RED - current
+ * @red.prev_xstats:	extended stats for RED - previously reported
  */
 struct nfp_qdisc {
 	struct net_device *netdev;
@@ -105,29 +120,26 @@ struct nfp_qdisc {
 	struct nfp_qdisc **children;
 
 	bool params_ok;
+	bool offload_mark;
+	bool offloaded;
 
 	union {
+		/* NFP_QDISC_MQ */
+		struct {
+			struct nfp_alink_stats stats;
+			struct nfp_alink_stats prev_stats;
+		} mq;
 		/* TC_SETUP_QDISC_RED */
 		struct {
 			u32 threshold;
+			struct nfp_alink_stats stats;
+			struct nfp_alink_stats prev_stats;
+			struct nfp_alink_xstats xstats;
+			struct nfp_alink_xstats prev_xstats;
 		} red;
 	};
 };
 
-/**
- * struct nfp_red_qdisc - representation of single RED Qdisc
- * @handle:	handle of currently offloaded RED Qdisc
- * @threshold:	marking threshold of this Qdisc
- * @stats:	statistics from last refresh
- * @xstats:	base of extended statistics
- */
-struct nfp_red_qdisc {
-	u32 handle;
-	u32 threshold;
-	struct nfp_alink_stats stats;
-	struct nfp_alink_xstats xstats;
-};
-
 /**
  * struct nfp_abm_link - port tuple of a ABM NIC
  * @abm:	back pointer to nfp_abm
@@ -135,9 +147,9 @@ struct nfp_red_qdisc {
  * @id:		id of the data vNIC
  * @queue_base:	id of base to host queue within PCIe (not QC idx)
  * @total_queues:	number of PF queues
- * @parent:	handle of expected parent, i.e. handle of MQ, or TC_H_ROOT
- * @num_qdiscs:	number of currently used qdiscs
- * @red_qdiscs:	array of qdiscs
+ *
+ * @last_stats_update:	ktime of last stats update
+ *
  * @root_qdisc:	pointer to the current root of the Qdisc hierarchy
  * @qdiscs:	all qdiscs recorded by major part of the handle
  */
@@ -147,13 +159,14 @@ struct nfp_abm_link {
 	unsigned int id;
 	unsigned int queue_base;
 	unsigned int total_queues;
-	u32 parent;
-	unsigned int num_qdiscs;
-	struct nfp_red_qdisc *red_qdiscs;
+
+	u64 last_stats_update;
+
 	struct nfp_qdisc *root_qdisc;
 	struct radix_tree_root qdiscs;
 };
 
+void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink);
 int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink,
 		       struct tc_root_qopt_offload *opt);
 int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
@@ -166,12 +179,8 @@ int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
 int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val);
 int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int queue,
 			   u32 val);
-int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
-			    struct nfp_alink_stats *stats);
 int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
 			      struct nfp_alink_stats *stats);
-int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
-			     struct nfp_alink_xstats *xstats);
 int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
 			       struct nfp_alink_xstats *xstats);
 u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i);
diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
index fb68038ec1da0..16c4afe3a37ff 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
@@ -13,6 +13,11 @@
 #include "../nfp_port.h"
 #include "main.h"
 
+static bool nfp_abm_qdisc_is_red(struct nfp_qdisc *qdisc)
+{
+	return qdisc->type == NFP_QDISC_RED;
+}
+
 static bool nfp_abm_qdisc_child_valid(struct nfp_qdisc *qdisc, unsigned int id)
 {
 	return qdisc->children[id] &&
@@ -24,6 +29,74 @@ static void *nfp_abm_qdisc_tree_deref_slot(void __rcu **slot)
 	return rtnl_dereference(*slot);
 }
 
+static void
+nfp_abm_stats_propagate(struct nfp_alink_stats *parent,
+			struct nfp_alink_stats *child)
+{
+	parent->tx_pkts		+= child->tx_pkts;
+	parent->tx_bytes	+= child->tx_bytes;
+	parent->backlog_pkts	+= child->backlog_pkts;
+	parent->backlog_bytes	+= child->backlog_bytes;
+	parent->overlimits	+= child->overlimits;
+	parent->drops		+= child->drops;
+}
+
+static void
+nfp_abm_stats_update_red(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+			 unsigned int queue)
+{
+	struct nfp_cpp *cpp = alink->abm->app->cpp;
+	int err;
+
+	if (!qdisc->offloaded)
+		return;
+
+	err = nfp_abm_ctrl_read_q_stats(alink, queue, &qdisc->red.stats);
+	if (err)
+		nfp_err(cpp, "RED stats (%d) read failed with error %d\n",
+			queue, err);
+
+	err = nfp_abm_ctrl_read_q_xstats(alink, queue, &qdisc->red.xstats);
+	if (err)
+		nfp_err(cpp, "RED xstats (%d) read failed with error %d\n",
+			queue, err);
+}
+
+static void
+nfp_abm_stats_update_mq(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
+{
+	unsigned int i;
+
+	if (qdisc->type != NFP_QDISC_MQ)
+		return;
+
+	for (i = 0; i < alink->total_queues; i++)
+		if (nfp_abm_qdisc_child_valid(qdisc, i))
+			nfp_abm_stats_update_red(alink, qdisc->children[i], i);
+}
+
+static void __nfp_abm_stats_update(struct nfp_abm_link *alink, u64 time_now)
+{
+	alink->last_stats_update = time_now;
+	if (alink->root_qdisc)
+		nfp_abm_stats_update_mq(alink, alink->root_qdisc);
+}
+
+static void nfp_abm_stats_update(struct nfp_abm_link *alink)
+{
+	u64 now;
+
+	/* Limit the frequency of updates - stats of non-leaf qdiscs are a sum
+	 * of all their leafs, so we would read the same stat multiple times
+	 * for every dump.
+	 */
+	now = ktime_get();
+	if (now - alink->last_stats_update < NFP_ABM_STATS_REFRESH_IVAL)
+		return;
+
+	__nfp_abm_stats_update(alink, now);
+}
+
 static void
 nfp_abm_qdisc_unlink_children(struct nfp_qdisc *qdisc,
 			      unsigned int start, unsigned int end)
@@ -38,89 +111,174 @@ nfp_abm_qdisc_unlink_children(struct nfp_qdisc *qdisc,
 }
 
 static void
-nfp_abm_qdisc_clear_mq(struct net_device *netdev, struct nfp_abm_link *alink,
-		       struct nfp_qdisc *qdisc)
+nfp_abm_qdisc_offload_stop(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
 {
-	struct radix_tree_iter iter;
-	unsigned int mq_refs = 0;
-	void __rcu **slot;
+	/* Don't complain when qdisc is getting unlinked */
+	if (qdisc->use_cnt)
+		nfp_warn(alink->abm->app->cpp, "Offload of '%08x' stopped\n",
+			 qdisc->handle);
 
-	if (!qdisc->use_cnt)
+	if (!nfp_abm_qdisc_is_red(qdisc))
 		return;
-	/* MQ doesn't notify well on destruction, we need special handling of
-	 * MQ's children.
+
+	qdisc->red.stats.backlog_pkts = 0;
+	qdisc->red.stats.backlog_bytes = 0;
+}
+
+static int
+__nfp_abm_stats_init(struct nfp_abm_link *alink,
+		     unsigned int queue, struct nfp_alink_stats *prev_stats,
+		     struct nfp_alink_xstats *prev_xstats)
+{
+	u64 backlog_pkts, backlog_bytes;
+	int err;
+
+	/* Don't touch the backlog, backlog can only be reset after it has
+	 * been reported back to the tc qdisc stats.
 	 */
-	if (qdisc->type == NFP_QDISC_MQ &&
-	    qdisc == alink->root_qdisc &&
-	    netdev->reg_state == NETREG_UNREGISTERING)
-		return;
+	backlog_pkts = prev_stats->backlog_pkts;
+	backlog_bytes = prev_stats->backlog_bytes;
 
-	/* Count refs held by MQ instances and clear pointers */
-	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
-		struct nfp_qdisc *mq = nfp_abm_qdisc_tree_deref_slot(slot);
-		unsigned int i;
+	err = nfp_abm_ctrl_read_q_stats(alink, queue, prev_stats);
+	if (err) {
+		nfp_err(alink->abm->app->cpp,
+			"RED stats init (%d) failed with error %d\n",
+			queue, err);
+		return err;
+	}
 
-		if (mq->type != NFP_QDISC_MQ || mq->netdev != netdev)
-			continue;
-		for (i = 0; i < mq->num_children; i++)
-			if (mq->children[i] == qdisc) {
-				mq->children[i] = NULL;
-				mq_refs++;
-			}
+	err = nfp_abm_ctrl_read_q_xstats(alink, queue, prev_xstats);
+	if (err) {
+		nfp_err(alink->abm->app->cpp,
+			"RED xstats init (%d) failed with error %d\n",
+			queue, err);
+		return err;
 	}
 
-	WARN(qdisc->use_cnt != mq_refs, "non-zero qdisc use count: %d (- %d)\n",
-	     qdisc->use_cnt, mq_refs);
+	prev_stats->backlog_pkts = backlog_pkts;
+	prev_stats->backlog_bytes = backlog_bytes;
+	return 0;
+}
+
+static int
+nfp_abm_stats_init(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+		   unsigned int queue)
+{
+	return __nfp_abm_stats_init(alink, queue,
+				    &qdisc->red.prev_stats,
+				    &qdisc->red.prev_xstats);
 }
 
 static void
-nfp_abm_offload_compile_red(struct nfp_abm_link *alink,
-			    struct nfp_red_qdisc *qdisc, unsigned int queue)
+nfp_abm_offload_compile_red(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+			    unsigned int queue)
 {
-	if (!qdisc->handle)
+	qdisc->offload_mark = qdisc->type == NFP_QDISC_RED &&
+			      qdisc->params_ok &&
+			      qdisc->use_cnt == 1 &&
+			      !qdisc->children[0];
+
+	/* If we are starting offload init prev_stats */
+	if (qdisc->offload_mark && !qdisc->offloaded)
+		if (nfp_abm_stats_init(alink, qdisc, queue))
+			qdisc->offload_mark = false;
+
+	if (!qdisc->offload_mark)
 		return;
 
-	nfp_abm_ctrl_set_q_lvl(alink, queue, qdisc->threshold);
+	nfp_abm_ctrl_set_q_lvl(alink, queue, qdisc->red.threshold);
 }
 
-static void nfp_abm_offload_compile_one(struct nfp_abm_link *alink)
+static void
+nfp_abm_offload_compile_mq(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
 {
 	unsigned int i;
-	bool is_mq;
 
-	is_mq = alink->num_qdiscs > 1;
+	qdisc->offload_mark = qdisc->type == NFP_QDISC_MQ;
+	if (!qdisc->offload_mark)
+		return;
 
 	for (i = 0; i < alink->total_queues; i++) {
-		struct nfp_red_qdisc *next;
+		struct nfp_qdisc *child = qdisc->children[i];
 
-		if (is_mq && !alink->red_qdiscs[i].handle)
+		if (!nfp_abm_qdisc_child_valid(qdisc, i))
 			continue;
 
-		next = is_mq ? &alink->red_qdiscs[i] : &alink->red_qdiscs[0];
-		nfp_abm_offload_compile_red(alink, next, i);
+		nfp_abm_offload_compile_red(alink, child, i);
 	}
 }
 
-static void nfp_abm_offload_update(struct nfp_abm *abm)
+void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink)
 {
-	struct nfp_abm_link *alink = NULL;
-	struct nfp_pf *pf = abm->app->pf;
-	struct nfp_net *nn;
+	struct nfp_abm *abm = alink->abm;
+	struct radix_tree_iter iter;
+	struct nfp_qdisc *qdisc;
+	void __rcu **slot;
 	size_t i;
 
 	/* Mark all thresholds as unconfigured */
-	__bitmap_set(abm->threshold_undef, 0, abm->num_thresholds);
+	__bitmap_set(abm->threshold_undef,
+		     alink->queue_base, alink->total_queues);
 
-	/* Configure all offloads */
-	list_for_each_entry(nn, &pf->vnics, vnic_list) {
-		alink = nn->app_priv;
-		nfp_abm_offload_compile_one(alink);
+	/* Clear offload marks */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		qdisc = nfp_abm_qdisc_tree_deref_slot(slot);
+		qdisc->offload_mark = false;
+	}
+
+	if (alink->root_qdisc)
+		nfp_abm_offload_compile_mq(alink, alink->root_qdisc);
+
+	/* Refresh offload status */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		qdisc = nfp_abm_qdisc_tree_deref_slot(slot);
+		if (!qdisc->offload_mark && qdisc->offloaded)
+			nfp_abm_qdisc_offload_stop(alink, qdisc);
+		qdisc->offloaded = qdisc->offload_mark;
 	}
 
 	/* Reset the unconfigured thresholds */
 	for (i = 0; i < abm->num_thresholds; i++)
 		if (test_bit(i, abm->threshold_undef))
 			__nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY);
+
+	__nfp_abm_stats_update(alink, ktime_get());
+}
+
+static void
+nfp_abm_qdisc_clear_mq(struct net_device *netdev, struct nfp_abm_link *alink,
+		       struct nfp_qdisc *qdisc)
+{
+	struct radix_tree_iter iter;
+	unsigned int mq_refs = 0;
+	void __rcu **slot;
+
+	if (!qdisc->use_cnt)
+		return;
+	/* MQ doesn't notify well on destruction, we need special handling of
+	 * MQ's children.
+	 */
+	if (qdisc->type == NFP_QDISC_MQ &&
+	    qdisc == alink->root_qdisc &&
+	    netdev->reg_state == NETREG_UNREGISTERING)
+		return;
+
+	/* Count refs held by MQ instances and clear pointers */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		struct nfp_qdisc *mq = nfp_abm_qdisc_tree_deref_slot(slot);
+		unsigned int i;
+
+		if (mq->type != NFP_QDISC_MQ || mq->netdev != netdev)
+			continue;
+		for (i = 0; i < mq->num_children; i++)
+			if (mq->children[i] == qdisc) {
+				mq->children[i] = NULL;
+				mq_refs++;
+			}
+	}
+
+	WARN(qdisc->use_cnt != mq_refs, "non-zero qdisc use count: %d (- %d)\n",
+	     qdisc->use_cnt, mq_refs);
 }
 
 static void
@@ -221,8 +379,13 @@ nfp_abm_qdisc_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
 	nfp_abm_qdisc_unlink_children(qdisc, 0, qdisc->num_children);
 	nfp_abm_qdisc_free(netdev, alink, qdisc);
 
-	if (alink->root_qdisc == qdisc)
+	if (alink->root_qdisc == qdisc) {
 		alink->root_qdisc = NULL;
+		/* Only root change matters, other changes are acted upon on
+		 * the graft notification.
+		 */
+		nfp_abm_qdisc_offload_update(alink);
+	}
 }
 
 static int
@@ -249,66 +412,73 @@ nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle,
 		child = NFP_QDISC_UNTRACKED;
 	parent->children[id] = child;
 
+	nfp_abm_qdisc_offload_update(alink);
+
 	return 0;
 }
 
 static void
-__nfp_abm_reset_root(struct net_device *netdev, struct nfp_abm_link *alink,
-		     u32 handle, unsigned int qs, u32 init_val)
+nfp_abm_stats_calculate(struct nfp_alink_stats *new,
+			struct nfp_alink_stats *old,
+			struct gnet_stats_basic_packed *bstats,
+			struct gnet_stats_queue *qstats)
 {
-	memset(alink->red_qdiscs, 0,
-	       sizeof(*alink->red_qdiscs) * alink->num_qdiscs);
-
-	alink->parent = handle;
-	alink->num_qdiscs = qs;
+	_bstats_update(bstats, new->tx_bytes - old->tx_bytes,
+		       new->tx_pkts - old->tx_pkts);
+	qstats->qlen += new->backlog_pkts - old->backlog_pkts;
+	qstats->backlog += new->backlog_bytes - old->backlog_bytes;
+	qstats->overlimits += new->overlimits - old->overlimits;
+	qstats->drops += new->drops - old->drops;
 }
 
 static void
-nfp_abm_reset_root(struct net_device *netdev, struct nfp_abm_link *alink,
-		   u32 handle, unsigned int qs)
+nfp_abm_stats_red_calculate(struct nfp_alink_xstats *new,
+			    struct nfp_alink_xstats *old,
+			    struct red_stats *stats)
 {
-	__nfp_abm_reset_root(netdev, alink, handle, qs, NFP_ABM_LVL_INFINITY);
+	stats->forced_mark += new->ecn_marked - old->ecn_marked;
+	stats->pdrop += new->pdrop - old->pdrop;
 }
 
 static int
-nfp_abm_red_find(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
+nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
 {
-	unsigned int i = TC_H_MIN(opt->parent) - 1;
+	struct nfp_qdisc *qdisc;
 
-	if (opt->parent == TC_H_ROOT)
-		i = 0;
-	else if (TC_H_MAJ(alink->parent) == TC_H_MAJ(opt->parent))
-		i = TC_H_MIN(opt->parent) - 1;
-	else
-		return -EOPNOTSUPP;
+	nfp_abm_stats_update(alink);
 
-	if (i >= alink->num_qdiscs ||
-	    opt->handle != alink->red_qdiscs[i].handle)
+	qdisc = nfp_abm_qdisc_find(alink, opt->handle);
+	if (!qdisc || !qdisc->offloaded)
 		return -EOPNOTSUPP;
 
-	return i;
+	nfp_abm_stats_red_calculate(&qdisc->red.xstats,
+				    &qdisc->red.prev_xstats,
+				    opt->xstats);
+	qdisc->red.prev_xstats = qdisc->red.xstats;
+	return 0;
 }
 
-static void
-nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
-		    u32 handle)
+static int
+nfp_abm_red_stats(struct nfp_abm_link *alink, u32 handle,
+		  struct tc_qopt_offload_stats *stats)
 {
-	unsigned int i;
+	struct nfp_qdisc *qdisc;
 
-	nfp_abm_qdisc_destroy(netdev, alink, handle);
+	nfp_abm_stats_update(alink);
 
-	for (i = 0; i < alink->num_qdiscs; i++)
-		if (handle == alink->red_qdiscs[i].handle)
-			break;
-	if (i == alink->num_qdiscs)
-		return;
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return -EOPNOTSUPP;
+	/* If the qdisc offload has stopped we may need to adjust the backlog
+	 * counters back so carry on even if qdisc is not currently offloaded.
+	 */
 
-	if (alink->parent == TC_H_ROOT)
-		nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 0);
-	else
-		memset(&alink->red_qdiscs[i], 0, sizeof(*alink->red_qdiscs));
+	nfp_abm_stats_calculate(&qdisc->red.stats,
+				&qdisc->red.prev_stats,
+				stats->bstats, stats->qstats);
+	qdisc->red.prev_stats = qdisc->red.stats;
 
-	nfp_abm_offload_update(alink->abm);
+	return qdisc->offloaded ? 0 : -EOPNOTSUPP;
 }
 
 static bool
@@ -347,20 +517,12 @@ nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
 		    struct tc_red_qopt_offload *opt)
 {
 	struct nfp_qdisc *qdisc;
-	bool existing;
-	int i, err;
 	int ret;
 
 	ret = nfp_abm_qdisc_replace(netdev, alink, NFP_QDISC_RED, opt->parent,
 				    opt->handle, 1, &qdisc);
-
-	i = nfp_abm_red_find(alink, opt);
-	existing = i >= 0;
-
-	if (ret < 0) {
-		err = ret;
-		goto err_destroy;
-	}
+	if (ret < 0)
+		return ret;
 
 	/* If limit != 0 child gets reset */
 	if (opt->set.limit) {
@@ -376,127 +538,11 @@ nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
 	}
 
 	qdisc->params_ok = nfp_abm_red_check_params(alink, opt);
-	if (qdisc->params_ok) {
+	if (qdisc->params_ok)
 		qdisc->red.threshold = opt->set.min;
-	} else {
-		err = -EINVAL;
-		goto err_destroy;
-	}
-
-	if (existing) {
-		nfp_abm_offload_update(alink->abm);
-		return 0;
-	}
-
-	if (opt->parent == TC_H_ROOT) {
-		i = 0;
-		__nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 1, opt->set.min);
-	} else if (TC_H_MAJ(alink->parent) == TC_H_MAJ(opt->parent)) {
-		i = TC_H_MIN(opt->parent) - 1;
-	} else {
-		return -EINVAL;
-	}
-	alink->red_qdiscs[i].handle = opt->handle;
-
-	if (opt->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_stats(alink,
-					      &alink->red_qdiscs[i].stats);
-	else
-		err = nfp_abm_ctrl_read_q_stats(alink, i,
-						&alink->red_qdiscs[i].stats);
-	if (err)
-		goto err_destroy;
-
-	if (opt->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_xstats(alink,
-					       &alink->red_qdiscs[i].xstats);
-	else
-		err = nfp_abm_ctrl_read_q_xstats(alink, i,
-						 &alink->red_qdiscs[i].xstats);
-	if (err)
-		goto err_destroy;
-
-	alink->red_qdiscs[i].threshold = opt->set.min;
-	alink->red_qdiscs[i].stats.backlog_pkts = 0;
-	alink->red_qdiscs[i].stats.backlog_bytes = 0;
-
-	nfp_abm_offload_update(alink->abm);
-
-	return 0;
-err_destroy:
-	/* If the qdisc keeps on living, but we can't offload undo changes */
-	if (existing) {
-		opt->set.qstats->qlen -=
-			alink->red_qdiscs[i].stats.backlog_pkts;
-		opt->set.qstats->backlog -=
-			alink->red_qdiscs[i].stats.backlog_bytes;
-	}
-	nfp_abm_red_destroy(netdev, alink, opt->handle);
-
-	return err;
-}
-
-static void
-nfp_abm_update_stats(struct nfp_alink_stats *new, struct nfp_alink_stats *old,
-		     struct tc_qopt_offload_stats *stats)
-{
-	_bstats_update(stats->bstats, new->tx_bytes - old->tx_bytes,
-		       new->tx_pkts - old->tx_pkts);
-	stats->qstats->qlen += new->backlog_pkts - old->backlog_pkts;
-	stats->qstats->backlog += new->backlog_bytes - old->backlog_bytes;
-	stats->qstats->overlimits += new->overlimits - old->overlimits;
-	stats->qstats->drops += new->drops - old->drops;
-}
-
-static int
-nfp_abm_red_stats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
-{
-	struct nfp_alink_stats *prev_stats;
-	struct nfp_alink_stats stats;
-	int i, err;
-
-	i = nfp_abm_red_find(alink, opt);
-	if (i < 0)
-		return i;
-	prev_stats = &alink->red_qdiscs[i].stats;
-
-	if (alink->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_stats(alink, &stats);
-	else
-		err = nfp_abm_ctrl_read_q_stats(alink, i, &stats);
-	if (err)
-		return err;
-
-	nfp_abm_update_stats(&stats, prev_stats, &opt->stats);
 
-	*prev_stats = stats;
-
-	return 0;
-}
-
-static int
-nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
-{
-	struct nfp_alink_xstats *prev_xstats;
-	struct nfp_alink_xstats xstats;
-	int i, err;
-
-	i = nfp_abm_red_find(alink, opt);
-	if (i < 0)
-		return i;
-	prev_xstats = &alink->red_qdiscs[i].xstats;
-
-	if (alink->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_xstats(alink, &xstats);
-	else
-		err = nfp_abm_ctrl_read_q_xstats(alink, i, &xstats);
-	if (err)
-		return err;
-
-	opt->xstats->forced_mark += xstats.ecn_marked - prev_xstats->ecn_marked;
-	opt->xstats->pdrop += xstats.pdrop - prev_xstats->pdrop;
-
-	*prev_xstats = xstats;
+	if (qdisc->use_cnt == 1)
+		nfp_abm_qdisc_offload_update(alink);
 
 	return 0;
 }
@@ -508,10 +554,10 @@ int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
 	case TC_RED_REPLACE:
 		return nfp_abm_red_replace(netdev, alink, opt);
 	case TC_RED_DESTROY:
-		nfp_abm_red_destroy(netdev, alink, opt->handle);
+		nfp_abm_qdisc_destroy(netdev, alink, opt->handle);
 		return 0;
 	case TC_RED_STATS:
-		return nfp_abm_red_stats(alink, opt);
+		return nfp_abm_red_stats(alink, opt->handle, &opt->stats);
 	case TC_RED_XSTATS:
 		return nfp_abm_red_xstats(alink, opt);
 	case TC_RED_GRAFT:
@@ -522,28 +568,6 @@ int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
 	}
 }
 
-static int
-nfp_abm_mq_stats(struct nfp_abm_link *alink, struct tc_mq_qopt_offload *opt)
-{
-	struct nfp_alink_stats stats;
-	unsigned int i;
-	int err;
-
-	for (i = 0; i < alink->num_qdiscs; i++) {
-		if (alink->red_qdiscs[i].handle == TC_H_UNSPEC)
-			continue;
-
-		err = nfp_abm_ctrl_read_q_stats(alink, i, &stats);
-		if (err)
-			return err;
-
-		nfp_abm_update_stats(&stats, &alink->red_qdiscs[i].stats,
-				     &opt->stats);
-	}
-
-	return 0;
-}
-
 static int
 nfp_abm_mq_create(struct net_device *netdev, struct nfp_abm_link *alink,
 		  struct tc_mq_qopt_offload *opt)
@@ -556,27 +580,63 @@ nfp_abm_mq_create(struct net_device *netdev, struct nfp_abm_link *alink,
 				    &qdisc);
 	if (ret < 0)
 		return ret;
+
+	qdisc->params_ok = true;
+	qdisc->offloaded = true;
+	nfp_abm_qdisc_offload_update(alink);
 	return 0;
 }
 
+static int
+nfp_abm_mq_stats(struct nfp_abm_link *alink, u32 handle,
+		 struct tc_qopt_offload_stats *stats)
+{
+	struct nfp_qdisc *qdisc, *red;
+	unsigned int i;
+
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return -EOPNOTSUPP;
+
+	nfp_abm_stats_update(alink);
+
+	/* MQ stats are summed over the children in the core, so we need
+	 * to add up the unreported child values.
+	 */
+	memset(&qdisc->mq.stats, 0, sizeof(qdisc->mq.stats));
+	memset(&qdisc->mq.prev_stats, 0, sizeof(qdisc->mq.prev_stats));
+
+	for (i = 0; i < qdisc->num_children; i++) {
+		if (!nfp_abm_qdisc_child_valid(qdisc, i))
+			continue;
+
+		if (!nfp_abm_qdisc_is_red(qdisc->children[i]))
+			continue;
+		red = qdisc->children[i];
+
+		nfp_abm_stats_propagate(&qdisc->mq.stats,
+					&red->red.stats);
+		nfp_abm_stats_propagate(&qdisc->mq.prev_stats,
+					&red->red.prev_stats);
+	}
+
+	nfp_abm_stats_calculate(&qdisc->mq.stats, &qdisc->mq.prev_stats,
+				stats->bstats, stats->qstats);
+
+	return qdisc->offloaded ? 0 : -EOPNOTSUPP;
+}
+
 int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
 			struct tc_mq_qopt_offload *opt)
 {
 	switch (opt->command) {
 	case TC_MQ_CREATE:
-		nfp_abm_reset_root(netdev, alink, opt->handle,
-				   alink->total_queues);
-		nfp_abm_offload_update(alink->abm);
 		return nfp_abm_mq_create(netdev, alink, opt);
 	case TC_MQ_DESTROY:
 		nfp_abm_qdisc_destroy(netdev, alink, opt->handle);
-		if (opt->handle == alink->parent) {
-			nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 0);
-			nfp_abm_offload_update(alink->abm);
-		}
 		return 0;
 	case TC_MQ_STATS:
-		return nfp_abm_mq_stats(alink, opt);
+		return nfp_abm_mq_stats(alink, opt->handle, &opt->stats);
 	case TC_MQ_GRAFT:
 		return nfp_abm_qdisc_graft(alink, opt->handle,
 					   opt->graft_params.child_handle,
@@ -597,5 +657,7 @@ int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink,
 	if (alink->root_qdisc)
 		alink->root_qdisc->use_cnt++;
 
+	nfp_abm_qdisc_offload_update(alink);
+
 	return 0;
 }