Skip to content

Commit

Permalink
writeback: don't embed root bdi_writeback_congested in bdi_writeback
Browse files Browse the repository at this point in the history
52ebea7 ("writeback: make backing_dev_info host cgroup-specific
bdi_writebacks") made bdi (backing_dev_info) host per-cgroup wb's
(bdi_writeback's).  As the congested state needs to be per-wb and
referenced from blkcg side and multiple wbs, the patch made all
non-root cong's (bdi_writeback_congested's) reference counted and
indexed on bdi.

When a bdi is destroyed, cgwb_bdi_destroy() tries to drain all
non-root cong's; however, this can hang indefinitely because wb's can
also be referenced from blkcg_gq's which are destroyed after bdi
destruction is complete.

To fix the bug, bdi destruction will be updated to not wait for cong's
to drain, which naturally means that cong's may outlive the associated
bdi.  This is fine for non-root cong's but is problematic for the root
cong's which are embedded in their bdi's as they may end up getting
dereferenced after the containing bdi's are freed.

This patch makes root cong's behave the same as non-root cong's.  They
are no longer embedded in their bdi's but allocated separately during
bdi initialization, indexed and reference counted the same way.

* As cong handling is the same for all wb's, wb->congested
  initialization is moved into wb_init().

* When !CONFIG_CGROUP_WRITEBACK, there was no indexing or refcnting.
  bdi->wb_congested is now a pointer pointing to the root cong
  allocated during bdi init and minimal refcnting operations are
  implemented.

* The above makes root wb init paths diverge depending on
  CONFIG_CGROUP_WRITEBACK.  root wb init is moved to cgwb_bdi_init().

This patch in itself shouldn't cause any consequential behavior
differences but prepares for the actual fix.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jon Christopherson <jon@jons.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=100681
Tested-by: Jon Christopherson <jon@jons.org>

Added <linux/slab.h> include to backing-dev.h for kfree() definition.

Signed-off-by: Jens Axboe <axboe@fb.com>
  • Loading branch information
Tejun Heo authored and Jens Axboe committed Jul 2, 2015
1 parent 4da3064 commit a13f35e
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 44 deletions.
5 changes: 3 additions & 2 deletions include/linux/backing-dev-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ enum wb_stat_item {
*/
struct bdi_writeback_congested {
unsigned long state; /* WB_[a]sync_congested flags */
atomic_t refcnt; /* nr of attached wb's and blkg */

#ifdef CONFIG_CGROUP_WRITEBACK
struct backing_dev_info *bdi; /* the associated bdi */
atomic_t refcnt; /* nr of attached wb's and blkg */
int blkcg_id; /* ID of the associated blkcg */
struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */
#endif
Expand Down Expand Up @@ -150,11 +150,12 @@ struct backing_dev_info {
atomic_long_t tot_write_bandwidth;

struct bdi_writeback wb; /* the root writeback info for this bdi */
struct bdi_writeback_congested wb_congested; /* its congested state */
#ifdef CONFIG_CGROUP_WRITEBACK
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
struct rb_root cgwb_congested_tree; /* their congested states */
atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */
#else
struct bdi_writeback_congested *wb_congested;
#endif
wait_queue_head_t wb_waitq;

Expand Down
6 changes: 5 additions & 1 deletion include/linux/backing-dev.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <linux/writeback.h>
#include <linux/blk-cgroup.h>
#include <linux/backing-dev-defs.h>
#include <linux/slab.h>

int __must_check bdi_init(struct backing_dev_info *bdi);
void bdi_destroy(struct backing_dev_info *bdi);
Expand Down Expand Up @@ -465,11 +466,14 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
static inline struct bdi_writeback_congested *
wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
{
return bdi->wb.congested;
atomic_inc(&bdi->wb_congested->refcnt);
return bdi->wb_congested;
}

static inline void wb_congested_put(struct bdi_writeback_congested *congested)
{
if (atomic_dec_and_test(&congested->refcnt))
kfree(congested);
}

static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
Expand Down
87 changes: 46 additions & 41 deletions mm/backing-dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ void wb_wakeup_delayed(struct bdi_writeback *wb)
#define INIT_BW (100 << (20 - PAGE_SHIFT))

static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
gfp_t gfp)
int blkcg_id, gfp_t gfp)
{
int i, err;

Expand All @@ -311,21 +311,29 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
INIT_LIST_HEAD(&wb->work_list);
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);

wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
if (!wb->congested)
return -ENOMEM;

err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
return err;
goto out_put_cong;

for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
err = percpu_counter_init(&wb->stat[i], 0, gfp);
if (err) {
while (--i)
percpu_counter_destroy(&wb->stat[i]);
fprop_local_destroy_percpu(&wb->completions);
return err;
}
if (err)
goto out_destroy_stat;
}

return 0;

out_destroy_stat:
while (--i)
percpu_counter_destroy(&wb->stat[i]);
fprop_local_destroy_percpu(&wb->completions);
out_put_cong:
wb_congested_put(wb->congested);
return err;
}

/*
Expand Down Expand Up @@ -361,6 +369,7 @@ static void wb_exit(struct bdi_writeback *wb)
percpu_counter_destroy(&wb->stat[i]);

fprop_local_destroy_percpu(&wb->completions);
wb_congested_put(wb->congested);
}

#ifdef CONFIG_CGROUP_WRITEBACK
Expand Down Expand Up @@ -392,9 +401,6 @@ wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
struct bdi_writeback_congested *new_congested = NULL, *congested;
struct rb_node **node, *parent;
unsigned long flags;

if (blkcg_id == 1)
return &bdi->wb_congested;
retry:
spin_lock_irqsave(&cgwb_lock, flags);

Expand Down Expand Up @@ -453,9 +459,6 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
struct backing_dev_info *bdi = congested->bdi;
unsigned long flags;

if (congested->blkcg_id == 1)
return;

local_irq_save(flags);
if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
local_irq_restore(flags);
Expand All @@ -480,7 +483,6 @@ static void cgwb_release_workfn(struct work_struct *work)

css_put(wb->memcg_css);
css_put(wb->blkcg_css);
wb_congested_put(wb->congested);

fprop_local_destroy_percpu(&wb->memcg_completions);
percpu_ref_exit(&wb->refcnt);
Expand Down Expand Up @@ -541,7 +543,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
if (!wb)
return -ENOMEM;

ret = wb_init(wb, bdi, gfp);
ret = wb_init(wb, bdi, blkcg_css->id, gfp);
if (ret)
goto err_free;

Expand All @@ -553,12 +555,6 @@ static int cgwb_create(struct backing_dev_info *bdi,
if (ret)
goto err_ref_exit;

wb->congested = wb_congested_get_create(bdi, blkcg_css->id, gfp);
if (!wb->congested) {
ret = -ENOMEM;
goto err_fprop_exit;
}

wb->memcg_css = memcg_css;
wb->blkcg_css = blkcg_css;
INIT_WORK(&wb->release_work, cgwb_release_workfn);
Expand Down Expand Up @@ -588,12 +584,10 @@ static int cgwb_create(struct backing_dev_info *bdi,
if (ret) {
if (ret == -EEXIST)
ret = 0;
goto err_put_congested;
goto err_fprop_exit;
}
goto out_put;

err_put_congested:
wb_congested_put(wb->congested);
err_fprop_exit:
fprop_local_destroy_percpu(&wb->memcg_completions);
err_ref_exit:
Expand Down Expand Up @@ -662,14 +656,20 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
return wb;
}

static void cgwb_bdi_init(struct backing_dev_info *bdi)
static int cgwb_bdi_init(struct backing_dev_info *bdi)
{
bdi->wb.memcg_css = mem_cgroup_root_css;
bdi->wb.blkcg_css = blkcg_root_css;
bdi->wb_congested.blkcg_id = 1;
int ret;

INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
bdi->cgwb_congested_tree = RB_ROOT;
atomic_set(&bdi->usage_cnt, 1);

ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (!ret) {
bdi->wb.memcg_css = mem_cgroup_root_css;
bdi->wb.blkcg_css = blkcg_root_css;
}
return ret;
}

static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
Expand Down Expand Up @@ -732,15 +732,28 @@ void wb_blkcg_offline(struct blkcg *blkcg)

#else /* CONFIG_CGROUP_WRITEBACK */

static void cgwb_bdi_init(struct backing_dev_info *bdi) { }
static int cgwb_bdi_init(struct backing_dev_info *bdi)
{
int err;

bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL);
if (!bdi->wb_congested)
return -ENOMEM;

err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (err) {
kfree(bdi->wb_congested);
return err;
}
return 0;
}

static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }

#endif /* CONFIG_CGROUP_WRITEBACK */

int bdi_init(struct backing_dev_info *bdi)
{
int err;

bdi->dev = NULL;

bdi->min_ratio = 0;
Expand All @@ -749,15 +762,7 @@ int bdi_init(struct backing_dev_info *bdi)
INIT_LIST_HEAD(&bdi->bdi_list);
init_waitqueue_head(&bdi->wb_waitq);

err = wb_init(&bdi->wb, bdi, GFP_KERNEL);
if (err)
return err;

bdi->wb_congested.state = 0;
bdi->wb.congested = &bdi->wb_congested;

cgwb_bdi_init(bdi);
return 0;
return cgwb_bdi_init(bdi);
}
EXPORT_SYMBOL(bdi_init);

Expand Down

0 comments on commit a13f35e

Please sign in to comment.