From 57186f389cc79bcb0782ec6f9835af3266496cb1 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:59 -0800
Subject: [PATCH] --- yaml --- r: 347026 b: refs/heads/master c:
 22933152934f30de6f05b600c03f8a08f853a8d2 h: refs/heads/master v: v3

---
 [refs]                |  2 +-
 trunk/mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/[refs] b/[refs]
index 9f5effd328b6..dc2ef31755a1 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 7cf2798240a2a2230cb16a391beef98d8a7ad362
+refs/heads/master: 22933152934f30de6f05b600c03f8a08f853a8d2
diff --git a/trunk/mm/memcontrol.c b/trunk/mm/memcontrol.c
index 4b68ec2c8df6..7633e0d429e0 100644
--- a/trunk/mm/memcontrol.c
+++ b/trunk/mm/memcontrol.c
@@ -3080,7 +3080,27 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
 
 	cachep = memcg_params_to_cache(p);
 
-	if (!atomic_read(&cachep->memcg_params->nr_pages))
+	/*
+	 * If we get down to 0 after shrink, we could delete right away.
+	 * However, memcg_release_pages() already puts us back in the workqueue
+	 * in that case. If we proceed deleting, we'll get a dangling
+	 * reference, and removing the object from the workqueue in that case
+	 * is unnecessary complication. We are not a fast path.
+	 *
+	 * Note that this case is fundamentally different from racing with
+	 * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
+	 * kmem_cache_shrink, not only we would be reinserting a dead cache
+	 * into the queue, but doing so from inside the worker racing to
+	 * destroy it.
+	 *
+	 * So if we aren't down to zero, we'll just schedule a worker and try
+	 * again
+	 */
+	if (atomic_read(&cachep->memcg_params->nr_pages) != 0) {
+		kmem_cache_shrink(cachep);
+		if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
+			return;
+	} else
 		kmem_cache_destroy(cachep);
 }
 
@@ -3089,6 +3109,26 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
 	if (!cachep->memcg_params->dead)
 		return;
 
+	/*
+	 * There are many ways in which we can get here.
+	 *
+	 * We can get to a memory-pressure situation while the delayed work is
+	 * still pending to run. The vmscan shrinkers can then release all
+	 * cache memory and get us to destruction. If this is the case, we'll
+	 * be executed twice, which is a bug (the second time will execute over
+	 * bogus data). In this case, cancelling the work should be fine.
+	 *
+	 * But we can also get here from the worker itself, if
+	 * kmem_cache_shrink is enough to shake all the remaining objects and
+	 * get the page count to 0. In this case, we'll deadlock if we try to
+	 * cancel the work (the worker runs with an internal lock held, which
+	 * is the same lock we would hold for cancel_work_sync().)
+	 *
+	 * Since we can't possibly know who got us here, just refrain from
+	 * running if there is already work pending
+	 */
+	if (work_pending(&cachep->memcg_params->destroy))
+		return;
 	/*
 	 * We have to defer the actual destroying to a workqueue, because
 	 * we might currently be in a context that cannot sleep.
@@ -3217,7 +3257,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 		 * set, so flip it down to guarantee we are in control.
 		 */
 		c->memcg_params->dead = false;
-		cancel_delayed_work_sync(&c->memcg_params->destroy);
+		cancel_work_sync(&c->memcg_params->destroy);
 		kmem_cache_destroy(c);
 	}
 	mutex_unlock(&set_limit_mutex);
@@ -3242,7 +3282,7 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
 		cachep = memcg_params_to_cache(params);
 		cachep->memcg_params->dead = true;
 		INIT_WORK(&cachep->memcg_params->destroy,
-			  kmem_cache_destroy_work_func);
+				  kmem_cache_destroy_work_func);
 		schedule_work(&cachep->memcg_params->destroy);
 	}
 	mutex_unlock(&memcg->slab_caches_mutex);