From e86325b9677f19ebed6818e51e50c994e905bf19 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ursula.braun@de.ibm.com>
Date: Fri, 10 Oct 2008 21:33:04 +0200
Subject: [PATCH] --- yaml --- r: 110804 b: refs/heads/master c:
 4bcb3a37180ee4dffaef8298f373b334a7bedabb h: refs/heads/master v: v3

---
 [refs]                                        |   2 +-
 trunk/Documentation/DocBook/kernel-api.tmpl   |   1 -
 trunk/Documentation/RCU/checklist.txt         |   2 +-
 trunk/Documentation/RCU/rcuref.txt            |  16 +-
 trunk/Documentation/RCU/whatisRCU.txt         |   2 +
 trunk/Documentation/SELinux.txt               |  27 --
 trunk/Documentation/kernel-doc-nano-HOWTO.txt |   4 +-
 .../scheduler/sched-design-CFS.txt            | 395 +++++++-----------
 trunk/MAINTAINERS                             |   5 +-
 trunk/arch/alpha/kernel/smp.c                 |   3 -
 trunk/arch/arm/kernel/smp.c                   |   1 -
 trunk/arch/cris/arch-v32/kernel/smp.c         |   1 -
 trunk/arch/ia64/kernel/smpboot.c              |   1 -
 trunk/arch/m32r/kernel/smpboot.c              |   2 -
 trunk/arch/mips/kernel/smp.c                  |   2 -
 trunk/arch/powerpc/kernel/smp.c               |   1 -
 trunk/arch/s390/kernel/smp.c                  |   2 -
 trunk/arch/sh/kernel/smp.c                    |   2 -
 trunk/arch/sparc/kernel/sun4d_smp.c           |   1 -
 trunk/arch/sparc/kernel/sun4m_smp.c           |   2 -
 trunk/arch/um/kernel/smp.c                    |   1 -
 .../x86/kernel/cpu/cpufreq/acpi-cpufreq.c     |  13 +-
 trunk/arch/x86/kernel/cpu/cpufreq/elanfreq.c  |  42 +-
 .../arch/x86/kernel/cpu/cpufreq/powernow-k6.c |  41 +-
 trunk/arch/x86/kernel/smpboot.c               |   1 -
 trunk/arch/x86/mach-voyager/voyager_smp.c     |   2 -
 trunk/drivers/char/tpm/Kconfig                |   1 -
 trunk/drivers/cpufreq/cpufreq.c               |  30 +-
 trunk/drivers/cpufreq/cpufreq_conservative.c  |   5 +-
 trunk/drivers/cpufreq/cpufreq_ondemand.c      | 147 ++-----
 trunk/drivers/cpufreq/cpufreq_performance.c   |   4 +-
 trunk/drivers/cpufreq/cpufreq_powersave.c     |   4 +-
 trunk/drivers/cpufreq/cpufreq_userspace.c     |   4 +-
 trunk/drivers/s390/cio/qdio.h                 |   8 +
 trunk/drivers/s390/cio/qdio_main.c            |   6 +
 trunk/include/linux/compiler.h                |   4 +-
 trunk/include/linux/completion.h              |  41 --
 trunk/include/linux/cpu.h                     |   1 -
 trunk/include/linux/cpufreq.h                 |   7 +-
 trunk/include/linux/notifier.h                |  10 +-
 trunk/include/linux/proportions.h             |   2 +-
 trunk/include/linux/rcuclassic.h              |  37 +-
 trunk/include/linux/rculist.h                 |  14 +
 trunk/include/linux/rcupdate.h                |  20 -
 trunk/include/linux/rcupreempt.h              |  11 +-
 trunk/include/linux/sched.h                   |   9 +-
 trunk/include/linux/security.h                |  54 ++-
 trunk/include/linux/tick.h                    |   2 +-
 trunk/kernel/cpu.c                            |  24 +-
 trunk/kernel/cpuset.c                         |   2 +-
 trunk/kernel/rcuclassic.c                     | 337 ++++-----------
 trunk/kernel/rcupreempt.c                     |   8 +
 trunk/kernel/rcupreempt_trace.c               |   7 +-
 trunk/kernel/sched.c                          | 377 ++++++-----------
 trunk/kernel/sched_fair.c                     | 234 ++++++++---
 trunk/kernel/sched_features.h                 |   1 -
 trunk/kernel/sched_idletask.c                 |   6 +-
 trunk/kernel/sched_rt.c                       |  57 +--
 trunk/kernel/time/tick-sched.c                |  11 +-
 trunk/kernel/user.c                           |   4 +-
 trunk/lib/Kconfig.debug                       |  13 -
 trunk/scripts/Makefile                        |   3 +-
 trunk/scripts/selinux/Makefile                |   2 -
 trunk/scripts/selinux/README                  |   2 -
 trunk/scripts/selinux/install_policy.sh       |  69 ---
 trunk/scripts/selinux/mdp/.gitignore          |   2 -
 trunk/scripts/selinux/mdp/Makefile            |   5 -
 trunk/scripts/selinux/mdp/dbus_contexts       |   6 -
 trunk/scripts/selinux/mdp/mdp.c               | 242 -----------
 trunk/security/Kconfig                        |   8 -
 trunk/security/Makefile                       |   3 +-
 trunk/security/commoncap.c                    |   2 +-
 trunk/security/inode.c                        |  33 +-
 trunk/security/security.c                     |   8 +-
 trunk/security/selinux/Kconfig                |   3 +
 trunk/security/selinux/avc.c                  |   2 +-
 trunk/security/selinux/hooks.c                |  62 +--
 trunk/security/selinux/include/avc.h          |   4 -
 trunk/security/selinux/include/security.h     |  15 +-
 trunk/security/selinux/ss/avtab.c             |   8 +-
 trunk/security/selinux/ss/conditional.c       |  18 +-
 trunk/security/selinux/ss/conditional.h       |   2 +-
 trunk/security/selinux/ss/ebitmap.c           |   4 +-
 trunk/security/selinux/ss/hashtab.c           |   6 +-
 trunk/security/selinux/ss/mls.c               |  14 +-
 trunk/security/selinux/ss/policydb.c          | 225 ++--------
 trunk/security/selinux/ss/policydb.h          |   5 -
 trunk/security/selinux/ss/services.c          | 180 +-------
 trunk/security/selinux/ss/sidtab.c            |  12 +-
 trunk/security/smack/smack.h                  |   1 -
 trunk/security/smack/smack_access.c           |  10 +-
 trunk/security/smack/smackfs.c                |  92 ----
 92 files changed, 883 insertions(+), 2239 deletions(-)
 delete mode 100644 trunk/Documentation/SELinux.txt
 delete mode 100644 trunk/scripts/selinux/Makefile
 delete mode 100644 trunk/scripts/selinux/README
 delete mode 100644 trunk/scripts/selinux/install_policy.sh
 delete mode 100644 trunk/scripts/selinux/mdp/.gitignore
 delete mode 100644 trunk/scripts/selinux/mdp/Makefile
 delete mode 100644 trunk/scripts/selinux/mdp/dbus_contexts
 delete mode 100644 trunk/scripts/selinux/mdp/mdp.c

diff --git a/[refs] b/[refs]
index 639195a9998d..5334b3276c8e 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 098ef215b1e87cff51f983bae4e4e1358b932ec9
+refs/heads/master: 4bcb3a37180ee4dffaef8298f373b334a7bedabb
diff --git a/trunk/Documentation/DocBook/kernel-api.tmpl b/trunk/Documentation/DocBook/kernel-api.tmpl
index 9d0058e788e5..f5696ba9ae96 100644
--- a/trunk/Documentation/DocBook/kernel-api.tmpl
+++ b/trunk/Documentation/DocBook/kernel-api.tmpl
@@ -283,7 +283,6 @@ X!Earch/x86/kernel/mca_32.c
   <chapter id="security">
      <title>Security Framework</title>
 !Isecurity/security.c
-!Esecurity/inode.c
   </chapter>
 
   <chapter id="audit">
diff --git a/trunk/Documentation/RCU/checklist.txt b/trunk/Documentation/RCU/checklist.txt
index 6e253407b3dc..cf5562cbe356 100644
--- a/trunk/Documentation/RCU/checklist.txt
+++ b/trunk/Documentation/RCU/checklist.txt
@@ -210,7 +210,7 @@ over a rather long period of time, but improvements are always welcome!
 		number of updates per grace period.
 
 9.	All RCU list-traversal primitives, which include
-	rcu_dereference(), list_for_each_entry_rcu(),
+	rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(),
 	list_for_each_continue_rcu(), and list_for_each_safe_rcu(),
 	must be either within an RCU read-side critical section or
 	must be protected by appropriate update-side locks.  RCU
diff --git a/trunk/Documentation/RCU/rcuref.txt b/trunk/Documentation/RCU/rcuref.txt
index 4202ad093130..451de2ad8329 100644
--- a/trunk/Documentation/RCU/rcuref.txt
+++ b/trunk/Documentation/RCU/rcuref.txt
@@ -29,9 +29,9 @@ release_referenced()			delete()
 					}
 
 If this list/array is made lock free using RCU as in changing the
-write_lock() in add() and delete() to spin_lock() and changing read_lock()
-in search_and_reference() to rcu_read_lock(), the atomic_inc() in
-search_and_reference() could potentially hold reference to an element which
+write_lock() in add() and delete() to spin_lock and changing read_lock
+in search_and_reference to rcu_read_lock(), the atomic_get in
+search_and_reference could potentially hold reference to an element which
 has already been deleted from the list/array.  Use atomic_inc_not_zero()
 in this scenario as follows:
 
@@ -40,20 +40,20 @@ add()					search_and_reference()
 {					{
     alloc_object			    rcu_read_lock();
     ...					    search_for_element
-    atomic_set(&el->rc, 1);		    if (!atomic_inc_not_zero(&el->rc)) {
-    spin_lock(&list_lock);		        rcu_read_unlock();
+    atomic_set(&el->rc, 1);		    if (atomic_inc_not_zero(&el->rc)) {
+    write_lock(&list_lock);		        rcu_read_unlock();
 					        return FAIL;
     add_element				    }
     ...					    ...
-    spin_unlock(&list_lock);		    rcu_read_unlock();
+    write_unlock(&list_lock);		    rcu_read_unlock();
 }					}
 3.					4.
 release_referenced()			delete()
 {					{
-    ...					    spin_lock(&list_lock);
+    ...					    write_lock(&list_lock);
     if (atomic_dec_and_test(&el->rc))       ...
         call_rcu(&el->head, el_free);       delete_element
-    ...                                     spin_unlock(&list_lock);
+    ...                                     write_unlock(&list_lock);
 } 					    ...
 					    if (atomic_dec_and_test(&el->rc))
 					        call_rcu(&el->head, el_free);
diff --git a/trunk/Documentation/RCU/whatisRCU.txt b/trunk/Documentation/RCU/whatisRCU.txt
index 96170824a717..e04d643a9f57 100644
--- a/trunk/Documentation/RCU/whatisRCU.txt
+++ b/trunk/Documentation/RCU/whatisRCU.txt
@@ -786,6 +786,8 @@ RCU pointer/list traversal:
 	list_for_each_entry_rcu
 	hlist_for_each_entry_rcu
 
+	list_for_each_rcu		(to be deprecated in favor of
+					 list_for_each_entry_rcu)
 	list_for_each_continue_rcu	(to be deprecated in favor of new
 					 list_for_each_entry_continue_rcu)
 
diff --git a/trunk/Documentation/SELinux.txt b/trunk/Documentation/SELinux.txt
deleted file mode 100644
index 07eae00f3314..000000000000
--- a/trunk/Documentation/SELinux.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-If you want to use SELinux, chances are you will want
-to use the distro-provided policies, or install the
-latest reference policy release from
-	http://oss.tresys.com/projects/refpolicy
-
-However, if you want to install a dummy policy for
-testing, you can do using 'mdp' provided under
-scripts/selinux.  Note that this requires the selinux
-userspace to be installed - in particular you will
-need checkpolicy to compile a kernel, and setfiles and
-fixfiles to label the filesystem.
-
-	1. Compile the kernel with selinux enabled.
-	2. Type 'make' to compile mdp.
-	3. Make sure that you are not running with
-	   SELinux enabled and a real policy.  If
-	   you are, reboot with selinux disabled
-	   before continuing.
-	4. Run install_policy.sh:
-		cd scripts/selinux
-		sh install_policy.sh
-
-Step 4 will create a new dummy policy valid for your
-kernel, with a single selinux user, role, and type.
-It will compile the policy, will set your SELINUXTYPE to
-dummy in /etc/selinux/config, install the compiled policy
-as 'dummy', and relabel your filesystem.
diff --git a/trunk/Documentation/kernel-doc-nano-HOWTO.txt b/trunk/Documentation/kernel-doc-nano-HOWTO.txt
index c6841eee9598..0bd32748a467 100644
--- a/trunk/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/trunk/Documentation/kernel-doc-nano-HOWTO.txt
@@ -168,10 +168,10 @@ if ($#ARGV < 0) {
 mkdir $ARGV[0],0777;
 $state = 0;
 while (<STDIN>) {
-    if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) {
+    if (/^\.TH \"[^\"]*\" 4 \"([^\"]*)\"/) {
 	if ($state == 1) { close OUT }
 	$state = 1;
-	$fn = "$ARGV[0]/$1.9";
+	$fn = "$ARGV[0]/$1.4";
 	print STDERR "Creating $fn\n";
 	open OUT, ">$fn" or die "can't open $fn: $!\n";
 	print OUT $_;
diff --git a/trunk/Documentation/scheduler/sched-design-CFS.txt b/trunk/Documentation/scheduler/sched-design-CFS.txt
index 9d8eb553884c..88bcb8767335 100644
--- a/trunk/Documentation/scheduler/sched-design-CFS.txt
+++ b/trunk/Documentation/scheduler/sched-design-CFS.txt
@@ -1,242 +1,151 @@
-                      =============
-                      CFS Scheduler
-                      =============
 
-
-1.  OVERVIEW
-
-CFS stands for "Completely Fair Scheduler," and is the new "desktop" process
-scheduler implemented by Ingo Molnar and merged in Linux 2.6.23.  It is the
-replacement for the previous vanilla scheduler's SCHED_OTHER interactivity
-code.
-
-80% of CFS's design can be summed up in a single sentence: CFS basically models
-an "ideal, precise multi-tasking CPU" on real hardware.
-
-"Ideal multi-tasking CPU" is a (non-existent  :-)) CPU that has 100% physical
-power and which can run each task at precise equal speed, in parallel, each at
-1/nr_running speed.  For example: if there are 2 tasks running, then it runs
-each at 50% physical power --- i.e., actually in parallel.
-
-On real hardware, we can run only a single task at once, so we have to
-introduce the concept of "virtual runtime."  The virtual runtime of a task
-specifies when its next timeslice would start execution on the ideal
-multi-tasking CPU described above.  In practice, the virtual runtime of a task
-is its actual runtime normalized to the total number of running tasks.
-
-
-
-2.  FEW IMPLEMENTATION DETAILS
-
-In CFS the virtual runtime is expressed and tracked via the per-task
-p->se.vruntime (nanosec-unit) value.  This way, it's possible to accurately
-timestamp and measure the "expected CPU time" a task should have gotten.
-
-[ small detail: on "ideal" hardware, at any time all tasks would have the same
-  p->se.vruntime value --- i.e., tasks would execute simultaneously and no task
-  would ever get "out of balance" from the "ideal" share of CPU time.  ]
-
-CFS's task picking logic is based on this p->se.vruntime value and it is thus
-very simple: it always tries to run the task with the smallest p->se.vruntime
-value (i.e., the task which executed least so far).  CFS always tries to split
-up CPU time between runnable tasks as close to "ideal multitasking hardware" as
-possible.
-
-Most of the rest of CFS's design just falls out of this really simple concept,
-with a few add-on embellishments like nice levels, multiprocessing and various
-algorithm variants to recognize sleepers.
-
-
-
-3.  THE RBTREE
-
-CFS's design is quite radical: it does not use the old data structures for the
-runqueues, but it uses a time-ordered rbtree to build a "timeline" of future
-task execution, and thus has no "array switch" artifacts (by which both the
-previous vanilla scheduler and RSDL/SD are affected).
-
-CFS also maintains the rq->cfs.min_vruntime value, which is a monotonic
-increasing value tracking the smallest vruntime among all tasks in the
-runqueue.  The total amount of work done by the system is tracked using
-min_vruntime; that value is used to place newly activated entities on the left
-side of the tree as much as possible.
-
-The total number of running tasks in the runqueue is accounted through the
-rq->cfs.load value, which is the sum of the weights of the tasks queued on the
-runqueue.
-
-CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the
-p->se.vruntime key (there is a subtraction using rq->cfs.min_vruntime to
-account for possible wraparounds).  CFS picks the "leftmost" task from this
-tree and sticks to it.
-As the system progresses forwards, the executed tasks are put into the tree
-more and more to the right --- slowly but surely giving a chance for every task
-to become the "leftmost task" and thus get on the CPU within a deterministic
-amount of time.
-
-Summing up, CFS works like this: it runs a task a bit, and when the task
-schedules (or a scheduler tick happens) the task's CPU usage is "accounted
-for": the (small) time it just spent using the physical CPU is added to
-p->se.vruntime.  Once p->se.vruntime gets high enough so that another task
-becomes the "leftmost task" of the time-ordered rbtree it maintains (plus a
-small amount of "granularity" distance relative to the leftmost task so that we
-do not over-schedule tasks and trash the cache), then the new leftmost task is
-picked and the current task is preempted.
-
-
-
-4.  SOME FEATURES OF CFS
-
-CFS uses nanosecond granularity accounting and does not rely on any jiffies or
-other HZ detail.  Thus the CFS scheduler has no notion of "timeslices" in the
-way the previous scheduler had, and has no heuristics whatsoever.  There is
-only one central tunable (you have to switch on CONFIG_SCHED_DEBUG):
-
-   /proc/sys/kernel/sched_granularity_ns
-
-which can be used to tune the scheduler from "desktop" (i.e., low latencies) to
-"server" (i.e., good batching) workloads.  It defaults to a setting suitable
-for desktop workloads.  SCHED_BATCH is handled by the CFS scheduler module too.
-
-Due to its design, the CFS scheduler is not prone to any of the "attacks" that
-exist today against the heuristics of the stock scheduler: fiftyp.c, thud.c,
-chew.c, ring-test.c, massive_intr.c all work fine and do not impact
-interactivity and produce the expected behavior.
-
-The CFS scheduler has a much stronger handling of nice levels and SCHED_BATCH
-than the previous vanilla scheduler: both types of workloads are isolated much
-more aggressively.
-
-SMP load-balancing has been reworked/sanitized: the runqueue-walking
-assumptions are gone from the load-balancing code now, and iterators of the
-scheduling modules are used.  The balancing code got quite a bit simpler as a
-result.
-
-
-
-5. Scheduling policies
-
-CFS implements three scheduling policies:
-
-  - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling
-    policy that is used for regular tasks.
-
-  - SCHED_BATCH: Does not preempt nearly as often as regular tasks
-    would, thereby allowing tasks to run longer and make better use of
-    caches but at the cost of interactivity. This is well suited for
-    batch jobs.
-
-  - SCHED_IDLE: This is even weaker than nice 19, but its not a true
-    idle timer scheduler in order to avoid to get into priority
-    inversion problems which would deadlock the machine.
-
-SCHED_FIFO/_RR are implemented in sched_rt.c and are as specified by
-POSIX.
-
-The command chrt from util-linux-ng 2.13.1.1 can set all of these except
-SCHED_IDLE.
-
-
-
-6.  SCHEDULING CLASSES
-
-The new CFS scheduler has been designed in such a way to introduce "Scheduling
-Classes," an extensible hierarchy of scheduler modules.  These modules
-encapsulate scheduling policy details and are handled by the scheduler core
-without the core code assuming too much about them.
-
-sched_fair.c implements the CFS scheduler described above.
-
-sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler way than
-the previous vanilla scheduler did.  It uses 100 runqueues (for all 100 RT
-priority levels, instead of 140 in the previous scheduler) and it needs no
-expired array.
-
-Scheduling classes are implemented through the sched_class structure, which
-contains hooks to functions that must be called whenever an interesting event
-occurs.
-
-This is the (partial) list of the hooks:
-
- - enqueue_task(...)
-
-   Called when a task enters a runnable state.
-   It puts the scheduling entity (task) into the red-black tree and
-   increments the nr_running variable.
-
- - dequeue_tree(...)
-
-   When a task is no longer runnable, this function is called to keep the
-   corresponding scheduling entity out of the red-black tree.  It decrements
-   the nr_running variable.
-
- - yield_task(...)
-
-   This function is basically just a dequeue followed by an enqueue, unless the
-   compat_yield sysctl is turned on; in that case, it places the scheduling
-   entity at the right-most end of the red-black tree.
-
- - check_preempt_curr(...)
-
-   This function checks if a task that entered the runnable state should
-   preempt the currently running task.
-
- - pick_next_task(...)
-
-   This function chooses the most appropriate task eligible to run next.
-
- - set_curr_task(...)
-
-   This function is called when a task changes its scheduling class or changes
-   its task group.
-
- - task_tick(...)
-
-   This function is mostly called from time tick functions; it might lead to
-   process switch.  This drives the running preemption.
-
- - task_new(...)
-
-   The core scheduler gives the scheduling module an opportunity to manage new
-   task startup.  The CFS scheduling module uses it for group scheduling, while
-   the scheduling module for a real-time task does not use it.
-
-
-
-7.  GROUP SCHEDULER EXTENSIONS TO CFS
-
-Normally, the scheduler operates on individual tasks and strives to provide
-fair CPU time to each task.  Sometimes, it may be desirable to group tasks and
-provide fair CPU time to each such task group.  For example, it may be
-desirable to first provide fair CPU time to each user on the system and then to
-each task belonging to a user.
-
-CONFIG_GROUP_SCHED strives to achieve exactly that.  It lets tasks to be
-grouped and divides CPU time fairly among such groups.
-
-CONFIG_RT_GROUP_SCHED permits to group real-time (i.e., SCHED_FIFO and
-SCHED_RR) tasks.
-
-CONFIG_FAIR_GROUP_SCHED permits to group CFS (i.e., SCHED_NORMAL and
-SCHED_BATCH) tasks.
-
-At present, there are two (mutually exclusive) mechanisms to group tasks for
-CPU bandwidth control purposes:
-
- - Based on user id (CONFIG_USER_SCHED)
-
-   With this option, tasks are grouped according to their user id.
-
- - Based on "cgroup" pseudo filesystem (CONFIG_CGROUP_SCHED)
-
-   This options needs CONFIG_CGROUPS to be defined, and lets the administrator
-   create arbitrary groups of tasks, using the "cgroup" pseudo filesystem.  See
-   Documentation/cgroups.txt for more information about this filesystem.
+This is the CFS scheduler.
+
+80% of CFS's design can be summed up in a single sentence: CFS basically
+models an "ideal, precise multi-tasking CPU" on real hardware.
+
+"Ideal multi-tasking CPU" is a (non-existent  :-))  CPU that has 100%
+physical power and which can run each task at precise equal speed, in
+parallel, each at 1/nr_running speed. For example: if there are 2 tasks
+running then it runs each at 50% physical power - totally in parallel.
+
+On real hardware, we can run only a single task at once, so while that
+one task runs, the other tasks that are waiting for the CPU are at a
+disadvantage - the current task gets an unfair amount of CPU time. In
+CFS this fairness imbalance is expressed and tracked via the per-task
+p->wait_runtime (nanosec-unit) value. "wait_runtime" is the amount of
+time the task should now run on the CPU for it to become completely fair
+and balanced.
+
+( small detail: on 'ideal' hardware, the p->wait_runtime value would
+  always be zero - no task would ever get 'out of balance' from the
+  'ideal' share of CPU time. )
+
+CFS's task picking logic is based on this p->wait_runtime value and it
+is thus very simple: it always tries to run the task with the largest
+p->wait_runtime value. In other words, CFS tries to run the task with
+the 'gravest need' for more CPU time. So CFS always tries to split up
+CPU time between runnable tasks as close to 'ideal multitasking
+hardware' as possible.
+
+Most of the rest of CFS's design just falls out of this really simple
+concept, with a few add-on embellishments like nice levels,
+multiprocessing and various algorithm variants to recognize sleepers.
+
+In practice it works like this: the system runs a task a bit, and when
+the task schedules (or a scheduler tick happens) the task's CPU usage is
+'accounted for': the (small) time it just spent using the physical CPU
+is deducted from p->wait_runtime. [minus the 'fair share' it would have
+gotten anyway]. Once p->wait_runtime gets low enough so that another
+task becomes the 'leftmost task' of the time-ordered rbtree it maintains
+(plus a small amount of 'granularity' distance relative to the leftmost
+task so that we do not over-schedule tasks and trash the cache) then the
+new leftmost task is picked and the current task is preempted.
+
+The rq->fair_clock value tracks the 'CPU time a runnable task would have
+fairly gotten, had it been runnable during that time'. So by using
+rq->fair_clock values we can accurately timestamp and measure the
+'expected CPU time' a task should have gotten. All runnable tasks are
+sorted in the rbtree by the "rq->fair_clock - p->wait_runtime" key, and
+CFS picks the 'leftmost' task and sticks to it. As the system progresses
+forwards, newly woken tasks are put into the tree more and more to the
+right - slowly but surely giving a chance for every task to become the
+'leftmost task' and thus get on the CPU within a deterministic amount of
+time.
+
+Some implementation details:
+
+ - the introduction of Scheduling Classes: an extensible hierarchy of
+   scheduler modules. These modules encapsulate scheduling policy
+   details and are handled by the scheduler core without the core
+   code assuming about them too much.
+
+ - sched_fair.c implements the 'CFS desktop scheduler': it is a
+   replacement for the vanilla scheduler's SCHED_OTHER interactivity
+   code.
+
+   I'd like to give credit to Con Kolivas for the general approach here:
+   he has proven via RSDL/SD that 'fair scheduling' is possible and that
+   it results in better desktop scheduling. Kudos Con!
+
+   The CFS patch uses a completely different approach and implementation
+   from RSDL/SD. My goal was to make CFS's interactivity quality exceed
+   that of RSDL/SD, which is a high standard to meet :-) Testing
+   feedback is welcome to decide this one way or another. [ and, in any
+   case, all of SD's logic could be added via a kernel/sched_sd.c module
+   as well, if Con is interested in such an approach. ]
+
+   CFS's design is quite radical: it does not use runqueues, it uses a
+   time-ordered rbtree to build a 'timeline' of future task execution,
+   and thus has no 'array switch' artifacts (by which both the vanilla
+   scheduler and RSDL/SD are affected).
+
+   CFS uses nanosecond granularity accounting and does not rely on any
+   jiffies or other HZ detail. Thus the CFS scheduler has no notion of
+   'timeslices' and has no heuristics whatsoever. There is only one
+   central tunable (you have to switch on CONFIG_SCHED_DEBUG):
+
+         /proc/sys/kernel/sched_granularity_ns
+
+   which can be used to tune the scheduler from 'desktop' (low
+   latencies) to 'server' (good batching) workloads. It defaults to a
+   setting suitable for desktop workloads. SCHED_BATCH is handled by the
+   CFS scheduler module too.
+
+   Due to its design, the CFS scheduler is not prone to any of the
+   'attacks' that exist today against the heuristics of the stock
+   scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all
+   work fine and do not impact interactivity and produce the expected
+   behavior.
+
+   the CFS scheduler has a much stronger handling of nice levels and
+   SCHED_BATCH: both types of workloads should be isolated much more
+   agressively than under the vanilla scheduler.
+
+   ( another detail: due to nanosec accounting and timeline sorting,
+     sched_yield() support is very simple under CFS, and in fact under
+     CFS sched_yield() behaves much better than under any other
+     scheduler i have tested so far. )
+
+ - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler
+   way than the vanilla scheduler does. It uses 100 runqueues (for all
+   100 RT priority levels, instead of 140 in the vanilla scheduler)
+   and it needs no expired array.
+
+ - reworked/sanitized SMP load-balancing: the runqueue-walking
+   assumptions are gone from the load-balancing code now, and
+   iterators of the scheduling modules are used. The balancing code got
+   quite a bit simpler as a result.
+
+
+Group scheduler extension to CFS
+================================
+
+Normally the scheduler operates on individual tasks and strives to provide
+fair CPU time to each task. Sometimes, it may be desirable to group tasks
+and provide fair CPU time to each such task group. For example, it may
+be desirable to first provide fair CPU time to each user on the system
+and then to each task belonging to a user.
+
+CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets
+SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such
+groups. At present, there are two (mutually exclusive) mechanisms to group
+tasks for CPU bandwidth control purpose:
+
+	- Based on user id (CONFIG_FAIR_USER_SCHED)
+		In this option, tasks are grouped according to their user id.
+	- Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED)
+		This options lets the administrator create arbitrary groups
+		of tasks, using the "cgroup" pseudo filesystem. See
+		Documentation/cgroups.txt for more information about this
+		filesystem.
 
 Only one of these options to group tasks can be chosen and not both.
 
-When CONFIG_USER_SCHED is defined, a directory is created in sysfs for each new
-user and a "cpu_share" file is added in that directory.
+Group scheduler tunables:
+
+When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for
+each new user and a "cpu_share" file is added in that directory.
 
 	# cd /sys/kernel/uids
 	# cat 512/cpu_share		# Display user 512's CPU share
@@ -246,14 +155,16 @@ user and a "cpu_share" file is added in that directory.
 	2048
 	#
 
-CPU bandwidth between two users is divided in the ratio of their CPU shares.
-For example: if you would like user "root" to get twice the bandwidth of user
-"guest," then set the cpu_share for both the users such that "root"'s cpu_share
-is twice "guest"'s cpu_share.
+CPU bandwidth between two users are divided in the ratio of their CPU shares.
+For ex: if you would like user "root" to get twice the bandwidth of user
+"guest", then set the cpu_share for both the users such that "root"'s
+cpu_share is twice "guest"'s cpu_share
+
 
-When CONFIG_CGROUP_SCHED is defined, a "cpu.shares" file is created for each
-group created using the pseudo filesystem.  See example steps below to create
-task groups and modify their CPU share using the "cgroups" pseudo filesystem.
+When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created
+for each group created using the pseudo filesystem. See example steps
+below to create task groups and modify their CPU share using the "cgroups"
+pseudo filesystem
 
 	# mkdir /dev/cpuctl
 	# mount -t cgroup -ocpu none /dev/cpuctl
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 7a03bd5a91a3..8dae4555f10e 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -3649,9 +3649,8 @@ M:	jmorris@namei.org
 P:	Eric Paris
 M:	eparis@parisplace.org
 L:	linux-kernel@vger.kernel.org (kernel issues)
-L: 	selinux@tycho.nsa.gov (subscribers-only, general discussion)
-W:	http://selinuxproject.org
-T:	git kernel.org:pub/scm/linux/kernel/git/jmorris/security-testing-2.6.git
+L:	selinux@tycho.nsa.gov (subscribers-only, general discussion)
+W:	http://www.nsa.gov/selinux
 S:	Supported
 
 SENSABLE PHANTOM
diff --git a/trunk/arch/alpha/kernel/smp.c b/trunk/arch/alpha/kernel/smp.c
index 06b6fdab639f..83df541650fc 100644
--- a/trunk/arch/alpha/kernel/smp.c
+++ b/trunk/arch/alpha/kernel/smp.c
@@ -149,9 +149,6 @@ smp_callin(void)
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
 
-	/* inform the notifiers about the new cpu */
-	notify_cpu_starting(cpuid);
-
 	/* Must have completely accurate bogos.  */
 	local_irq_enable();
 
diff --git a/trunk/arch/arm/kernel/smp.c b/trunk/arch/arm/kernel/smp.c
index e42a749a56dd..e9842f6767f9 100644
--- a/trunk/arch/arm/kernel/smp.c
+++ b/trunk/arch/arm/kernel/smp.c
@@ -277,7 +277,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	/*
 	 * Enable local interrupts.
 	 */
-	notify_cpu_starting(cpu);
 	local_irq_enable();
 	local_fiq_enable();
 
diff --git a/trunk/arch/cris/arch-v32/kernel/smp.c b/trunk/arch/cris/arch-v32/kernel/smp.c
index 52e16c6436f9..952a24b2f5a9 100644
--- a/trunk/arch/cris/arch-v32/kernel/smp.c
+++ b/trunk/arch/cris/arch-v32/kernel/smp.c
@@ -178,7 +178,6 @@ void __init smp_callin(void)
 	unmask_irq(IPI_INTR_VECT);
 	unmask_irq(TIMER0_INTR_VECT);
 	preempt_disable();
-	notify_cpu_starting(cpu);
 	local_irq_enable();
 
 	cpu_set(cpu, cpu_online_map);
diff --git a/trunk/arch/ia64/kernel/smpboot.c b/trunk/arch/ia64/kernel/smpboot.c
index 1dcbb85fc4ee..d8f05e504fbf 100644
--- a/trunk/arch/ia64/kernel/smpboot.c
+++ b/trunk/arch/ia64/kernel/smpboot.c
@@ -401,7 +401,6 @@ smp_callin (void)
 	spin_lock(&vector_lock);
 	/* Setup the per cpu irq handling data structures */
 	__setup_vector_irq(cpuid);
-	notify_cpu_starting(cpuid);
 	cpu_set(cpuid, cpu_online_map);
 	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
 	spin_unlock(&vector_lock);
diff --git a/trunk/arch/m32r/kernel/smpboot.c b/trunk/arch/m32r/kernel/smpboot.c
index fc2994811f15..2c03ac1d005f 100644
--- a/trunk/arch/m32r/kernel/smpboot.c
+++ b/trunk/arch/m32r/kernel/smpboot.c
@@ -498,8 +498,6 @@ static void __init smp_online(void)
 {
 	int cpu_id = smp_processor_id();
 
-	notify_cpu_starting(cpu_id);
-
 	local_irq_enable();
 
 	/* Get our bogomips. */
diff --git a/trunk/arch/mips/kernel/smp.c b/trunk/arch/mips/kernel/smp.c
index 7b59cfb7e602..4410f172b8ab 100644
--- a/trunk/arch/mips/kernel/smp.c
+++ b/trunk/arch/mips/kernel/smp.c
@@ -121,8 +121,6 @@ asmlinkage __cpuinit void start_secondary(void)
 	cpu = smp_processor_id();
 	cpu_data[cpu].udelay_val = loops_per_jiffy;
 
-	notify_cpu_starting(cpu);
-
 	mp_ops->smp_finish();
 	set_cpu_sibling_map(cpu);
 
diff --git a/trunk/arch/powerpc/kernel/smp.c b/trunk/arch/powerpc/kernel/smp.c
index c27b10a1bd79..5337ca7bb649 100644
--- a/trunk/arch/powerpc/kernel/smp.c
+++ b/trunk/arch/powerpc/kernel/smp.c
@@ -453,7 +453,6 @@ int __devinit start_secondary(void *unused)
 	secondary_cpu_time_init();
 
 	ipi_call_lock();
-	notify_cpu_starting(cpu);
 	cpu_set(cpu, cpu_online_map);
 	/* Update sibling maps */
 	base = cpu_first_thread_in_core(cpu);
diff --git a/trunk/arch/s390/kernel/smp.c b/trunk/arch/s390/kernel/smp.c
index 9e8b1f9b8f4d..00b9b4dec5eb 100644
--- a/trunk/arch/s390/kernel/smp.c
+++ b/trunk/arch/s390/kernel/smp.c
@@ -585,8 +585,6 @@ int __cpuinit start_secondary(void *cpuvoid)
 	/* Enable pfault pseudo page faults on this cpu. */
 	pfault_init();
 
-	/* call cpu notifiers */
-	notify_cpu_starting(smp_processor_id());
 	/* Mark this cpu as online */
 	spin_lock(&call_lock);
 	cpu_set(smp_processor_id(), cpu_online_map);
diff --git a/trunk/arch/sh/kernel/smp.c b/trunk/arch/sh/kernel/smp.c
index 001778f9adaf..60c50841143e 100644
--- a/trunk/arch/sh/kernel/smp.c
+++ b/trunk/arch/sh/kernel/smp.c
@@ -82,8 +82,6 @@ asmlinkage void __cpuinit start_secondary(void)
 
 	preempt_disable();
 
-	notify_cpu_starting(smp_processor_id());
-
 	local_irq_enable();
 
 	calibrate_delay();
diff --git a/trunk/arch/sparc/kernel/sun4d_smp.c b/trunk/arch/sparc/kernel/sun4d_smp.c
index 446767e8f569..69596402a500 100644
--- a/trunk/arch/sparc/kernel/sun4d_smp.c
+++ b/trunk/arch/sparc/kernel/sun4d_smp.c
@@ -88,7 +88,6 @@ void __init smp4d_callin(void)
 	local_flush_cache_all();
 	local_flush_tlb_all();
 
-	notify_cpu_starting(cpuid);
 	/*
 	 * Unblock the master CPU _only_ when the scheduler state
 	 * of all secondary CPUs will be up-to-date, so after
diff --git a/trunk/arch/sparc/kernel/sun4m_smp.c b/trunk/arch/sparc/kernel/sun4m_smp.c
index 9964890dc1db..a14a76ac7f36 100644
--- a/trunk/arch/sparc/kernel/sun4m_smp.c
+++ b/trunk/arch/sparc/kernel/sun4m_smp.c
@@ -71,8 +71,6 @@ void __cpuinit smp4m_callin(void)
 	local_flush_cache_all();
 	local_flush_tlb_all();
 
-	notify_cpu_starting(cpuid);
-
 	/* Get our local ticker going. */
 	smp_setup_percpu_timer();
 
diff --git a/trunk/arch/um/kernel/smp.c b/trunk/arch/um/kernel/smp.c
index 045772142844..be2d50c3aa95 100644
--- a/trunk/arch/um/kernel/smp.c
+++ b/trunk/arch/um/kernel/smp.c
@@ -85,7 +85,6 @@ static int idle_proc(void *cpup)
 	while (!cpu_isset(cpu, smp_commenced_mask))
 		cpu_relax();
 
-	notify_cpu_starting(cpu);
 	cpu_set(cpu, cpu_online_map);
 	default_idle();
 	return 0;
diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index c24c4a487b7c..dd097b835839 100644
--- a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -256,8 +256,7 @@ static u32 get_cur_val(const cpumask_t *mask)
  * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
  * no meaning should be associated with absolute values of these MSRs.
  */
-static unsigned int get_measured_perf(struct cpufreq_policy *policy,
-				      unsigned int cpu)
+static unsigned int get_measured_perf(unsigned int cpu)
 {
 	union {
 		struct {
@@ -327,7 +326,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
 
 #endif
 
-	retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100;
+	retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100;
 
 	put_cpu();
 	set_cpus_allowed_ptr(current, &saved_mask);
@@ -786,11 +785,7 @@ static int __init acpi_cpufreq_init(void)
 	if (ret)
 		return ret;
 
-	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
-	if (ret)
-		free_percpu(acpi_perf_data);
-
-	return ret;
+	return cpufreq_register_driver(&acpi_cpufreq_driver);
 }
 
 static void __exit acpi_cpufreq_exit(void)
@@ -800,6 +795,8 @@ static void __exit acpi_cpufreq_exit(void)
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 
 	free_percpu(acpi_perf_data);
+
+	return;
 }
 
 module_param(acpi_pstate_strict, uint, 0644);
diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/trunk/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index fe613c93b366..e4a4bf870e94 100644
--- a/trunk/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/trunk/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -25,8 +25,8 @@
 #include <linux/cpufreq.h>
 
 #include <asm/msr.h>
-#include <linux/timex.h>
-#include <linux/io.h>
+#include <asm/timex.h>
+#include <asm/io.h>
 
 #define REG_CSCIR 0x22		/* Chip Setup and Control Index Register    */
 #define REG_CSCDR 0x23		/* Chip Setup and Control Data  Register    */
@@ -82,7 +82,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
 	u8 clockspeed_reg;    /* Clock Speed Register */
 
 	local_irq_disable();
-	outb_p(0x80, REG_CSCIR);
+	outb_p(0x80,REG_CSCIR);
 	clockspeed_reg = inb_p(REG_CSCDR);
 	local_irq_enable();
 
@@ -98,10 +98,10 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
 	}
 
 	/* 33 MHz is not 32 MHz... */
-	if ((clockspeed_reg & 0xE0) == 0xA0)
+	if ((clockspeed_reg & 0xE0)==0xA0)
 		return 33000;
 
-	return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000;
+	return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000);
 }
 
 
@@ -117,7 +117,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
  *	There is no return value.
  */
 
-static void elanfreq_set_cpu_state(unsigned int state)
+static void elanfreq_set_cpu_state (unsigned int state)
 {
 	struct cpufreq_freqs    freqs;
 
@@ -144,20 +144,20 @@ static void elanfreq_set_cpu_state(unsigned int state)
 	 */
 
 	local_irq_disable();
-	outb_p(0x40, REG_CSCIR);		/* Disable hyperspeed mode */
-	outb_p(0x00, REG_CSCDR);
+	outb_p(0x40,REG_CSCIR);		/* Disable hyperspeed mode */
+	outb_p(0x00,REG_CSCDR);
 	local_irq_enable();		/* wait till internal pipelines and */
 	udelay(1000);			/* buffers have cleaned up          */
 
 	local_irq_disable();
 
 	/* now, set the CPU clock speed register (0x80) */
-	outb_p(0x80, REG_CSCIR);
-	outb_p(elan_multiplier[state].val80h, REG_CSCDR);
+	outb_p(0x80,REG_CSCIR);
+	outb_p(elan_multiplier[state].val80h,REG_CSCDR);
 
 	/* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
-	outb_p(0x40, REG_CSCIR);
-	outb_p(elan_multiplier[state].val40h, REG_CSCDR);
+	outb_p(0x40,REG_CSCIR);
+	outb_p(elan_multiplier[state].val40h,REG_CSCDR);
 	udelay(10000);
 	local_irq_enable();
 
@@ -173,12 +173,12 @@ static void elanfreq_set_cpu_state(unsigned int state)
  *	for the hardware supported by the driver.
  */
 
-static int elanfreq_verify(struct cpufreq_policy *policy)
+static int elanfreq_verify (struct cpufreq_policy *policy)
 {
 	return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
 }
 
-static int elanfreq_target(struct cpufreq_policy *policy,
+static int elanfreq_target (struct cpufreq_policy *policy,
 			    unsigned int target_freq,
 			    unsigned int relation)
 {
@@ -205,7 +205,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
 
 	/* capability check */
 	if ((c->x86_vendor != X86_VENDOR_AMD) ||
-	    (c->x86 != 4) || (c->x86_model != 10))
+	    (c->x86 != 4) || (c->x86_model!=10))
 		return -ENODEV;
 
 	/* max freq */
@@ -213,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
 		max_freq = elanfreq_get_cpu_frequency(0);
 
 	/* table init */
-	for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+	for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
 		if (elanfreq_table[i].frequency > max_freq)
 			elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
 	}
@@ -224,7 +224,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
 
 	result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
 	if (result)
-		return result;
+		return (result);
 
 	cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
 	return 0;
@@ -260,7 +260,7 @@ __setup("elanfreq=", elanfreq_setup);
 #endif
 
 
-static struct freq_attr *elanfreq_attr[] = {
+static struct freq_attr* elanfreq_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,
 };
@@ -284,9 +284,9 @@ static int __init elanfreq_init(void)
 
 	/* Test if we have the right hardware */
 	if ((c->x86_vendor != X86_VENDOR_AMD) ||
-		(c->x86 != 4) || (c->x86_model != 10)) {
+		(c->x86 != 4) || (c->x86_model!=10)) {
 		printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
-		return -ENODEV;
+                return -ENODEV;
 	}
 	return cpufreq_register_driver(&elanfreq_driver);
 }
@@ -298,7 +298,7 @@ static void __exit elanfreq_exit(void)
 }
 
 
-module_param(max_freq, int, 0444);
+module_param (max_freq, int, 0444);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>");
diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/trunk/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index b5ced806a316..eb9b62b0830c 100644
--- a/trunk/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/trunk/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -15,11 +15,12 @@
 #include <linux/slab.h>
 
 #include <asm/msr.h>
-#include <linux/timex.h>
-#include <linux/io.h>
+#include <asm/timex.h>
+#include <asm/io.h>
 
-#define POWERNOW_IOPORT 0xfff0          /* it doesn't matter where, as long
-					   as it is unused */
+
+#define POWERNOW_IOPORT 0xfff0         /* it doesn't matter where, as long
+					  as it is unused */
 
 static unsigned int                     busfreq;   /* FSB, in 10 kHz */
 static unsigned int                     max_multiplier;
@@ -52,7 +53,7 @@ static int powernow_k6_get_cpu_multiplier(void)
 
 	msrval = POWERNOW_IOPORT + 0x1;
 	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
-	invalue = inl(POWERNOW_IOPORT + 0x8);
+	invalue=inl(POWERNOW_IOPORT + 0x8);
 	msrval = POWERNOW_IOPORT + 0x0;
 	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
 
@@ -66,9 +67,9 @@ static int powernow_k6_get_cpu_multiplier(void)
  *
  *   Tries to change the PowerNow! multiplier
  */
-static void powernow_k6_set_state(unsigned int best_i)
+static void powernow_k6_set_state (unsigned int best_i)
 {
-	unsigned long           outvalue = 0, invalue = 0;
+	unsigned long           outvalue=0, invalue=0;
 	unsigned long           msrval;
 	struct cpufreq_freqs    freqs;
 
@@ -89,10 +90,10 @@ static void powernow_k6_set_state(unsigned int best_i)
 
 	msrval = POWERNOW_IOPORT + 0x1;
 	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
-	invalue = inl(POWERNOW_IOPORT + 0x8);
+	invalue=inl(POWERNOW_IOPORT + 0x8);
 	invalue = invalue & 0xf;
 	outvalue = outvalue | invalue;
-	outl(outvalue , (POWERNOW_IOPORT + 0x8));
+	outl(outvalue ,(POWERNOW_IOPORT + 0x8));
 	msrval = POWERNOW_IOPORT + 0x0;
 	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
 
@@ -123,7 +124,7 @@ static int powernow_k6_verify(struct cpufreq_policy *policy)
  *
  * sets a new CPUFreq policy
  */
-static int powernow_k6_target(struct cpufreq_policy *policy,
+static int powernow_k6_target (struct cpufreq_policy *policy,
 			       unsigned int target_freq,
 			       unsigned int relation)
 {
@@ -151,7 +152,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
 	busfreq = cpu_khz / max_multiplier;
 
 	/* table init */
-	for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
+	for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
 		if (clock_ratio[i].index > max_multiplier)
 			clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
 		else
@@ -164,7 +165,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
 
 	result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
 	if (result)
-		return result;
+		return (result);
 
 	cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
 
@@ -175,8 +176,8 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
 static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
 {
 	unsigned int i;
-	for (i = 0; i < 8; i++) {
-		if (i == max_multiplier)
+	for (i=0; i<8; i++) {
+		if (i==max_multiplier)
 			powernow_k6_set_state(i);
 	}
 	cpufreq_frequency_table_put_attr(policy->cpu);
@@ -188,7 +189,7 @@ static unsigned int powernow_k6_get(unsigned int cpu)
 	return busfreq * powernow_k6_get_cpu_multiplier();
 }
 
-static struct freq_attr *powernow_k6_attr[] = {
+static struct freq_attr* powernow_k6_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,
 };
@@ -226,7 +227,7 @@ static int __init powernow_k6_init(void)
 	}
 
 	if (cpufreq_register_driver(&powernow_k6_driver)) {
-		release_region(POWERNOW_IOPORT, 16);
+		release_region (POWERNOW_IOPORT, 16);
 		return -EINVAL;
 	}
 
@@ -242,13 +243,13 @@ static int __init powernow_k6_init(void)
 static void __exit powernow_k6_exit(void)
 {
 	cpufreq_unregister_driver(&powernow_k6_driver);
-	release_region(POWERNOW_IOPORT, 16);
+	release_region (POWERNOW_IOPORT, 16);
 }
 
 
-MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
-MODULE_LICENSE("GPL");
+MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
+MODULE_LICENSE ("GPL");
 
 module_init(powernow_k6_init);
 module_exit(powernow_k6_exit);
diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c
index 4e7ccb0e2a9b..45531e3ba194 100644
--- a/trunk/arch/x86/kernel/smpboot.c
+++ b/trunk/arch/x86/kernel/smpboot.c
@@ -257,7 +257,6 @@ static void __cpuinit smp_callin(void)
 	end_local_APIC_setup();
 	map_cpu_to_logical_apicid();
 
-	notify_cpu_starting(cpuid);
 	/*
 	 * Get our bogomips.
 	 *
diff --git a/trunk/arch/x86/mach-voyager/voyager_smp.c b/trunk/arch/x86/mach-voyager/voyager_smp.c
index 199a5f4a873c..ee0fba092157 100644
--- a/trunk/arch/x86/mach-voyager/voyager_smp.c
+++ b/trunk/arch/x86/mach-voyager/voyager_smp.c
@@ -448,8 +448,6 @@ static void __init start_secondary(void *unused)
 
 	VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid));
 
-	notify_cpu_starting(cpuid);
-
 	/* enable interrupts */
 	local_irq_enable();
 
diff --git a/trunk/drivers/char/tpm/Kconfig b/trunk/drivers/char/tpm/Kconfig
index f5fc64f89c5c..3738cfa209ff 100644
--- a/trunk/drivers/char/tpm/Kconfig
+++ b/trunk/drivers/char/tpm/Kconfig
@@ -6,7 +6,6 @@ menuconfig TCG_TPM
 	tristate "TPM Hardware Support"
 	depends on HAS_IOMEM
 	depends on EXPERIMENTAL
-	select SECURITYFS
 	---help---
 	  If you have a TPM security chip in your system, which
 	  implements the Trusted Computing Group's specification,
diff --git a/trunk/drivers/cpufreq/cpufreq.c b/trunk/drivers/cpufreq/cpufreq.c
index 31d6f535a79d..8a67f16987db 100644
--- a/trunk/drivers/cpufreq/cpufreq.c
+++ b/trunk/drivers/cpufreq/cpufreq.c
@@ -1467,27 +1467,25 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
 			  unsigned int target_freq,
 			  unsigned int relation)
 {
-	int ret = -EINVAL;
+	int ret;
 
 	policy = cpufreq_cpu_get(policy->cpu);
 	if (!policy)
-		goto no_policy;
+		return -EINVAL;
 
 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
-		goto fail;
+		return -EINVAL;
 
 	ret = __cpufreq_driver_target(policy, target_freq, relation);
 
 	unlock_policy_rwsem_write(policy->cpu);
 
-fail:
 	cpufreq_cpu_put(policy);
-no_policy:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
 
-int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
+int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
 {
 	int ret = 0;
 
@@ -1495,8 +1493,8 @@ int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
 	if (!policy)
 		return -EINVAL;
 
-	if (cpu_online(cpu) && cpufreq_driver->getavg)
-		ret = cpufreq_driver->getavg(policy, cpu);
+	if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
+		ret = cpufreq_driver->getavg(policy->cpu);
 
 	cpufreq_cpu_put(policy);
 	return ret;
@@ -1719,17 +1717,13 @@ int cpufreq_update_policy(unsigned int cpu)
 {
 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
 	struct cpufreq_policy policy;
-	int ret;
+	int ret = 0;
 
-	if (!data) {
-		ret = -ENODEV;
-		goto no_policy;
-	}
+	if (!data)
+		return -ENODEV;
 
-	if (unlikely(lock_policy_rwsem_write(cpu))) {
-		ret = -EINVAL;
-		goto fail;
-	}
+	if (unlikely(lock_policy_rwsem_write(cpu)))
+		return -EINVAL;
 
 	dprintk("updating policy for CPU %u\n", cpu);
 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
@@ -1756,9 +1750,7 @@ int cpufreq_update_policy(unsigned int cpu)
 
 	unlock_policy_rwsem_write(cpu);
 
-fail:
 	cpufreq_cpu_put(data);
-no_policy:
 	return ret;
 }
 EXPORT_SYMBOL(cpufreq_update_policy);
diff --git a/trunk/drivers/cpufreq/cpufreq_conservative.c b/trunk/drivers/cpufreq/cpufreq_conservative.c
index e2657837d954..ac0bbf2d234f 100644
--- a/trunk/drivers/cpufreq/cpufreq_conservative.c
+++ b/trunk/drivers/cpufreq/cpufreq_conservative.c
@@ -460,7 +460,6 @@ static void do_dbs_timer(struct work_struct *work)
 
 static inline void dbs_timer_init(void)
 {
-	init_timer_deferrable(&dbs_work.timer);
 	schedule_delayed_work(&dbs_work,
 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
 	return;
@@ -576,15 +575,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 	return 0;
 }
 
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
-static
-#endif
 struct cpufreq_governor cpufreq_gov_conservative = {
 	.name			= "conservative",
 	.governor		= cpufreq_governor_dbs,
 	.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
 	.owner			= THIS_MODULE,
 };
+EXPORT_SYMBOL(cpufreq_gov_conservative);
 
 static int __init cpufreq_gov_dbs_init(void)
 {
diff --git a/trunk/drivers/cpufreq/cpufreq_ondemand.c b/trunk/drivers/cpufreq/cpufreq_ondemand.c
index 2ab3c12b88af..33855cb3cf16 100644
--- a/trunk/drivers/cpufreq/cpufreq_ondemand.c
+++ b/trunk/drivers/cpufreq/cpufreq_ondemand.c
@@ -18,19 +18,13 @@
 #include <linux/jiffies.h>
 #include <linux/kernel_stat.h>
 #include <linux/mutex.h>
-#include <linux/hrtimer.h>
-#include <linux/tick.h>
-#include <linux/ktime.h>
 
 /*
  * dbs is used in this file as a shortform for demandbased switching
  * It helps to keep variable names smaller, simpler
  */
 
-#define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(10)
 #define DEF_FREQUENCY_UP_THRESHOLD		(80)
-#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL	(3)
-#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
 #define MIN_FREQUENCY_UP_THRESHOLD		(11)
 #define MAX_FREQUENCY_UP_THRESHOLD		(100)
 
@@ -63,7 +57,6 @@ enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
 struct cpu_dbs_info_s {
 	cputime64_t prev_cpu_idle;
 	cputime64_t prev_cpu_wall;
-	cputime64_t prev_cpu_nice;
 	struct cpufreq_policy *cur_policy;
  	struct delayed_work work;
 	struct cpufreq_frequency_table *freq_table;
@@ -93,24 +86,21 @@ static struct workqueue_struct	*kondemand_wq;
 static struct dbs_tuners {
 	unsigned int sampling_rate;
 	unsigned int up_threshold;
-	unsigned int down_differential;
 	unsigned int ignore_nice;
 	unsigned int powersave_bias;
 } dbs_tuners_ins = {
 	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
-	.down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
 	.ignore_nice = 0,
 	.powersave_bias = 0,
 };
 
-static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
-							cputime64_t *wall)
+static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
 {
 	cputime64_t idle_time;
-	cputime64_t cur_wall_time;
+	cputime64_t cur_jiffies;
 	cputime64_t busy_time;
 
-	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
 	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
 			kstat_cpu(cpu).cpustat.system);
 
@@ -123,37 +113,7 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
 				kstat_cpu(cpu).cpustat.nice);
 	}
 
-	idle_time = cputime64_sub(cur_wall_time, busy_time);
-	if (wall)
-		*wall = cur_wall_time;
-
-	return idle_time;
-}
-
-static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
-{
-	u64 idle_time = get_cpu_idle_time_us(cpu, wall);
-
-	if (idle_time == -1ULL)
-		return get_cpu_idle_time_jiffy(cpu, wall);
-
-	if (dbs_tuners_ins.ignore_nice) {
-		cputime64_t cur_nice;
-		unsigned long cur_nice_jiffies;
-		struct cpu_dbs_info_s *dbs_info;
-
-		dbs_info = &per_cpu(cpu_dbs_info, cpu);
-		cur_nice = cputime64_sub(kstat_cpu(cpu).cpustat.nice,
-					 dbs_info->prev_cpu_nice);
-		/*
-		 * Assumption: nice time between sampling periods will be
-		 * less than 2^32 jiffies for 32 bit sys
-		 */
-		cur_nice_jiffies = (unsigned long)
-					cputime64_to_jiffies64(cur_nice);
-		dbs_info->prev_cpu_nice = kstat_cpu(cpu).cpustat.nice;
-		return idle_time + jiffies_to_usecs(cur_nice_jiffies);
-	}
+	idle_time = cputime64_sub(cur_jiffies, busy_time);
 	return idle_time;
 }
 
@@ -317,8 +277,8 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
 	for_each_online_cpu(j) {
 		struct cpu_dbs_info_s *dbs_info;
 		dbs_info = &per_cpu(cpu_dbs_info, j);
-		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
-						&dbs_info->prev_cpu_wall);
+		dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
+		dbs_info->prev_cpu_wall = get_jiffies_64();
 	}
 	mutex_unlock(&dbs_mutex);
 
@@ -374,7 +334,9 @@ static struct attribute_group dbs_attr_group = {
 
 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 {
-	unsigned int max_load_freq;
+	unsigned int idle_ticks, total_ticks;
+	unsigned int load = 0;
+	cputime64_t cur_jiffies;
 
 	struct cpufreq_policy *policy;
 	unsigned int j;
@@ -384,7 +346,13 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 
 	this_dbs_info->freq_lo = 0;
 	policy = this_dbs_info->cur_policy;
+	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
+	total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
+			this_dbs_info->prev_cpu_wall);
+	this_dbs_info->prev_cpu_wall = get_jiffies_64();
 
+	if (!total_ticks)
+		return;
 	/*
 	 * Every sampling_rate, we check, if current idle time is less
 	 * than 20% (default), then we try to increase frequency
@@ -397,44 +365,27 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 	 * 5% (default) of current frequency
 	 */
 
-	/* Get Absolute Load - in terms of freq */
-	max_load_freq = 0;
-
+	/* Get Idle Time */
+	idle_ticks = UINT_MAX;
 	for_each_cpu_mask_nr(j, policy->cpus) {
+		cputime64_t total_idle_ticks;
+		unsigned int tmp_idle_ticks;
 		struct cpu_dbs_info_s *j_dbs_info;
-		cputime64_t cur_wall_time, cur_idle_time;
-		unsigned int idle_time, wall_time;
-		unsigned int load, load_freq;
-		int freq_avg;
 
 		j_dbs_info = &per_cpu(cpu_dbs_info, j);
-
-		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
-
-		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
-				j_dbs_info->prev_cpu_wall);
-		j_dbs_info->prev_cpu_wall = cur_wall_time;
-
-		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
+		total_idle_ticks = get_cpu_idle_time(j);
+		tmp_idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks,
 				j_dbs_info->prev_cpu_idle);
-		j_dbs_info->prev_cpu_idle = cur_idle_time;
-
-		if (unlikely(!wall_time || wall_time < idle_time))
-			continue;
-
-		load = 100 * (wall_time - idle_time) / wall_time;
-
-		freq_avg = __cpufreq_driver_getavg(policy, j);
-		if (freq_avg <= 0)
-			freq_avg = policy->cur;
+		j_dbs_info->prev_cpu_idle = total_idle_ticks;
 
-		load_freq = load * freq_avg;
-		if (load_freq > max_load_freq)
-			max_load_freq = load_freq;
+		if (tmp_idle_ticks < idle_ticks)
+			idle_ticks = tmp_idle_ticks;
 	}
+	if (likely(total_ticks > idle_ticks))
+		load = (100 * (total_ticks - idle_ticks)) / total_ticks;
 
 	/* Check for frequency increase */
-	if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
+	if (load > dbs_tuners_ins.up_threshold) {
 		/* if we are already at full speed then break out early */
 		if (!dbs_tuners_ins.powersave_bias) {
 			if (policy->cur == policy->max)
@@ -461,13 +412,15 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 	 * can support the current CPU usage without triggering the up
 	 * policy. To be safe, we focus 10 points under the threshold.
 	 */
-	if (max_load_freq <
-	    (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
-	     policy->cur) {
-		unsigned int freq_next;
-		freq_next = max_load_freq /
-				(dbs_tuners_ins.up_threshold -
-				 dbs_tuners_ins.down_differential);
+	if (load < (dbs_tuners_ins.up_threshold - 10)) {
+		unsigned int freq_next, freq_cur;
+
+		freq_cur = __cpufreq_driver_getavg(policy);
+		if (!freq_cur)
+			freq_cur = policy->cur;
+
+		freq_next = (freq_cur * load) /
+			(dbs_tuners_ins.up_threshold - 10);
 
 		if (!dbs_tuners_ins.powersave_bias) {
 			__cpufreq_driver_target(policy, freq_next,
@@ -573,8 +526,8 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 			j_dbs_info = &per_cpu(cpu_dbs_info, j);
 			j_dbs_info->cur_policy = policy;
 
-			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
-						&j_dbs_info->prev_cpu_wall);
+			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
+			j_dbs_info->prev_cpu_wall = get_jiffies_64();
 		}
 		this_dbs_info->cpu = cpu;
 		/*
@@ -626,42 +579,22 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 	return 0;
 }
 
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
-static
-#endif
 struct cpufreq_governor cpufreq_gov_ondemand = {
 	.name			= "ondemand",
 	.governor		= cpufreq_governor_dbs,
 	.max_transition_latency = TRANSITION_LATENCY_LIMIT,
 	.owner			= THIS_MODULE,
 };
+EXPORT_SYMBOL(cpufreq_gov_ondemand);
 
 static int __init cpufreq_gov_dbs_init(void)
 {
-	int err;
-	cputime64_t wall;
-	u64 idle_time;
-	int cpu = get_cpu();
-
-	idle_time = get_cpu_idle_time_us(cpu, &wall);
-	put_cpu();
-	if (idle_time != -1ULL) {
-		/* Idle micro accounting is supported. Use finer thresholds */
-		dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
-		dbs_tuners_ins.down_differential =
-					MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
-	}
-
 	kondemand_wq = create_workqueue("kondemand");
 	if (!kondemand_wq) {
 		printk(KERN_ERR "Creation of kondemand failed\n");
 		return -EFAULT;
 	}
-	err = cpufreq_register_governor(&cpufreq_gov_ondemand);
-	if (err)
-		destroy_workqueue(kondemand_wq);
-
-	return err;
+	return cpufreq_register_governor(&cpufreq_gov_ondemand);
 }
 
 static void __exit cpufreq_gov_dbs_exit(void)
diff --git a/trunk/drivers/cpufreq/cpufreq_performance.c b/trunk/drivers/cpufreq/cpufreq_performance.c
index 7e2e515087f8..e8e1451ef1c1 100644
--- a/trunk/drivers/cpufreq/cpufreq_performance.c
+++ b/trunk/drivers/cpufreq/cpufreq_performance.c
@@ -36,14 +36,12 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy,
 	return 0;
 }
 
-#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE
-static
-#endif
 struct cpufreq_governor cpufreq_gov_performance = {
 	.name		= "performance",
 	.governor	= cpufreq_governor_performance,
 	.owner		= THIS_MODULE,
 };
+EXPORT_SYMBOL(cpufreq_gov_performance);
 
 
 static int __init cpufreq_gov_performance_init(void)
diff --git a/trunk/drivers/cpufreq/cpufreq_powersave.c b/trunk/drivers/cpufreq/cpufreq_powersave.c
index e6db5faf3eb1..88d2f44fba48 100644
--- a/trunk/drivers/cpufreq/cpufreq_powersave.c
+++ b/trunk/drivers/cpufreq/cpufreq_powersave.c
@@ -35,14 +35,12 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
 	return 0;
 }
 
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE
-static
-#endif
 struct cpufreq_governor cpufreq_gov_powersave = {
 	.name		= "powersave",
 	.governor	= cpufreq_governor_powersave,
 	.owner		= THIS_MODULE,
 };
+EXPORT_SYMBOL(cpufreq_gov_powersave);
 
 static int __init cpufreq_gov_powersave_init(void)
 {
diff --git a/trunk/drivers/cpufreq/cpufreq_userspace.c b/trunk/drivers/cpufreq/cpufreq_userspace.c
index 1442bbada053..32244aa7cc0c 100644
--- a/trunk/drivers/cpufreq/cpufreq_userspace.c
+++ b/trunk/drivers/cpufreq/cpufreq_userspace.c
@@ -187,9 +187,6 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 }
 
 
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
-static
-#endif
 struct cpufreq_governor cpufreq_gov_userspace = {
 	.name		= "userspace",
 	.governor	= cpufreq_governor_userspace,
@@ -197,6 +194,7 @@ struct cpufreq_governor cpufreq_gov_userspace = {
 	.show_setspeed	= show_speed,
 	.owner		= THIS_MODULE,
 };
+EXPORT_SYMBOL(cpufreq_gov_userspace);
 
 static int __init cpufreq_gov_userspace_init(void)
 {
diff --git a/trunk/drivers/s390/cio/qdio.h b/trunk/drivers/s390/cio/qdio.h
index c1a70985abfa..af867731a5f4 100644
--- a/trunk/drivers/s390/cio/qdio.h
+++ b/trunk/drivers/s390/cio/qdio.h
@@ -16,6 +16,14 @@
 #define QDIO_BUSY_BIT_GIVE_UP		2000000	/* 2 seconds = eternity */
 #define QDIO_INPUT_THRESHOLD		500	/* 500 microseconds */
 
+/*
+ * if an asynchronous HiperSockets queue runs full, the 10 seconds timer wait
+ * till next initiative to give transmitted skbs back to the stack is too long.
+ * Therefore polling is started in case of multicast queue is filled more
+ * than 50 percent.
+ */
+#define QDIO_IQDIO_POLL_LVL		65	/* HS multicast queue */
+
 enum qdio_irq_states {
 	QDIO_IRQ_STATE_INACTIVE,
 	QDIO_IRQ_STATE_ESTABLISHED,
diff --git a/trunk/drivers/s390/cio/qdio_main.c b/trunk/drivers/s390/cio/qdio_main.c
index e6eabc853422..9307512132fe 100644
--- a/trunk/drivers/s390/cio/qdio_main.c
+++ b/trunk/drivers/s390/cio/qdio_main.c
@@ -851,6 +851,12 @@ static void __qdio_outbound_processing(struct qdio_q *q)
 	if (queue_type(q) == QDIO_IQDIO_QFMT && !multicast_outbound(q))
 		return;
 
+	if ((queue_type(q) == QDIO_IQDIO_QFMT) &&
+	    (atomic_read(&q->nr_buf_used)) > QDIO_IQDIO_POLL_LVL) {
+		tasklet_schedule(&q->tasklet);
+		return;
+	}
+
 	if (q->u.out.pci_out_enabled)
 		return;
 
diff --git a/trunk/include/linux/compiler.h b/trunk/include/linux/compiler.h
index 8322141ee480..c8bd2daf95ec 100644
--- a/trunk/include/linux/compiler.h
+++ b/trunk/include/linux/compiler.h
@@ -190,9 +190,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
  * ACCESS_ONCE() in different C statements.
  *
  * This macro does absolutely -nothing- to prevent the CPU from reordering,
- * merging, or refetching absolutely anything at any time.  Its main intended
- * use is to mediate communication between process-level code and irq/NMI
- * handlers, all running on the same CPU.
+ * merging, or refetching absolutely anything at any time.
  */
 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
 
diff --git a/trunk/include/linux/completion.h b/trunk/include/linux/completion.h
index 4a6b604ef7e4..02ef8835999c 100644
--- a/trunk/include/linux/completion.h
+++ b/trunk/include/linux/completion.h
@@ -10,18 +10,6 @@
 
 #include <linux/wait.h>
 
-/**
- * struct completion - structure used to maintain state for a "completion"
- *
- * This is the opaque structure used to maintain the state for a "completion".
- * Completions currently use a FIFO to queue threads that have to wait for
- * the "completion" event.
- *
- * See also:  complete(), wait_for_completion() (and friends _timeout,
- * _interruptible, _interruptible_timeout, and _killable), init_completion(),
- * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and
- * INIT_COMPLETION().
- */
 struct completion {
 	unsigned int done;
 	wait_queue_head_t wait;
@@ -33,14 +21,6 @@ struct completion {
 #define COMPLETION_INITIALIZER_ONSTACK(work) \
 	({ init_completion(&work); work; })
 
-/**
- * DECLARE_COMPLETION: - declare and initialize a completion structure
- * @work:  identifier for the completion structure
- *
- * This macro declares and initializes a completion structure. Generally used
- * for static declarations. You should use the _ONSTACK variant for automatic
- * variables.
- */
 #define DECLARE_COMPLETION(work) \
 	struct completion work = COMPLETION_INITIALIZER(work)
 
@@ -49,13 +29,6 @@ struct completion {
  * completions - so we use the _ONSTACK() variant for those that
  * are on the kernel stack:
  */
-/**
- * DECLARE_COMPLETION_ONSTACK: - declare and initialize a completion structure
- * @work:  identifier for the completion structure
- *
- * This macro declares and initializes a completion structure on the kernel
- * stack.
- */
 #ifdef CONFIG_LOCKDEP
 # define DECLARE_COMPLETION_ONSTACK(work) \
 	struct completion work = COMPLETION_INITIALIZER_ONSTACK(work)
@@ -63,13 +36,6 @@ struct completion {
 # define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
 #endif
 
-/**
- * init_completion: - Initialize a dynamically allocated completion
- * @x:  completion structure that is to be initialized
- *
- * This inline function will initialize a dynamically created completion
- * structure.
- */
 static inline void init_completion(struct completion *x)
 {
 	x->done = 0;
@@ -89,13 +55,6 @@ extern bool completion_done(struct completion *x);
 extern void complete(struct completion *);
 extern void complete_all(struct completion *);
 
-/**
- * INIT_COMPLETION: - reinitialize a completion structure
- * @x:  completion structure to be reinitialized
- *
- * This macro should be used to reinitialize a completion structure so it can
- * be reused. This is especially important after complete_all() is used.
- */
 #define INIT_COMPLETION(x)	((x).done = 0)
 
 
diff --git a/trunk/include/linux/cpu.h b/trunk/include/linux/cpu.h
index c2747ac2ae43..d7faf8808497 100644
--- a/trunk/include/linux/cpu.h
+++ b/trunk/include/linux/cpu.h
@@ -69,7 +69,6 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
 #endif
 
 int cpu_up(unsigned int cpu);
-void notify_cpu_starting(unsigned int cpu);
 extern void cpu_hotplug_init(void);
 extern void cpu_maps_update_begin(void);
 extern void cpu_maps_update_done(void);
diff --git a/trunk/include/linux/cpufreq.h b/trunk/include/linux/cpufreq.h
index 1ee608fd7b77..6fd5668aa572 100644
--- a/trunk/include/linux/cpufreq.h
+++ b/trunk/include/linux/cpufreq.h
@@ -187,8 +187,7 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
 				   unsigned int relation);
 
 
-extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy,
-				   unsigned int cpu);
+extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy);
 
 int cpufreq_register_governor(struct cpufreq_governor *governor);
 void cpufreq_unregister_governor(struct cpufreq_governor *governor);
@@ -227,9 +226,7 @@ struct cpufreq_driver {
 	unsigned int	(*get)	(unsigned int cpu);
 
 	/* optional */
-	unsigned int (*getavg)	(struct cpufreq_policy *policy,
-				 unsigned int cpu);
-
+	unsigned int (*getavg)	(unsigned int cpu);
 	int	(*exit)		(struct cpufreq_policy *policy);
 	int	(*suspend)	(struct cpufreq_policy *policy, pm_message_t pmsg);
 	int	(*resume)	(struct cpufreq_policy *policy);
diff --git a/trunk/include/linux/notifier.h b/trunk/include/linux/notifier.h
index b86fa2ffca0c..da2698b0fdd1 100644
--- a/trunk/include/linux/notifier.h
+++ b/trunk/include/linux/notifier.h
@@ -213,16 +213,9 @@ static inline int notifier_to_errno(int ret)
 #define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
 #define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
 #define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
-					* not handling interrupts, soon dead.
-					* Called on the dying cpu, interrupts
-					* are already disabled. Must not
-					* sleep, must not fail */
+				        * not handling interrupts, soon dead */
 #define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
 					* lock is dropped */
-#define CPU_STARTING		0x000A /* CPU (unsigned)v soon running.
-					* Called on the new cpu, just before
-					* enabling interrupts. Must not sleep,
-					* must not fail */
 
 /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
  * operation in progress
@@ -236,7 +229,6 @@ static inline int notifier_to_errno(int ret)
 #define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
 #define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
 #define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
-#define CPU_STARTING_FROZEN	(CPU_STARTING | CPU_TASKS_FROZEN)
 
 /* Hibernation and suspend events */
 #define PM_HIBERNATION_PREPARE	0x0001 /* Going to hibernate */
diff --git a/trunk/include/linux/proportions.h b/trunk/include/linux/proportions.h
index cf793bbbd05e..5afc1b23346d 100644
--- a/trunk/include/linux/proportions.h
+++ b/trunk/include/linux/proportions.h
@@ -104,8 +104,8 @@ struct prop_local_single {
 	 * snapshot of the last seen global state
 	 * and a lock protecting this state
 	 */
-	unsigned long period;
 	int shift;
+	unsigned long period;
 	spinlock_t lock;		/* protect the snapshot state */
 };
 
diff --git a/trunk/include/linux/rcuclassic.h b/trunk/include/linux/rcuclassic.h
index 5f89b62e6983..4ab843622727 100644
--- a/trunk/include/linux/rcuclassic.h
+++ b/trunk/include/linux/rcuclassic.h
@@ -40,21 +40,12 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK	( 3 * HZ) /* for rcp->jiffies_stall */
-#define RCU_SECONDS_TILL_STALL_RECHECK	(30 * HZ) /* for rcp->jiffies_stall */
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /* Global control variables for rcupdate callback mechanism. */
 struct rcu_ctrlblk {
 	long	cur;		/* Current batch number.                      */
 	long	completed;	/* Number of the last completed batch         */
-	long	pending;	/* Number of the last pending batch           */
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-	unsigned long gp_start;	/* Time at which GP started in jiffies. */
-	unsigned long jiffies_stall;
-				/* Time at which to check for CPU stalls. */
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+	int	next_pending;	/* Is the next batch already waiting?         */
 
 	int	signaled;
 
@@ -75,7 +66,11 @@ static inline int rcu_batch_after(long a, long b)
 	return (a - b) > 0;
 }
 
-/* Per-CPU data for Read-Copy UPdate. */
+/*
+ * Per-CPU data for Read-Copy UPdate.
+ * nxtlist - new callbacks are added here
+ * curlist - current batch for which quiescent cycle started if any
+ */
 struct rcu_data {
 	/* 1) quiescent state handling : */
 	long		quiescbatch;     /* Batch # for grace period */
@@ -83,24 +78,12 @@ struct rcu_data {
 	int		qs_pending;	 /* core waits for quiesc state */
 
 	/* 2) batch handling */
-	/*
-	 * if nxtlist is not NULL, then:
-	 * batch:
-	 *	The batch # for the last entry of nxtlist
-	 * [*nxttail[1], NULL = *nxttail[2]):
-	 *	Entries that batch # <= batch
-	 * [*nxttail[0], *nxttail[1]):
-	 *	Entries that batch # <= batch - 1
-	 * [nxtlist, *nxttail[0]):
-	 *	Entries that batch # <= batch - 2
-	 *	The grace period for these entries has completed, and
-	 *	the other grace-period-completed entries may be moved
-	 *	here temporarily in rcu_process_callbacks().
-	 */
-	long  	       	batch;
+	long  	       	batch;           /* Batch # for current RCU batch */
 	struct rcu_head *nxtlist;
-	struct rcu_head **nxttail[3];
+	struct rcu_head **nxttail;
 	long            qlen; 	 	 /* # of queued callbacks */
+	struct rcu_head *curlist;
+	struct rcu_head **curtail;
 	struct rcu_head *donelist;
 	struct rcu_head **donetail;
 	long		blimit;		 /* Upper limit on a processed batch */
diff --git a/trunk/include/linux/rculist.h b/trunk/include/linux/rculist.h
index e649bd3f2c97..eb4443c7e05b 100644
--- a/trunk/include/linux/rculist.h
+++ b/trunk/include/linux/rculist.h
@@ -198,6 +198,20 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	at->prev = last;
 }
 
+/**
+ * list_for_each_rcu	-	iterate over an rcu-protected list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define list_for_each_rcu(pos, head) \
+	for (pos = rcu_dereference((head)->next); \
+		prefetch(pos->next), pos != (head); \
+		pos = rcu_dereference(pos->next))
+
 #define __list_for_each_rcu(pos, head) \
 	for (pos = rcu_dereference((head)->next); \
 		pos != (head); \
diff --git a/trunk/include/linux/rcupdate.h b/trunk/include/linux/rcupdate.h
index 86f1f5e43e33..e8b4039cfb2f 100644
--- a/trunk/include/linux/rcupdate.h
+++ b/trunk/include/linux/rcupdate.h
@@ -132,26 +132,6 @@ struct rcu_head {
  */
 #define rcu_read_unlock_bh() __rcu_read_unlock_bh()
 
-/**
- * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section
- *
- * Should be used with either
- * - synchronize_sched()
- * or
- * - call_rcu_sched() and rcu_barrier_sched()
- * on the write-side to insure proper synchronization.
- */
-#define rcu_read_lock_sched() preempt_disable()
-
-/*
- * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
- *
- * See rcu_read_lock_sched for more information.
- */
-#define rcu_read_unlock_sched() preempt_enable()
-
-
-
 /**
  * rcu_dereference - fetch an RCU-protected pointer in an
  * RCU read-side critical section.  This pointer may later
diff --git a/trunk/include/linux/rcupreempt.h b/trunk/include/linux/rcupreempt.h
index 3e05c09b54a2..0967f03b0705 100644
--- a/trunk/include/linux/rcupreempt.h
+++ b/trunk/include/linux/rcupreempt.h
@@ -57,13 +57,7 @@ static inline void rcu_qsctr_inc(int cpu)
 	rdssp->sched_qs++;
 }
 #define rcu_bh_qsctr_inc(cpu)
-
-/*
- * Someone might want to pass call_rcu_bh as a function pointer.
- * So this needs to just be a rename and not a macro function.
- *  (no parentheses)
- */
-#define call_rcu_bh	 	call_rcu
+#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
 
 /**
  * call_rcu_sched - Queue RCU callback for invocation after sched grace period.
@@ -117,6 +111,7 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
 struct softirq_action;
 
 #ifdef CONFIG_NO_HZ
+DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
 
 static inline void rcu_enter_nohz(void)
 {
@@ -131,8 +126,8 @@ static inline void rcu_exit_nohz(void)
 {
 	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
 
-	__get_cpu_var(rcu_dyntick_sched).dynticks++;
 	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+	__get_cpu_var(rcu_dyntick_sched).dynticks++;
 	WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
 				&rs);
 }
diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h
index 5d0819ee442a..3d9120c5ad15 100644
--- a/trunk/include/linux/sched.h
+++ b/trunk/include/linux/sched.h
@@ -451,8 +451,8 @@ struct signal_struct {
 	 * - everyone except group_exit_task is stopped during signal delivery
 	 *   of fatal signals, group_exit_task processes the signal.
 	 */
-	int			notify_count;
 	struct task_struct	*group_exit_task;
+	int			notify_count;
 
 	/* thread group stop support, overloads group_exit_code too */
 	int			group_stop_count;
@@ -824,9 +824,6 @@ struct sched_domain {
 	unsigned int ttwu_move_affine;
 	unsigned int ttwu_move_balance;
 #endif
-#ifdef CONFIG_SCHED_DEBUG
-	char *name;
-#endif
 };
 
 extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
@@ -900,7 +897,7 @@ struct sched_class {
 	void (*yield_task) (struct rq *rq);
 	int  (*select_task_rq)(struct task_struct *p, int sync);
 
-	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
+	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
 
 	struct task_struct * (*pick_next_task) (struct rq *rq);
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
@@ -1013,8 +1010,8 @@ struct sched_entity {
 
 struct sched_rt_entity {
 	struct list_head run_list;
-	unsigned long timeout;
 	unsigned int time_slice;
+	unsigned long timeout;
 	int nr_cpus_allowed;
 
 	struct sched_rt_entity *back;
diff --git a/trunk/include/linux/security.h b/trunk/include/linux/security.h
index f5c4a51eb42e..80c4d002864c 100644
--- a/trunk/include/linux/security.h
+++ b/trunk/include/linux/security.h
@@ -1560,6 +1560,11 @@ struct security_operations {
 extern int security_init(void);
 extern int security_module_enable(struct security_operations *ops);
 extern int register_security(struct security_operations *ops);
+extern struct dentry *securityfs_create_file(const char *name, mode_t mode,
+					     struct dentry *parent, void *data,
+					     const struct file_operations *fops);
+extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent);
+extern void securityfs_remove(struct dentry *dentry);
 
 /* Security operations */
 int security_ptrace_may_access(struct task_struct *child, unsigned int mode);
@@ -2419,6 +2424,25 @@ static inline int security_netlink_recv(struct sk_buff *skb, int cap)
 	return cap_netlink_recv(skb, cap);
 }
 
+static inline struct dentry *securityfs_create_dir(const char *name,
+					struct dentry *parent)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline struct dentry *securityfs_create_file(const char *name,
+						mode_t mode,
+						struct dentry *parent,
+						void *data,
+						const struct file_operations *fops)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void securityfs_remove(struct dentry *dentry)
+{
+}
+
 static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
 {
 	return -EOPNOTSUPP;
@@ -2782,35 +2806,5 @@ static inline void security_audit_rule_free(void *lsmrule)
 #endif /* CONFIG_SECURITY */
 #endif /* CONFIG_AUDIT */
 
-#ifdef CONFIG_SECURITYFS
-
-extern struct dentry *securityfs_create_file(const char *name, mode_t mode,
-					     struct dentry *parent, void *data,
-					     const struct file_operations *fops);
-extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent);
-extern void securityfs_remove(struct dentry *dentry);
-
-#else /* CONFIG_SECURITYFS */
-
-static inline struct dentry *securityfs_create_dir(const char *name,
-						   struct dentry *parent)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline struct dentry *securityfs_create_file(const char *name,
-						    mode_t mode,
-						    struct dentry *parent,
-						    void *data,
-						    const struct file_operations *fops)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void securityfs_remove(struct dentry *dentry)
-{}
-
-#endif
-
 #endif /* ! __LINUX_SECURITY_H */
 
diff --git a/trunk/include/linux/tick.h b/trunk/include/linux/tick.h
index 98921a3e1aa8..8cf8cfe2cc97 100644
--- a/trunk/include/linux/tick.h
+++ b/trunk/include/linux/tick.h
@@ -126,7 +126,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void)
 	return len;
 }
 static inline void tick_nohz_stop_idle(int cpu) { }
-static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
+static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; }
 # endif /* !NO_HZ */
 
 #endif
diff --git a/trunk/kernel/cpu.c b/trunk/kernel/cpu.c
index 86d49045daed..f17e9854c246 100644
--- a/trunk/kernel/cpu.c
+++ b/trunk/kernel/cpu.c
@@ -199,14 +199,13 @@ static int __ref take_cpu_down(void *_param)
 	struct take_cpu_down_param *param = _param;
 	int err;
 
+	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+				param->hcpu);
 	/* Ensure this CPU doesn't handle any more interrupts. */
 	err = __cpu_disable();
 	if (err < 0)
 		return err;
 
-	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
-				param->hcpu);
-
 	/* Force idle task to run as soon as we yield: it should
 	   immediately notice cpu is offline and die quickly. */
 	sched_idle_next();
@@ -454,25 +453,6 @@ void __ref enable_nonboot_cpus(void)
 }
 #endif /* CONFIG_PM_SLEEP_SMP */
 
-/**
- * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
- * @cpu: cpu that just started
- *
- * This function calls the cpu_chain notifiers with CPU_STARTING.
- * It must be called by the arch code on the new cpu, before the new cpu
- * enables interrupts and before the "boot" cpu returns from __cpu_up().
- */
-void notify_cpu_starting(unsigned int cpu)
-{
-	unsigned long val = CPU_STARTING;
-
-#ifdef CONFIG_PM_SLEEP_SMP
-	if (cpu_isset(cpu, frozen_cpus))
-		val = CPU_STARTING_FROZEN;
-#endif /* CONFIG_PM_SLEEP_SMP */
-	raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
-}
-
 #endif /* CONFIG_SMP */
 
 /*
diff --git a/trunk/kernel/cpuset.c b/trunk/kernel/cpuset.c
index eab7bd6628e0..827cd9adccb2 100644
--- a/trunk/kernel/cpuset.c
+++ b/trunk/kernel/cpuset.c
@@ -1921,7 +1921,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
  * that has tasks along with an empty 'mems'.  But if we did see such
  * a cpuset, we'd handle it just like we do if its 'cpus' was empty.
  */
-static void scan_for_empty_cpusets(struct cpuset *root)
+static void scan_for_empty_cpusets(const struct cpuset *root)
 {
 	LIST_HEAD(queue);
 	struct cpuset *cp;	/* scans cpusets being updated */
diff --git a/trunk/kernel/rcuclassic.c b/trunk/kernel/rcuclassic.c
index 37f72e551542..aad93cdc9f68 100644
--- a/trunk/kernel/rcuclassic.c
+++ b/trunk/kernel/rcuclassic.c
@@ -47,7 +47,6 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
-#include <linux/time.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -61,14 +60,12 @@ EXPORT_SYMBOL_GPL(rcu_lock_map);
 static struct rcu_ctrlblk rcu_ctrlblk = {
 	.cur = -300,
 	.completed = -300,
-	.pending = -300,
 	.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
 	.cpumask = CPU_MASK_NONE,
 };
 static struct rcu_ctrlblk rcu_bh_ctrlblk = {
 	.cur = -300,
 	.completed = -300,
-	.pending = -300,
 	.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
 	.cpumask = CPU_MASK_NONE,
 };
@@ -86,10 +83,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
 {
 	int cpu;
 	cpumask_t cpumask;
-	unsigned long flags;
-
 	set_need_resched();
-	spin_lock_irqsave(&rcp->lock, flags);
 	if (unlikely(!rcp->signaled)) {
 		rcp->signaled = 1;
 		/*
@@ -115,7 +109,6 @@ static void force_quiescent_state(struct rcu_data *rdp,
 		for_each_cpu_mask_nr(cpu, cpumask)
 			smp_send_reschedule(cpu);
 	}
-	spin_unlock_irqrestore(&rcp->lock, flags);
 }
 #else
 static inline void force_quiescent_state(struct rcu_data *rdp,
@@ -125,126 +118,6 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
 }
 #endif
 
-static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
-		struct rcu_data *rdp)
-{
-	long batch;
-
-	head->next = NULL;
-	smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
-
-	/*
-	 * Determine the batch number of this callback.
-	 *
-	 * Using ACCESS_ONCE to avoid the following error when gcc eliminates
-	 * local variable "batch" and emits codes like this:
-	 *	1) rdp->batch = rcp->cur + 1 # gets old value
-	 *	......
-	 *	2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value
-	 * then [*nxttail[0], *nxttail[1]) may contain callbacks
-	 * that batch# = rdp->batch, see the comment of struct rcu_data.
-	 */
-	batch = ACCESS_ONCE(rcp->cur) + 1;
-
-	if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) {
-		/* process callbacks */
-		rdp->nxttail[0] = rdp->nxttail[1];
-		rdp->nxttail[1] = rdp->nxttail[2];
-		if (rcu_batch_after(batch - 1, rdp->batch))
-			rdp->nxttail[0] = rdp->nxttail[2];
-	}
-
-	rdp->batch = batch;
-	*rdp->nxttail[2] = head;
-	rdp->nxttail[2] = &head->next;
-
-	if (unlikely(++rdp->qlen > qhimark)) {
-		rdp->blimit = INT_MAX;
-		force_quiescent_state(rdp, &rcu_ctrlblk);
-	}
-}
-
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-
-static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
-{
-	rcp->gp_start = jiffies;
-	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
-}
-
-static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	int cpu;
-	long delta;
-	unsigned long flags;
-
-	/* Only let one CPU complain about others per time interval. */
-
-	spin_lock_irqsave(&rcp->lock, flags);
-	delta = jiffies - rcp->jiffies_stall;
-	if (delta < 2 || rcp->cur != rcp->completed) {
-		spin_unlock_irqrestore(&rcp->lock, flags);
-		return;
-	}
-	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
-	spin_unlock_irqrestore(&rcp->lock, flags);
-
-	/* OK, time to rat on our buddy... */
-
-	printk(KERN_ERR "RCU detected CPU stalls:");
-	for_each_possible_cpu(cpu) {
-		if (cpu_isset(cpu, rcp->cpumask))
-			printk(" %d", cpu);
-	}
-	printk(" (detected by %d, t=%ld jiffies)\n",
-	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
-}
-
-static void print_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	unsigned long flags;
-
-	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
-			smp_processor_id(), jiffies,
-			jiffies - rcp->gp_start);
-	dump_stack();
-	spin_lock_irqsave(&rcp->lock, flags);
-	if ((long)(jiffies - rcp->jiffies_stall) >= 0)
-		rcp->jiffies_stall =
-			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
-	spin_unlock_irqrestore(&rcp->lock, flags);
-	set_need_resched();  /* kick ourselves to get things going. */
-}
-
-static void check_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	long delta;
-
-	delta = jiffies - rcp->jiffies_stall;
-	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
-
-		/* We haven't checked in, so go dump stack. */
-		print_cpu_stall(rcp);
-
-	} else if (rcp->cur != rcp->completed && delta >= 2) {
-
-		/* They had two seconds to dump stack, so complain. */
-		print_other_cpu_stall(rcp);
-	}
-}
-
-#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
-static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
-{
-}
-
-static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -260,10 +133,18 @@ void call_rcu(struct rcu_head *head,
 				void (*func)(struct rcu_head *rcu))
 {
 	unsigned long flags;
+	struct rcu_data *rdp;
 
 	head->func = func;
+	head->next = NULL;
 	local_irq_save(flags);
-	__call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
+	rdp = &__get_cpu_var(rcu_data);
+	*rdp->nxttail = head;
+	rdp->nxttail = &head->next;
+	if (unlikely(++rdp->qlen > qhimark)) {
+		rdp->blimit = INT_MAX;
+		force_quiescent_state(rdp, &rcu_ctrlblk);
+	}
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(call_rcu);
@@ -288,10 +169,20 @@ void call_rcu_bh(struct rcu_head *head,
 				void (*func)(struct rcu_head *rcu))
 {
 	unsigned long flags;
+	struct rcu_data *rdp;
 
 	head->func = func;
+	head->next = NULL;
 	local_irq_save(flags);
-	__call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
+	rdp = &__get_cpu_var(rcu_bh_data);
+	*rdp->nxttail = head;
+	rdp->nxttail = &head->next;
+
+	if (unlikely(++rdp->qlen > qhimark)) {
+		rdp->blimit = INT_MAX;
+		force_quiescent_state(rdp, &rcu_bh_ctrlblk);
+	}
+
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(call_rcu_bh);
@@ -320,6 +211,12 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
 static inline void raise_rcu_softirq(void)
 {
 	raise_softirq(RCU_SOFTIRQ);
+	/*
+	 * The smp_mb() here is required to ensure that this cpu's
+	 * __rcu_process_callbacks() reads the most recently updated
+	 * value of rcu->cur.
+	 */
+	smp_mb();
 }
 
 /*
@@ -328,7 +225,6 @@ static inline void raise_rcu_softirq(void)
  */
 static void rcu_do_batch(struct rcu_data *rdp)
 {
-	unsigned long flags;
 	struct rcu_head *next, *list;
 	int count = 0;
 
@@ -343,9 +239,9 @@ static void rcu_do_batch(struct rcu_data *rdp)
 	}
 	rdp->donelist = list;
 
-	local_irq_save(flags);
+	local_irq_disable();
 	rdp->qlen -= count;
-	local_irq_restore(flags);
+	local_irq_enable();
 	if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
 		rdp->blimit = blimit;
 
@@ -373,7 +269,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
  *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
  *   period (if necessary).
  */
-
 /*
  * Register a new batch of callbacks, and start it up if there is currently no
  * active batch and the batch to be registered has not already occurred.
@@ -381,10 +276,15 @@ static void rcu_do_batch(struct rcu_data *rdp)
  */
 static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 {
-	if (rcp->cur != rcp->pending &&
+	if (rcp->next_pending &&
 			rcp->completed == rcp->cur) {
+		rcp->next_pending = 0;
+		/*
+		 * next_pending == 0 must be visible in
+		 * __rcu_process_callbacks() before it can see new value of cur.
+		 */
+		smp_wmb();
 		rcp->cur++;
-		record_gp_stall_check_time(rcp);
 
 		/*
 		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -422,8 +322,6 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
 static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
 					struct rcu_data *rdp)
 {
-	unsigned long flags;
-
 	if (rdp->quiescbatch != rcp->cur) {
 		/* start new grace period: */
 		rdp->qs_pending = 1;
@@ -447,7 +345,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
 		return;
 	rdp->qs_pending = 0;
 
-	spin_lock_irqsave(&rcp->lock, flags);
+	spin_lock(&rcp->lock);
 	/*
 	 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
 	 * during cpu startup. Ignore the quiescent state.
@@ -455,7 +353,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
 	if (likely(rdp->quiescbatch == rcp->cur))
 		cpu_quiet(rdp->cpu, rcp);
 
-	spin_unlock_irqrestore(&rcp->lock, flags);
+	spin_unlock(&rcp->lock);
 }
 
 
@@ -466,38 +364,33 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
  * which is dead and hence not processing interrupts.
  */
 static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
-				struct rcu_head **tail, long batch)
+				struct rcu_head **tail)
 {
-	unsigned long flags;
-
-	if (list) {
-		local_irq_save(flags);
-		this_rdp->batch = batch;
-		*this_rdp->nxttail[2] = list;
-		this_rdp->nxttail[2] = tail;
-		local_irq_restore(flags);
-	}
+	local_irq_disable();
+	*this_rdp->nxttail = list;
+	if (list)
+		this_rdp->nxttail = tail;
+	local_irq_enable();
 }
 
 static void __rcu_offline_cpu(struct rcu_data *this_rdp,
 				struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
-	unsigned long flags;
-
-	/*
-	 * if the cpu going offline owns the grace period
+	/* if the cpu going offline owns the grace period
 	 * we can block indefinitely waiting for it, so flush
 	 * it here
 	 */
-	spin_lock_irqsave(&rcp->lock, flags);
+	spin_lock_bh(&rcp->lock);
 	if (rcp->cur != rcp->completed)
 		cpu_quiet(rdp->cpu, rcp);
-	rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
-	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
-	spin_unlock(&rcp->lock);
+	spin_unlock_bh(&rcp->lock);
+	rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
+	rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
+	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
 
+	local_irq_disable();
 	this_rdp->qlen += rdp->qlen;
-	local_irq_restore(flags);
+	local_irq_enable();
 }
 
 static void rcu_offline_cpu(int cpu)
@@ -527,52 +420,38 @@ static void rcu_offline_cpu(int cpu)
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
 					struct rcu_data *rdp)
 {
-	unsigned long flags;
-	long completed_snap;
+	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
+		*rdp->donetail = rdp->curlist;
+		rdp->donetail = rdp->curtail;
+		rdp->curlist = NULL;
+		rdp->curtail = &rdp->curlist;
+	}
 
-	if (rdp->nxtlist) {
-		local_irq_save(flags);
-		completed_snap = ACCESS_ONCE(rcp->completed);
+	if (rdp->nxtlist && !rdp->curlist) {
+		local_irq_disable();
+		rdp->curlist = rdp->nxtlist;
+		rdp->curtail = rdp->nxttail;
+		rdp->nxtlist = NULL;
+		rdp->nxttail = &rdp->nxtlist;
+		local_irq_enable();
 
 		/*
-		 * move the other grace-period-completed entries to
-		 * [rdp->nxtlist, *rdp->nxttail[0]) temporarily
+		 * start the next batch of callbacks
 		 */
-		if (!rcu_batch_before(completed_snap, rdp->batch))
-			rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
-		else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
-			rdp->nxttail[0] = rdp->nxttail[1];
 
-		/*
-		 * the grace period for entries in
-		 * [rdp->nxtlist, *rdp->nxttail[0]) has completed and
-		 * move these entries to donelist
+		/* determine batch number */
+		rdp->batch = rcp->cur + 1;
+		/* see the comment and corresponding wmb() in
+		 * the rcu_start_batch()
 		 */
-		if (rdp->nxttail[0] != &rdp->nxtlist) {
-			*rdp->donetail = rdp->nxtlist;
-			rdp->donetail = rdp->nxttail[0];
-			rdp->nxtlist = *rdp->nxttail[0];
-			*rdp->donetail = NULL;
-
-			if (rdp->nxttail[1] == rdp->nxttail[0])
-				rdp->nxttail[1] = &rdp->nxtlist;
-			if (rdp->nxttail[2] == rdp->nxttail[0])
-				rdp->nxttail[2] = &rdp->nxtlist;
-			rdp->nxttail[0] = &rdp->nxtlist;
-		}
-
-		local_irq_restore(flags);
-
-		if (rcu_batch_after(rdp->batch, rcp->pending)) {
-			unsigned long flags2;
+		smp_rmb();
 
+		if (!rcp->next_pending) {
 			/* and start it/schedule start if it's a new batch */
-			spin_lock_irqsave(&rcp->lock, flags2);
-			if (rcu_batch_after(rdp->batch, rcp->pending)) {
-				rcp->pending = rdp->batch;
-				rcu_start_batch(rcp);
-			}
-			spin_unlock_irqrestore(&rcp->lock, flags2);
+			spin_lock(&rcp->lock);
+			rcp->next_pending = 1;
+			rcu_start_batch(rcp);
+			spin_unlock(&rcp->lock);
 		}
 	}
 
@@ -583,53 +462,21 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
 
 static void rcu_process_callbacks(struct softirq_action *unused)
 {
-	/*
-	 * Memory references from any prior RCU read-side critical sections
-	 * executed by the interrupted code must be see before any RCU
-	 * grace-period manupulations below.
-	 */
-
-	smp_mb(); /* See above block comment. */
-
 	__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
 	__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
-
-	/*
-	 * Memory references from any later RCU read-side critical sections
-	 * executed by the interrupted code must be see after any RCU
-	 * grace-period manupulations above.
-	 */
-
-	smp_mb(); /* See above block comment. */
 }
 
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
-	/* Check for CPU stalls, if enabled. */
-	check_cpu_stall(rcp);
-
-	if (rdp->nxtlist) {
-		long completed_snap = ACCESS_ONCE(rcp->completed);
-
-		/*
-		 * This cpu has pending rcu entries and the grace period
-		 * for them has completed.
-		 */
-		if (!rcu_batch_before(completed_snap, rdp->batch))
-			return 1;
-		if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
-				rdp->nxttail[0] != rdp->nxttail[1])
-			return 1;
-		if (rdp->nxttail[0] != &rdp->nxtlist)
-			return 1;
+	/* This cpu has pending rcu entries and the grace period
+	 * for them has completed.
+	 */
+	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
+		return 1;
 
-		/*
-		 * This cpu has pending rcu entries and the new batch
-		 * for then hasn't been started nor scheduled start
-		 */
-		if (rcu_batch_after(rdp->batch, rcp->pending))
-			return 1;
-	}
+	/* This cpu has no pending entries, but there are new entries */
+	if (!rdp->curlist && rdp->nxtlist)
+		return 1;
 
 	/* This cpu has finished callbacks to invoke */
 	if (rdp->donelist)
@@ -665,15 +512,9 @@ int rcu_needs_cpu(int cpu)
 	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
 	struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
 
-	return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
+	return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
 }
 
-/*
- * Top-level function driving RCU grace-period detection, normally
- * invoked from the scheduler-clock interrupt.  This function simply
- * increments counters that are read only from softirq by this same
- * CPU, so there are no memory barriers required.
- */
 void rcu_check_callbacks(int cpu, int user)
 {
 	if (user ||
@@ -717,17 +558,14 @@ void rcu_check_callbacks(int cpu, int user)
 static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
 						struct rcu_data *rdp)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&rcp->lock, flags);
 	memset(rdp, 0, sizeof(*rdp));
-	rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
+	rdp->curtail = &rdp->curlist;
+	rdp->nxttail = &rdp->nxtlist;
 	rdp->donetail = &rdp->donelist;
 	rdp->quiescbatch = rcp->completed;
 	rdp->qs_pending = 0;
 	rdp->cpu = cpu;
 	rdp->blimit = blimit;
-	spin_unlock_irqrestore(&rcp->lock, flags);
 }
 
 static void __cpuinit rcu_online_cpu(int cpu)
@@ -772,9 +610,6 @@ static struct notifier_block __cpuinitdata rcu_nb = {
  */
 void __init __rcu_init(void)
 {
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
 			(void *)(long)smp_processor_id());
 	/* Register notifier for non-boot CPUs */
diff --git a/trunk/kernel/rcupreempt.c b/trunk/kernel/rcupreempt.c
index ca4bbbe04aa4..27827931ca0d 100644
--- a/trunk/kernel/rcupreempt.c
+++ b/trunk/kernel/rcupreempt.c
@@ -58,6 +58,14 @@
 #include <linux/cpumask.h>
 #include <linux/rcupreempt_trace.h>
 
+/*
+ * Macro that prevents the compiler from reordering accesses, but does
+ * absolutely -nothing- to prevent CPUs from reordering.  This is used
+ * only to mediate communication between mainline code and hardware
+ * interrupt and NMI handlers.
+ */
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
 /*
  * PREEMPT_RCU data structures.
  */
diff --git a/trunk/kernel/rcupreempt_trace.c b/trunk/kernel/rcupreempt_trace.c
index 35c2d3360ecf..5edf82c34bbc 100644
--- a/trunk/kernel/rcupreempt_trace.c
+++ b/trunk/kernel/rcupreempt_trace.c
@@ -308,16 +308,11 @@ static int rcupreempt_debugfs_init(void)
 
 static int __init rcupreempt_trace_init(void)
 {
-	int ret;
-
 	mutex_init(&rcupreempt_trace_mutex);
 	rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
 	if (!rcupreempt_trace_buf)
 		return 1;
-	ret = rcupreempt_debugfs_init();
-	if (ret)
-		kfree(rcupreempt_trace_buf);
-	return ret;
+	return rcupreempt_debugfs_init();
 }
 
 static void __exit rcupreempt_trace_cleanup(void)
diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c
index 6f230596bd0c..ad1962dc0aa2 100644
--- a/trunk/kernel/sched.c
+++ b/trunk/kernel/sched.c
@@ -204,16 +204,11 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
 	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 }
 
-static inline int rt_bandwidth_enabled(void)
-{
-	return sysctl_sched_rt_runtime >= 0;
-}
-
 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 {
 	ktime_t now;
 
-	if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
+	if (rt_b->rt_runtime == RUNTIME_INF)
 		return;
 
 	if (hrtimer_active(&rt_b->rt_period_timer))
@@ -303,9 +298,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
 static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
 static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
 #endif /* CONFIG_RT_GROUP_SCHED */
-#else /* !CONFIG_USER_SCHED */
+#else /* !CONFIG_FAIR_GROUP_SCHED */
 #define root_task_group init_task_group
-#endif /* CONFIG_USER_SCHED */
+#endif /* CONFIG_FAIR_GROUP_SCHED */
 
 /* task_group_lock serializes add/remove of task groups and also changes to
  * a task group's cpu shares.
@@ -609,9 +604,9 @@ struct rq {
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
-static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
 {
-	rq->curr->sched_class->check_preempt_curr(rq, p, sync);
+	rq->curr->sched_class->check_preempt_curr(rq, p);
 }
 
 static inline int cpu_of(struct rq *rq)
@@ -1107,7 +1102,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
 	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
 }
 
-static inline void init_hrtick(void)
+static void init_hrtick(void)
 {
 }
 #endif /* CONFIG_SMP */
@@ -1126,7 +1121,7 @@ static void init_rq_hrtick(struct rq *rq)
 	rq->hrtick_timer.function = hrtick;
 	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 }
-#else	/* CONFIG_SCHED_HRTICK */
+#else
 static inline void hrtick_clear(struct rq *rq)
 {
 }
@@ -1138,7 +1133,7 @@ static inline void init_rq_hrtick(struct rq *rq)
 static inline void init_hrtick(void)
 {
 }
-#endif	/* CONFIG_SCHED_HRTICK */
+#endif
 
 /*
  * resched_task - mark a task 'to be rescheduled now'.
@@ -1385,24 +1380,38 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
 	update_load_sub(&rq->load, load);
 }
 
-#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
-typedef int (*tg_visitor)(struct task_group *, void *);
+#ifdef CONFIG_SMP
+static unsigned long source_load(int cpu, int type);
+static unsigned long target_load(int cpu, int type);
+static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
+
+static unsigned long cpu_avg_load_per_task(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	if (rq->nr_running)
+		rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+
+	return rq->avg_load_per_task;
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
+typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
 
 /*
  * Iterate the full tree, calling @down when first entering a node and @up when
  * leaving it for the final time.
  */
-static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
+static void
+walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
 {
 	struct task_group *parent, *child;
-	int ret;
 
 	rcu_read_lock();
 	parent = &root_task_group;
 down:
-	ret = (*down)(parent, data);
-	if (ret)
-		goto out_unlock;
+	(*down)(parent, cpu, sd);
 	list_for_each_entry_rcu(child, &parent->children, siblings) {
 		parent = child;
 		goto down;
@@ -1410,43 +1419,15 @@ static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
 up:
 		continue;
 	}
-	ret = (*up)(parent, data);
-	if (ret)
-		goto out_unlock;
+	(*up)(parent, cpu, sd);
 
 	child = parent;
 	parent = parent->parent;
 	if (parent)
 		goto up;
-out_unlock:
 	rcu_read_unlock();
-
-	return ret;
 }
 
-static int tg_nop(struct task_group *tg, void *data)
-{
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_SMP
-static unsigned long source_load(int cpu, int type);
-static unsigned long target_load(int cpu, int type);
-static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
-
-static unsigned long cpu_avg_load_per_task(int cpu)
-{
-	struct rq *rq = cpu_rq(cpu);
-
-	if (rq->nr_running)
-		rq->avg_load_per_task = rq->load.weight / rq->nr_running;
-
-	return rq->avg_load_per_task;
-}
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-
 static void __set_se_shares(struct sched_entity *se, unsigned long shares);
 
 /*
@@ -1505,11 +1486,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
  * This needs to be done in a bottom-up fashion because the rq weight of a
  * parent group depends on the shares of its child groups.
  */
-static int tg_shares_up(struct task_group *tg, void *data)
+static void
+tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
 {
 	unsigned long rq_weight = 0;
 	unsigned long shares = 0;
-	struct sched_domain *sd = data;
 	int i;
 
 	for_each_cpu_mask(i, sd->span) {
@@ -1534,8 +1515,6 @@ static int tg_shares_up(struct task_group *tg, void *data)
 		__update_group_shares_cpu(tg, i, shares, rq_weight);
 		spin_unlock_irqrestore(&rq->lock, flags);
 	}
-
-	return 0;
 }
 
 /*
@@ -1543,10 +1522,10 @@ static int tg_shares_up(struct task_group *tg, void *data)
  * This needs to be done in a top-down fashion because the load of a child
  * group is a fraction of its parents load.
  */
-static int tg_load_down(struct task_group *tg, void *data)
+static void
+tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
 {
 	unsigned long load;
-	long cpu = (long)data;
 
 	if (!tg->parent) {
 		load = cpu_rq(cpu)->load.weight;
@@ -1557,8 +1536,11 @@ static int tg_load_down(struct task_group *tg, void *data)
 	}
 
 	tg->cfs_rq[cpu]->h_load = load;
+}
 
-	return 0;
+static void
+tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
+{
 }
 
 static void update_shares(struct sched_domain *sd)
@@ -1568,7 +1550,7 @@ static void update_shares(struct sched_domain *sd)
 
 	if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
 		sd->last_update = now;
-		walk_tg_tree(tg_nop, tg_shares_up, sd);
+		walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
 	}
 }
 
@@ -1579,9 +1561,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
 	spin_lock(&rq->lock);
 }
 
-static void update_h_load(long cpu)
+static void update_h_load(int cpu)
 {
-	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
+	walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
 }
 
 #else
@@ -1939,8 +1921,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		running = task_running(rq, p);
 		on_rq = p->se.on_rq;
 		ncsw = 0;
-		if (!match_state || p->state == match_state)
-			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+		if (!match_state || p->state == match_state) {
+			ncsw = p->nivcsw + p->nvcsw;
+			if (unlikely(!ncsw))
+				ncsw = 1;
+		}
 		task_rq_unlock(rq, &flags);
 
 		/*
@@ -2300,7 +2285,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	trace_mark(kernel_sched_wakeup,
 		"pid %d state %ld ## rq %p task %p rq->curr %p",
 		p->pid, p->state, rq, p, rq->curr);
-	check_preempt_curr(rq, p, sync);
+	check_preempt_curr(rq, p);
 
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
@@ -2435,7 +2420,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	trace_mark(kernel_sched_wakeup_new,
 		"pid %d state %ld ## rq %p task %p rq->curr %p",
 		p->pid, p->state, rq, p, rq->curr);
-	check_preempt_curr(rq, p, 0);
+	check_preempt_curr(rq, p);
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
 		p->sched_class->task_wake_up(rq, p);
@@ -2895,7 +2880,7 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
 	 * Note that idle threads have a prio of MAX_PRIO, for this test
 	 * to be always true for them.
 	 */
-	check_preempt_curr(this_rq, p, 0);
+	check_preempt_curr(this_rq, p);
 }
 
 /*
@@ -4642,15 +4627,6 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
-/**
- * complete: - signals a single thread waiting on this completion
- * @x:  holds the state of this particular completion
- *
- * This will wake up a single thread waiting on this completion. Threads will be
- * awakened in the same order in which they were queued.
- *
- * See also complete_all(), wait_for_completion() and related routines.
- */
 void complete(struct completion *x)
 {
 	unsigned long flags;
@@ -4662,12 +4638,6 @@ void complete(struct completion *x)
 }
 EXPORT_SYMBOL(complete);
 
-/**
- * complete_all: - signals all threads waiting on this completion
- * @x:  holds the state of this particular completion
- *
- * This will wake up all threads waiting on this particular completion event.
- */
 void complete_all(struct completion *x)
 {
 	unsigned long flags;
@@ -4688,7 +4658,10 @@ do_wait_for_common(struct completion *x, long timeout, int state)
 		wait.flags |= WQ_FLAG_EXCLUSIVE;
 		__add_wait_queue_tail(&x->wait, &wait);
 		do {
-			if (signal_pending_state(state, current)) {
+			if ((state == TASK_INTERRUPTIBLE &&
+			     signal_pending(current)) ||
+			    (state == TASK_KILLABLE &&
+			     fatal_signal_pending(current))) {
 				timeout = -ERESTARTSYS;
 				break;
 			}
@@ -4716,31 +4689,12 @@ wait_for_common(struct completion *x, long timeout, int state)
 	return timeout;
 }
 
-/**
- * wait_for_completion: - waits for completion of a task
- * @x:  holds the state of this particular completion
- *
- * This waits to be signaled for completion of a specific task. It is NOT
- * interruptible and there is no timeout.
- *
- * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
- * and interrupt capability. Also see complete().
- */
 void __sched wait_for_completion(struct completion *x)
 {
 	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(wait_for_completion);
 
-/**
- * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
- * @x:  holds the state of this particular completion
- * @timeout:  timeout value in jiffies
- *
- * This waits for either a completion of a specific task to be signaled or for a
- * specified timeout to expire. The timeout is in jiffies. It is not
- * interruptible.
- */
 unsigned long __sched
 wait_for_completion_timeout(struct completion *x, unsigned long timeout)
 {
@@ -4748,13 +4702,6 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
 }
 EXPORT_SYMBOL(wait_for_completion_timeout);
 
-/**
- * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
- * @x:  holds the state of this particular completion
- *
- * This waits for completion of a specific task to be signaled. It is
- * interruptible.
- */
 int __sched wait_for_completion_interruptible(struct completion *x)
 {
 	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
@@ -4764,14 +4711,6 @@ int __sched wait_for_completion_interruptible(struct completion *x)
 }
 EXPORT_SYMBOL(wait_for_completion_interruptible);
 
-/**
- * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
- * @x:  holds the state of this particular completion
- * @timeout:  timeout value in jiffies
- *
- * This waits for either a completion of a specific task to be signaled or for a
- * specified timeout to expire. It is interruptible. The timeout is in jiffies.
- */
 unsigned long __sched
 wait_for_completion_interruptible_timeout(struct completion *x,
 					  unsigned long timeout)
@@ -4780,13 +4719,6 @@ wait_for_completion_interruptible_timeout(struct completion *x,
 }
 EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
 
-/**
- * wait_for_completion_killable: - waits for completion of a task (killable)
- * @x:  holds the state of this particular completion
- *
- * This waits to be signaled for completion of a specific task. It can be
- * interrupted by a kill signal.
- */
 int __sched wait_for_completion_killable(struct completion *x)
 {
 	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
@@ -5189,8 +5121,7 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
 		 * Do not allow realtime tasks into groups that have no runtime
 		 * assigned.
 		 */
-		if (rt_bandwidth_enabled() && rt_policy(policy) &&
-				task_group(p)->rt_bandwidth.rt_runtime == 0)
+		if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
 			return -EPERM;
 #endif
 
@@ -6026,7 +5957,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	set_task_cpu(p, dest_cpu);
 	if (on_rq) {
 		activate_task(rq_dest, p, 0);
-		check_preempt_curr(rq_dest, p, 0);
+		check_preempt_curr(rq_dest, p);
 	}
 done:
 	ret = 1;
@@ -6351,7 +6282,7 @@ set_table_entry(struct ctl_table *entry,
 static struct ctl_table *
 sd_alloc_ctl_domain_table(struct sched_domain *sd)
 {
-	struct ctl_table *table = sd_alloc_ctl_entry(13);
+	struct ctl_table *table = sd_alloc_ctl_entry(12);
 
 	if (table == NULL)
 		return NULL;
@@ -6379,9 +6310,7 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
 		sizeof(int), 0644, proc_dointvec_minmax);
 	set_table_entry(&table[10], "flags", &sd->flags,
 		sizeof(int), 0644, proc_dointvec_minmax);
-	set_table_entry(&table[11], "name", sd->name,
-		CORENAME_MAX_SIZE, 0444, proc_dostring);
-	/* &table[12] is terminator */
+	/* &table[11] is terminator */
 
 	return table;
 }
@@ -7265,21 +7194,13 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
  */
 
-#ifdef CONFIG_SCHED_DEBUG
-# define SD_INIT_NAME(sd, type)		sd->name = #type
-#else
-# define SD_INIT_NAME(sd, type)		do { } while (0)
-#endif
-
 #define	SD_INIT(sd, type)	sd_init_##type(sd)
-
 #define SD_INIT_FUNC(type)	\
 static noinline void sd_init_##type(struct sched_domain *sd)	\
 {								\
 	memset(sd, 0, sizeof(*sd));				\
 	*sd = SD_##type##_INIT;					\
 	sd->level = SD_LV_##type;				\
-	SD_INIT_NAME(sd, type);					\
 }
 
 SD_INIT_FUNC(CPU)
@@ -8321,25 +8242,20 @@ void __might_sleep(char *file, int line)
 #ifdef in_atomic
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
-	if ((!in_atomic() && !irqs_disabled()) ||
-		    system_state != SYSTEM_RUNNING || oops_in_progress)
-		return;
-	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-		return;
-	prev_jiffy = jiffies;
-
-	printk(KERN_ERR
-		"BUG: sleeping function called from invalid context at %s:%d\n",
-			file, line);
-	printk(KERN_ERR
-		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-			in_atomic(), irqs_disabled(),
-			current->pid, current->comm);
-
-	debug_show_held_locks(current);
-	if (irqs_disabled())
-		print_irqtrace_events(current);
-	dump_stack();
+	if ((in_atomic() || irqs_disabled()) &&
+	    system_state == SYSTEM_RUNNING && !oops_in_progress) {
+		if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+			return;
+		prev_jiffy = jiffies;
+		printk(KERN_ERR "BUG: sleeping function called from invalid"
+				" context at %s:%d\n", file, line);
+		printk("in_atomic():%d, irqs_disabled():%d\n",
+			in_atomic(), irqs_disabled());
+		debug_show_held_locks(current);
+		if (irqs_disabled())
+			print_irqtrace_events(current);
+		dump_stack();
+	}
 #endif
 }
 EXPORT_SYMBOL(__might_sleep);
@@ -8837,95 +8753,73 @@ static DEFINE_MUTEX(rt_constraints_mutex);
 static unsigned long to_ratio(u64 period, u64 runtime)
 {
 	if (runtime == RUNTIME_INF)
-		return 1ULL << 20;
-
-	return div64_u64(runtime << 20, period);
-}
-
-/* Must be called with tasklist_lock held */
-static inline int tg_has_rt_tasks(struct task_group *tg)
-{
-	struct task_struct *g, *p;
-
-	do_each_thread(g, p) {
-		if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
-			return 1;
-	} while_each_thread(g, p);
+		return 1ULL << 16;
 
-	return 0;
+	return div64_u64(runtime << 16, period);
 }
 
-struct rt_schedulable_data {
-	struct task_group *tg;
-	u64 rt_period;
-	u64 rt_runtime;
-};
-
-static int tg_schedulable(struct task_group *tg, void *data)
+#ifdef CONFIG_CGROUP_SCHED
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 {
-	struct rt_schedulable_data *d = data;
-	struct task_group *child;
-	unsigned long total, sum = 0;
-	u64 period, runtime;
+	struct task_group *tgi, *parent = tg->parent;
+	unsigned long total = 0;
 
-	period = ktime_to_ns(tg->rt_bandwidth.rt_period);
-	runtime = tg->rt_bandwidth.rt_runtime;
+	if (!parent) {
+		if (global_rt_period() < period)
+			return 0;
 
-	if (tg == d->tg) {
-		period = d->rt_period;
-		runtime = d->rt_runtime;
+		return to_ratio(period, runtime) <
+			to_ratio(global_rt_period(), global_rt_runtime());
 	}
 
-	/*
-	 * Cannot have more runtime than the period.
-	 */
-	if (runtime > period && runtime != RUNTIME_INF)
-		return -EINVAL;
-
-	/*
-	 * Ensure we don't starve existing RT tasks.
-	 */
-	if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
-		return -EBUSY;
+	if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
+		return 0;
 
-	total = to_ratio(period, runtime);
+	rcu_read_lock();
+	list_for_each_entry_rcu(tgi, &parent->children, siblings) {
+		if (tgi == tg)
+			continue;
 
-	/*
-	 * Nobody can have more than the global setting allows.
-	 */
-	if (total > to_ratio(global_rt_period(), global_rt_runtime()))
-		return -EINVAL;
+		total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
+				tgi->rt_bandwidth.rt_runtime);
+	}
+	rcu_read_unlock();
 
-	/*
-	 * The sum of our children's runtime should not exceed our own.
-	 */
-	list_for_each_entry_rcu(child, &tg->children, siblings) {
-		period = ktime_to_ns(child->rt_bandwidth.rt_period);
-		runtime = child->rt_bandwidth.rt_runtime;
+	return total + to_ratio(period, runtime) <=
+		to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
+				parent->rt_bandwidth.rt_runtime);
+}
+#elif defined CONFIG_USER_SCHED
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+{
+	struct task_group *tgi;
+	unsigned long total = 0;
+	unsigned long global_ratio =
+		to_ratio(global_rt_period(), global_rt_runtime());
 
-		if (child == d->tg) {
-			period = d->rt_period;
-			runtime = d->rt_runtime;
-		}
+	rcu_read_lock();
+	list_for_each_entry_rcu(tgi, &task_groups, list) {
+		if (tgi == tg)
+			continue;
 
-		sum += to_ratio(period, runtime);
+		total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
+				tgi->rt_bandwidth.rt_runtime);
 	}
+	rcu_read_unlock();
 
-	if (sum > total)
-		return -EINVAL;
-
-	return 0;
+	return total + to_ratio(period, runtime) < global_ratio;
 }
+#endif
 
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+/* Must be called with tasklist_lock held */
+static inline int tg_has_rt_tasks(struct task_group *tg)
 {
-	struct rt_schedulable_data data = {
-		.tg = tg,
-		.rt_period = period,
-		.rt_runtime = runtime,
-	};
-
-	return walk_tg_tree(tg_schedulable, tg_nop, &data);
+	struct task_struct *g, *p;
+	do_each_thread(g, p) {
+		if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
+			return 1;
+	} while_each_thread(g, p);
+	return 0;
 }
 
 static int tg_set_bandwidth(struct task_group *tg,
@@ -8935,9 +8829,14 @@ static int tg_set_bandwidth(struct task_group *tg,
 
 	mutex_lock(&rt_constraints_mutex);
 	read_lock(&tasklist_lock);
-	err = __rt_schedulable(tg, rt_period, rt_runtime);
-	if (err)
+	if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
+		err = -EBUSY;
 		goto unlock;
+	}
+	if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
+		err = -EINVAL;
+		goto unlock;
+	}
 
 	spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
 	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@ -9006,25 +8905,19 @@ long sched_group_rt_period(struct task_group *tg)
 
 static int sched_rt_global_constraints(void)
 {
-	u64 runtime, period;
+	struct task_group *tg = &root_task_group;
+	u64 rt_runtime, rt_period;
 	int ret = 0;
 
 	if (sysctl_sched_rt_period <= 0)
 		return -EINVAL;
 
-	runtime = global_rt_runtime();
-	period = global_rt_period();
-
-	/*
-	 * Sanity check on the sysctl variables.
-	 */
-	if (runtime > period && runtime != RUNTIME_INF)
-		return -EINVAL;
+	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+	rt_runtime = tg->rt_bandwidth.rt_runtime;
 
 	mutex_lock(&rt_constraints_mutex);
-	read_lock(&tasklist_lock);
-	ret = __rt_schedulable(NULL, 0, 0);
-	read_unlock(&tasklist_lock);
+	if (!__rt_schedulable(tg, rt_period, rt_runtime))
+		ret = -EINVAL;
 	mutex_unlock(&rt_constraints_mutex);
 
 	return ret;
@@ -9098,6 +8991,7 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
 
 	if (!cgrp->parent) {
 		/* This is early initialization for the top cgroup */
+		init_task_group.css.cgroup = cgrp;
 		return &init_task_group.css;
 	}
 
@@ -9106,6 +9000,9 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
 	if (IS_ERR(tg))
 		return ERR_PTR(-ENOMEM);
 
+	/* Bind the cgroup to task_group object we just created */
+	tg->css.cgroup = cgrp;
+
 	return &tg->css;
 }
 
diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c
index 18fd17172eb6..fb8994c6d4bb 100644
--- a/trunk/kernel/sched_fair.c
+++ b/trunk/kernel/sched_fair.c
@@ -408,6 +408,64 @@ static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	return __sched_period(nr_running);
 }
 
+/*
+ * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
+ * that it favours >=0 over <0.
+ *
+ *   -20         |
+ *               |
+ *     0 --------+-------
+ *             .'
+ *    19     .'
+ *
+ */
+static unsigned long
+calc_delta_asym(unsigned long delta, struct sched_entity *se)
+{
+	struct load_weight lw = {
+		.weight = NICE_0_LOAD,
+		.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
+	};
+
+	for_each_sched_entity(se) {
+		struct load_weight *se_lw = &se->load;
+		unsigned long rw = cfs_rq_of(se)->load.weight;
+
+#ifdef CONFIG_FAIR_SCHED_GROUP
+		struct cfs_rq *cfs_rq = se->my_q;
+		struct task_group *tg = NULL
+
+		if (cfs_rq)
+			tg = cfs_rq->tg;
+
+		if (tg && tg->shares < NICE_0_LOAD) {
+			/*
+			 * scale shares to what it would have been had
+			 * tg->weight been NICE_0_LOAD:
+			 *
+			 *   weight = 1024 * shares / tg->weight
+			 */
+			lw.weight *= se->load.weight;
+			lw.weight /= tg->shares;
+
+			lw.inv_weight = 0;
+
+			se_lw = &lw;
+			rw += lw.weight - se->load.weight;
+		} else
+#endif
+
+		if (se->load.weight < NICE_0_LOAD) {
+			se_lw = &lw;
+			rw += NICE_0_LOAD - se->load.weight;
+		}
+
+		delta = calc_delta_mine(delta, rw, se_lw);
+	}
+
+	return delta;
+}
+
 /*
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
@@ -528,12 +586,11 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	update_load_add(&cfs_rq->load, se->load.weight);
 	if (!parent_entity(se))
 		inc_cpu_load(rq_of(cfs_rq), se->load.weight);
-	if (entity_is_task(se)) {
+	if (entity_is_task(se))
 		add_cfs_task_weight(cfs_rq, se->load.weight);
-		list_add(&se->group_node, &cfs_rq->tasks);
-	}
 	cfs_rq->nr_running++;
 	se->on_rq = 1;
+	list_add(&se->group_node, &cfs_rq->tasks);
 }
 
 static void
@@ -542,12 +599,11 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	update_load_sub(&cfs_rq->load, se->load.weight);
 	if (!parent_entity(se))
 		dec_cpu_load(rq_of(cfs_rq), se->load.weight);
-	if (entity_is_task(se)) {
+	if (entity_is_task(se))
 		add_cfs_task_weight(cfs_rq, -se->load.weight);
-		list_del_init(&se->group_node);
-	}
 	cfs_rq->nr_running--;
 	se->on_rq = 0;
+	list_del_init(&se->group_node);
 }
 
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -1029,6 +1085,7 @@ static long effective_load(struct task_group *tg, int cpu,
 		long wl, long wg)
 {
 	struct sched_entity *se = tg->se[cpu];
+	long more_w;
 
 	if (!tg->parent)
 		return wl;
@@ -1040,17 +1097,18 @@ static long effective_load(struct task_group *tg, int cpu,
 	if (!wl && sched_feat(ASYM_EFF_LOAD))
 		return wl;
 
+	/*
+	 * Instead of using this increment, also add the difference
+	 * between when the shares were last updated and now.
+	 */
+	more_w = se->my_q->load.weight - se->my_q->rq_weight;
+	wl += more_w;
+	wg += more_w;
+
 	for_each_sched_entity(se) {
-		long S, rw, s, a, b;
-		long more_w;
+#define D(n) (likely(n) ? (n) : 1)
 
-		/*
-		 * Instead of using this increment, also add the difference
-		 * between when the shares were last updated and now.
-		 */
-		more_w = se->my_q->load.weight - se->my_q->rq_weight;
-		wl += more_w;
-		wg += more_w;
+		long S, rw, s, a, b;
 
 		S = se->my_q->tg->shares;
 		s = se->my_q->shares;
@@ -1059,11 +1117,7 @@ static long effective_load(struct task_group *tg, int cpu,
 		a = S*(rw + wl);
 		b = S*rw + s*wg;
 
-		wl = s*(a-b);
-
-		if (likely(b))
-			wl /= b;
-
+		wl = s*(a-b)/D(b);
 		/*
 		 * Assume the group is already running and will
 		 * thus already be accounted for in the weight.
@@ -1072,6 +1126,7 @@ static long effective_load(struct task_group *tg, int cpu,
 		 * alter the group weight.
 		 */
 		wg = 0;
+#undef D
 	}
 
 	return wl;
@@ -1088,7 +1143,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 #endif
 
 static int
-wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
+wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
 	    struct task_struct *p, int prev_cpu, int this_cpu, int sync,
 	    int idx, unsigned long load, unsigned long this_load,
 	    unsigned int imbalance)
@@ -1103,11 +1158,6 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 	if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
 		return 0;
 
-	if (!sync && sched_feat(SYNC_WAKEUPS) &&
-	    curr->se.avg_overlap < sysctl_sched_migration_cost &&
-	    p->se.avg_overlap < sysctl_sched_migration_cost)
-		sync = 1;
-
 	/*
 	 * If sync wakeup then subtract the (maximum possible)
 	 * effect of the currently running task from the load
@@ -1132,14 +1182,17 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 	 * a reasonable amount of time then attract this newly
 	 * woken task:
 	 */
-	if (sync && balanced)
-		return 1;
+	if (sync && balanced) {
+		if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
+		    p->se.avg_overlap < sysctl_sched_migration_cost)
+			return 1;
+	}
 
 	schedstat_inc(p, se.nr_wakeups_affine_attempts);
 	tl_per_task = cpu_avg_load_per_task(this_cpu);
 
-	if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <=
-			tl_per_task)) {
+	if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) ||
+			balanced) {
 		/*
 		 * This domain has SD_WAKE_AFFINE and
 		 * p is cache cold in this domain, and
@@ -1158,17 +1211,16 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
 	struct sched_domain *sd, *this_sd = NULL;
 	int prev_cpu, this_cpu, new_cpu;
 	unsigned long load, this_load;
-	struct rq *this_rq;
+	struct rq *rq, *this_rq;
 	unsigned int imbalance;
 	int idx;
 
 	prev_cpu	= task_cpu(p);
+	rq		= task_rq(p);
 	this_cpu	= smp_processor_id();
 	this_rq		= cpu_rq(this_cpu);
 	new_cpu		= prev_cpu;
 
-	if (prev_cpu == this_cpu)
-		goto out;
 	/*
 	 * 'this_sd' is the first domain that both
 	 * this_cpu and prev_cpu are present in:
@@ -1196,10 +1248,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
 	load = source_load(prev_cpu, idx);
 	this_load = target_load(this_cpu, idx);
 
-	if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
+	if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
 				     load, this_load, imbalance))
 		return this_cpu;
 
+	if (prev_cpu == this_cpu)
+		goto out;
+
 	/*
 	 * Start passive balancing when half the imbalance_pct
 	 * limit is reached.
@@ -1226,20 +1281,62 @@ static unsigned long wakeup_gran(struct sched_entity *se)
 	 * + nice tasks.
 	 */
 	if (sched_feat(ASYM_GRAN))
-		gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
+		gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
+	else
+		gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
 
 	return gran;
 }
 
+/*
+ * Should 'se' preempt 'curr'.
+ *
+ *             |s1
+ *        |s2
+ *   |s3
+ *         g
+ *      |<--->|c
+ *
+ *  w(c, s1) = -1
+ *  w(c, s2) =  0
+ *  w(c, s3) =  1
+ *
+ */
+static int
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
+{
+	s64 gran, vdiff = curr->vruntime - se->vruntime;
+
+	if (vdiff < 0)
+		return -1;
+
+	gran = wakeup_gran(curr);
+	if (vdiff > gran)
+		return 1;
+
+	return 0;
+}
+
+/* return depth at which a sched entity is present in the hierarchy */
+static inline int depth_se(struct sched_entity *se)
+{
+	int depth = 0;
+
+	for_each_sched_entity(se)
+		depth++;
+
+	return depth;
+}
+
 /*
  * Preempt the current task with a newly woken task if needed:
  */
-static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
 {
 	struct task_struct *curr = rq->curr;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
 	struct sched_entity *se = &curr->se, *pse = &p->se;
-	s64 delta_exec;
+	int se_depth, pse_depth;
 
 	if (unlikely(rt_prio(p->prio))) {
 		update_rq_clock(rq);
@@ -1253,13 +1350,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
 
 	cfs_rq_of(pse)->next = pse;
 
-	/*
-	 * We can come here with TIF_NEED_RESCHED already set from new task
-	 * wake up path.
-	 */
-	if (test_tsk_need_resched(curr))
-		return;
-
 	/*
 	 * Batch tasks do not preempt (their preemption is driven by
 	 * the tick):
@@ -1270,15 +1360,33 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
 	if (!sched_feat(WAKEUP_PREEMPT))
 		return;
 
-	if (sched_feat(WAKEUP_OVERLAP) && (sync ||
-			(se->avg_overlap < sysctl_sched_migration_cost &&
-			 pse->avg_overlap < sysctl_sched_migration_cost))) {
-		resched_task(curr);
-		return;
+	/*
+	 * preemption test can be made between sibling entities who are in the
+	 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
+	 * both tasks until we find their ancestors who are siblings of common
+	 * parent.
+	 */
+
+	/* First walk up until both entities are at same depth */
+	se_depth = depth_se(se);
+	pse_depth = depth_se(pse);
+
+	while (se_depth > pse_depth) {
+		se_depth--;
+		se = parent_entity(se);
+	}
+
+	while (pse_depth > se_depth) {
+		pse_depth--;
+		pse = parent_entity(pse);
 	}
 
-	delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
-	if (delta_exec > wakeup_gran(pse))
+	while (!is_same_group(se, pse)) {
+		se = parent_entity(se);
+		pse = parent_entity(pse);
+	}
+
+	if (wakeup_preempt_entity(se, pse) == 1)
 		resched_task(curr);
 }
 
@@ -1337,9 +1445,19 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
 	if (next == &cfs_rq->tasks)
 		return NULL;
 
-	se = list_entry(next, struct sched_entity, group_node);
-	p = task_of(se);
-	cfs_rq->balance_iterator = next->next;
+	/* Skip over entities that are not tasks */
+	do {
+		se = list_entry(next, struct sched_entity, group_node);
+		next = next->next;
+	} while (next != &cfs_rq->tasks && !entity_is_task(se));
+
+	if (next == &cfs_rq->tasks)
+		return NULL;
+
+	cfs_rq->balance_iterator = next;
+
+	if (entity_is_task(se))
+		p = task_of(se);
 
 	return p;
 }
@@ -1389,7 +1507,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 	rcu_read_lock();
 	update_h_load(busiest_cpu);
 
-	list_for_each_entry_rcu(tg, &task_groups, list) {
+	list_for_each_entry(tg, &task_groups, list) {
 		struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
 		unsigned long busiest_h_load = busiest_cfs_rq->h_load;
 		unsigned long busiest_weight = busiest_cfs_rq->load.weight;
@@ -1502,10 +1620,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 		 * 'current' within the tree based on its new key value.
 		 */
 		swap(curr->vruntime, se->vruntime);
-		resched_task(rq->curr);
 	}
 
 	enqueue_task_fair(rq, p, 0);
+	resched_task(rq->curr);
 }
 
 /*
@@ -1524,7 +1642,7 @@ static void prio_changed_fair(struct rq *rq, struct task_struct *p,
 		if (p->prio > oldprio)
 			resched_task(rq->curr);
 	} else
-		check_preempt_curr(rq, p, 0);
+		check_preempt_curr(rq, p);
 }
 
 /*
@@ -1541,7 +1659,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p,
 	if (running)
 		resched_task(rq->curr);
 	else
-		check_preempt_curr(rq, p, 0);
+		check_preempt_curr(rq, p);
 }
 
 /* Account for a task changing its policy or group.
diff --git a/trunk/kernel/sched_features.h b/trunk/kernel/sched_features.h
index 7c9e8f4a049f..9353ca78154e 100644
--- a/trunk/kernel/sched_features.h
+++ b/trunk/kernel/sched_features.h
@@ -11,4 +11,3 @@ SCHED_FEAT(ASYM_GRAN, 1)
 SCHED_FEAT(LB_BIAS, 1)
 SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
 SCHED_FEAT(ASYM_EFF_LOAD, 1)
-SCHED_FEAT(WAKEUP_OVERLAP, 0)
diff --git a/trunk/kernel/sched_idletask.c b/trunk/kernel/sched_idletask.c
index dec4ccabe2f5..3a4f92dbbe66 100644
--- a/trunk/kernel/sched_idletask.c
+++ b/trunk/kernel/sched_idletask.c
@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync)
 /*
  * Idle tasks are unconditionally rescheduled:
  */
-static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
 {
 	resched_task(rq->idle);
 }
@@ -76,7 +76,7 @@ static void switched_to_idle(struct rq *rq, struct task_struct *p,
 	if (running)
 		resched_task(rq->curr);
 	else
-		check_preempt_curr(rq, p, 0);
+		check_preempt_curr(rq, p);
 }
 
 static void prio_changed_idle(struct rq *rq, struct task_struct *p,
@@ -93,7 +93,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
 		if (p->prio > oldprio)
 			resched_task(rq->curr);
 	} else
-		check_preempt_curr(rq, p, 0);
+		check_preempt_curr(rq, p);
 }
 
 /*
diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c
index cdf5740ab03e..1113157b2058 100644
--- a/trunk/kernel/sched_rt.c
+++ b/trunk/kernel/sched_rt.c
@@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
 
 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
-	struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
 	struct sched_rt_entity *rt_se = rt_rq->rt_se;
 
-	if (rt_rq->rt_nr_running) {
-		if (rt_se && !on_rt_rq(rt_se))
-			enqueue_rt_entity(rt_se);
+	if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {
+		struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
+
+		enqueue_rt_entity(rt_se);
 		if (rt_rq->highest_prio < curr->prio)
 			resched_task(curr);
 	}
@@ -231,9 +231,6 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 #ifdef CONFIG_SMP
-/*
- * We ran out of runtime, see if we can borrow some from our neighbours.
- */
 static int do_balance_runtime(struct rt_rq *rt_rq)
 {
 	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -253,18 +250,9 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
 			continue;
 
 		spin_lock(&iter->rt_runtime_lock);
-		/*
-		 * Either all rqs have inf runtime and there's nothing to steal
-		 * or __disable_runtime() below sets a specific rq to inf to
-		 * indicate its been disabled and disalow stealing.
-		 */
 		if (iter->rt_runtime == RUNTIME_INF)
 			goto next;
 
-		/*
-		 * From runqueues with spare time, take 1/n part of their
-		 * spare time, but no more than our period.
-		 */
 		diff = iter->rt_runtime - iter->rt_time;
 		if (diff > 0) {
 			diff = div_u64((u64)diff, weight);
@@ -286,9 +274,6 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
 	return more;
 }
 
-/*
- * Ensure this RQ takes back all the runtime it lend to its neighbours.
- */
 static void __disable_runtime(struct rq *rq)
 {
 	struct root_domain *rd = rq->rd;
@@ -304,33 +289,17 @@ static void __disable_runtime(struct rq *rq)
 
 		spin_lock(&rt_b->rt_runtime_lock);
 		spin_lock(&rt_rq->rt_runtime_lock);
-		/*
-		 * Either we're all inf and nobody needs to borrow, or we're
-		 * already disabled and thus have nothing to do, or we have
-		 * exactly the right amount of runtime to take out.
-		 */
 		if (rt_rq->rt_runtime == RUNTIME_INF ||
 				rt_rq->rt_runtime == rt_b->rt_runtime)
 			goto balanced;
 		spin_unlock(&rt_rq->rt_runtime_lock);
 
-		/*
-		 * Calculate the difference between what we started out with
-		 * and what we current have, that's the amount of runtime
-		 * we lend and now have to reclaim.
-		 */
 		want = rt_b->rt_runtime - rt_rq->rt_runtime;
 
-		/*
-		 * Greedy reclaim, take back as much as we can.
-		 */
 		for_each_cpu_mask(i, rd->span) {
 			struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 			s64 diff;
 
-			/*
-			 * Can't reclaim from ourselves or disabled runqueues.
-			 */
 			if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
 				continue;
 
@@ -350,16 +319,8 @@ static void __disable_runtime(struct rq *rq)
 		}
 
 		spin_lock(&rt_rq->rt_runtime_lock);
-		/*
-		 * We cannot be left wanting - that would mean some runtime
-		 * leaked out of the system.
-		 */
 		BUG_ON(want);
 balanced:
-		/*
-		 * Disable all the borrow logic by pretending we have inf
-		 * runtime - in which case borrowing doesn't make sense.
-		 */
 		rt_rq->rt_runtime = RUNTIME_INF;
 		spin_unlock(&rt_rq->rt_runtime_lock);
 		spin_unlock(&rt_b->rt_runtime_lock);
@@ -382,9 +343,6 @@ static void __enable_runtime(struct rq *rq)
 	if (unlikely(!scheduler_running))
 		return;
 
-	/*
-	 * Reset each runqueue's bandwidth settings
-	 */
 	for_each_leaf_rt_rq(rt_rq, rq) {
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 
@@ -431,7 +389,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 	int i, idle = 1;
 	cpumask_t span;
 
-	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
+	if (rt_b->rt_runtime == RUNTIME_INF)
 		return 1;
 
 	span = sched_rt_period_mask();
@@ -529,9 +487,6 @@ static void update_curr_rt(struct rq *rq)
 	curr->se.exec_start = rq->clock;
 	cpuacct_charge(curr, delta_exec);
 
-	if (!rt_bandwidth_enabled())
-		return;
-
 	for_each_sched_rt_entity(rt_se) {
 		rt_rq = rt_rq_of_se(rt_se);
 
@@ -829,7 +784,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 /*
  * Preempt the current task with a newly woken task if needed:
  */
-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
 {
 	if (p->prio < rq->curr->prio) {
 		resched_task(rq->curr);
diff --git a/trunk/kernel/time/tick-sched.c b/trunk/kernel/time/tick-sched.c
index a4d219398167..cb02324bdb88 100644
--- a/trunk/kernel/time/tick-sched.c
+++ b/trunk/kernel/time/tick-sched.c
@@ -20,7 +20,6 @@
 #include <linux/profile.h>
 #include <linux/sched.h>
 #include <linux/tick.h>
-#include <linux/module.h>
 
 #include <asm/irq_regs.h>
 
@@ -191,17 +190,9 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
 {
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 
-	if (!tick_nohz_enabled)
-		return -1;
-
-	if (ts->idle_active)
-		*last_update_time = ktime_to_us(ts->idle_lastupdate);
-	else
-		*last_update_time = ktime_to_us(ktime_get());
-
+	*last_update_time = ktime_to_us(ts->idle_lastupdate);
 	return ktime_to_us(ts->idle_sleeptime);
 }
-EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
 
 /**
  * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
diff --git a/trunk/kernel/user.c b/trunk/kernel/user.c
index 39d6159fae43..865ecf57a096 100644
--- a/trunk/kernel/user.c
+++ b/trunk/kernel/user.c
@@ -169,7 +169,7 @@ static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
 {
 	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
 
-	return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
+	return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
 }
 
 static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
@@ -180,7 +180,7 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
 	unsigned long rt_runtime;
 	int rc;
 
-	sscanf(buf, "%ld", &rt_runtime);
+	sscanf(buf, "%lu", &rt_runtime);
 
 	rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
 
diff --git a/trunk/lib/Kconfig.debug b/trunk/lib/Kconfig.debug
index ce697e0b319e..7d7a31d0ddeb 100644
--- a/trunk/lib/Kconfig.debug
+++ b/trunk/lib/Kconfig.debug
@@ -597,19 +597,6 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
-config RCU_CPU_STALL_DETECTOR
-	bool "Check for stalled CPUs delaying RCU grace periods"
-	depends on CLASSIC_RCU
-	default n
-	help
-	  This option causes RCU to printk information on which
-	  CPUs are delaying the current grace period, but only when
-	  the grace period extends for excessive time periods.
-
-	  Say Y if you want RCU to perform such checks.
-
-	  Say N if you are unsure.
-
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL
diff --git a/trunk/scripts/Makefile b/trunk/scripts/Makefile
index aafdf064feef..1c73c5aea66b 100644
--- a/trunk/scripts/Makefile
+++ b/trunk/scripts/Makefile
@@ -20,7 +20,6 @@ hostprogs-y += unifdef
 
 subdir-$(CONFIG_MODVERSIONS) += genksyms
 subdir-y                     += mod
-subdir-$(CONFIG_SECURITY_SELINUX) += selinux
 
 # Let clean descend into subdirs
-subdir-	+= basic kconfig package selinux
+subdir-	+= basic kconfig package
diff --git a/trunk/scripts/selinux/Makefile b/trunk/scripts/selinux/Makefile
deleted file mode 100644
index ca4b1ec01822..000000000000
--- a/trunk/scripts/selinux/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-subdir-y := mdp
-subdir-	+= mdp
diff --git a/trunk/scripts/selinux/README b/trunk/scripts/selinux/README
deleted file mode 100644
index a936315ba2c8..000000000000
--- a/trunk/scripts/selinux/README
+++ /dev/null
@@ -1,2 +0,0 @@
-Please see Documentation/SELinux.txt for information on
-installing a dummy SELinux policy.
diff --git a/trunk/scripts/selinux/install_policy.sh b/trunk/scripts/selinux/install_policy.sh
deleted file mode 100644
index 7b9ccf61f8f9..000000000000
--- a/trunk/scripts/selinux/install_policy.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/sh
-if [ `id -u` -ne 0 ]; then
-	echo "$0: must be root to install the selinux policy"
-	exit 1
-fi
-SF=`which setfiles`
-if [ $? -eq 1 ]; then
-	if [ -f /sbin/setfiles ]; then
-		SF="/usr/setfiles"
-	else
-		echo "no selinux tools installed: setfiles"
-		exit 1
-	fi
-fi
-
-cd mdp
-
-CP=`which checkpolicy`
-VERS=`$CP -V | awk '{print $1}'`
-
-./mdp policy.conf file_contexts
-$CP -o policy.$VERS policy.conf
-
-mkdir -p /etc/selinux/dummy/policy
-mkdir -p /etc/selinux/dummy/contexts/files
-
-cp file_contexts /etc/selinux/dummy/contexts/files
-cp dbus_contexts /etc/selinux/dummy/contexts
-cp policy.$VERS /etc/selinux/dummy/policy
-FC_FILE=/etc/selinux/dummy/contexts/files/file_contexts
-
-if [ ! -d /etc/selinux ]; then
-	mkdir -p /etc/selinux
-fi
-if [ ! -f /etc/selinux/config ]; then
-	cat > /etc/selinux/config << EOF
-SELINUX=enforcing
-SELINUXTYPE=dummy
-EOF
-else
-	TYPE=`cat /etc/selinux/config | grep "^SELINUXTYPE" | tail -1 | awk -F= '{ print $2 '}`
-	if [ "eq$TYPE" != "eqdummy" ]; then
-		selinuxenabled
-		if [ $? -eq 0 ]; then
-			echo "SELinux already enabled with a non-dummy policy."
-			echo "Exiting.  Please install policy by hand if that"
-			echo "is what you REALLY want."
-			exit 1
-		fi
-		mv /etc/selinux/config /etc/selinux/config.mdpbak
-		grep -v "^SELINUXTYPE" /etc/selinux/config.mdpbak >> /etc/selinux/config
-		echo "SELINUXTYPE=dummy" >> /etc/selinux/config
-	fi
-fi
-
-cd /etc/selinux/dummy/contexts/files
-$SF file_contexts /
-
-mounts=`cat /proc/$$/mounts | egrep "ext2|ext3|xfs|jfs|ext4|ext4dev|gfs2" | awk '{ print $2 '}`
-$SF file_contexts $mounts
-
-
-dodev=`cat /proc/$$/mounts | grep "/dev "`
-if [ "eq$dodev" != "eq" ]; then
-	mount --move /dev /mnt
-	$SF file_contexts /dev
-	mount --move /mnt /dev
-fi
-
diff --git a/trunk/scripts/selinux/mdp/.gitignore b/trunk/scripts/selinux/mdp/.gitignore
deleted file mode 100644
index 654546d8dffd..000000000000
--- a/trunk/scripts/selinux/mdp/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# Generated file
-mdp
diff --git a/trunk/scripts/selinux/mdp/Makefile b/trunk/scripts/selinux/mdp/Makefile
deleted file mode 100644
index eb365b333441..000000000000
--- a/trunk/scripts/selinux/mdp/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-hostprogs-y	:= mdp
-HOST_EXTRACFLAGS += -Isecurity/selinux/include
-
-always		:= $(hostprogs-y)
-clean-files	:= $(hostprogs-y) policy.* file_contexts
diff --git a/trunk/scripts/selinux/mdp/dbus_contexts b/trunk/scripts/selinux/mdp/dbus_contexts
deleted file mode 100644
index 116e684f9fc1..000000000000
--- a/trunk/scripts/selinux/mdp/dbus_contexts
+++ /dev/null
@@ -1,6 +0,0 @@
-<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
- "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
-<busconfig>
-  <selinux>
-  </selinux>
-</busconfig>
diff --git a/trunk/scripts/selinux/mdp/mdp.c b/trunk/scripts/selinux/mdp/mdp.c
deleted file mode 100644
index ca757d486187..000000000000
--- a/trunk/scripts/selinux/mdp/mdp.c
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- *
- * mdp - make dummy policy
- *
- * When pointed at a kernel tree, builds a dummy policy for that kernel
- * with exactly one type with full rights to itself.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Authors: Serge E. Hallyn <serue@us.ibm.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-
-#include "flask.h"
-
-void usage(char *name)
-{
-	printf("usage: %s [-m] policy_file context_file\n", name);
-	exit(1);
-}
-
-void find_common_name(char *cname, char *dest, int len)
-{
-	char *start, *end;
-
-	start = strchr(cname, '_')+1;
-	end = strchr(start, '_');
-	if (!start || !end || start-cname > len || end-start > len) {
-		printf("Error with commons defines\n");
-		exit(1);
-	}
-	strncpy(dest, start, end-start);
-	dest[end-start] = '\0';
-}
-
-#define S_(x) x,
-static char *classlist[] = {
-#include "class_to_string.h"
-	NULL
-};
-#undef S_
-
-#include "initial_sid_to_string.h"
-
-#define TB_(x) char *x[] = {
-#define TE_(x) NULL };
-#define S_(x) x,
-#include "common_perm_to_string.h"
-#undef TB_
-#undef TE_
-#undef S_
-
-struct common {
-	char *cname;
-	char **perms;
-};
-struct common common[] = {
-#define TB_(x) { #x, x },
-#define S_(x)
-#define TE_(x)
-#include "common_perm_to_string.h"
-#undef TB_
-#undef TE_
-#undef S_
-};
-
-#define S_(x, y, z) {x, #y},
-struct av_inherit {
-	int class;
-	char *common;
-};
-struct av_inherit av_inherit[] = {
-#include "av_inherit.h"
-};
-#undef S_
-
-#include "av_permissions.h"
-#define S_(x, y, z) {x, y, z},
-struct av_perms {
-	int class;
-	int perm_i;
-	char *perm_s;
-};
-struct av_perms av_perms[] = {
-#include "av_perm_to_string.h"
-};
-#undef S_
-
-int main(int argc, char *argv[])
-{
-	int i, j, mls = 0;
-	char **arg, *polout, *ctxout;
-	int classlist_len, initial_sid_to_string_len;
-	FILE *fout;
-
-	if (argc < 3)
-		usage(argv[0]);
-	arg = argv+1;
-	if (argc==4 && strcmp(argv[1], "-m") == 0) {
-		mls = 1;
-		arg++;
-	}
-	polout = *arg++;
-	ctxout = *arg;
-
-	fout = fopen(polout, "w");
-	if (!fout) {
-		printf("Could not open %s for writing\n", polout);
-		usage(argv[0]);
-	}
-
-	classlist_len = sizeof(classlist) / sizeof(char *);
-	/* print out the classes */
-	for (i=1; i < classlist_len; i++) {
-		if(classlist[i])
-			fprintf(fout, "class %s\n", classlist[i]);
-		else
-			fprintf(fout, "class user%d\n", i);
-	}
-	fprintf(fout, "\n");
-
-	initial_sid_to_string_len = sizeof(initial_sid_to_string) / sizeof (char *);
-	/* print out the sids */
-	for (i=1; i < initial_sid_to_string_len; i++)
-		fprintf(fout, "sid %s\n", initial_sid_to_string[i]);
-	fprintf(fout, "\n");
-
-	/* print out the commons */
-	for (i=0; i< sizeof(common)/sizeof(struct common); i++) {
-		char cname[101];
-		find_common_name(common[i].cname, cname, 100);
-		cname[100] = '\0';
-		fprintf(fout, "common %s\n{\n", cname);
-		for (j=0; common[i].perms[j]; j++)
-			fprintf(fout, "\t%s\n", common[i].perms[j]);
-		fprintf(fout, "}\n\n");
-	}
-	fprintf(fout, "\n");
-
-	/* print out the class permissions */
-	for (i=1; i < classlist_len; i++) {
-		if (classlist[i]) {
-			int firstperm = -1, numperms = 0;
-
-			fprintf(fout, "class %s\n", classlist[i]);
-			/* does it inherit from a common? */
-			for (j=0; j < sizeof(av_inherit)/sizeof(struct av_inherit); j++)
-				if (av_inherit[j].class == i)
-					fprintf(fout, "inherits %s\n", av_inherit[j].common);
-
-			for (j=0; j < sizeof(av_perms)/sizeof(struct av_perms); j++) {
-				if (av_perms[j].class == i) {
-					if (firstperm == -1)
-						firstperm = j;
-					numperms++;
-				}
-			}
-			if (!numperms) {
-				fprintf(fout, "\n");
-				continue;
-			}
-
-			fprintf(fout, "{\n");
-			/* print out the av_perms */
-			for (j=0; j < numperms; j++) {
-				fprintf(fout, "\t%s\n", av_perms[firstperm+j].perm_s);
-			}
-			fprintf(fout, "}\n\n");
-		}
-	}
-	fprintf(fout, "\n");
-
-	/* NOW PRINT OUT MLS STUFF */
-	if (mls) {
-		printf("MLS not yet implemented\n");
-		exit(1);
-	}
-
-	/* types, roles, and allows */
-	fprintf(fout, "type base_t;\n");
-	fprintf(fout, "role base_r types { base_t };\n");
-	for (i=1; i < classlist_len; i++) {
-		if (classlist[i])
-			fprintf(fout, "allow base_t base_t:%s *;\n", classlist[i]);
-		else
-			fprintf(fout, "allow base_t base_t:user%d *;\n", i);
-	}
-	fprintf(fout, "user user_u roles { base_r };\n");
-	fprintf(fout, "\n");
-
-	/* default sids */
-	for (i=1; i < initial_sid_to_string_len; i++)
-		fprintf(fout, "sid %s user_u:base_r:base_t\n", initial_sid_to_string[i]);
-	fprintf(fout, "\n");
-
-
-	fprintf(fout, "fs_use_xattr ext2 user_u:base_r:base_t;\n");
-	fprintf(fout, "fs_use_xattr ext3 user_u:base_r:base_t;\n");
-	fprintf(fout, "fs_use_xattr jfs user_u:base_r:base_t;\n");
-	fprintf(fout, "fs_use_xattr xfs user_u:base_r:base_t;\n");
-	fprintf(fout, "fs_use_xattr reiserfs user_u:base_r:base_t;\n");
-
-	fprintf(fout, "fs_use_task pipefs user_u:base_r:base_t;\n");
-	fprintf(fout, "fs_use_task sockfs user_u:base_r:base_t;\n");
-
-	fprintf(fout, "fs_use_trans devpts user_u:base_r:base_t;\n");
-	fprintf(fout, "fs_use_trans tmpfs user_u:base_r:base_t;\n");
-	fprintf(fout, "fs_use_trans shm user_u:base_r:base_t;\n");
-
-	fprintf(fout, "genfscon proc / user_u:base_r:base_t\n");
-
-	fclose(fout);
-
-	fout = fopen(ctxout, "w");
-	if (!fout) {
-		printf("Wrote policy, but cannot open %s for writing\n", ctxout);
-		usage(argv[0]);
-	}
-	fprintf(fout, "/ user_u:base_r:base_t\n");
-	fprintf(fout, "/.* user_u:base_r:base_t\n");
-	fclose(fout);
-
-	return 0;
-}
diff --git a/trunk/security/Kconfig b/trunk/security/Kconfig
index d9f47ce7e207..559293922a47 100644
--- a/trunk/security/Kconfig
+++ b/trunk/security/Kconfig
@@ -51,14 +51,6 @@ config SECURITY
 
 	  If you are unsure how to answer this question, answer N.
 
-config SECURITYFS
-	bool "Enable the securityfs filesystem"
-	help
-	  This will build the securityfs filesystem.  It is currently used by
-	  the TPM bios character driver.  It is not used by SELinux or SMACK.
-
-	  If you are unsure how to answer this question, answer N.
-
 config SECURITY_NETWORK
 	bool "Socket and Networking Security Hooks"
 	depends on SECURITY
diff --git a/trunk/security/Makefile b/trunk/security/Makefile
index c05c127fff9a..f65426099aa6 100644
--- a/trunk/security/Makefile
+++ b/trunk/security/Makefile
@@ -10,8 +10,7 @@ subdir-$(CONFIG_SECURITY_SMACK)		+= smack
 obj-y		+= commoncap.o
 
 # Object file lists
-obj-$(CONFIG_SECURITY)			+= security.o capability.o
-obj-$(CONFIG_SECURITYFS)		+= inode.o
+obj-$(CONFIG_SECURITY)			+= security.o capability.o inode.o
 # Must precede capability.o in order to stack properly.
 obj-$(CONFIG_SECURITY_SELINUX)		+= selinux/built-in.o
 obj-$(CONFIG_SECURITY_SMACK)		+= smack/built-in.o
diff --git a/trunk/security/commoncap.c b/trunk/security/commoncap.c
index 399bfdb9e2da..e4c4b3fc0c04 100644
--- a/trunk/security/commoncap.c
+++ b/trunk/security/commoncap.c
@@ -541,7 +541,7 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid,
  * yet with increased caps.
  * So we check for increased caps on the target process.
  */
-static int cap_safe_nice(struct task_struct *p)
+static inline int cap_safe_nice(struct task_struct *p)
 {
 	if (!cap_issubset(p->cap_permitted, current->cap_permitted) &&
 	    !capable(CAP_SYS_NICE))
diff --git a/trunk/security/inode.c b/trunk/security/inode.c
index ca4958ebad8d..acc6cf0d7900 100644
--- a/trunk/security/inode.c
+++ b/trunk/security/inode.c
@@ -190,7 +190,7 @@ static int create_by_name(const char *name, mode_t mode,
  * @name: a pointer to a string containing the name of the file to create.
  * @mode: the permission that the file should have
  * @parent: a pointer to the parent dentry for this file.  This should be a
- *          directory dentry if set.  If this parameter is %NULL, then the
+ *          directory dentry if set.  If this paramater is NULL, then the
  *          file will be created in the root of the securityfs filesystem.
  * @data: a pointer to something that the caller will want to get to later
  *        on.  The inode.i_private pointer will point to this value on
@@ -199,18 +199,18 @@ static int create_by_name(const char *name, mode_t mode,
  *        this file.
  *
  * This is the basic "create a file" function for securityfs.  It allows for a
- * wide range of flexibility in creating a file, or a directory (if you
+ * wide range of flexibility in createing a file, or a directory (if you
  * want to create a directory, the securityfs_create_dir() function is
- * recommended to be used instead).
+ * recommended to be used instead.)
  *
- * This function returns a pointer to a dentry if it succeeds.  This
+ * This function will return a pointer to a dentry if it succeeds.  This
  * pointer must be passed to the securityfs_remove() function when the file is
  * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here).  If an error occurs, %NULL is returned.
+ * you are responsible here.)  If an error occurs, NULL will be returned.
  *
- * If securityfs is not enabled in the kernel, the value %-ENODEV is
+ * If securityfs is not enabled in the kernel, the value -ENODEV will be
  * returned.  It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
+ * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
 struct dentry *securityfs_create_file(const char *name, mode_t mode,
@@ -252,19 +252,19 @@ EXPORT_SYMBOL_GPL(securityfs_create_file);
  * @name: a pointer to a string containing the name of the directory to
  *        create.
  * @parent: a pointer to the parent dentry for this file.  This should be a
- *          directory dentry if set.  If this parameter is %NULL, then the
+ *          directory dentry if set.  If this paramater is NULL, then the
  *          directory will be created in the root of the securityfs filesystem.
  *
- * This function creates a directory in securityfs with the given @name.
+ * This function creates a directory in securityfs with the given name.
  *
- * This function returns a pointer to a dentry if it succeeds.  This
+ * This function will return a pointer to a dentry if it succeeds.  This
  * pointer must be passed to the securityfs_remove() function when the file is
  * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here).  If an error occurs, %NULL will be returned.
+ * you are responsible here.)  If an error occurs, NULL will be returned.
  *
- * If securityfs is not enabled in the kernel, the value %-ENODEV is
+ * If securityfs is not enabled in the kernel, the value -ENODEV will be
  * returned.  It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
+ * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
 struct dentry *securityfs_create_dir(const char *name, struct dentry *parent)
@@ -278,15 +278,16 @@ EXPORT_SYMBOL_GPL(securityfs_create_dir);
 /**
  * securityfs_remove - removes a file or directory from the securityfs filesystem
  *
- * @dentry: a pointer to a the dentry of the file or directory to be removed.
+ * @dentry: a pointer to a the dentry of the file or directory to be
+ *          removed.
  *
  * This function removes a file or directory in securityfs that was previously
  * created with a call to another securityfs function (like
  * securityfs_create_file() or variants thereof.)
  *
  * This function is required to be called in order for the file to be
- * removed. No automatic cleanup of files will happen when a module is
- * removed; you are responsible here.
+ * removed, no automatic cleanup of files will happen when a module is
+ * removed, you are responsible here.
  */
 void securityfs_remove(struct dentry *dentry)
 {
diff --git a/trunk/security/security.c b/trunk/security/security.c
index 255b08559b2b..3a4b4f55b33f 100644
--- a/trunk/security/security.c
+++ b/trunk/security/security.c
@@ -82,8 +82,8 @@ __setup("security=", choose_lsm);
  *
  * Return true if:
  *	-The passed LSM is the one chosen by user at boot time,
- *	-or user didn't specify a specific LSM and we're the first to ask
- *	 for registration permission,
+ *	-or user didsn't specify a specific LSM and we're the first to ask
+ *	 for registeration permissoin,
  *	-or the passed LSM is currently loaded.
  * Otherwise, return false.
  */
@@ -101,13 +101,13 @@ int __init security_module_enable(struct security_operations *ops)
  * register_security - registers a security framework with the kernel
  * @ops: a pointer to the struct security_options that is to be registered
  *
- * This function allows a security module to register itself with the
+ * This function is to allow a security module to register itself with the
  * kernel security subsystem.  Some rudimentary checking is done on the @ops
  * value passed to this function. You'll need to check first if your LSM
  * is allowed to register its @ops by calling security_module_enable(@ops).
  *
  * If there is already a security module registered with the kernel,
- * an error will be returned.  Otherwise %0 is returned on success.
+ * an error will be returned.  Otherwise 0 is returned on success.
  */
 int register_security(struct security_operations *ops)
 {
diff --git a/trunk/security/selinux/Kconfig b/trunk/security/selinux/Kconfig
index 26301dd651d3..a436d1cfa88b 100644
--- a/trunk/security/selinux/Kconfig
+++ b/trunk/security/selinux/Kconfig
@@ -6,6 +6,9 @@ config SECURITY_SELINUX
 	help
 	  This selects NSA Security-Enhanced Linux (SELinux).
 	  You will also need a policy configuration and a labeled filesystem.
+	  You can obtain the policy compiler (checkpolicy), the utility for
+	  labeling filesystems (setfiles), and an example policy configuration
+	  from <http://www.nsa.gov/selinux/>.
 	  If you are unsure how to answer this question, answer N.
 
 config SECURITY_SELINUX_BOOTPARAM
diff --git a/trunk/security/selinux/avc.c b/trunk/security/selinux/avc.c
index cb30c7e350b3..114b4b4c97b2 100644
--- a/trunk/security/selinux/avc.c
+++ b/trunk/security/selinux/avc.c
@@ -136,7 +136,7 @@ static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass)
  * @tclass: target security class
  * @av: access vector
  */
-void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
+static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
 {
 	const char **common_pts = NULL;
 	u32 common_base = 0;
diff --git a/trunk/security/selinux/hooks.c b/trunk/security/selinux/hooks.c
index 4a7374c12d9c..03fc6a81ae32 100644
--- a/trunk/security/selinux/hooks.c
+++ b/trunk/security/selinux/hooks.c
@@ -957,8 +957,7 @@ static int superblock_doinit(struct super_block *sb, void *data)
 	return rc;
 }
 
-static void selinux_write_opts(struct seq_file *m,
-			       struct security_mnt_opts *opts)
+void selinux_write_opts(struct seq_file *m, struct security_mnt_opts *opts)
 {
 	int i;
 	char *prefix;
@@ -1291,7 +1290,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 		/* Default to the fs superblock SID. */
 		isec->sid = sbsec->sid;
 
-		if (sbsec->proc && !S_ISLNK(inode->i_mode)) {
+		if (sbsec->proc) {
 			struct proc_inode *proci = PROC_I(inode);
 			if (proci->pde) {
 				isec->sclass = inode_mode_to_security_class(inode->i_mode);
@@ -3549,44 +3548,38 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb,
 #endif /* IPV6 */
 
 static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad,
-			     char **_addrp, int src, u8 *proto)
+			     char **addrp, int src, u8 *proto)
 {
-	char *addrp;
-	int ret;
+	int ret = 0;
 
 	switch (ad->u.net.family) {
 	case PF_INET:
 		ret = selinux_parse_skb_ipv4(skb, ad, proto);
-		if (ret)
-			goto parse_error;
-		addrp = (char *)(src ? &ad->u.net.v4info.saddr :
-				       &ad->u.net.v4info.daddr);
-		goto okay;
+		if (ret || !addrp)
+			break;
+		*addrp = (char *)(src ? &ad->u.net.v4info.saddr :
+					&ad->u.net.v4info.daddr);
+		break;
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case PF_INET6:
 		ret = selinux_parse_skb_ipv6(skb, ad, proto);
-		if (ret)
-			goto parse_error;
-		addrp = (char *)(src ? &ad->u.net.v6info.saddr :
-				       &ad->u.net.v6info.daddr);
-		goto okay;
+		if (ret || !addrp)
+			break;
+		*addrp = (char *)(src ? &ad->u.net.v6info.saddr :
+					&ad->u.net.v6info.daddr);
+		break;
 #endif	/* IPV6 */
 	default:
-		addrp = NULL;
-		goto okay;
+		break;
 	}
 
-parse_error:
-	printk(KERN_WARNING
-	       "SELinux: failure in selinux_parse_skb(),"
-	       " unable to parse packet\n");
-	return ret;
+	if (unlikely(ret))
+		printk(KERN_WARNING
+		       "SELinux: failure in selinux_parse_skb(),"
+		       " unable to parse packet\n");
 
-okay:
-	if (_addrp)
-		*_addrp = addrp;
-	return 0;
+	return ret;
 }
 
 /**
@@ -5226,12 +5219,8 @@ static int selinux_setprocattr(struct task_struct *p,
 
 		if (sid == 0)
 			return -EINVAL;
-		/*
-		 * SELinux allows to change context in the following case only.
-		 *  - Single threaded processes.
-		 *  - Multi threaded processes intend to change its context into
-		 *    more restricted domain (defined by TYPEBOUNDS statement).
-		 */
+
+		/* Only allow single threaded processes to change context */
 		if (atomic_read(&p->mm->mm_users) != 1) {
 			struct task_struct *g, *t;
 			struct mm_struct *mm = p->mm;
@@ -5239,16 +5228,11 @@ static int selinux_setprocattr(struct task_struct *p,
 			do_each_thread(g, t) {
 				if (t->mm == mm && t != p) {
 					read_unlock(&tasklist_lock);
-					error = security_bounded_transition(tsec->sid, sid);
-					if (!error)
-						goto boundary_ok;
-
-					return error;
+					return -EPERM;
 				}
 			} while_each_thread(g, t);
 			read_unlock(&tasklist_lock);
 		}
-boundary_ok:
 
 		/* Check permissions for the transition. */
 		error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS,
diff --git a/trunk/security/selinux/include/avc.h b/trunk/security/selinux/include/avc.h
index d12ff1a9c0aa..7b9769f5e775 100644
--- a/trunk/security/selinux/include/avc.h
+++ b/trunk/security/selinux/include/avc.h
@@ -12,7 +12,6 @@
 #include <linux/kdev_t.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
-#include <linux/audit.h>
 #include <linux/in6.h>
 #include <linux/path.h>
 #include <asm/system.h>
@@ -127,9 +126,6 @@ int avc_add_callback(int (*callback)(u32 event, u32 ssid, u32 tsid,
 		     u32 events, u32 ssid, u32 tsid,
 		     u16 tclass, u32 perms);
 
-/* Shows permission in human readable form */
-void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av);
-
 /* Exported to selinuxfs */
 int avc_get_hash_stats(char *page);
 extern unsigned int avc_cache_threshold;
diff --git a/trunk/security/selinux/include/security.h b/trunk/security/selinux/include/security.h
index 72447370bc95..7c543003d653 100644
--- a/trunk/security/selinux/include/security.h
+++ b/trunk/security/selinux/include/security.h
@@ -27,14 +27,13 @@
 #define POLICYDB_VERSION_RANGETRANS	21
 #define POLICYDB_VERSION_POLCAP		22
 #define POLICYDB_VERSION_PERMISSIVE	23
-#define POLICYDB_VERSION_BOUNDARY	24
 
 /* Range of policy versions we understand*/
 #define POLICYDB_VERSION_MIN   POLICYDB_VERSION_BASE
 #ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
 #define POLICYDB_VERSION_MAX	CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
 #else
-#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_BOUNDARY
+#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_PERMISSIVE
 #endif
 
 #define CONTEXT_MNT	0x01
@@ -63,16 +62,6 @@ enum {
 extern int selinux_policycap_netpeer;
 extern int selinux_policycap_openperm;
 
-/*
- * type_datum properties
- * available at the kernel policy version >= POLICYDB_VERSION_BOUNDARY
- */
-#define TYPEDATUM_PROPERTY_PRIMARY	0x0001
-#define TYPEDATUM_PROPERTY_ATTRIBUTE	0x0002
-
-/* limitation of boundary depth  */
-#define POLICYDB_BOUNDS_MAXDEPTH	4
-
 int security_load_policy(void *data, size_t len);
 
 int security_policycap_supported(unsigned int req_cap);
@@ -128,8 +117,6 @@ int security_node_sid(u16 domain, void *addr, u32 addrlen,
 int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid,
 				 u16 tclass);
 
-int security_bounded_transition(u32 oldsid, u32 newsid);
-
 int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid);
 
 int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type,
diff --git a/trunk/security/selinux/ss/avtab.c b/trunk/security/selinux/ss/avtab.c
index 1215b8e47dba..a1be97f8beea 100644
--- a/trunk/security/selinux/ss/avtab.c
+++ b/trunk/security/selinux/ss/avtab.c
@@ -98,7 +98,7 @@ struct avtab_node *
 avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datum *datum)
 {
 	int hvalue;
-	struct avtab_node *prev, *cur;
+	struct avtab_node *prev, *cur, *newnode;
 	u16 specified = key->specified & ~(AVTAB_ENABLED|AVTAB_ENABLED_OLD);
 
 	if (!h || !h->htable)
@@ -122,7 +122,9 @@ avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datu
 		    key->target_class < cur->key.target_class)
 			break;
 	}
-	return avtab_insert_node(h, hvalue, prev, cur, key, datum);
+	newnode = avtab_insert_node(h, hvalue, prev, cur, key, datum);
+
+	return newnode;
 }
 
 struct avtab_datum *avtab_search(struct avtab *h, struct avtab_key *key)
@@ -229,7 +231,7 @@ void avtab_destroy(struct avtab *h)
 
 	for (i = 0; i < h->nslot; i++) {
 		cur = h->htable[i];
-		while (cur) {
+		while (cur != NULL) {
 			temp = cur;
 			cur = cur->next;
 			kmem_cache_free(avtab_node_cachep, temp);
diff --git a/trunk/security/selinux/ss/conditional.c b/trunk/security/selinux/ss/conditional.c
index 4a4e35cac22b..fb4efe4f4bc8 100644
--- a/trunk/security/selinux/ss/conditional.c
+++ b/trunk/security/selinux/ss/conditional.c
@@ -29,7 +29,7 @@ static int cond_evaluate_expr(struct policydb *p, struct cond_expr *expr)
 	int s[COND_EXPR_MAXDEPTH];
 	int sp = -1;
 
-	for (cur = expr; cur; cur = cur->next) {
+	for (cur = expr; cur != NULL; cur = cur->next) {
 		switch (cur->expr_type) {
 		case COND_BOOL:
 			if (sp == (COND_EXPR_MAXDEPTH - 1))
@@ -97,14 +97,14 @@ int evaluate_cond_node(struct policydb *p, struct cond_node *node)
 		if (new_state == -1)
 			printk(KERN_ERR "SELinux: expression result was undefined - disabling all rules.\n");
 		/* turn the rules on or off */
-		for (cur = node->true_list; cur; cur = cur->next) {
+		for (cur = node->true_list; cur != NULL; cur = cur->next) {
 			if (new_state <= 0)
 				cur->node->key.specified &= ~AVTAB_ENABLED;
 			else
 				cur->node->key.specified |= AVTAB_ENABLED;
 		}
 
-		for (cur = node->false_list; cur; cur = cur->next) {
+		for (cur = node->false_list; cur != NULL; cur = cur->next) {
 			/* -1 or 1 */
 			if (new_state)
 				cur->node->key.specified &= ~AVTAB_ENABLED;
@@ -128,7 +128,7 @@ int cond_policydb_init(struct policydb *p)
 static void cond_av_list_destroy(struct cond_av_list *list)
 {
 	struct cond_av_list *cur, *next;
-	for (cur = list; cur; cur = next) {
+	for (cur = list; cur != NULL; cur = next) {
 		next = cur->next;
 		/* the avtab_ptr_t node is destroy by the avtab */
 		kfree(cur);
@@ -139,7 +139,7 @@ static void cond_node_destroy(struct cond_node *node)
 {
 	struct cond_expr *cur_expr, *next_expr;
 
-	for (cur_expr = node->expr; cur_expr; cur_expr = next_expr) {
+	for (cur_expr = node->expr; cur_expr != NULL; cur_expr = next_expr) {
 		next_expr = cur_expr->next;
 		kfree(cur_expr);
 	}
@@ -155,7 +155,7 @@ static void cond_list_destroy(struct cond_node *list)
 	if (list == NULL)
 		return;
 
-	for (cur = list; cur; cur = next) {
+	for (cur = list; cur != NULL; cur = next) {
 		next = cur->next;
 		cond_node_destroy(cur);
 	}
@@ -239,7 +239,7 @@ int cond_read_bool(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto err;
-	key[len] = '\0';
+	key[len] = 0;
 	if (hashtab_insert(h, key, booldatum))
 		goto err;
 
@@ -291,7 +291,7 @@ static int cond_insertf(struct avtab *a, struct avtab_key *k, struct avtab_datum
 					goto err;
 				}
 				found = 0;
-				for (cur = other; cur; cur = cur->next) {
+				for (cur = other; cur != NULL; cur = cur->next) {
 					if (cur->node == node_ptr) {
 						found = 1;
 						break;
@@ -485,7 +485,7 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decisi
 	if (!ctab || !key || !avd)
 		return;
 
-	for (node = avtab_search_node(ctab, key); node;
+	for (node = avtab_search_node(ctab, key); node != NULL;
 				node = avtab_search_node_next(node, key->specified)) {
 		if ((u16)(AVTAB_ALLOWED|AVTAB_ENABLED) ==
 		    (node->key.specified & (AVTAB_ALLOWED|AVTAB_ENABLED)))
diff --git a/trunk/security/selinux/ss/conditional.h b/trunk/security/selinux/ss/conditional.h
index 53ddb013ae57..65b9f8366e9c 100644
--- a/trunk/security/selinux/ss/conditional.h
+++ b/trunk/security/selinux/ss/conditional.h
@@ -28,7 +28,7 @@ struct cond_expr {
 #define COND_XOR	5 /* bool ^ bool */
 #define COND_EQ		6 /* bool == bool */
 #define COND_NEQ	7 /* bool != bool */
-#define COND_LAST	COND_NEQ
+#define COND_LAST	8
 	__u32 expr_type;
 	__u32 bool;
 	struct cond_expr *next;
diff --git a/trunk/security/selinux/ss/ebitmap.c b/trunk/security/selinux/ss/ebitmap.c
index 68c7348d1acc..ddc275490af8 100644
--- a/trunk/security/selinux/ss/ebitmap.c
+++ b/trunk/security/selinux/ss/ebitmap.c
@@ -109,7 +109,7 @@ int ebitmap_netlbl_export(struct ebitmap *ebmap,
 	*catmap = c_iter;
 	c_iter->startbit = e_iter->startbit & ~(NETLBL_CATMAP_SIZE - 1);
 
-	while (e_iter) {
+	while (e_iter != NULL) {
 		for (i = 0; i < EBITMAP_UNIT_NUMS; i++) {
 			unsigned int delta, e_startbit, c_endbit;
 
@@ -197,7 +197,7 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap,
 			}
 		}
 		c_iter = c_iter->next;
-	} while (c_iter);
+	} while (c_iter != NULL);
 	if (e_iter != NULL)
 		ebmap->highbit = e_iter->startbit + EBITMAP_SIZE;
 	else
diff --git a/trunk/security/selinux/ss/hashtab.c b/trunk/security/selinux/ss/hashtab.c
index 933e735bb185..2e7788e13213 100644
--- a/trunk/security/selinux/ss/hashtab.c
+++ b/trunk/security/selinux/ss/hashtab.c
@@ -81,7 +81,7 @@ void *hashtab_search(struct hashtab *h, const void *key)
 
 	hvalue = h->hash_value(h, key);
 	cur = h->htable[hvalue];
-	while (cur && h->keycmp(h, key, cur->key) > 0)
+	while (cur != NULL && h->keycmp(h, key, cur->key) > 0)
 		cur = cur->next;
 
 	if (cur == NULL || (h->keycmp(h, key, cur->key) != 0))
@@ -100,7 +100,7 @@ void hashtab_destroy(struct hashtab *h)
 
 	for (i = 0; i < h->size; i++) {
 		cur = h->htable[i];
-		while (cur) {
+		while (cur != NULL) {
 			temp = cur;
 			cur = cur->next;
 			kfree(temp);
@@ -127,7 +127,7 @@ int hashtab_map(struct hashtab *h,
 
 	for (i = 0; i < h->size; i++) {
 		cur = h->htable[i];
-		while (cur) {
+		while (cur != NULL) {
 			ret = apply(cur->key, cur->datum, args);
 			if (ret)
 				return ret;
diff --git a/trunk/security/selinux/ss/mls.c b/trunk/security/selinux/ss/mls.c
index b5407f16c2a4..77d745da48bb 100644
--- a/trunk/security/selinux/ss/mls.c
+++ b/trunk/security/selinux/ss/mls.c
@@ -283,8 +283,8 @@ int mls_context_to_sid(struct policydb *pol,
 		p++;
 
 	delim = *p;
-	if (delim != '\0')
-		*p++ = '\0';
+	if (delim != 0)
+		*p++ = 0;
 
 	for (l = 0; l < 2; l++) {
 		levdatum = hashtab_search(pol->p_levels.table, scontextp);
@@ -302,14 +302,14 @@ int mls_context_to_sid(struct policydb *pol,
 				while (*p && *p != ',' && *p != '-')
 					p++;
 				delim = *p;
-				if (delim != '\0')
-					*p++ = '\0';
+				if (delim != 0)
+					*p++ = 0;
 
 				/* Separate into range if exists */
 				rngptr = strchr(scontextp, '.');
 				if (rngptr != NULL) {
 					/* Remove '.' */
-					*rngptr++ = '\0';
+					*rngptr++ = 0;
 				}
 
 				catdatum = hashtab_search(pol->p_cats.table,
@@ -357,8 +357,8 @@ int mls_context_to_sid(struct policydb *pol,
 				p++;
 
 			delim = *p;
-			if (delim != '\0')
-				*p++ = '\0';
+			if (delim != 0)
+				*p++ = 0;
 		} else
 			break;
 	}
diff --git a/trunk/security/selinux/ss/policydb.c b/trunk/security/selinux/ss/policydb.c
index 72e4a54973aa..2391761ae422 100644
--- a/trunk/security/selinux/ss/policydb.c
+++ b/trunk/security/selinux/ss/policydb.c
@@ -30,7 +30,6 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/errno.h>
-#include <linux/audit.h>
 #include "security.h"
 
 #include "policydb.h"
@@ -117,12 +116,7 @@ static struct policydb_compat_info policydb_compat[] = {
 		.version	= POLICYDB_VERSION_PERMISSIVE,
 		.sym_num	= SYM_NUM,
 		.ocon_num	= OCON_NUM,
-	},
-	{
-		.version	= POLICYDB_VERSION_BOUNDARY,
-		.sym_num	= SYM_NUM,
-		.ocon_num	= OCON_NUM,
-	},
+	}
 };
 
 static struct policydb_compat_info *policydb_lookup_compat(int version)
@@ -260,9 +254,7 @@ static int role_index(void *key, void *datum, void *datap)
 
 	role = datum;
 	p = datap;
-	if (!role->value
-	    || role->value > p->p_roles.nprim
-	    || role->bounds > p->p_roles.nprim)
+	if (!role->value || role->value > p->p_roles.nprim)
 		return -EINVAL;
 	p->p_role_val_to_name[role->value - 1] = key;
 	p->role_val_to_struct[role->value - 1] = role;
@@ -278,12 +270,9 @@ static int type_index(void *key, void *datum, void *datap)
 	p = datap;
 
 	if (typdatum->primary) {
-		if (!typdatum->value
-		    || typdatum->value > p->p_types.nprim
-		    || typdatum->bounds > p->p_types.nprim)
+		if (!typdatum->value || typdatum->value > p->p_types.nprim)
 			return -EINVAL;
 		p->p_type_val_to_name[typdatum->value - 1] = key;
-		p->type_val_to_struct[typdatum->value - 1] = typdatum;
 	}
 
 	return 0;
@@ -296,9 +285,7 @@ static int user_index(void *key, void *datum, void *datap)
 
 	usrdatum = datum;
 	p = datap;
-	if (!usrdatum->value
-	    || usrdatum->value > p->p_users.nprim
-	    || usrdatum->bounds > p->p_users.nprim)
+	if (!usrdatum->value || usrdatum->value > p->p_users.nprim)
 		return -EINVAL;
 	p->p_user_val_to_name[usrdatum->value - 1] = key;
 	p->user_val_to_struct[usrdatum->value - 1] = usrdatum;
@@ -451,14 +438,6 @@ static int policydb_index_others(struct policydb *p)
 		goto out;
 	}
 
-	p->type_val_to_struct =
-		kmalloc(p->p_types.nprim * sizeof(*(p->type_val_to_struct)),
-			GFP_KERNEL);
-	if (!p->type_val_to_struct) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
 	if (cond_init_bool_indexes(p)) {
 		rc = -ENOMEM;
 		goto out;
@@ -646,7 +625,6 @@ void policydb_destroy(struct policydb *p)
 	kfree(p->class_val_to_struct);
 	kfree(p->role_val_to_struct);
 	kfree(p->user_val_to_struct);
-	kfree(p->type_val_to_struct);
 
 	avtab_destroy(&p->te_avtab);
 
@@ -954,7 +932,7 @@ static int perm_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = '\0';
+	key[len] = 0;
 
 	rc = hashtab_insert(h, key, perdatum);
 	if (rc)
@@ -1001,7 +979,7 @@ static int common_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = '\0';
+	key[len] = 0;
 
 	for (i = 0; i < nel; i++) {
 		rc = perm_read(p, comdatum->permissions.table, fp);
@@ -1139,7 +1117,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = '\0';
+	key[len] = 0;
 
 	if (len2) {
 		cladatum->comkey = kmalloc(len2 + 1, GFP_KERNEL);
@@ -1150,7 +1128,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp)
 		rc = next_entry(cladatum->comkey, fp, len2);
 		if (rc < 0)
 			goto bad;
-		cladatum->comkey[len2] = '\0';
+		cladatum->comkey[len2] = 0;
 
 		cladatum->comdatum = hashtab_search(p->p_commons.table,
 						    cladatum->comkey);
@@ -1198,8 +1176,8 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp)
 {
 	char *key = NULL;
 	struct role_datum *role;
-	int rc, to_read = 2;
-	__le32 buf[3];
+	int rc;
+	__le32 buf[2];
 	u32 len;
 
 	role = kzalloc(sizeof(*role), GFP_KERNEL);
@@ -1208,17 +1186,12 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp)
 		goto out;
 	}
 
-	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
-		to_read = 3;
-
-	rc = next_entry(buf, fp, sizeof(buf[0]) * to_read);
+	rc = next_entry(buf, fp, sizeof buf);
 	if (rc < 0)
 		goto bad;
 
 	len = le32_to_cpu(buf[0]);
 	role->value = le32_to_cpu(buf[1]);
-	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
-		role->bounds = le32_to_cpu(buf[2]);
 
 	key = kmalloc(len + 1, GFP_KERNEL);
 	if (!key) {
@@ -1228,7 +1201,7 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = '\0';
+	key[len] = 0;
 
 	rc = ebitmap_read(&role->dominates, fp);
 	if (rc)
@@ -1263,8 +1236,8 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp)
 {
 	char *key = NULL;
 	struct type_datum *typdatum;
-	int rc, to_read = 3;
-	__le32 buf[4];
+	int rc;
+	__le32 buf[3];
 	u32 len;
 
 	typdatum = kzalloc(sizeof(*typdatum), GFP_KERNEL);
@@ -1273,27 +1246,13 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp)
 		return rc;
 	}
 
-	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
-		to_read = 4;
-
-	rc = next_entry(buf, fp, sizeof(buf[0]) * to_read);
+	rc = next_entry(buf, fp, sizeof buf);
 	if (rc < 0)
 		goto bad;
 
 	len = le32_to_cpu(buf[0]);
 	typdatum->value = le32_to_cpu(buf[1]);
-	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) {
-		u32 prop = le32_to_cpu(buf[2]);
-
-		if (prop & TYPEDATUM_PROPERTY_PRIMARY)
-			typdatum->primary = 1;
-		if (prop & TYPEDATUM_PROPERTY_ATTRIBUTE)
-			typdatum->attribute = 1;
-
-		typdatum->bounds = le32_to_cpu(buf[3]);
-	} else {
-		typdatum->primary = le32_to_cpu(buf[2]);
-	}
+	typdatum->primary = le32_to_cpu(buf[2]);
 
 	key = kmalloc(len + 1, GFP_KERNEL);
 	if (!key) {
@@ -1303,7 +1262,7 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = '\0';
+	key[len] = 0;
 
 	rc = hashtab_insert(h, key, typdatum);
 	if (rc)
@@ -1350,8 +1309,8 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp)
 {
 	char *key = NULL;
 	struct user_datum *usrdatum;
-	int rc, to_read = 2;
-	__le32 buf[3];
+	int rc;
+	__le32 buf[2];
 	u32 len;
 
 	usrdatum = kzalloc(sizeof(*usrdatum), GFP_KERNEL);
@@ -1360,17 +1319,12 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp)
 		goto out;
 	}
 
-	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
-		to_read = 3;
-
-	rc = next_entry(buf, fp, sizeof(buf[0]) * to_read);
+	rc = next_entry(buf, fp, sizeof buf);
 	if (rc < 0)
 		goto bad;
 
 	len = le32_to_cpu(buf[0]);
 	usrdatum->value = le32_to_cpu(buf[1]);
-	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
-		usrdatum->bounds = le32_to_cpu(buf[2]);
 
 	key = kmalloc(len + 1, GFP_KERNEL);
 	if (!key) {
@@ -1380,7 +1334,7 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = '\0';
+	key[len] = 0;
 
 	rc = ebitmap_read(&usrdatum->roles, fp);
 	if (rc)
@@ -1434,7 +1388,7 @@ static int sens_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = '\0';
+	key[len] = 0;
 
 	levdatum->level = kmalloc(sizeof(struct mls_level), GFP_ATOMIC);
 	if (!levdatum->level) {
@@ -1486,7 +1440,7 @@ static int cat_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = '\0';
+	key[len] = 0;
 
 	rc = hashtab_insert(h, key, catdatum);
 	if (rc)
@@ -1511,133 +1465,6 @@ static int (*read_f[SYM_NUM]) (struct policydb *p, struct hashtab *h, void *fp)
 	cat_read,
 };
 
-static int user_bounds_sanity_check(void *key, void *datum, void *datap)
-{
-	struct user_datum *upper, *user;
-	struct policydb *p = datap;
-	int depth = 0;
-
-	upper = user = datum;
-	while (upper->bounds) {
-		struct ebitmap_node *node;
-		unsigned long bit;
-
-		if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
-			printk(KERN_ERR "SELinux: user %s: "
-			       "too deep or looped boundary",
-			       (char *) key);
-			return -EINVAL;
-		}
-
-		upper = p->user_val_to_struct[upper->bounds - 1];
-		ebitmap_for_each_positive_bit(&user->roles, node, bit) {
-			if (ebitmap_get_bit(&upper->roles, bit))
-				continue;
-
-			printk(KERN_ERR
-			       "SELinux: boundary violated policy: "
-			       "user=%s role=%s bounds=%s\n",
-			       p->p_user_val_to_name[user->value - 1],
-			       p->p_role_val_to_name[bit],
-			       p->p_user_val_to_name[upper->value - 1]);
-
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
-static int role_bounds_sanity_check(void *key, void *datum, void *datap)
-{
-	struct role_datum *upper, *role;
-	struct policydb *p = datap;
-	int depth = 0;
-
-	upper = role = datum;
-	while (upper->bounds) {
-		struct ebitmap_node *node;
-		unsigned long bit;
-
-		if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
-			printk(KERN_ERR "SELinux: role %s: "
-			       "too deep or looped bounds\n",
-			       (char *) key);
-			return -EINVAL;
-		}
-
-		upper = p->role_val_to_struct[upper->bounds - 1];
-		ebitmap_for_each_positive_bit(&role->types, node, bit) {
-			if (ebitmap_get_bit(&upper->types, bit))
-				continue;
-
-			printk(KERN_ERR
-			       "SELinux: boundary violated policy: "
-			       "role=%s type=%s bounds=%s\n",
-			       p->p_role_val_to_name[role->value - 1],
-			       p->p_type_val_to_name[bit],
-			       p->p_role_val_to_name[upper->value - 1]);
-
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
-static int type_bounds_sanity_check(void *key, void *datum, void *datap)
-{
-	struct type_datum *upper, *type;
-	struct policydb *p = datap;
-	int depth = 0;
-
-	upper = type = datum;
-	while (upper->bounds) {
-		if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
-			printk(KERN_ERR "SELinux: type %s: "
-			       "too deep or looped boundary\n",
-			       (char *) key);
-			return -EINVAL;
-		}
-
-		upper = p->type_val_to_struct[upper->bounds - 1];
-		if (upper->attribute) {
-			printk(KERN_ERR "SELinux: type %s: "
-			       "bounded by attribute %s",
-			       (char *) key,
-			       p->p_type_val_to_name[upper->value - 1]);
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
-static int policydb_bounds_sanity_check(struct policydb *p)
-{
-	int rc;
-
-	if (p->policyvers < POLICYDB_VERSION_BOUNDARY)
-		return 0;
-
-	rc = hashtab_map(p->p_users.table,
-			 user_bounds_sanity_check, p);
-	if (rc)
-		return rc;
-
-	rc = hashtab_map(p->p_roles.table,
-			 role_bounds_sanity_check, p);
-	if (rc)
-		return rc;
-
-	rc = hashtab_map(p->p_types.table,
-			 type_bounds_sanity_check, p);
-	if (rc)
-		return rc;
-
-	return 0;
-}
-
 extern int ss_initialized;
 
 /*
@@ -1696,7 +1523,7 @@ int policydb_read(struct policydb *p, void *fp)
 		kfree(policydb_str);
 		goto bad;
 	}
-	policydb_str[len] = '\0';
+	policydb_str[len] = 0;
 	if (strcmp(policydb_str, POLICYDB_STRING)) {
 		printk(KERN_ERR "SELinux:  policydb string %s does not match "
 		       "my string %s\n", policydb_str, POLICYDB_STRING);
@@ -2134,10 +1961,6 @@ int policydb_read(struct policydb *p, void *fp)
 				goto bad;
 	}
 
-	rc = policydb_bounds_sanity_check(p);
-	if (rc)
-		goto bad;
-
 	rc = 0;
 out:
 	return rc;
diff --git a/trunk/security/selinux/ss/policydb.h b/trunk/security/selinux/ss/policydb.h
index 55152d498b53..4253370fda6a 100644
--- a/trunk/security/selinux/ss/policydb.h
+++ b/trunk/security/selinux/ss/policydb.h
@@ -61,7 +61,6 @@ struct class_datum {
 /* Role attributes */
 struct role_datum {
 	u32 value;			/* internal role value */
-	u32 bounds;			/* boundary of role */
 	struct ebitmap dominates;	/* set of roles dominated by this role */
 	struct ebitmap types;		/* set of authorized types for role */
 };
@@ -82,15 +81,12 @@ struct role_allow {
 /* Type attributes */
 struct type_datum {
 	u32 value;		/* internal type value */
-	u32 bounds;		/* boundary of type */
 	unsigned char primary;	/* primary name? */
-	unsigned char attribute;/* attribute ?*/
 };
 
 /* User attributes */
 struct user_datum {
 	u32 value;			/* internal user value */
-	u32 bounds;			/* bounds of user */
 	struct ebitmap roles;		/* set of authorized roles for user */
 	struct mls_range range;		/* MLS range (min - max) for user */
 	struct mls_level dfltlevel;	/* default login MLS level for user */
@@ -213,7 +209,6 @@ struct policydb {
 	struct class_datum **class_val_to_struct;
 	struct role_datum **role_val_to_struct;
 	struct user_datum **user_val_to_struct;
-	struct type_datum **type_val_to_struct;
 
 	/* type enforcement access vectors and transitions */
 	struct avtab te_avtab;
diff --git a/trunk/security/selinux/ss/services.c b/trunk/security/selinux/ss/services.c
index ab0cc0c7b944..8551952ef329 100644
--- a/trunk/security/selinux/ss/services.c
+++ b/trunk/security/selinux/ss/services.c
@@ -88,11 +88,6 @@ static u32 latest_granting;
 static int context_struct_to_string(struct context *context, char **scontext,
 				    u32 *scontext_len);
 
-static int context_struct_compute_av(struct context *scontext,
-				     struct context *tcontext,
-				     u16 tclass,
-				     u32 requested,
-				     struct av_decision *avd);
 /*
  * Return the boolean value of a constraint expression
  * when it is applied to the specified source and target
@@ -278,100 +273,6 @@ static int constraint_expr_eval(struct context *scontext,
 	return s[0];
 }
 
-/*
- * security_boundary_permission - drops violated permissions
- * on boundary constraint.
- */
-static void type_attribute_bounds_av(struct context *scontext,
-				     struct context *tcontext,
-				     u16 tclass,
-				     u32 requested,
-				     struct av_decision *avd)
-{
-	struct context lo_scontext;
-	struct context lo_tcontext;
-	struct av_decision lo_avd;
-	struct type_datum *source
-		= policydb.type_val_to_struct[scontext->type - 1];
-	struct type_datum *target
-		= policydb.type_val_to_struct[tcontext->type - 1];
-	u32 masked = 0;
-
-	if (source->bounds) {
-		memset(&lo_avd, 0, sizeof(lo_avd));
-
-		memcpy(&lo_scontext, scontext, sizeof(lo_scontext));
-		lo_scontext.type = source->bounds;
-
-		context_struct_compute_av(&lo_scontext,
-					  tcontext,
-					  tclass,
-					  requested,
-					  &lo_avd);
-		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
-			return;		/* no masked permission */
-		masked = ~lo_avd.allowed & avd->allowed;
-	}
-
-	if (target->bounds) {
-		memset(&lo_avd, 0, sizeof(lo_avd));
-
-		memcpy(&lo_tcontext, tcontext, sizeof(lo_tcontext));
-		lo_tcontext.type = target->bounds;
-
-		context_struct_compute_av(scontext,
-					  &lo_tcontext,
-					  tclass,
-					  requested,
-					  &lo_avd);
-		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
-			return;		/* no masked permission */
-		masked = ~lo_avd.allowed & avd->allowed;
-	}
-
-	if (source->bounds && target->bounds) {
-		memset(&lo_avd, 0, sizeof(lo_avd));
-		/*
-		 * lo_scontext and lo_tcontext are already
-		 * set up.
-		 */
-
-		context_struct_compute_av(&lo_scontext,
-					  &lo_tcontext,
-					  tclass,
-					  requested,
-					  &lo_avd);
-		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
-			return;		/* no masked permission */
-		masked = ~lo_avd.allowed & avd->allowed;
-	}
-
-	if (masked) {
-		struct audit_buffer *ab;
-		char *stype_name
-			= policydb.p_type_val_to_name[source->value - 1];
-		char *ttype_name
-			= policydb.p_type_val_to_name[target->value - 1];
-		char *tclass_name
-			= policydb.p_class_val_to_name[tclass - 1];
-
-		/* mask violated permissions */
-		avd->allowed &= ~masked;
-
-		/* notice to userspace via audit message */
-		ab = audit_log_start(current->audit_context,
-				     GFP_ATOMIC, AUDIT_SELINUX_ERR);
-		if (!ab)
-			return;
-
-		audit_log_format(ab, "av boundary violation: "
-				 "source=%s target=%s tclass=%s",
-				 stype_name, ttype_name, tclass_name);
-		avc_dump_av(ab, tclass, masked);
-		audit_log_end(ab);
-	}
-}
-
 /*
  * Compute access vectors based on a context structure pair for
  * the permissions in a particular class.
@@ -455,7 +356,7 @@ static int context_struct_compute_av(struct context *scontext,
 			avkey.source_type = i + 1;
 			avkey.target_type = j + 1;
 			for (node = avtab_search_node(&policydb.te_avtab, &avkey);
-			     node;
+			     node != NULL;
 			     node = avtab_search_node_next(node, avkey.specified)) {
 				if (node->key.specified == AVTAB_ALLOWED)
 					avd->allowed |= node->datum.data;
@@ -503,14 +404,6 @@ static int context_struct_compute_av(struct context *scontext,
 							PROCESS__DYNTRANSITION);
 	}
 
-	/*
-	 * If the given source and target types have boundary
-	 * constraint, lazy checks have to mask any violated
-	 * permission and notice it to userspace via audit.
-	 */
-	type_attribute_bounds_av(scontext, tcontext,
-				 tclass, requested, avd);
-
 	return 0;
 
 inval_class:
@@ -656,69 +549,6 @@ int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid,
 	return rc;
 }
 
-/*
- * security_bounded_transition - check whether the given
- * transition is directed to bounded, or not.
- * It returns 0, if @newsid is bounded by @oldsid.
- * Otherwise, it returns error code.
- *
- * @oldsid : current security identifier
- * @newsid : destinated security identifier
- */
-int security_bounded_transition(u32 old_sid, u32 new_sid)
-{
-	struct context *old_context, *new_context;
-	struct type_datum *type;
-	int index;
-	int rc = -EINVAL;
-
-	read_lock(&policy_rwlock);
-
-	old_context = sidtab_search(&sidtab, old_sid);
-	if (!old_context) {
-		printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n",
-		       __func__, old_sid);
-		goto out;
-	}
-
-	new_context = sidtab_search(&sidtab, new_sid);
-	if (!new_context) {
-		printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n",
-		       __func__, new_sid);
-		goto out;
-	}
-
-	/* type/domain unchaned */
-	if (old_context->type == new_context->type) {
-		rc = 0;
-		goto out;
-	}
-
-	index = new_context->type;
-	while (true) {
-		type = policydb.type_val_to_struct[index - 1];
-		BUG_ON(!type);
-
-		/* not bounded anymore */
-		if (!type->bounds) {
-			rc = -EPERM;
-			break;
-		}
-
-		/* @newsid is bounded by @oldsid */
-		if (type->bounds == old_context->type) {
-			rc = 0;
-			break;
-		}
-		index = type->bounds;
-	}
-out:
-	read_unlock(&policy_rwlock);
-
-	return rc;
-}
-
-
 /**
  * security_compute_av - Compute access vector decisions.
  * @ssid: source security identifier
@@ -964,7 +794,7 @@ static int string_to_context_struct(struct policydb *pol,
 	*p++ = 0;
 
 	typdatum = hashtab_search(pol->p_types.table, scontextp);
-	if (!typdatum || typdatum->attribute)
+	if (!typdatum)
 		goto out;
 
 	ctx->type = typdatum->value;
@@ -1207,7 +1037,7 @@ static int security_compute_sid(u32 ssid,
 	/* If no permanent rule, also check for enabled conditional rules */
 	if (!avdatum) {
 		node = avtab_search_node(&policydb.te_cond_avtab, &avkey);
-		for (; node; node = avtab_search_node_next(node, specified)) {
+		for (; node != NULL; node = avtab_search_node_next(node, specified)) {
 			if (node->key.specified & AVTAB_ENABLED) {
 				avdatum = &node->datum;
 				break;
@@ -2220,7 +2050,7 @@ int security_set_bools(int len, int *values)
 			policydb.bool_val_to_struct[i]->state = 0;
 	}
 
-	for (cur = policydb.cond_list; cur; cur = cur->next) {
+	for (cur = policydb.cond_list; cur != NULL; cur = cur->next) {
 		rc = evaluate_cond_node(&policydb, cur);
 		if (rc)
 			goto out;
@@ -2272,7 +2102,7 @@ static int security_preserve_bools(struct policydb *p)
 		if (booldatum)
 			booldatum->state = bvalues[i];
 	}
-	for (cur = p->cond_list; cur; cur = cur->next) {
+	for (cur = p->cond_list; cur != NULL; cur = cur->next) {
 		rc = evaluate_cond_node(p, cur);
 		if (rc)
 			goto out;
diff --git a/trunk/security/selinux/ss/sidtab.c b/trunk/security/selinux/ss/sidtab.c
index e817989764cd..a81ded104129 100644
--- a/trunk/security/selinux/ss/sidtab.c
+++ b/trunk/security/selinux/ss/sidtab.c
@@ -43,7 +43,7 @@ int sidtab_insert(struct sidtab *s, u32 sid, struct context *context)
 	hvalue = SIDTAB_HASH(sid);
 	prev = NULL;
 	cur = s->htable[hvalue];
-	while (cur && sid > cur->sid) {
+	while (cur != NULL && sid > cur->sid) {
 		prev = cur;
 		cur = cur->next;
 	}
@@ -92,7 +92,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force)
 
 	hvalue = SIDTAB_HASH(sid);
 	cur = s->htable[hvalue];
-	while (cur && sid > cur->sid)
+	while (cur != NULL && sid > cur->sid)
 		cur = cur->next;
 
 	if (force && cur && sid == cur->sid && cur->context.len)
@@ -103,7 +103,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force)
 		sid = SECINITSID_UNLABELED;
 		hvalue = SIDTAB_HASH(sid);
 		cur = s->htable[hvalue];
-		while (cur && sid > cur->sid)
+		while (cur != NULL && sid > cur->sid)
 			cur = cur->next;
 		if (!cur || sid != cur->sid)
 			return NULL;
@@ -136,7 +136,7 @@ int sidtab_map(struct sidtab *s,
 
 	for (i = 0; i < SIDTAB_SIZE; i++) {
 		cur = s->htable[i];
-		while (cur) {
+		while (cur != NULL) {
 			rc = apply(cur->sid, &cur->context, args);
 			if (rc)
 				goto out;
@@ -155,7 +155,7 @@ static inline u32 sidtab_search_context(struct sidtab *s,
 
 	for (i = 0; i < SIDTAB_SIZE; i++) {
 		cur = s->htable[i];
-		while (cur) {
+		while (cur != NULL) {
 			if (context_cmp(&cur->context, context))
 				return cur->sid;
 			cur = cur->next;
@@ -242,7 +242,7 @@ void sidtab_destroy(struct sidtab *s)
 
 	for (i = 0; i < SIDTAB_SIZE; i++) {
 		cur = s->htable[i];
-		while (cur) {
+		while (cur != NULL) {
 			temp = cur;
 			cur = cur->next;
 			context_destroy(&temp->context);
diff --git a/trunk/security/smack/smack.h b/trunk/security/smack/smack.h
index 31dce559595a..4a4477f5afdc 100644
--- a/trunk/security/smack/smack.h
+++ b/trunk/security/smack/smack.h
@@ -178,7 +178,6 @@ u32 smack_to_secid(const char *);
 extern int smack_cipso_direct;
 extern int smack_net_nltype;
 extern char *smack_net_ambient;
-extern char *smack_onlycap;
 
 extern struct smack_known *smack_known;
 extern struct smack_known smack_known_floor;
diff --git a/trunk/security/smack/smack_access.c b/trunk/security/smack/smack_access.c
index 79ff21ed4c3b..f6b5f6eed6dd 100644
--- a/trunk/security/smack/smack_access.c
+++ b/trunk/security/smack/smack_access.c
@@ -157,7 +157,7 @@ int smk_access(char *subject_label, char *object_label, int request)
  *
  * This function checks the current subject label/object label pair
  * in the access rule list and returns 0 if the access is permitted,
- * non zero otherwise. It allows that current may have the capability
+ * non zero otherwise. It allows that current my have the capability
  * to override the rules.
  */
 int smk_curacc(char *obj_label, u32 mode)
@@ -168,14 +168,6 @@ int smk_curacc(char *obj_label, u32 mode)
 	if (rc == 0)
 		return 0;
 
-	/*
-	 * Return if a specific label has been designated as the
-	 * only one that gets privilege and current does not
-	 * have that label.
-	 */
-	if (smack_onlycap != NULL && smack_onlycap != current->security)
-		return rc;
-
 	if (capable(CAP_MAC_OVERRIDE))
 		return 0;
 
diff --git a/trunk/security/smack/smackfs.c b/trunk/security/smack/smackfs.c
index e7c642458ec9..271a835fbbe3 100644
--- a/trunk/security/smack/smackfs.c
+++ b/trunk/security/smack/smackfs.c
@@ -39,7 +39,6 @@ enum smk_inos {
 	SMK_DIRECT	= 6,	/* CIPSO level indicating direct label */
 	SMK_AMBIENT	= 7,	/* internet ambient label */
 	SMK_NLTYPE	= 8,	/* label scheme to use by default */
-	SMK_ONLYCAP	= 9,	/* the only "capable" label */
 };
 
 /*
@@ -69,16 +68,6 @@ int smack_net_nltype = NETLBL_NLTYPE_CIPSOV4;
  */
 int smack_cipso_direct = SMACK_CIPSO_DIRECT_DEFAULT;
 
-/*
- * Unless a process is running with this label even
- * having CAP_MAC_OVERRIDE isn't enough to grant
- * privilege to violate MAC policy. If no label is
- * designated (the NULL case) capabilities apply to
- * everyone. It is expected that the hat (^) label
- * will be used if any label is used.
- */
-char *smack_onlycap;
-
 static int smk_cipso_doi_value = SMACK_CIPSO_DOI_DEFAULT;
 struct smk_list_entry *smack_list;
 
@@ -798,85 +787,6 @@ static const struct file_operations smk_ambient_ops = {
 	.write		= smk_write_ambient,
 };
 
-/**
- * smk_read_onlycap - read() for /smack/onlycap
- * @filp: file pointer, not actually used
- * @buf: where to put the result
- * @cn: maximum to send along
- * @ppos: where to start
- *
- * Returns number of bytes read or error code, as appropriate
- */
-static ssize_t smk_read_onlycap(struct file *filp, char __user *buf,
-				size_t cn, loff_t *ppos)
-{
-	char *smack = "";
-	ssize_t rc = -EINVAL;
-	int asize;
-
-	if (*ppos != 0)
-		return 0;
-
-	if (smack_onlycap != NULL)
-		smack = smack_onlycap;
-
-	asize = strlen(smack) + 1;
-
-	if (cn >= asize)
-		rc = simple_read_from_buffer(buf, cn, ppos, smack, asize);
-
-	return rc;
-}
-
-/**
- * smk_write_onlycap - write() for /smack/onlycap
- * @filp: file pointer, not actually used
- * @buf: where to get the data from
- * @count: bytes sent
- * @ppos: where to start
- *
- * Returns number of bytes written or error code, as appropriate
- */
-static ssize_t smk_write_onlycap(struct file *file, const char __user *buf,
-				 size_t count, loff_t *ppos)
-{
-	char in[SMK_LABELLEN];
-	char *sp = current->security;
-
-	if (!capable(CAP_MAC_ADMIN))
-		return -EPERM;
-
-	/*
-	 * This can be done using smk_access() but is done
-	 * explicitly for clarity. The smk_access() implementation
-	 * would use smk_access(smack_onlycap, MAY_WRITE)
-	 */
-	if (smack_onlycap != NULL && smack_onlycap != sp)
-		return -EPERM;
-
-	if (count >= SMK_LABELLEN)
-		return -EINVAL;
-
-	if (copy_from_user(in, buf, count) != 0)
-		return -EFAULT;
-
-	/*
-	 * Should the null string be passed in unset the onlycap value.
-	 * This seems like something to be careful with as usually
-	 * smk_import only expects to return NULL for errors. It
-	 * is usually the case that a nullstring or "\n" would be
-	 * bad to pass to smk_import but in fact this is useful here.
-	 */
-	smack_onlycap = smk_import(in, count);
-
-	return count;
-}
-
-static const struct file_operations smk_onlycap_ops = {
-	.read		= smk_read_onlycap,
-	.write		= smk_write_onlycap,
-};
-
 struct option_names {
 	int	o_number;
 	char	*o_name;
@@ -1009,8 +919,6 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
 			{"ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR},
 		[SMK_NLTYPE]	=
 			{"nltype", &smk_nltype_ops, S_IRUGO|S_IWUSR},
-		[SMK_ONLYCAP]	=
-			{"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR},
 		/* last one */ {""}
 	};