---

yaml --- r: 91415 b: refs/heads/master c: e80ab41 h: refs/heads/master i: 91413: 9d7b590 91411: 6efe2e0 91407: 95216b5 v: v3
git-mirror · Apr 21, 2008 · 79ea3e5 · 79ea3e5
1 parent fc70658
commit 79ea3e5
Show file tree

Hide file tree

Showing 801 changed files with 44,955 additions and 15,807 deletions.
diff --git a/[refs] b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: ee959b00c335d7780136c5abda37809191fe52c3
+refs/heads/master: e80ab411e589e00550e2e6e5a6a02d59cc730357
diff --git a/trunk/Documentation/DocBook/kernel-locking.tmpl b/trunk/Documentation/DocBook/kernel-locking.tmpl
@@ -241,7 +241,7 @@
    </para>
    <para>
      The third type is a semaphore
-     (<filename class="headerfile">include/asm/semaphore.h</filename>): it
+     (<filename class="headerfile">include/linux/semaphore.h</filename>): it
      can have more than one holder at any time (the number decided at
      initialization time), although it is most commonly used as a
      single-holder lock (a mutex).  If you can't get a semaphore, your
@@ -290,7 +290,7 @@
      <para>
        If you have a data structure which is only ever accessed from
        user context, then you can use a simple semaphore
-       (<filename>linux/asm/semaphore.h</filename>) to protect it.  This 
+       (<filename>linux/linux/semaphore.h</filename>) to protect it.  This
        is the most trivial case: you initialize the semaphore to the number 
        of resources available (usually 1), and call
        <function>down_interruptible()</function> to grab the semaphore, and 
@@ -1656,7 +1656,7 @@ the amount of locking which needs to be done.
  #include &lt;linux/slab.h&gt;
  #include &lt;linux/string.h&gt;
 +#include &lt;linux/rcupdate.h&gt;
- #include &lt;asm/semaphore.h&gt;
+ #include &lt;linux/semaphore.h&gt;
  #include &lt;asm/errno.h&gt;
 
  struct object

diff --git a/trunk/Documentation/cpusets.txt b/trunk/Documentation/cpusets.txt
@@ -8,6 +8,7 @@ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
 Modified by Paul Jackson <pj@sgi.com>
 Modified by Christoph Lameter <clameter@sgi.com>
 Modified by Paul Menage <menage@google.com>
+Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
 
 CONTENTS:
 =========
@@ -20,7 +21,8 @@ CONTENTS:
   1.5 What is memory_pressure ?
   1.6 What is memory spread ?
   1.7 What is sched_load_balance ?
-  1.8 How do I use cpusets ?
+  1.8 What is sched_relax_domain_level ?
+  1.9 How do I use cpusets ?
 2. Usage Examples and Syntax
   2.1 Basic Usage
   2.2 Adding/removing cpus
@@ -497,7 +499,73 @@ the cpuset code to update these sched domains, it compares the new
 partition requested with the current, and updates its sched domains,
 removing the old and adding the new, for each change.
 
-1.8 How do I use cpusets ?
+
+1.8 What is sched_relax_domain_level ?
+--------------------------------------
+
+In sched domain, the scheduler migrates tasks in 2 ways; periodic load
+balance on tick, and at time of some schedule events.
+
+When a task is woken up, scheduler try to move the task on idle CPU.
+For example, if a task A running on CPU X activates another task B
+on the same CPU X, and if CPU Y is X's sibling and performing idle,
+then scheduler migrate task B to CPU Y so that task B can start on
+CPU Y without waiting task A on CPU X.
+
+And if a CPU run out of tasks in its runqueue, the CPU try to pull
+extra tasks from other busy CPUs to help them before it is going to
+be idle.
+
+Of course it takes some searching cost to find movable tasks and/or
+idle CPUs, the scheduler might not search all CPUs in the domain
+everytime.  In fact, in some architectures, the searching ranges on
+events are limited in the same socket or node where the CPU locates,
+while the load balance on tick searchs all.
+
+For example, assume CPU Z is relatively far from CPU X.  Even if CPU Z
+is idle while CPU X and the siblings are busy, scheduler can't migrate
+woken task B from X to Z since it is out of its searching range.
+As the result, task B on CPU X need to wait task A or wait load balance
+on the next tick.  For some applications in special situation, waiting
+1 tick may be too long.
+
+The 'sched_relax_domain_level' file allows you to request changing
+this searching range as you like.  This file takes int value which
+indicates size of searching range in levels ideally as follows,
+otherwise initial value -1 that indicates the cpuset has no request.
+
+  -1  : no request. use system default or follow request of others.
+   0  : no search.
+   1  : search siblings (hyperthreads in a core).
+   2  : search cores in a package.
+   3  : search cpus in a node [= system wide on non-NUMA system]
+ ( 4  : search nodes in a chunk of node [on NUMA system] )
+ ( 5~ : search system wide [on NUMA system])
+
+This file is per-cpuset and affect the sched domain where the cpuset
+belongs to.  Therefore if the flag 'sched_load_balance' of a cpuset
+is disabled, then 'sched_relax_domain_level' have no effect since
+there is no sched domain belonging the cpuset.
+
+If multiple cpusets are overlapping and hence they form a single sched
+domain, the largest value among those is used.  Be careful, if one
+requests 0 and others are -1 then 0 is used.
+
+Note that modifying this file will have both good and bad effects,
+and whether it is acceptable or not will be depend on your situation.
+Don't modify this file if you are not sure.
+
+If your situation is:
+ - The migration costs between each cpu can be assumed considerably
+   small(for you) due to your special application's behavior or
+   special hardware support for CPU cache etc.
+ - The searching cost doesn't have impact(for you) or you can make
+   the searching cost enough small by managing cpuset to compact etc.
+ - The latency is required even it sacrifices cache hit rate etc.
+then increasing 'sched_relax_domain_level' would benefit you.
+
+
+1.9 How do I use cpusets ?
 --------------------------
 
 In order to minimize the impact of cpusets on critical kernel

diff --git a/trunk/Documentation/feature-removal-schedule.txt b/trunk/Documentation/feature-removal-schedule.txt
@@ -282,6 +282,13 @@ Why:	Not used in-tree. The current out-of-tree users used it to
 	out-of-tree driver.
 Who:	Thomas Gleixner <tglx@linutronix.de>
 
+----------------------------
+
+What:	usedac i386 kernel parameter
+When:	2.6.27
+Why:	replaced by allowdac and no dac combination
+Who:	Glauber Costa <gcosta@redhat.com>
+
 ---------------------------
 
 What:	/sys/o2cb symlink
@@ -291,3 +298,11 @@ Why:	/sys/fs/o2cb is the proper location for this information - /sys/o2cb
 	ocfs2-tools. 2 years should be sufficient time to phase in new versions
 	which know to look in /sys/fs/o2cb.
 Who:	ocfs2-devel@oss.oracle.com
+
+---------------------------
+
+What:	asm/semaphore.h
+When:	2.6.26
+Why:	Implementation became generic; users should now include
+	linux/semaphore.h instead.
+Who:	Matthew Wilcox <willy@linux.intel.com>
diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt
@@ -1280,8 +1280,16 @@ and is between 256 and 4096 characters. It is defined in the file
 	noexec		[IA-64]
 
 	noexec		[X86-32,X86-64]
+			On X86-32 available only on PAE configured kernels.
 			noexec=on: enable non-executable mappings (default)
-			noexec=off: disable nn-executable mappings
+			noexec=off: disable non-executable mappings
+
+	noexec32	[X86-64]
+			This affects only 32-bit executables.
+			noexec32=on: enable non-executable mappings (default)
+				read doesn't imply executable mappings
+			noexec32=off: disable non-executable mappings
+				read implies executable mappings
 
 	nofxsr		[BUGS=X86-32] Disables x86 floating point extended
 			register save and restore. The kernel will only save

diff --git a/trunk/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c b/trunk/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
@@ -0,0 +1,96 @@
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * at context switches
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE		1   /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV		2   /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+uint64_t rdtsc() {
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+void sigsegv_expect(int sig)
+{
+	/* */
+}
+
+void segvtask(void)
+{
+	if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+	{
+		perror("prctl");
+		exit(0);
+	}
+	signal(SIGSEGV, sigsegv_expect);
+	alarm(10);
+	rdtsc();
+	fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+	exit(0);
+}
+
+
+void sigsegv_fail(int sig)
+{
+	fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+	exit(0);
+}
+
+void rdtsctask(void)
+{
+	if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+	{
+		perror("prctl");
+		exit(0);
+	}
+	signal(SIGSEGV, sigsegv_fail);
+	alarm(10);
+	for(;;) rdtsc();
+}
+
+
+int main(int argc, char **argv)
+{
+	int n_tasks = 100, i;
+
+	fprintf(stderr, "[No further output means we're allright]\n");
+
+	for (i=0; i<n_tasks; i++)
+		if (fork() == 0)
+		{
+			if (i & 1)
+				segvtask();
+			else
+				rdtsctask();
+		}
+
+	for (i=0; i<n_tasks; i++)
+		wait(NULL);
+
+	exit(0);
+}
+
diff --git a/trunk/Documentation/prctl/disable-tsc-on-off-stress-test.c b/trunk/Documentation/prctl/disable-tsc-on-off-stress-test.c
@@ -0,0 +1,95 @@
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * when set with prctl()
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE		1   /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV		2   /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+/* snippet from wikipedia :-) */
+
+uint64_t rdtsc() {
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+int should_segv = 0;
+
+void sigsegv_cb(int sig)
+{
+	if (!should_segv)
+	{
+		fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+		exit(0);
+	}
+	if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+	{
+		perror("prctl");
+		exit(0);
+	}
+	should_segv = 0;
+
+	rdtsc();
+}
+
+void task(void)
+{
+	signal(SIGSEGV, sigsegv_cb);
+	alarm(10);
+	for(;;)
+	{
+		rdtsc();
+		if (should_segv)
+		{
+			fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+			exit(0);
+		}
+		if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+		{
+			perror("prctl");
+			exit(0);
+		}
+		should_segv = 1;
+	}
+}
+
+
+int main(int argc, char **argv)
+{
+	int n_tasks = 100, i;
+
+	fprintf(stderr, "[No further output means we're allright]\n");
+
+	for (i=0; i<n_tasks; i++)
+		if (fork() == 0)
+			task();
+
+	for (i=0; i<n_tasks; i++)
+		wait(NULL);
+
+	exit(0);
+}
+