---

yaml --- r: 96191 b: refs/heads/master c: dc5dc7e h: refs/heads/master i: 96189: 997416b 96187: 764adad 96183: 018d2c8 96175: ae43732 96159: 16f6fd8 96127: be75440 v: v3
git-mirror · May 8, 2008 · 95ef6ed · 95ef6ed
1 parent 37e6885
commit 95ef6ed
Show file tree

Hide file tree

Showing 244 changed files with 2,591 additions and 4,207 deletions.
diff --git a/[refs] b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 8d539108560ec121d59eee05160236488266221c
+refs/heads/master: dc5dc7e6d71ca9fd1ea01a1418150af3b2937489
diff --git a/trunk/Documentation/DocBook/kgdb.tmpl b/trunk/Documentation/DocBook/kgdb.tmpl
@@ -72,7 +72,7 @@
     kgdb is a source level debugger for linux kernel. It is used along
     with gdb to debug a linux kernel.  The expectation is that gdb can
     be used to "break in" to the kernel to inspect memory, variables
-    and look through call stack information similar to what an
+    and look through a cal stack information similar to what an
     application developer would use gdb for.  It is possible to place
     breakpoints in kernel code and perform some limited execution
     stepping.
@@ -93,10 +93,8 @@
   <chapter id="CompilingAKernel">
     <title>Compiling a kernel</title>
     <para>
-    To enable <symbol>CONFIG_KGDB</symbol> you should first turn on
-    "Prompt for development and/or incomplete code/drivers"
-    (CONFIG_EXPERIMENTAL) in  "General setup", then under the
-    "Kernel debugging" select "KGDB: kernel debugging with remote gdb".
+    To enable <symbol>CONFIG_KGDB</symbol>, look under the "Kernel debugging"
+    and then select "KGDB: kernel debugging with remote gdb".
     </para>
     <para>
     Next you should choose one of more I/O drivers to interconnect debugging

diff --git a/trunk/Documentation/filesystems/Locking b/trunk/Documentation/filesystems/Locking
@@ -92,6 +92,7 @@ prototypes:
 	void (*destroy_inode)(struct inode *);
 	void (*dirty_inode) (struct inode *);
 	int (*write_inode) (struct inode *, int);
+	void (*put_inode) (struct inode *);
 	void (*drop_inode) (struct inode *);
 	void (*delete_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
@@ -114,6 +115,7 @@ alloc_inode:		no	no	no
 destroy_inode:		no
 dirty_inode:		no				(must not sleep)
 write_inode:		no
+put_inode:		no
 drop_inode:		no				!!!inode_lock!!!
 delete_inode:		no
 put_super:		yes	yes	no

diff --git a/trunk/Documentation/filesystems/vfs.txt b/trunk/Documentation/filesystems/vfs.txt
@@ -205,6 +205,7 @@ struct super_operations {
 
         void (*dirty_inode) (struct inode *);
         int (*write_inode) (struct inode *, int);
+        void (*put_inode) (struct inode *);
         void (*drop_inode) (struct inode *);
         void (*delete_inode) (struct inode *);
         void (*put_super) (struct super_block *);
@@ -245,6 +246,9 @@ or bottom half).
 	inode to disc.  The second parameter indicates whether the write
 	should be synchronous or not, not all filesystems check this flag.
 
+  put_inode: called when the VFS inode is removed from the inode
+	cache.
+
   drop_inode: called when the last access to the inode is dropped,
 	with the inode_lock spinlock held.
 

diff --git a/trunk/Documentation/kbuild/kconfig-language.txt b/trunk/Documentation/kbuild/kconfig-language.txt
@@ -377,3 +377,27 @@ config FOO
 
 limits FOO to module (=m) or disabled (=n).
 
+
+Build limited by a third config symbol which may be =y or =m
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A common idiom that we see (and sometimes have problems with) is this:
+
+When option C in B (module or subsystem) uses interfaces from A (module
+or subsystem), and both A and B are tristate (could be =y or =m if they
+were independent of each other, but they aren't), then we need to limit
+C such that it cannot be built statically if A is built as a loadable
+module.  (C already depends on B, so there is no dependency issue to
+take care of here.)
+
+If A is linked statically into the kernel image, C can be built
+statically or as loadable module(s).  However, if A is built as loadable
+module(s), then C must be restricted to loadable module(s) also.  This
+can be expressed in kconfig language as:
+
+config C
+	depends on A = y || A = B
+
+or for real examples, use this command in a kernel tree:
+
+$ find . -name Kconfig\* | xargs grep -ns "depends on.*=.*||.*=" | grep -v orig
+
diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt
@@ -1094,6 +1094,9 @@ and is between 256 and 4096 characters. It is defined in the file
 	mac5380=	[HW,SCSI] Format:
 			<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
 
+	mac53c9x=	[HW,SCSI] Format:
+			<num_esps>,<disconnect>,<nosync>,<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
+
 	machvec=	[IA64] Force the use of a particular machine-vector
 			(machvec) in a generic kernel.
 			Example: machvec=hpzx1_swiotlb
@@ -1522,8 +1525,6 @@ and is between 256 and 4096 characters. It is defined in the file
 				This is normally done in pci_enable_device(),
 				so this option is a temporary workaround
 				for broken drivers that don't call it.
-		skip_isa_align	[X86] do not align io start addr, so can
-				handle more pci cards
 		firmware	[ARM] Do not re-enumerate the bus but instead
 				just use the configuration from the
 				bootloader. This is currently used on

diff --git a/trunk/Documentation/s390/CommonIO b/trunk/Documentation/s390/CommonIO
@@ -8,6 +8,17 @@ Command line parameters
 
   Enable logging of debug information in case of ccw device timeouts.
 
+
+* cio_msg = yes | no
+
+  Determines whether information on found devices and sensed device 
+  characteristics should be shown during startup or when new devices are
+  found, i. e. messages of the types "Detected device 0.0.4711 on subchannel
+  0.0.0042" and "SenseID: Device 0.0.4711 reports: ...".
+
+  Default is off.
+
+
 * cio_ignore = {all} |
 	       {<device> | <range of devices>} |
 	       {!<device> | !<range of devices>}

diff --git a/trunk/Documentation/scheduler/sched-design.txt b/trunk/Documentation/scheduler/sched-design.txt
@@ -0,0 +1,165 @@
+		   Goals, Design and Implementation of the
+		      new ultra-scalable O(1) scheduler
+
+
+  This is an edited version of an email Ingo Molnar sent to
+  lkml on 4 Jan 2002.  It describes the goals, design, and
+  implementation of Ingo's new ultra-scalable O(1) scheduler.
+  Last Updated: 18 April 2002.
+
+
+Goal
+====
+
+The main goal of the new scheduler is to keep all the good things we know
+and love about the current Linux scheduler:
+
+ - good interactive performance even during high load: if the user
+   types or clicks then the system must react instantly and must execute
+   the user tasks smoothly, even during considerable background load.
+
+ - good scheduling/wakeup performance with 1-2 runnable processes.
+
+ - fairness: no process should stay without any timeslice for any
+   unreasonable amount of time. No process should get an unjustly high
+   amount of CPU time.
+
+ - priorities: less important tasks can be started with lower priority,
+   more important tasks with higher priority.
+
+ - SMP efficiency: no CPU should stay idle if there is work to do.
+
+ - SMP affinity: processes which run on one CPU should stay affine to
+   that CPU. Processes should not bounce between CPUs too frequently.
+
+ - plus additional scheduler features: RT scheduling, CPU binding.
+
+and the goal is also to add a few new things:
+
+ - fully O(1) scheduling. Are you tired of the recalculation loop
+   blowing the L1 cache away every now and then? Do you think the goodness
+   loop is taking a bit too long to finish if there are lots of runnable
+   processes? This new scheduler takes no prisoners: wakeup(), schedule(),
+   the timer interrupt are all O(1) algorithms. There is no recalculation
+   loop. There is no goodness loop either.
+
+ - 'perfect' SMP scalability. With the new scheduler there is no 'big'
+   runqueue_lock anymore - it's all per-CPU runqueues and locks - two
+   tasks on two separate CPUs can wake up, schedule and context-switch
+   completely in parallel, without any interlocking. All
+   scheduling-relevant data is structured for maximum scalability.
+
+ - better SMP affinity. The old scheduler has a particular weakness that
+   causes the random bouncing of tasks between CPUs if/when higher
+   priority/interactive tasks, this was observed and reported by many
+   people. The reason is that the timeslice recalculation loop first needs
+   every currently running task to consume its timeslice. But when this
+   happens on eg. an 8-way system, then this property starves an
+   increasing number of CPUs from executing any process. Once the last
+   task that has a timeslice left has finished using up that timeslice,
+   the recalculation loop is triggered and other CPUs can start executing
+   tasks again - after having idled around for a number of timer ticks.
+   The more CPUs, the worse this effect.
+
+   Furthermore, this same effect causes the bouncing effect as well:
+   whenever there is such a 'timeslice squeeze' of the global runqueue,
+   idle processors start executing tasks which are not affine to that CPU.
+   (because the affine tasks have finished off their timeslices already.)
+
+   The new scheduler solves this problem by distributing timeslices on a
+   per-CPU basis, without having any global synchronization or
+   recalculation.
+
+ - batch scheduling. A significant proportion of computing-intensive tasks
+   benefit from batch-scheduling, where timeslices are long and processes
+   are roundrobin scheduled. The new scheduler does such batch-scheduling
+   of the lowest priority tasks - so nice +19 jobs will get
+   'batch-scheduled' automatically. With this scheduler, nice +19 jobs are
+   in essence SCHED_IDLE, from an interactiveness point of view.
+
+ - handle extreme loads more smoothly, without breakdown and scheduling
+   storms.
+
+ - O(1) RT scheduling. For those RT folks who are paranoid about the
+   O(nr_running) property of the goodness loop and the recalculation loop.
+
+ - run fork()ed children before the parent. Andrea has pointed out the
+   advantages of this a few months ago, but patches for this feature
+   do not work with the old scheduler as well as they should,
+   because idle processes often steal the new child before the fork()ing
+   CPU gets to execute it.
+
+
+Design
+======
+
+The core of the new scheduler contains the following mechanisms:
+
+ - *two* priority-ordered 'priority arrays' per CPU. There is an 'active'
+   array and an 'expired' array. The active array contains all tasks that
+   are affine to this CPU and have timeslices left. The expired array
+   contains all tasks which have used up their timeslices - but this array
+   is kept sorted as well. The active and expired array is not accessed
+   directly, it's accessed through two pointers in the per-CPU runqueue
+   structure. If all active tasks are used up then we 'switch' the two
+   pointers and from now on the ready-to-go (former-) expired array is the
+   active array - and the empty active array serves as the new collector
+   for expired tasks.
+
+ - there is a 64-bit bitmap cache for array indices. Finding the highest
+   priority task is thus a matter of two x86 BSFL bit-search instructions.
+
+the split-array solution enables us to have an arbitrary number of active
+and expired tasks, and the recalculation of timeslices can be done
+immediately when the timeslice expires. Because the arrays are always
+access through the pointers in the runqueue, switching the two arrays can
+be done very quickly.
+
+this is a hybride priority-list approach coupled with roundrobin
+scheduling and the array-switch method of distributing timeslices.
+
+ - there is a per-task 'load estimator'.
+
+one of the toughest things to get right is good interactive feel during
+heavy system load. While playing with various scheduler variants i found
+that the best interactive feel is achieved not by 'boosting' interactive
+tasks, but by 'punishing' tasks that want to use more CPU time than there
+is available. This method is also much easier to do in an O(1) fashion.
+
+to establish the actual 'load' the task contributes to the system, a
+complex-looking but pretty accurate method is used: there is a 4-entry
+'history' ringbuffer of the task's activities during the last 4 seconds.
+This ringbuffer is operated without much overhead. The entries tell the
+scheduler a pretty accurate load-history of the task: has it used up more
+CPU time or less during the past N seconds. [the size '4' and the interval
+of 4x 1 seconds was found by lots of experimentation - this part is
+flexible and can be changed in both directions.]
+
+the penalty a task gets for generating more load than the CPU can handle
+is a priority decrease - there is a maximum amount to this penalty
+relative to their static priority, so even fully CPU-bound tasks will
+observe each other's priorities, and will share the CPU accordingly.
+
+the SMP load-balancer can be extended/switched with additional parallel
+computing and cache hierarchy concepts: NUMA scheduling, multi-core CPUs
+can be supported easily by changing the load-balancer. Right now it's
+tuned for my SMP systems.
+
+i skipped the prev->mm == next->mm advantage - no workload i know of shows
+any sensitivity to this. It can be added back by sacrificing O(1)
+schedule() [the current and one-lower priority list can be searched for a
+that->mm == current->mm condition], but costs a fair number of cycles
+during a number of important workloads, so i wanted to avoid this as much
+as possible.
+
+- the SMP idle-task startup code was still racy and the new scheduler
+triggered this. So i streamlined the idle-setup code a bit. We do not call
+into schedule() before all processors have started up fully and all idle
+threads are in place.
+
+- the patch also cleans up a number of aspects of sched.c - moves code
+into other areas of the kernel where it's appropriate, and simplifies
+certain code paths and data constructs. As a result, the new scheduler's
+code is smaller than the old one.
+
+	Ingo
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
@@ -2112,10 +2112,12 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 
 INTEL ETHERNET DRIVERS (e100/e1000/e1000e/igb/ixgb/ixgbe)
-P:	Jeff Kirsher
-M:	jeffrey.t.kirsher@intel.com
+P:	Auke Kok
+M:	auke-jan.h.kok@intel.com
 P:	Jesse Brandeburg
 M:	jesse.brandeburg@intel.com
+P:	Jeff Kirsher
+M:	jeffrey.t.kirsher@intel.com
 P:	Bruce Allan
 M:	bruce.w.allan@intel.com
 P:	John Ronciak

diff --git a/trunk/arch/cris/kernel/sys_cris.c b/trunk/arch/cris/kernel/sys_cris.c
@@ -40,11 +40,8 @@ asmlinkage int sys_pipe(unsigned long __user * fildes)
         error = do_pipe(fd);
         unlock_kernel();
         if (!error) {
-                if (copy_to_user(fildes, fd, 2*sizeof(int))) {
-			sys_close(fd[0]);
-			sys_close(fd[1]);
+                if (copy_to_user(fildes, fd, 2*sizeof(int)))
                         error = -EFAULT;
-		}
         }
         return error;
 }

diff --git a/trunk/arch/m32r/kernel/sys_m32r.c b/trunk/arch/m32r/kernel/sys_m32r.c
@@ -90,11 +90,8 @@ sys_pipe(unsigned long r0, unsigned long r1, unsigned long r2,
 
 	error = do_pipe(fd);
 	if (!error) {
-		if (copy_to_user((void __user *)r0, fd, 2*sizeof(int))) {
-			sys_close(fd[0]);
-			sys_close(fd[1]);
+		if (copy_to_user((void __user *)r0, fd, 2*sizeof(int)))
 			error = -EFAULT;
-		}
 	}
 	return error;
 }

diff --git a/trunk/arch/m68k/kernel/traps.c b/trunk/arch/m68k/kernel/traps.c
@@ -468,26 +468,15 @@ static inline void access_error040(struct frame *fp)
 			 * (if do_page_fault didn't fix the mapping,
                          * the writeback won't do good)
 			 */
-disable_wb:
 #ifdef DEBUG
 			printk(".. disabling wb2\n");
 #endif
 			if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr)
 				fp->un.fmt7.wb2s &= ~WBV_040;
-			if (fp->un.fmt7.wb3a == fp->un.fmt7.faddr)
-				fp->un.fmt7.wb3s &= ~WBV_040;
 		}
-	} else {
-		/* In case of a bus error we either kill the process or expect
-		 * the kernel to catch the fault, which then is also responsible
-		 * for cleaning up the mess.
-		 */
-		current->thread.signo = SIGBUS;
-		current->thread.faddr = fp->un.fmt7.faddr;
-		if (send_fault_sig(&fp->ptregs) >= 0)
-			printk("68040 bus error (ssw=%x, faddr=%lx)\n", ssw,
-			       fp->un.fmt7.faddr);
-		goto disable_wb;
+	} else if (send_fault_sig(&fp->ptregs) > 0) {
+		printk("68040 access error, ssw=%x\n", ssw);
+		trap_c(fp);
 	}
 
 	do_040writebacks(fp);

diff --git a/trunk/arch/m68k/mac/config.c b/trunk/arch/m68k/mac/config.c
@@ -48,6 +48,9 @@
 struct mac_booter_data mac_bi_data;
 int mac_bisize = sizeof mac_bi_data;
 
+struct mac_hw_present mac_hw_present;
+EXPORT_SYMBOL(mac_hw_present);
+
 /* New m68k bootinfo stuff and videobase */
 
 extern int m68k_num_memory;
@@ -814,6 +817,27 @@ void __init mac_identify(void)
 		m68k_ramdisk.addr, m68k_ramdisk.size);
 #endif
 
+	/*
+	 * TODO: set the various fields in macintosh_config->hw_present here!
+	 */
+	switch (macintosh_config->scsi_type) {
+	case MAC_SCSI_OLD:
+		MACHW_SET(MAC_SCSI_80);
+		break;
+	case MAC_SCSI_QUADRA:
+	case MAC_SCSI_QUADRA2:
+	case MAC_SCSI_QUADRA3:
+		MACHW_SET(MAC_SCSI_96);
+		if ((macintosh_config->ident == MAC_MODEL_Q900) ||
+		    (macintosh_config->ident == MAC_MODEL_Q950))
+			MACHW_SET(MAC_SCSI_96_2);
+		break;
+	default:
+		printk(KERN_WARNING "config.c: wtf: unknown scsi, using 53c80\n");
+		MACHW_SET(MAC_SCSI_80);
+		break;
+	}
+
 	iop_init();
 	via_init();
 	oss_init();