---

yaml --- r: 181399 b: refs/heads/master c: 9df5f74 h: refs/heads/master i: 181397: 0c26c80 181395: 0f00208 181391: cc6cb36 v: v3
git-mirror · Jan 25, 2010 · 1bc9a69 · 1bc9a69
1 parent 99df0da
commit 1bc9a69
Show file tree

Hide file tree

Showing 2,498 changed files with 31,137 additions and 76,729 deletions.
diff --git a/[refs] b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 654451748b779b28077d9058442d0f354251870d
+refs/heads/master: 9df5f74194871ebd0e51ef5ad2eca5084acaaaba
diff --git a/trunk/.gitignore b/trunk/.gitignore
@@ -37,7 +37,6 @@ modules.builtin
 tags
 TAGS
 vmlinux
-vmlinuz
 System.map
 Module.markers
 Module.symvers

diff --git a/trunk/Documentation/ABI/testing/ima_policy b/trunk/Documentation/ABI/testing/ima_policy
@@ -20,7 +20,7 @@ Description:
 			lsm:	[[subj_user=] [subj_role=] [subj_type=]
 				 [obj_user=] [obj_role=] [obj_type=]]
 
-		base: 	func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK]
+		base: 	func:= [BPRM_CHECK][FILE_MMAP][INODE_PERMISSION]
 			mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC]
 			fsmagic:= hex value
 			uid:= decimal value
@@ -40,11 +40,11 @@ Description:
 
 			measure func=BPRM_CHECK
 			measure func=FILE_MMAP mask=MAY_EXEC
-			measure func=FILE_CHECK mask=MAY_READ uid=0
+			measure func=INODE_PERM mask=MAY_READ uid=0
 
 		The default policy measures all executables in bprm_check,
 		all files mmapped executable in file_mmap, and all files
-		open for read by root in do_filp_open.
+		open for read by root in inode_permission.
 
 		Examples of LSM specific definitions:
 
@@ -54,8 +54,8 @@ Description:
 
 			dont_measure obj_type=var_log_t
 			dont_measure obj_type=auditd_log_t
-			measure subj_user=system_u func=FILE_CHECK mask=MAY_READ
-			measure subj_role=system_r func=FILE_CHECK mask=MAY_READ
+			measure subj_user=system_u func=INODE_PERM mask=MAY_READ
+			measure subj_role=system_r func=INODE_PERM mask=MAY_READ
 
 		Smack:
-			measure subj_user=_ func=FILE_CHECK mask=MAY_READ
+			measure subj_user=_ func=INODE_PERM mask=MAY_READ
diff --git a/trunk/Documentation/PCI/PCI-DMA-mapping.txt → trunk/Documentation/DMA-mapping.txt b/trunk/Documentation/PCI/PCI-DMA-mapping.txt → trunk/Documentation/DMA-mapping.txt
diff --git a/trunk/Documentation/DocBook/mtdnand.tmpl b/trunk/Documentation/DocBook/mtdnand.tmpl
@@ -174,15 +174,15 @@
 		</para>
 		<programlisting>
 static struct mtd_info *board_mtd;
-static void __iomem *baseaddr;
+static unsigned long baseaddr;
 		</programlisting>
 		<para>
 			Static example
 		</para>
 		<programlisting>
 static struct mtd_info board_mtd;
 static struct nand_chip board_chip;
-static void __iomem *baseaddr;
+static unsigned long baseaddr;
 		</programlisting>
 	</sect1>
 	<sect1 id="Partition_defines">
@@ -283,8 +283,8 @@ int __init board_init (void)
 	}
 
 	/* map physical address */
-	baseaddr = ioremap(CHIP_PHYSICAL_ADDRESS, 1024);
-	if (!baseaddr) {
+	baseaddr = (unsigned long)ioremap(CHIP_PHYSICAL_ADDRESS, 1024);
+	if(!baseaddr){
 		printk("Ioremap to access NAND chip failed\n");
 		err = -EIO;
 		goto out_mtd;
@@ -316,7 +316,7 @@ int __init board_init (void)
 	goto out;
 
 out_ior:
-	iounmap(baseaddr);
+	iounmap((void *)baseaddr);
 out_mtd:
 	kfree (board_mtd);
 out:
@@ -341,7 +341,7 @@ static void __exit board_cleanup (void)
 	nand_release (board_mtd);
 
 	/* unmap physical address */
-	iounmap(baseaddr);
+	iounmap((void *)baseaddr);
 
 	/* Free the MTD device structure */
 	kfree (board_mtd);

diff --git a/trunk/Documentation/IO-mapping.txt b/trunk/Documentation/IO-mapping.txt
@@ -157,7 +157,7 @@ For such memory, you can do things like
 	 * access only the 640k-1MB area, so anything else
 	 * has to be remapped.
 	 */
-	void __iomem *baseptr = ioremap(0xFC000000, 1024*1024);
+	char * baseptr = ioremap(0xFC000000, 1024*1024);
 
 	/* write a 'A' to the offset 10 of the area */
 	writeb('A',baseptr+10);

diff --git a/trunk/Documentation/block/00-INDEX b/trunk/Documentation/block/00-INDEX
@@ -1,5 +1,7 @@
 00-INDEX
 	- This file
+as-iosched.txt
+	- Anticipatory IO scheduler
 barrier.txt
 	- I/O Barriers
 biodoc.txt

diff --git a/trunk/Documentation/block/as-iosched.txt b/trunk/Documentation/block/as-iosched.txt
@@ -0,0 +1,172 @@
+Anticipatory IO scheduler
+-------------------------
+Nick Piggin <piggin@cyberone.com.au>    13 Sep 2003
+
+Attention! Database servers, especially those using "TCQ" disks should
+investigate performance with the 'deadline' IO scheduler. Any system with high
+disk performance requirements should do so, in fact.
+
+If you see unusual performance characteristics of your disk systems, or you
+see big performance regressions versus the deadline scheduler, please email
+me. Database users don't bother unless you're willing to test a lot of patches
+from me ;) its a known issue.
+
+Also, users with hardware RAID controllers, doing striping, may find
+highly variable performance results with using the as-iosched. The
+as-iosched anticipatory implementation is based on the notion that a disk
+device has only one physical seeking head.  A striped RAID controller
+actually has a head for each physical device in the logical RAID device.
+
+However, setting the antic_expire (see tunable parameters below) produces
+very similar behavior to the deadline IO scheduler.
+
+Selecting IO schedulers
+-----------------------
+Refer to Documentation/block/switching-sched.txt for information on
+selecting an io scheduler on a per-device basis.
+
+Anticipatory IO scheduler Policies
+----------------------------------
+The as-iosched implementation implements several layers of policies
+to determine when an IO request is dispatched to the disk controller.
+Here are the policies outlined, in order of application.
+
+1. one-way Elevator algorithm.
+
+The elevator algorithm is similar to that used in deadline scheduler, with
+the addition that it allows limited backward movement of the elevator
+(i.e. seeks backwards).  A seek backwards can occur when choosing between
+two IO requests where one is behind the elevator's current position, and
+the other is in front of the elevator's position. If the seek distance to
+the request in back of the elevator is less than half the seek distance to
+the request in front of the elevator, then the request in back can be chosen.
+Backward seeks are also limited to a maximum of MAXBACK (1024*1024) sectors.
+This favors forward movement of the elevator, while allowing opportunistic
+"short" backward seeks.
+
+2. FIFO expiration times for reads and for writes.
+
+This is again very similar to the deadline IO scheduler.  The expiration
+times for requests on these lists is tunable using the parameters read_expire
+and write_expire discussed below.  When a read or a write expires in this way,
+the IO scheduler will interrupt its current elevator sweep or read anticipation
+to service the expired request.
+
+3. Read and write request batching
+
+A batch is a collection of read requests or a collection of write
+requests.  The as scheduler alternates dispatching read and write batches
+to the driver.  In the case a read batch, the scheduler submits read
+requests to the driver as long as there are read requests to submit, and
+the read batch time limit has not been exceeded (read_batch_expire).
+The read batch time limit begins counting down only when there are
+competing write requests pending.
+
+In the case of a write batch, the scheduler submits write requests to
+the driver as long as there are write requests available, and the
+write batch time limit has not been exceeded (write_batch_expire).
+However, the length of write batches will be gradually shortened
+when read batches frequently exceed their time limit.
+
+When changing between batch types, the scheduler waits for all requests
+from the previous batch to complete before scheduling requests for the
+next batch.
+
+The read and write fifo expiration times described in policy 2 above
+are checked only when in scheduling IO of a batch for the corresponding
+(read/write) type.  So for example, the read FIFO timeout values are
+tested only during read batches.  Likewise, the write FIFO timeout
+values are tested only during write batches.  For this reason,
+it is generally not recommended for the read batch time
+to be longer than the write expiration time, nor for the write batch
+time to exceed the read expiration time (see tunable parameters below).
+
+When the IO scheduler changes from a read to a write batch,
+it begins the elevator from the request that is on the head of the
+write expiration FIFO.  Likewise, when changing from a write batch to
+a read batch, scheduler begins the elevator from the first entry
+on the read expiration FIFO.
+
+4. Read anticipation.
+
+Read anticipation occurs only when scheduling a read batch.
+This implementation of read anticipation allows only one read request
+to be dispatched to the disk controller at a time.  In
+contrast, many write requests may be dispatched to the disk controller
+at a time during a write batch.  It is this characteristic that can make
+the anticipatory scheduler perform anomalously with controllers supporting
+TCQ, or with hardware striped RAID devices. Setting the antic_expire
+queue parameter (see below) to zero disables this behavior, and the 
+anticipatory scheduler behaves essentially like the deadline scheduler.
+
+When read anticipation is enabled (antic_expire is not zero), reads
+are dispatched to the disk controller one at a time.
+At the end of each read request, the IO scheduler examines its next
+candidate read request from its sorted read list.  If that next request
+is from the same process as the request that just completed,
+or if the next request in the queue is "very close" to the
+just completed request, it is dispatched immediately.  Otherwise,
+statistics (average think time, average seek distance) on the process
+that submitted the just completed request are examined.  If it seems
+likely that that process will submit another request soon, and that
+request is likely to be near the just completed request, then the IO
+scheduler will stop dispatching more read requests for up to (antic_expire)
+milliseconds, hoping that process will submit a new request near the one
+that just completed.  If such a request is made, then it is dispatched
+immediately.  If the antic_expire wait time expires, then the IO scheduler
+will dispatch the next read request from the sorted read queue.
+
+To decide whether an anticipatory wait is worthwhile, the scheduler
+maintains statistics for each process that can be used to compute
+mean "think time" (the time between read requests), and mean seek
+distance for that process.  One observation is that these statistics
+are associated with each process, but those statistics are not associated
+with a specific IO device.  So for example, if a process is doing IO
+on several file systems on separate devices, the statistics will be
+a combination of IO behavior from all those devices.
+
+
+Tuning the anticipatory IO scheduler
+------------------------------------
+When using 'as', the anticipatory IO scheduler there are 5 parameters under
+/sys/block/*/queue/iosched/. All are units of milliseconds.
+
+The parameters are:
+* read_expire
+    Controls how long until a read request becomes "expired". It also controls the
+    interval between which expired requests are served, so set to 50, a request
+    might take anywhere < 100ms to be serviced _if_ it is the next on the
+    expired list. Obviously request expiration strategies won't make the disk
+    go faster. The result basically equates to the timeslice a single reader
+    gets in the presence of other IO. 100*((seek time / read_expire) + 1) is
+    very roughly the % streaming read efficiency your disk should get with
+    multiple readers.
+
+* read_batch_expire
+    Controls how much time a batch of reads is given before pending writes are
+    served. A higher value is more efficient. This might be set below read_expire
+    if writes are to be given higher priority than reads, but reads are to be
+    as efficient as possible when there are no writes. Generally though, it
+    should be some multiple of read_expire.
+
+* write_expire, and
+* write_batch_expire are equivalent to the above, for writes.
+
+* antic_expire
+    Controls the maximum amount of time we can anticipate a good read (one
+    with a short seek distance from the most recently completed request) before
+    giving up. Many other factors may cause anticipation to be stopped early,
+    or some processes will not be "anticipated" at all. Should be a bit higher
+    for big seek time devices though not a linear correspondence - most
+    processes have only a few ms thinktime.
+
+In addition to the tunables above there is a read-only file named est_time
+which, when read, will show:
+
+    - The probability of a task exiting without a cooperating task
+      submitting an anticipated IO.
+
+    - The current mean think time.
+
+    - The seek distance used to determine if an incoming IO is better.
+
diff --git a/trunk/Documentation/block/biodoc.txt b/trunk/Documentation/block/biodoc.txt
@@ -186,7 +186,7 @@ a virtual address mapping (unlike the earlier scheme of virtual address
 do not have a corresponding kernel virtual address space mapping) and
 low-memory pages.
 
-Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion
+Note: Please refer to Documentation/DMA-mapping.txt for a discussion
 on PCI high mem DMA aspects and mapping of scatter gather lists, and support
 for 64 bit PCI.
 

diff --git a/trunk/Documentation/cachetlb.txt b/trunk/Documentation/cachetlb.txt
@@ -377,3 +377,27 @@ maps this page at its virtual address.
 	All the functionality of flush_icache_page can be implemented in
 	flush_dcache_page and update_mmu_cache. In 2.7 the hope is to
 	remove this interface completely.
+
+The final category of APIs is for I/O to deliberately aliased address
+ranges inside the kernel.  Such aliases are set up by use of the
+vmap/vmalloc API.  Since kernel I/O goes via physical pages, the I/O
+subsystem assumes that the user mapping and kernel offset mapping are
+the only aliases.  This isn't true for vmap aliases, so anything in
+the kernel trying to do I/O to vmap areas must manually manage
+coherency.  It must do this by flushing the vmap range before doing
+I/O and invalidating it after the I/O returns.
+
+  void flush_kernel_vmap_range(void *vaddr, int size)
+       flushes the kernel cache for a given virtual address range in
+       the vmap area.  This is to make sure that any data the kernel
+       modified in the vmap range is made visible to the physical
+       page.  The design is to make this area safe to perform I/O on.
+       Note that this API does *not* also flush the offset map alias
+       of the area.
+
+  void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates
+       the cache for a given virtual address range in the vmap area
+       which prevents the processor from making the cache stale by
+       speculatively reading data while the I/O was occurring to the
+       physical pages.  This is only necessary for data reads into the
+       vmap area.
diff --git a/trunk/Documentation/cpu-freq/governors.txt b/trunk/Documentation/cpu-freq/governors.txt
@@ -145,8 +145,8 @@ show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT.
 up_threshold: defines what the average CPU usage between the samplings
 of 'sampling_rate' needs to be for the kernel to make a decision on
 whether it should increase the frequency.  For example when it is set
-to its default value of '95' it means that between the checking
-intervals the CPU needs to be on average more than 95% in use to then
+to its default value of '80' it means that between the checking
+intervals the CPU needs to be on average more than 80% in use to then
 decide that the CPU frequency needs to be increased.  
 
 ignore_nice_load: this parameter takes a value of '0' or '1'. When

diff --git a/trunk/Documentation/dontdiff b/trunk/Documentation/dontdiff
@@ -69,6 +69,7 @@ av_permissions.h
 bbootsect
 bin2c
 binkernel.spec
+binoffset
 bootsect
 bounds.h
 bsetup

diff --git a/trunk/Documentation/fault-injection/fault-injection.txt b/trunk/Documentation/fault-injection/fault-injection.txt
@@ -143,8 +143,8 @@ o provide a way to configure fault attributes
   failslab, fail_page_alloc, and fail_make_request use this way.
   Helper functions:
 
-	init_fault_attr_dentries(entries, attr, name);
-	void cleanup_fault_attr_dentries(entries);
+	init_fault_attr_entries(entries, attr, name);
+	void cleanup_fault_attr_entries(entries);
 
 - module parameters
-Original file line number
+Diff line change
@@ Expand Up / @@ -37,7 +37,6 @@ modules.builtin @@
     tags
     TAGS
     vmlinux
-    vmlinuz
     System.map
     Module.markers
     Module.symvers
@@ Expand Down @@