---

yaml --- r: 58107 b: refs/heads/master c: 2524363 h: refs/heads/master i: 58105: c971780 58103: b95d65e v: v3
git-mirror · Jun 29, 2007 · 9533e3a · 9533e3a
1 parent 50820cb
commit 9533e3a
Show file tree

Hide file tree

Showing 207 changed files with 4,161 additions and 3,112 deletions.
diff --git a/[refs] b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 5131a184a3458d9ac47d9eba032cf4c4d3295afd
+refs/heads/master: 25243633c29b72c4edd5fe9cfcbd76aa5eef8b36
diff --git a/trunk/CREDITS b/trunk/CREDITS
@@ -3301,14 +3301,6 @@ S: 12725 SW Millikan Way, Suite 400
 S: Beaverton, Oregon 97005
 S: USA
 
-N: Li Yang
-E: leoli@freescale.com
-D: Freescale Highspeed USB device driver
-D: Freescale QE SoC support and Ethernet driver
-S: B-1206 Jingmao Guojigongyu
-S: 16 Baliqiao Nanjie, Beijing 101100
-S: People's Repulic of China
-
 N: Marcelo Tosatti
 E: marcelo@kvack.org
 D: v2.4 kernel maintainer
@@ -3726,6 +3718,14 @@ S: 542 West 112th Street, 5N
 S: New York, New York 10025
 S: USA
 
+N: Li Yang
+E: leoli@freescale.com
+D: Freescale Highspeed USB device driver
+D: Freescale QE SoC support and Ethernet driver
+S: B-1206 Jingmao Guojigongyu
+S: 16 Baliqiao Nanjie, Beijing 101100
+S: People's Repulic of China
+
 N: Victor Yodaiken
 E: yodaiken@fsmlabs.com
 D: RTLinux (RealTime Linux)

diff --git a/trunk/Documentation/SM501.txt b/trunk/Documentation/SM501.txt
@@ -0,0 +1,66 @@
+			SM501 Driver
+			============
+
+Copyright 2006, 2007 Simtec Electronics
+
+Core
+----
+
+The core driver in drivers/mfd provides common services for the
+drivers which manage the specific hardware blocks. These services
+include locking for common registers, clock control and resource
+management.
+
+The core registers drivers for both PCI and generic bus based
+chips via the platform device and driver system.
+
+On detection of a device, the core initialises the chip (which may
+be specified by the platform data) and then exports the selected
+peripheral set as platform devices for the specific drivers.
+
+The core re-uses the platform device system as the platform device
+system provides enough features to support the drivers without the
+need to create a new bus-type and the associated code to go with it.
+
+
+Resources
+---------
+
+Each peripheral has a view of the device which is implicitly narrowed to
+the specific set of resources that peripheral requires in order to
+function correctly.
+
+The centralised memory allocation allows the driver to ensure that the
+maximum possible resource allocation can be made to the video subsystem
+as this is by-far the most resource-sensitive of the on-chip functions.
+
+The primary issue with memory allocation is that of moving the video
+buffers once a display mode is chosen. Indeed when a video mode change
+occurs the memory footprint of the video subsystem changes.
+
+Since video memory is difficult to move without changing the display
+(unless sufficient contiguous memory can be provided for the old and new
+modes simultaneously) the video driver fully utilises the memory area
+given to it by aligning fb0 to the start of the area and fb1 to the end
+of it. Any memory left over in the middle is used for the acceleration
+functions, which are transient and thus their location is less critical
+as it can be moved.
+
+
+Configuration
+-------------
+
+The platform device driver uses a set of platform data to pass
+configurations through to the core and the subsidiary drivers
+so that there can be support for more than one system carrying
+an SM501 built into a single kernel image.
+
+The PCI driver assumes that the PCI card behaves as per the Silicon
+Motion reference design.
+
+There is an errata (AB-5) affecting the selection of the
+of the M1XCLK and M1CLK frequencies. These two clocks
+must be sourced from the same PLL, although they can then
+be divided down individually. If this is not set, then SM501 may
+lock and hang the whole system. The driver will refuse to
+attach if the PLL selection is different.
diff --git a/trunk/Documentation/volatile-considered-harmful.txt b/trunk/Documentation/volatile-considered-harmful.txt
@@ -0,0 +1,119 @@
+Why the "volatile" type class should not be used
+------------------------------------------------
+
+C programmers have often taken volatile to mean that the variable could be
+changed outside of the current thread of execution; as a result, they are
+sometimes tempted to use it in kernel code when shared data structures are
+being used.  In other words, they have been known to treat volatile types
+as a sort of easy atomic variable, which they are not.  The use of volatile in
+kernel code is almost never correct; this document describes why.
+
+The key point to understand with regard to volatile is that its purpose is
+to suppress optimization, which is almost never what one really wants to
+do.  In the kernel, one must protect shared data structures against
+unwanted concurrent access, which is very much a different task.  The
+process of protecting against unwanted concurrency will also avoid almost
+all optimization-related problems in a more efficient way.
+
+Like volatile, the kernel primitives which make concurrent access to data
+safe (spinlocks, mutexes, memory barriers, etc.) are designed to prevent
+unwanted optimization.  If they are being used properly, there will be no
+need to use volatile as well.  If volatile is still necessary, there is
+almost certainly a bug in the code somewhere.  In properly-written kernel
+code, volatile can only serve to slow things down.
+
+Consider a typical block of kernel code:
+
+    spin_lock(&the_lock);
+    do_something_on(&shared_data);
+    do_something_else_with(&shared_data);
+    spin_unlock(&the_lock);
+
+If all the code follows the locking rules, the value of shared_data cannot
+change unexpectedly while the_lock is held.  Any other code which might
+want to play with that data will be waiting on the lock.  The spinlock
+primitives act as memory barriers - they are explicitly written to do so -
+meaning that data accesses will not be optimized across them.  So the
+compiler might think it knows what will be in shared_data, but the
+spin_lock() call, since it acts as a memory barrier, will force it to
+forget anything it knows.  There will be no optimization problems with
+accesses to that data.
+
+If shared_data were declared volatile, the locking would still be
+necessary.  But the compiler would also be prevented from optimizing access
+to shared_data _within_ the critical section, when we know that nobody else
+can be working with it.  While the lock is held, shared_data is not
+volatile.  When dealing with shared data, proper locking makes volatile
+unnecessary - and potentially harmful.
+
+The volatile storage class was originally meant for memory-mapped I/O
+registers.  Within the kernel, register accesses, too, should be protected
+by locks, but one also does not want the compiler "optimizing" register
+accesses within a critical section.  But, within the kernel, I/O memory
+accesses are always done through accessor functions; accessing I/O memory
+directly through pointers is frowned upon and does not work on all
+architectures.  Those accessors are written to prevent unwanted
+optimization, so, once again, volatile is unnecessary.
+
+Another situation where one might be tempted to use volatile is
+when the processor is busy-waiting on the value of a variable.  The right
+way to perform a busy wait is:
+
+    while (my_variable != what_i_want)
+        cpu_relax();
+
+The cpu_relax() call can lower CPU power consumption or yield to a
+hyperthreaded twin processor; it also happens to serve as a memory barrier,
+so, once again, volatile is unnecessary.  Of course, busy-waiting is
+generally an anti-social act to begin with.
+
+There are still a few rare situations where volatile makes sense in the
+kernel:
+
+  - The above-mentioned accessor functions might use volatile on
+    architectures where direct I/O memory access does work.  Essentially,
+    each accessor call becomes a little critical section on its own and
+    ensures that the access happens as expected by the programmer.
+
+  - Inline assembly code which changes memory, but which has no other
+    visible side effects, risks being deleted by GCC.  Adding the volatile
+    keyword to asm statements will prevent this removal.
+
+  - The jiffies variable is special in that it can have a different value
+    every time it is referenced, but it can be read without any special
+    locking.  So jiffies can be volatile, but the addition of other
+    variables of this type is strongly frowned upon.  Jiffies is considered
+    to be a "stupid legacy" issue (Linus's words) in this regard; fixing it
+    would be more trouble than it is worth.
+
+  - Pointers to data structures in coherent memory which might be modified
+    by I/O devices can, sometimes, legitimately be volatile.  A ring buffer
+    used by a network adapter, where that adapter changes pointers to
+    indicate which descriptors have been processed, is an example of this
+    type of situation.
+
+For most code, none of the above justifications for volatile apply.  As a
+result, the use of volatile is likely to be seen as a bug and will bring
+additional scrutiny to the code.  Developers who are tempted to use
+volatile should take a step back and think about what they are truly trying
+to accomplish.
+
+Patches to remove volatile variables are generally welcome - as long as
+they come with a justification which shows that the concurrency issues have
+been properly thought through.
+
+
+NOTES
+-----
+
+[1] http://lwn.net/Articles/233481/
+[2] http://lwn.net/Articles/233482/
+
+CREDITS
+-------
+
+Original impetus and research by Randy Dunlap
+Written by Jonathan Corbet
+Improvements via coments from Satyam Sharma, Johannes Stezenbach, Jesper
+	Juhl, Heikki Orsila, H. Peter Anvin, Philipp Hahn, and Stefan
+	Richter.
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
@@ -4022,11 +4022,11 @@ S:	Supported
 
 XFS FILESYSTEM
 P:	Silicon Graphics Inc
-P:	Tim Shimmin, David Chatterton
+P:	Tim Shimmin
 M:	xfs-masters@oss.sgi.com
 L:	xfs@oss.sgi.com
 W:	http://oss.sgi.com/projects/xfs
-T:	git git://oss.sgi.com:8090/xfs/xfs-2.6
+T:	git git://oss.sgi.com:8090/xfs/xfs-2.6.git
 S:	Supported
 
 XILINX UARTLITE SERIAL DRIVER

diff --git a/trunk/Makefile b/trunk/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 22
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = Holy Dancing Manatees, Batman!
 
 # *DOCUMENTATION*

diff --git a/trunk/arch/alpha/lib/csum_ipv6_magic.S b/trunk/arch/alpha/lib/csum_ipv6_magic.S
@@ -7,6 +7,9 @@
  *                                __u32 len,
  *                                unsigned short proto,
  *                                unsigned int csum);
+ *
+ * Misalignment handling (which costs 16 instructions / 8 cycles)
+ * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
  */
 
 	.globl csum_ipv6_magic
@@ -16,37 +19,57 @@
 csum_ipv6_magic:
 	.prologue 0
 
-	ldq	$0,0($16)	# e0    : load src & dst addr words
+	ldq_u	$0,0($16)	# e0    : load src & dst addr words
 	zapnot	$20,15,$20	# .. e1 : zero extend incoming csum
 	extqh	$18,1,$4	# e0    : byte swap len & proto while we wait
-	ldq	$1,8($16)	# .. e1 :
+	ldq_u	$21,7($16)	# .. e1 : handle misalignment
 
 	extbl	$18,1,$5	# e0	:
-	ldq	$2,0($17)	# .. e1 :
+	ldq_u	$1,8($16)	# .. e1 :
 	extbl	$18,2,$6	# e0 	:
-	ldq	$3,8($17)	# .. e1 :
+	ldq_u	$22,15($16)	# .. e1 :
 
 	extbl	$18,3,$18	# e0	:
+	ldq_u	$2,0($17)	# .. e1 :
 	sra	$4,32,$4	# e0	:
+	ldq_u	$23,7($17)	# .. e1 :
+
+	extql	$0,$16,$0	# e0	:
+	ldq_u	$3,8($17)	# .. e1 :
+	extqh	$21,$16,$21	# e0	:
+	ldq_u	$24,15($17)	# .. e1 :
+
 	sll	$5,16,$5	# e0	:
+	or	$0,$21,$0	# .. e1 : 1st src word complete
+	extql	$1,$16,$1	# e0	:
 	addq	$20,$0,$20	# .. e1 : begin summing the words
 
-	sll	$6,8,$6		# e0	:
+	extqh	$22,$16,$22	# e0	:
 	cmpult	$20,$0,$0	# .. e1 :
-	extwh	$19,7,$7	# e0    :
-	or	$4,$18,$18	# .. e1 :
+	sll	$6,8,$6		# e0	:
+	or	$1,$22,$1	# .. e1 : 2nd src word complete
 
-	extbl	$19,1,$19	# e0    :
+	extql	$2,$17,$2	# e0	:
+	or	$4,$18,$18	# .. e1 :
+	extqh	$23,$17,$23	# e0	:
 	or	$5,$6,$5	# .. e1 :
-	or	$18,$5,$18	# e0    : len complete
-	or	$19,$7,$19	# .. e1 :
 
-	sll	$19,48,$19	# e0    :
+	extql	$3,$17,$3	# e0	:
+	or	$2,$23,$2	# .. e1 : 1st dst word complete
+	extqh	$24,$17,$24	# e0	:
+	or	$18,$5,$18	# .. e1 : len complete
+
+	extwh	$19,7,$7	# e0    :
+	or	$3,$24,$3	# .. e1 : 2nd dst word complete
+	extbl	$19,1,$19	# e0    :
 	addq	$20,$1,$20	# .. e1 :
-	sra	$19,32,$19	# e0    : proto complete
+
+	or	$19,$7,$19	# e0    :
 	cmpult	$20,$1,$1	# .. e1 :
+	sll	$19,48,$19	# e0    :
+	nop			# .. e0 :
 
-	nop			# e0    :
+	sra	$19,32,$19	# e0    : proto complete
 	addq	$20,$2,$20	# .. e1 :
 	cmpult	$20,$2,$2	# e0    :
 	addq	$20,$3,$20	# .. e1 :
@@ -84,7 +107,7 @@ csum_ipv6_magic:
 	extwl	$0,2,$1		# e0    : fold 17-bit value
 	zapnot	$0,3,$0		# .. e1 :
 	addq	$0,$1,$0	# e0    :
-	not	$0,$0		# e1    : and complement.
+	not	$0,$0		# .. e1 : and complement.
 
 	zapnot	$0,3,$0		# e0    :
 	ret			# .. e1 :

diff --git a/trunk/arch/alpha/lib/ev6-csum_ipv6_magic.S b/trunk/arch/alpha/lib/ev6-csum_ipv6_magic.S
@@ -46,6 +46,10 @@
  * add the 3 low ushorts together, generating a uint
  * a final add of the 2 lower ushorts
  * truncating the result.
+ *
+ * Misalignment handling added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ * The cost is 16 instructions (~8 cycles), including two extra loads which
+ * may cause additional delay in rare cases (load-load replay traps).
  */
 
 	.globl csum_ipv6_magic
@@ -55,25 +59,45 @@
 csum_ipv6_magic:
 	.prologue 0
 
-	ldq	$0,0($16)	# L : Latency: 3
+	ldq_u	$0,0($16)	# L : Latency: 3
 	inslh	$18,7,$4	# U : 0000000000AABBCC
-	ldq	$1,8($16)	# L : Latency: 3
+	ldq_u	$1,8($16)	# L : Latency: 3
 	sll	$19,8,$7	# U : U L U L : 0x00000000 00aabb00
 
+	and	$16,7,$6	# E : src misalignment
+	ldq_u	$5,15($16)	# L : Latency: 3
 	zapnot	$20,15,$20	# U : zero extend incoming csum
-	ldq	$2,0($17)	# L : Latency: 3
-	sll	$19,24,$19	# U : U L L U : 0x000000aa bb000000
+	ldq_u	$2,0($17)	# L : U L U L : Latency: 3
+
+	extql	$0,$6,$0	# U :
+	extqh	$1,$6,$22	# U :
+	ldq_u	$3,8($17)	# L : Latency: 3
+	sll	$19,24,$19	# U : U U L U : 0x000000aa bb000000
+
+	cmoveq	$6,$31,$22	# E : src aligned?
+	ldq_u	$23,15($17)	# L : Latency: 3
 	inswl	$18,3,$18	# U : 000000CCDD000000
+	addl	$19,$7,$19	# E : U L U L : <sign bits>bbaabb00
 
-	ldq	$3,8($17)	# L : Latency: 3
-	bis	$18,$4,$18	# E : 000000CCDDAABBCC
-	addl	$19,$7,$19	# E : <sign bits>bbaabb00
-	nop			# E : U L U L
+	or	$0,$22,$0	# E : 1st src word complete
+	extql	$1,$6,$1	# U :
+	or	$18,$4,$18	# E : 000000CCDDAABBCC
+	extqh	$5,$6,$5	# U : L U L U
 
+	and	$17,7,$6	# E : dst misalignment
+	extql	$2,$6,$2	# U :
+	or	$1,$5,$1	# E : 2nd src word complete
+	extqh	$3,$6,$22	# U : L U L U :
+
+	cmoveq	$6,$31,$22	# E : dst aligned?
+	extql	$3,$6,$3	# U :
 	addq	$20,$0,$20	# E : begin summing the words
+	extqh	$23,$6,$23	# U : L U L U :
+
 	srl	$18,16,$4	# U : 0000000000CCDDAA
+	or	$2,$22,$2	# E : 1st dst word complete
 	zap	$19,0x3,$19	# U : <sign bits>bbaa0000
-	nop			# E : L U U L
+	or	$3,$23,$3	# E : U L U L : 2nd dst word complete
 
 	cmpult	$20,$0,$0	# E :
 	addq	$20,$1,$20	# E :