diff --git a/[refs] b/[refs] index b2e0784fd7ef..3eb085f9dfe3 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: ece928df16494becd43f999aff9bd530182e7e81 +refs/heads/master: a74b81b0aff4a01e0816df5915c854fb52c5e87f diff --git a/trunk/.mailmap b/trunk/.mailmap index 5a6dd592eedc..353ad5607156 100644 --- a/trunk/.mailmap +++ b/trunk/.mailmap @@ -32,6 +32,7 @@ Brian Avery Brian King Christoph Hellwig Corey Minyard +Damian Hobson-Garcia David Brownell David Woodhouse Dmitry Eremin-Solenikov diff --git a/trunk/CREDITS b/trunk/CREDITS index 95c469c610bc..a7ea8e343836 100644 --- a/trunk/CREDITS +++ b/trunk/CREDITS @@ -2943,6 +2943,10 @@ S: Kasarmikatu 11 A4 S: 70110 Kuopio S: Finland +N: Tobias Ringström +E: tori@unhappy.mine.nu +D: Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver + N: Luca Risolia E: luca.risolia@studio.unibo.it P: 1024D/FCE635A4 88E8 F32F 7244 68BA 3958 5D40 99DA 5D2A FCE6 35A4 @@ -3913,6 +3917,10 @@ S: Flandernstrasse 101 S: D-73732 Esslingen S: Germany +N: Roman Zippel +E: zippel@linux-m68k.org +D: AFFS and HFS filesystems, m68k maintainer, new kernel configuration in 2.5 + N: Leonard N. Zubkoff W: http://www.dandelion.com/Linux/ D: BusLogic SCSI driver diff --git a/trunk/Documentation/ABI/testing/sysfs-block b/trunk/Documentation/ABI/testing/sysfs-block index 4873c759d535..c1eb41cb9876 100644 --- a/trunk/Documentation/ABI/testing/sysfs-block +++ b/trunk/Documentation/ABI/testing/sysfs-block @@ -142,3 +142,67 @@ Description: with the previous I/O request are enabled. When set to 2, all merge tries are disabled. The default value is 0 - which enables all types of merge tries. + +What: /sys/block//discard_alignment +Date: May 2011 +Contact: Martin K. Petersen +Description: + Devices that support discard functionality may + internally allocate space in units that are bigger than + the exported logical block size. The discard_alignment + parameter indicates how many bytes the beginning of the + device is offset from the internal allocation unit's + natural alignment. + +What: /sys/block///discard_alignment +Date: May 2011 +Contact: Martin K. Petersen +Description: + Devices that support discard functionality may + internally allocate space in units that are bigger than + the exported logical block size. The discard_alignment + parameter indicates how many bytes the beginning of the + partition is offset from the internal allocation unit's + natural alignment. + +What: /sys/block//queue/discard_granularity +Date: May 2011 +Contact: Martin K. Petersen +Description: + Devices that support discard functionality may + internally allocate space using units that are bigger + than the logical block size. The discard_granularity + parameter indicates the size of the internal allocation + unit in bytes if reported by the device. Otherwise the + discard_granularity will be set to match the device's + physical block size. A discard_granularity of 0 means + that the device does not support discard functionality. + +What: /sys/block//queue/discard_max_bytes +Date: May 2011 +Contact: Martin K. Petersen +Description: + Devices that support discard functionality may have + internal limits on the number of bytes that can be + trimmed or unmapped in a single operation. Some storage + protocols also have inherent limits on the number of + blocks that can be described in a single command. The + discard_max_bytes parameter is set by the device driver + to the maximum number of bytes that can be discarded in + a single operation. Discard requests issued to the + device must not exceed this limit. A discard_max_bytes + value of 0 means that the device does not support + discard functionality. + +What: /sys/block//queue/discard_zeroes_data +Date: May 2011 +Contact: Martin K. Petersen +Description: + Devices that support discard functionality may return + stale or random data when a previously discarded block + is read back. This can cause problems if the filesystem + expects discarded blocks to be explicitly cleared. If a + device reports that it deterministically returns zeroes + when a discarded area is read the discard_zeroes_data + parameter will be set to one. Otherwise it will be 0 and + the result of reading a discarded area is undefined. diff --git a/trunk/Documentation/ABI/testing/sysfs-kernel-mm-cleancache b/trunk/Documentation/ABI/testing/sysfs-kernel-mm-cleancache new file mode 100644 index 000000000000..662ae646ea12 --- /dev/null +++ b/trunk/Documentation/ABI/testing/sysfs-kernel-mm-cleancache @@ -0,0 +1,11 @@ +What: /sys/kernel/mm/cleancache/ +Date: April 2011 +Contact: Dan Magenheimer +Description: + /sys/kernel/mm/cleancache/ contains a number of files which + record a count of various cleancache operations + (sum across all filesystems): + succ_gets + failed_gets + puts + flushes diff --git a/trunk/Documentation/ABI/testing/sysfs-ptp b/trunk/Documentation/ABI/testing/sysfs-ptp new file mode 100644 index 000000000000..d40d2b550502 --- /dev/null +++ b/trunk/Documentation/ABI/testing/sysfs-ptp @@ -0,0 +1,98 @@ +What: /sys/class/ptp/ +Date: September 2010 +Contact: Richard Cochran +Description: + This directory contains files and directories + providing a standardized interface to the ancillary + features of PTP hardware clocks. + +What: /sys/class/ptp/ptpN/ +Date: September 2010 +Contact: Richard Cochran +Description: + This directory contains the attributes of the Nth PTP + hardware clock registered into the PTP class driver + subsystem. + +What: /sys/class/ptp/ptpN/clock_name +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the name of the PTP hardware clock + as a human readable string. + +What: /sys/class/ptp/ptpN/max_adjustment +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the PTP hardware clock's maximum + frequency adjustment value (a positive integer) in + parts per billion. + +What: /sys/class/ptp/ptpN/n_alarms +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the number of periodic or one shot + alarms offer by the PTP hardware clock. + +What: /sys/class/ptp/ptpN/n_external_timestamps +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the number of external timestamp + channels offered by the PTP hardware clock. + +What: /sys/class/ptp/ptpN/n_periodic_outputs +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the number of programmable periodic + output channels offered by the PTP hardware clock. + +What: /sys/class/ptp/ptpN/pps_avaiable +Date: September 2010 +Contact: Richard Cochran +Description: + This file indicates whether the PTP hardware clock + supports a Pulse Per Second to the host CPU. Reading + "1" means that the PPS is supported, while "0" means + not supported. + +What: /sys/class/ptp/ptpN/extts_enable +Date: September 2010 +Contact: Richard Cochran +Description: + This write-only file enables or disables external + timestamps. To enable external timestamps, write the + channel index followed by a "1" into the file. + To disable external timestamps, write the channel + index followed by a "0" into the file. + +What: /sys/class/ptp/ptpN/fifo +Date: September 2010 +Contact: Richard Cochran +Description: + This file provides timestamps on external events, in + the form of three integers: channel index, seconds, + and nanoseconds. + +What: /sys/class/ptp/ptpN/period +Date: September 2010 +Contact: Richard Cochran +Description: + This write-only file enables or disables periodic + outputs. To enable a periodic output, write five + integers into the file: channel index, start time + seconds, start time nanoseconds, period seconds, and + period nanoseconds. To disable a periodic output, set + all the seconds and nanoseconds values to zero. + +What: /sys/class/ptp/ptpN/pps_enable +Date: September 2010 +Contact: Richard Cochran +Description: + This write-only file enables or disables delivery of + PPS events to the Linux PPS subsystem. To enable PPS + events, write a "1" into the file. To disable events, + write a "0" into the file. diff --git a/trunk/Documentation/IRQ-affinity.txt b/trunk/Documentation/IRQ-affinity.txt index b4a615b78403..7890fae18529 100644 --- a/trunk/Documentation/IRQ-affinity.txt +++ b/trunk/Documentation/IRQ-affinity.txt @@ -4,10 +4,11 @@ ChangeLog: SMP IRQ affinity -/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted -for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed -to turn off all CPUs, and if an IRQ controller does not support IRQ -affinity then the value will not change from the default 0xffffffff. +/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify +which target CPUs are permitted for a given IRQ source. It's a bitmask +(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs. It's not +allowed to turn off all CPUs, and if an IRQ controller does not support +IRQ affinity then the value will not change from the default of all cpus. /proc/irq/default_smp_affinity specifies default affinity mask that applies to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask @@ -54,3 +55,11 @@ round-trip min/avg/max = 0.1/0.5/585.4 ms This time around IRQ44 was delivered only to the last four processors. i.e counters for the CPU0-3 did not change. +Here is an example of limiting that same irq (44) to cpus 1024 to 1031: + +[root@moon 44]# echo 1024-1031 > smp_affinity +[root@moon 44]# cat smp_affinity +1024-1031 + +Note that to do this with a bitmask would require 32 bitmasks of zero +to follow the pertinent one. diff --git a/trunk/Documentation/blockdev/cciss.txt b/trunk/Documentation/blockdev/cciss.txt index 89698e8df7d4..c00c6a5ab21f 100644 --- a/trunk/Documentation/blockdev/cciss.txt +++ b/trunk/Documentation/blockdev/cciss.txt @@ -169,3 +169,18 @@ is issued which positions the tape to a known position. Typically you must rewind the tape (by issuing "mt -f /dev/st0 rewind" for example) before i/o can proceed again to a tape drive which was reset. +There is a cciss_tape_cmds module parameter which can be used to make cciss +allocate more commands for use by tape drives. Ordinarily only a few commands +(6) are allocated for tape drives because tape drives are slow and +infrequently used and the primary purpose of Smart Array controllers is to +act as a RAID controller for disk drives, so the vast majority of commands +are allocated for disk devices. However, if you have more than a few tape +drives attached to a smart array, the default number of commands may not be +enought (for example, if you have 8 tape drives, you could only rewind 6 +at one time with the default number of commands.) The cciss_tape_cmds module +parameter allows more commands (up to 16 more) to be allocated for use by +tape drives. For example: + + insmod cciss.ko cciss_tape_cmds=16 + +Or, as a kernel boot parameter passed in via grub: cciss.cciss_tape_cmds=8 diff --git a/trunk/Documentation/cachetlb.txt b/trunk/Documentation/cachetlb.txt index 9164ae3b83bc..9b728dc17535 100644 --- a/trunk/Documentation/cachetlb.txt +++ b/trunk/Documentation/cachetlb.txt @@ -16,7 +16,7 @@ on all processors in the system. Don't let this scare you into thinking SMP cache/tlb flushing must be so inefficient, this is in fact an area where many optimizations are possible. For example, if it can be proven that a user address space has never executed -on a cpu (see vma->cpu_vm_mask), one need not perform a flush +on a cpu (see mm_cpumask()), one need not perform a flush for this address space on that cpu. First, the TLB flushing interfaces, since they are the simplest. The diff --git a/trunk/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/trunk/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt index edb7ae19e868..2c6be0377f55 100644 --- a/trunk/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt +++ b/trunk/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt @@ -74,3 +74,57 @@ Example: interrupt-parent = <&mpic>; phy-handle = <&phy0> }; + +* Gianfar PTP clock nodes + +General Properties: + + - compatible Should be "fsl,etsec-ptp" + - reg Offset and length of the register set for the device + - interrupts There should be at least two interrupts. Some devices + have as many as four PTP related interrupts. + +Clock Properties: + + - fsl,tclk-period Timer reference clock period in nanoseconds. + - fsl,tmr-prsc Prescaler, divides the output clock. + - fsl,tmr-add Frequency compensation value. + - fsl,tmr-fiper1 Fixed interval period pulse generator. + - fsl,tmr-fiper2 Fixed interval period pulse generator. + - fsl,max-adj Maximum frequency adjustment in parts per billion. + + These properties set the operational parameters for the PTP + clock. You must choose these carefully for the clock to work right. + Here is how to figure good values: + + TimerOsc = system clock MHz + tclk_period = desired clock period nanoseconds + NominalFreq = 1000 / tclk_period MHz + FreqDivRatio = TimerOsc / NominalFreq (must be greater that 1.0) + tmr_add = ceil(2^32 / FreqDivRatio) + OutputClock = NominalFreq / tmr_prsc MHz + PulseWidth = 1 / OutputClock microseconds + FiperFreq1 = desired frequency in Hz + FiperDiv1 = 1000000 * OutputClock / FiperFreq1 + tmr_fiper1 = tmr_prsc * tclk_period * FiperDiv1 - tclk_period + max_adj = 1000000000 * (FreqDivRatio - 1.0) - 1 + + The calculation for tmr_fiper2 is the same as for tmr_fiper1. The + driver expects that tmr_fiper1 will be correctly set to produce a 1 + Pulse Per Second (PPS) signal, since this will be offered to the PPS + subsystem to synchronize the Linux clock. + +Example: + + ptp_clock@24E00 { + compatible = "fsl,etsec-ptp"; + reg = <0x24E00 0xB0>; + interrupts = <12 0x8 13 0x8>; + interrupt-parent = < &ipic >; + fsl,tclk-period = <10>; + fsl,tmr-prsc = <100>; + fsl,tmr-add = <0x999999A4>; + fsl,tmr-fiper1 = <0x3B9AC9F6>; + fsl,tmr-fiper2 = <0x00018696>; + fsl,max-adj = <659999998>; + }; diff --git a/trunk/Documentation/filesystems/9p.txt b/trunk/Documentation/filesystems/9p.txt index b22abba78fed..13de64c7f0ab 100644 --- a/trunk/Documentation/filesystems/9p.txt +++ b/trunk/Documentation/filesystems/9p.txt @@ -25,6 +25,8 @@ Other applications are described in the following papers: http://xcpu.org/papers/cellfs-talk.pdf * PROSE I/O: Using 9p to enable Application Partitions http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf + * VirtFS: A Virtualization Aware File System pass-through + http://goo.gl/3WPDg USAGE ===== @@ -130,31 +132,20 @@ OPTIONS RESOURCES ========= -Our current recommendation is to use Inferno (http://www.vitanuova.com/nferno/index.html) -as the 9p server. You can start a 9p server under Inferno by issuing the -following command: - ; styxlisten -A tcp!*!564 export '#U*' +Protocol specifications are maintained on github: +http://ericvh.github.com/9p-rfc/ -The -A specifies an unauthenticated export. The 564 is the port # (you may -have to choose a higher port number if running as a normal user). The '#U*' -specifies exporting the root of the Linux name space. You may specify a -subset of the namespace by extending the path: '#U*'/tmp would just export -/tmp. For more information, see the Inferno manual pages covering styxlisten -and export. +9p client and server implementations are listed on +http://9p.cat-v.org/implementations -A Linux version of the 9p server is now maintained under the npfs project -on sourceforge (http://sourceforge.net/projects/npfs). The currently -maintained version is the single-threaded version of the server (named spfs) -available from the same SVN repository. +A 9p2000.L server is being developed by LLNL and can be found +at http://code.google.com/p/diod/ There are user and developer mailing lists available through the v9fs project on sourceforge (http://sourceforge.net/projects/v9fs). -A stand-alone version of the module (which should build for any 2.6 kernel) -is available via (http://github.com/ericvh/9p-sac/tree/master) - -News and other information is maintained on SWiK (http://swik.net/v9fs) -and the Wiki (http://sf.net/apps/mediawiki/v9fs/index.php). +News and other information is maintained on a Wiki. +(http://sf.net/apps/mediawiki/v9fs/index.php). Bug reports may be issued through the kernel.org bugzilla (http://bugzilla.kernel.org) diff --git a/trunk/Documentation/filesystems/ext4.txt b/trunk/Documentation/filesystems/ext4.txt index c79ec58fd7f6..3ae9bc94352a 100644 --- a/trunk/Documentation/filesystems/ext4.txt +++ b/trunk/Documentation/filesystems/ext4.txt @@ -226,10 +226,6 @@ acl Enables POSIX Access Control Lists support. noacl This option disables POSIX Access Control List support. -reservation - -noreservation - bsddf (*) Make 'df' act like BSD. minixdf Make 'df' act like Minix. diff --git a/trunk/Documentation/filesystems/proc.txt b/trunk/Documentation/filesystems/proc.txt index 60740e8ecb37..f48178024067 100644 --- a/trunk/Documentation/filesystems/proc.txt +++ b/trunk/Documentation/filesystems/proc.txt @@ -574,6 +574,12 @@ The contents of each smp_affinity file is the same by default: > cat /proc/irq/0/smp_affinity ffffffff +There is an alternate interface, smp_affinity_list which allows specifying +a cpu range instead of a bitmask: + + > cat /proc/irq/0/smp_affinity_list + 1024-1031 + The default_smp_affinity mask applies to all non-active IRQs, which are the IRQs which have not yet been allocated/activated, and hence which lack a /proc/irq/[0-9]* directory. @@ -583,12 +589,13 @@ reports itself as being attached. This hardware locality information does not include information about any possible driver locality preference. prof_cpu_mask specifies which CPUs are to be profiled by the system wide -profiler. Default value is ffffffff (all cpus). +profiler. Default value is ffffffff (all cpus if there are only 32 of them). The way IRQs are routed is handled by the IO-APIC, and it's Round Robin between all the CPUs which are allowed to handle it. As usual the kernel has more info than you and does a better job than you, so the defaults are the -best choice for almost everyone. +best choice for almost everyone. [Note this applies only to those IO-APIC's +that support "Round Robin" interrupt distribution.] There are three more important subdirectories in /proc: net, scsi, and sys. The general rule is that the contents, or even the existence of these diff --git a/trunk/Documentation/filesystems/xfs.txt b/trunk/Documentation/filesystems/xfs.txt index 7bff3e4f35df..3fc0c31a6f5d 100644 --- a/trunk/Documentation/filesystems/xfs.txt +++ b/trunk/Documentation/filesystems/xfs.txt @@ -39,6 +39,12 @@ When mounting an XFS filesystem, the following options are accepted. drive level write caching to be enabled, for devices that support write barriers. + discard + Issue command to let the block device reclaim space freed by the + filesystem. This is useful for SSD devices, thinly provisioned + LUNs and virtual machine images, but may have a performance + impact. This option is incompatible with the nodelaylog option. + dmapi Enable the DMAPI (Data Management API) event callouts. Use with the "mtpt" option. diff --git a/trunk/Documentation/hwmon/emc6w201 b/trunk/Documentation/hwmon/emc6w201 new file mode 100644 index 000000000000..32f355aaf56b --- /dev/null +++ b/trunk/Documentation/hwmon/emc6w201 @@ -0,0 +1,42 @@ +Kernel driver emc6w201 +====================== + +Supported chips: + * SMSC EMC6W201 + Prefix: 'emc6w201' + Addresses scanned: I2C 0x2c, 0x2d, 0x2e + Datasheet: Not public + +Author: Jean Delvare + + +Description +----------- + +From the datasheet: + +"The EMC6W201 is an environmental monitoring device with automatic fan +control capability and enhanced system acoustics for noise suppression. +This ACPI compliant device provides hardware monitoring for up to six +voltages (including its own VCC) and five external thermal sensors, +measures the speed of up to five fans, and controls the speed of +multiple DC fans using three Pulse Width Modulator (PWM) outputs. Note +that it is possible to control more than three fans by connecting two +fans to one PWM output. The EMC6W201 will be available in a 36-pin +QFN package." + +The device is functionally close to the EMC6D100 series, but is +register-incompatible. + +The driver currently only supports the monitoring of the voltages, +temperatures and fan speeds. Limits can be changed. Alarms are not +supported, and neither is fan speed control. + + +Known Systems With EMC6W201 +--------------------------- + +The EMC6W201 is a rare device, only found on a few systems, made in +2005 and 2006. Known systems with this device: +* Dell Precision 670 workstation +* Gigabyte 2CEWH mainboard diff --git a/trunk/Documentation/hwmon/f71882fg b/trunk/Documentation/hwmon/f71882fg index df02245d1419..84d2623810f3 100644 --- a/trunk/Documentation/hwmon/f71882fg +++ b/trunk/Documentation/hwmon/f71882fg @@ -6,6 +6,10 @@ Supported chips: Prefix: 'f71808e' Addresses scanned: none, address read from Super I/O config space Datasheet: Not public + * Fintek F71808A + Prefix: 'f71808a' + Addresses scanned: none, address read from Super I/O config space + Datasheet: Not public * Fintek F71858FG Prefix: 'f71858fg' Addresses scanned: none, address read from Super I/O config space diff --git a/trunk/Documentation/hwmon/fam15h_power b/trunk/Documentation/hwmon/fam15h_power new file mode 100644 index 000000000000..a92918e0bd69 --- /dev/null +++ b/trunk/Documentation/hwmon/fam15h_power @@ -0,0 +1,37 @@ +Kernel driver fam15h_power +========================== + +Supported chips: +* AMD Family 15h Processors + + Prefix: 'fam15h_power' + Addresses scanned: PCI space + Datasheets: + BIOS and Kernel Developer's Guide (BKDG) For AMD Family 15h Processors + (not yet published) + +Author: Andreas Herrmann + +Description +----------- + +This driver permits reading of registers providing power information +of AMD Family 15h processors. + +For AMD Family 15h processors the following power values can be +calculated using different processor northbridge function registers: + +* BasePwrWatts: Specifies in watts the maximum amount of power + consumed by the processor for NB and logic external to the core. +* ProcessorPwrWatts: Specifies in watts the maximum amount of power + the processor can support. +* CurrPwrWatts: Specifies in watts the current amount of power being + consumed by the processor. + +This driver provides ProcessorPwrWatts and CurrPwrWatts: +* power1_crit (ProcessorPwrWatts) +* power1_input (CurrPwrWatts) + +On multi-node processors the calculated value is for the entire +package and not for a single node. Thus the driver creates sysfs +attributes only for internal node0 of a multi-node processor. diff --git a/trunk/Documentation/hwmon/k10temp b/trunk/Documentation/hwmon/k10temp index d2b56a4fd1f5..0393c89277c0 100644 --- a/trunk/Documentation/hwmon/k10temp +++ b/trunk/Documentation/hwmon/k10temp @@ -11,6 +11,7 @@ Supported chips: Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra) * AMD Family 12h processors: "Llano" * AMD Family 14h processors: "Brazos" (C/E/G-Series) +* AMD Family 15h processors: "Bulldozer" Prefix: 'k10temp' Addresses scanned: PCI space @@ -40,7 +41,7 @@ Description ----------- This driver permits reading of the internal temperature sensor of AMD -Family 10h/11h/12h/14h processors. +Family 10h/11h/12h/14h/15h processors. All these processors have a sensor, but on those for Socket F or AM2+, the sensor may return inconsistent values (erratum 319). The driver diff --git a/trunk/Documentation/hwmon/max6650 b/trunk/Documentation/hwmon/max6650 index c565650fcfc6..58d9644a2bde 100644 --- a/trunk/Documentation/hwmon/max6650 +++ b/trunk/Documentation/hwmon/max6650 @@ -2,9 +2,13 @@ Kernel driver max6650 ===================== Supported chips: - * Maxim 6650 / 6651 + * Maxim MAX6650 Prefix: 'max6650' - Addresses scanned: I2C 0x1b, 0x1f, 0x48, 0x4b + Addresses scanned: none + Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6650-MAX6651.pdf + * Maxim MAX6651 + Prefix: 'max6651' + Addresses scanned: none Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6650-MAX6651.pdf Authors: @@ -15,10 +19,10 @@ Authors: Description ----------- -This driver implements support for the Maxim 6650/6651 +This driver implements support for the Maxim MAX6650 and MAX6651. -The 2 devices are very similar, but the Maxim 6550 has a reduced feature -set, e.g. only one fan-input, instead of 4 for the 6651. +The 2 devices are very similar, but the MAX6550 has a reduced feature +set, e.g. only one fan-input, instead of 4 for the MAX6651. The driver is not able to distinguish between the 2 devices. @@ -36,6 +40,13 @@ fan1_div rw sets the speed range the inputs can handle. Legal values are 1, 2, 4, and 8. Use lower values for faster fans. +Usage notes +----------- + +This driver does not auto-detect devices. You will have to instantiate the +devices explicitly. Please see Documentation/i2c/instantiating-devices for +details. + Module parameters ----------------- diff --git a/trunk/Documentation/ioctl/ioctl-number.txt b/trunk/Documentation/ioctl/ioctl-number.txt index 2d1ad12e2b3e..3a46e360496d 100644 --- a/trunk/Documentation/ioctl/ioctl-number.txt +++ b/trunk/Documentation/ioctl/ioctl-number.txt @@ -304,6 +304,7 @@ Code Seq#(hex) Include File Comments 0xB0 all RATIO devices in development: 0xB1 00-1F PPPoX +0xB3 00 linux/mmc/ioctl.h 0xC0 00-0F linux/usb/iowarrior.h 0xCB 00-1F CBM serial IEC bus in development: diff --git a/trunk/Documentation/kbuild/kconfig-language.txt b/trunk/Documentation/kbuild/kconfig-language.txt index b507d61fd41c..44e2649fbb29 100644 --- a/trunk/Documentation/kbuild/kconfig-language.txt +++ b/trunk/Documentation/kbuild/kconfig-language.txt @@ -113,6 +113,13 @@ applicable everywhere (see syntax). That will limit the usefulness but on the other hand avoid the illegal configurations all over. +- limiting menu display: "visible if" + This attribute is only applicable to menu blocks, if the condition is + false, the menu block is not displayed to the user (the symbols + contained there can still be selected by other symbols, though). It is + similar to a conditional "prompt" attribude for individual menu + entries. Default value of "visible" is true. + - numerical ranges: "range" ["if" ] This allows to limit the range of possible input values for int and hex symbols. The user can only input a value which is larger than @@ -303,7 +310,8 @@ menu: "endmenu" This defines a menu block, see "Menu structure" above for more -information. The only possible options are dependencies. +information. The only possible options are dependencies and "visible" +attributes. if: @@ -381,3 +389,25 @@ config FOO limits FOO to module (=m) or disabled (=n). +Kconfig symbol existence +~~~~~~~~~~~~~~~~~~~~~~~~ +The following two methods produce the same kconfig symbol dependencies +but differ greatly in kconfig symbol existence (production) in the +generated config file. + +case 1: + +config FOO + tristate "about foo" + depends on BAR + +vs. case 2: + +if BAR +config FOO + tristate "about foo" +endif + +In case 1, the symbol FOO will always exist in the config file (given +no other dependencies). In case 2, the symbol FOO will only exist in +the config file if BAR is enabled. diff --git a/trunk/Documentation/kbuild/kconfig.txt b/trunk/Documentation/kbuild/kconfig.txt index cca46b1a0f6c..c313d71324b4 100644 --- a/trunk/Documentation/kbuild/kconfig.txt +++ b/trunk/Documentation/kbuild/kconfig.txt @@ -48,11 +48,6 @@ KCONFIG_OVERWRITECONFIG If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not break symlinks when .config is a symlink to somewhere else. -KCONFIG_NOTIMESTAMP --------------------------------------------------- -If this environment variable exists and is non-null, the timestamp line -in generated .config files is omitted. - ______________________________________________________________________ Environment variables for '{allyes/allmod/allno/rand}config' diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 7c6624e7a5cb..5438a2d7907f 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -1777,9 +1777,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nosoftlockup [KNL] Disable the soft-lockup detector. - noswapaccount [KNL] Disable accounting of swap in memory resource - controller. (See Documentation/cgroups/memory.txt) - nosync [HW,M68K] Disables sync negotiation for all devices. notsc [BUGS=X86-32] Disable Time Stamp Counter diff --git a/trunk/Documentation/lockstat.txt b/trunk/Documentation/lockstat.txt index 65f4c795015d..9c0a80d17a23 100644 --- a/trunk/Documentation/lockstat.txt +++ b/trunk/Documentation/lockstat.txt @@ -136,7 +136,7 @@ View the top contending locks: dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 &inode->i_mutex: 161 286 18446744073709 62882.54 1244614.55 3653 20598 18446744073709 62318.60 1693822.74 &zone->lru_lock: 94 94 0.53 7.33 92.10 4366 32690 0.29 59.81 16350.06 - &inode->i_data.i_mmap_lock: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 + &inode->i_data.i_mmap_mutex: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 &q->__queue_lock: 48 50 0.52 31.62 86.31 774 13131 0.17 113.08 12277.52 &rq->rq_lock_key: 43 47 0.74 68.50 170.63 3706 33929 0.22 107.99 17460.62 &rq->rq_lock_key#2: 39 46 0.75 6.68 49.03 2979 32292 0.17 125.17 17137.63 diff --git a/trunk/Documentation/mmc/00-INDEX b/trunk/Documentation/mmc/00-INDEX index fca586f5b853..93dd7a714075 100644 --- a/trunk/Documentation/mmc/00-INDEX +++ b/trunk/Documentation/mmc/00-INDEX @@ -2,3 +2,5 @@ - this file mmc-dev-attrs.txt - info on SD and MMC device attributes +mmc-dev-parts.txt + - info on SD and MMC device partitions diff --git a/trunk/Documentation/mmc/mmc-dev-attrs.txt b/trunk/Documentation/mmc/mmc-dev-attrs.txt index ff2bd685bced..8898a95b41e5 100644 --- a/trunk/Documentation/mmc/mmc-dev-attrs.txt +++ b/trunk/Documentation/mmc/mmc-dev-attrs.txt @@ -1,3 +1,13 @@ +SD and MMC Block Device Attributes +================================== + +These attributes are defined for the block devices associated with the +SD or MMC device. + +The following attributes are read/write. + + force_ro Enforce read-only access even if write protect switch is off. + SD and MMC Device Attributes ============================ diff --git a/trunk/Documentation/mmc/mmc-dev-parts.txt b/trunk/Documentation/mmc/mmc-dev-parts.txt new file mode 100644 index 000000000000..2db28b8e662f --- /dev/null +++ b/trunk/Documentation/mmc/mmc-dev-parts.txt @@ -0,0 +1,27 @@ +SD and MMC Device Partitions +============================ + +Device partitions are additional logical block devices present on the +SD/MMC device. + +As of this writing, MMC boot partitions as supported and exposed as +/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the +parent /dev/mmcblkX. + +MMC Boot Partitions +=================== + +Read and write access is provided to the two MMC boot partitions. Due to +the sensitive nature of the boot partition contents, which often store +a bootloader or bootloader configuration tables crucial to booting the +platform, write access is disabled by default to reduce the chance of +accidental bricking. + +To enable write access to /dev/mmcblkXbootY, disable the forced read-only +access with: + +echo 0 > /sys/block/mmcblkXbootY/force_ro + +To re-enable read-only access: + +echo 1 > /sys/block/mmcblkXbootY/force_ro diff --git a/trunk/Documentation/networking/bonding.txt b/trunk/Documentation/networking/bonding.txt index 1f45bd887d65..675612ff41ae 100644 --- a/trunk/Documentation/networking/bonding.txt +++ b/trunk/Documentation/networking/bonding.txt @@ -770,8 +770,17 @@ resend_igmp a failover event. One membership report is issued immediately after the failover, subsequent packets are sent in each 200ms interval. - The valid range is 0 - 255; the default value is 1. This option - was added for bonding version 3.7.0. + The valid range is 0 - 255; the default value is 1. A value of 0 + prevents the IGMP membership report from being issued in response + to the failover event. + + This option is useful for bonding modes balance-rr (0), active-backup + (1), balance-tlb (5) and balance-alb (6), in which a failover can + switch the IGMP traffic from one slave to another. Therefore a fresh + IGMP report must be issued to cause the switch to forward the incoming + IGMP traffic over the newly selected slave. + + This option was added for bonding version 3.7.0. 3. Configuring Bonding Devices ============================== diff --git a/trunk/Documentation/ptp/ptp.txt b/trunk/Documentation/ptp/ptp.txt new file mode 100644 index 000000000000..ae8fef86b832 --- /dev/null +++ b/trunk/Documentation/ptp/ptp.txt @@ -0,0 +1,89 @@ + +* PTP hardware clock infrastructure for Linux + + This patch set introduces support for IEEE 1588 PTP clocks in + Linux. Together with the SO_TIMESTAMPING socket options, this + presents a standardized method for developing PTP user space + programs, synchronizing Linux with external clocks, and using the + ancillary features of PTP hardware clocks. + + A new class driver exports a kernel interface for specific clock + drivers and a user space interface. The infrastructure supports a + complete set of PTP hardware clock functionality. + + + Basic clock operations + - Set time + - Get time + - Shift the clock by a given offset atomically + - Adjust clock frequency + + + Ancillary clock features + - One short or periodic alarms, with signal delivery to user program + - Time stamp external events + - Period output signals configurable from user space + - Synchronization of the Linux system time via the PPS subsystem + +** PTP hardware clock kernel API + + A PTP clock driver registers itself with the class driver. The + class driver handles all of the dealings with user space. The + author of a clock driver need only implement the details of + programming the clock hardware. The clock driver notifies the class + driver of asynchronous events (alarms and external time stamps) via + a simple message passing interface. + + The class driver supports multiple PTP clock drivers. In normal use + cases, only one PTP clock is needed. However, for testing and + development, it can be useful to have more than one clock in a + single system, in order to allow performance comparisons. + +** PTP hardware clock user space API + + The class driver also creates a character device for each + registered clock. User space can use an open file descriptor from + the character device as a POSIX clock id and may call + clock_gettime, clock_settime, and clock_adjtime. These calls + implement the basic clock operations. + + User space programs may control the clock using standardized + ioctls. A program may query, enable, configure, and disable the + ancillary clock features. User space can receive time stamped + events via blocking read() and poll(). One shot and periodic + signals may be configured via the POSIX timer_settime() system + call. + +** Writing clock drivers + + Clock drivers include include/linux/ptp_clock_kernel.h and register + themselves by presenting a 'struct ptp_clock_info' to the + registration method. Clock drivers must implement all of the + functions in the interface. If a clock does not offer a particular + ancillary feature, then the driver should just return -EOPNOTSUPP + from those functions. + + Drivers must ensure that all of the methods in interface are + reentrant. Since most hardware implementations treat the time value + as a 64 bit integer accessed as two 32 bit registers, drivers + should use spin_lock_irqsave/spin_unlock_irqrestore to protect + against concurrent access. This locking cannot be accomplished in + class driver, since the lock may also be needed by the clock + driver's interrupt service routine. + +** Supported hardware + + + Freescale eTSEC gianfar + - 2 Time stamp external triggers, programmable polarity (opt. interrupt) + - 2 Alarm registers (optional interrupt) + - 3 Periodic signals (optional interrupt) + + + National DP83640 + - 6 GPIOs programmable as inputs or outputs + - 6 GPIOs with dedicated functions (LED/JTAG/clock) can also be + used as general inputs or outputs + - GPIO inputs can time stamp external triggers + - GPIO outputs can produce periodic signals + - 1 interrupt pin + + + Intel IXP465 + - Auxiliary Slave/Master Mode Snapshot (optional interrupt) + - Target Time (optional interrupt) diff --git a/trunk/Documentation/ptp/testptp.c b/trunk/Documentation/ptp/testptp.c new file mode 100644 index 000000000000..f59ded066108 --- /dev/null +++ b/trunk/Documentation/ptp/testptp.c @@ -0,0 +1,381 @@ +/* + * PTP 1588 clock support - User space test program + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DEVICE "/dev/ptp0" + +#ifndef ADJ_SETOFFSET +#define ADJ_SETOFFSET 0x0100 +#endif + +#ifndef CLOCK_INVALID +#define CLOCK_INVALID -1 +#endif + +/* When glibc offers the syscall, this will go away. */ +#include +static int clock_adjtime(clockid_t id, struct timex *tx) +{ + return syscall(__NR_clock_adjtime, id, tx); +} + +static clockid_t get_clockid(int fd) +{ +#define CLOCKFD 3 +#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) + + return FD_TO_CLOCKID(fd); +} + +static void handle_alarm(int s) +{ + printf("received signal %d\n", s); +} + +static int install_handler(int signum, void (*handler)(int)) +{ + struct sigaction action; + sigset_t mask; + + /* Unblock the signal. */ + sigemptyset(&mask); + sigaddset(&mask, signum); + sigprocmask(SIG_UNBLOCK, &mask, NULL); + + /* Install the signal handler. */ + action.sa_handler = handler; + action.sa_flags = 0; + sigemptyset(&action.sa_mask); + sigaction(signum, &action, NULL); + + return 0; +} + +static long ppb_to_scaled_ppm(int ppb) +{ + /* + * The 'freq' field in the 'struct timex' is in parts per + * million, but with a 16 bit binary fractional field. + * Instead of calculating either one of + * + * scaled_ppm = (ppb / 1000) << 16 [1] + * scaled_ppm = (ppb << 16) / 1000 [2] + * + * we simply use double precision math, in order to avoid the + * truncation in [1] and the possible overflow in [2]. + */ + return (long) (ppb * 65.536); +} + +static void usage(char *progname) +{ + fprintf(stderr, + "usage: %s [options]\n" + " -a val request a one-shot alarm after 'val' seconds\n" + " -A val request a periodic alarm every 'val' seconds\n" + " -c query the ptp clock's capabilities\n" + " -d name device to open\n" + " -e val read 'val' external time stamp events\n" + " -f val adjust the ptp clock frequency by 'val' ppb\n" + " -g get the ptp clock time\n" + " -h prints this message\n" + " -p val enable output with a period of 'val' nanoseconds\n" + " -P val enable or disable (val=1|0) the system clock PPS\n" + " -s set the ptp clock time from the system time\n" + " -S set the system time from the ptp clock time\n" + " -t val shift the ptp clock time by 'val' seconds\n", + progname); +} + +int main(int argc, char *argv[]) +{ + struct ptp_clock_caps caps; + struct ptp_extts_event event; + struct ptp_extts_request extts_request; + struct ptp_perout_request perout_request; + struct timespec ts; + struct timex tx; + + static timer_t timerid; + struct itimerspec timeout; + struct sigevent sigevent; + + char *progname; + int c, cnt, fd; + + char *device = DEVICE; + clockid_t clkid; + int adjfreq = 0x7fffffff; + int adjtime = 0; + int capabilities = 0; + int extts = 0; + int gettime = 0; + int oneshot = 0; + int periodic = 0; + int perout = -1; + int pps = -1; + int settime = 0; + + progname = strrchr(argv[0], '/'); + progname = progname ? 1+progname : argv[0]; + while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghp:P:sSt:v"))) { + switch (c) { + case 'a': + oneshot = atoi(optarg); + break; + case 'A': + periodic = atoi(optarg); + break; + case 'c': + capabilities = 1; + break; + case 'd': + device = optarg; + break; + case 'e': + extts = atoi(optarg); + break; + case 'f': + adjfreq = atoi(optarg); + break; + case 'g': + gettime = 1; + break; + case 'p': + perout = atoi(optarg); + break; + case 'P': + pps = atoi(optarg); + break; + case 's': + settime = 1; + break; + case 'S': + settime = 2; + break; + case 't': + adjtime = atoi(optarg); + break; + case 'h': + usage(progname); + return 0; + case '?': + default: + usage(progname); + return -1; + } + } + + fd = open(device, O_RDWR); + if (fd < 0) { + fprintf(stderr, "opening %s: %s\n", device, strerror(errno)); + return -1; + } + + clkid = get_clockid(fd); + if (CLOCK_INVALID == clkid) { + fprintf(stderr, "failed to read clock id\n"); + return -1; + } + + if (capabilities) { + if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) { + perror("PTP_CLOCK_GETCAPS"); + } else { + printf("capabilities:\n" + " %d maximum frequency adjustment (ppb)\n" + " %d programmable alarms\n" + " %d external time stamp channels\n" + " %d programmable periodic signals\n" + " %d pulse per second\n", + caps.max_adj, + caps.n_alarm, + caps.n_ext_ts, + caps.n_per_out, + caps.pps); + } + } + + if (0x7fffffff != adjfreq) { + memset(&tx, 0, sizeof(tx)); + tx.modes = ADJ_FREQUENCY; + tx.freq = ppb_to_scaled_ppm(adjfreq); + if (clock_adjtime(clkid, &tx)) { + perror("clock_adjtime"); + } else { + puts("frequency adjustment okay"); + } + } + + if (adjtime) { + memset(&tx, 0, sizeof(tx)); + tx.modes = ADJ_SETOFFSET; + tx.time.tv_sec = adjtime; + tx.time.tv_usec = 0; + if (clock_adjtime(clkid, &tx) < 0) { + perror("clock_adjtime"); + } else { + puts("time shift okay"); + } + } + + if (gettime) { + if (clock_gettime(clkid, &ts)) { + perror("clock_gettime"); + } else { + printf("clock time: %ld.%09ld or %s", + ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec)); + } + } + + if (settime == 1) { + clock_gettime(CLOCK_REALTIME, &ts); + if (clock_settime(clkid, &ts)) { + perror("clock_settime"); + } else { + puts("set time okay"); + } + } + + if (settime == 2) { + clock_gettime(clkid, &ts); + if (clock_settime(CLOCK_REALTIME, &ts)) { + perror("clock_settime"); + } else { + puts("set time okay"); + } + } + + if (extts) { + memset(&extts_request, 0, sizeof(extts_request)); + extts_request.index = 0; + extts_request.flags = PTP_ENABLE_FEATURE; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + extts = 0; + } else { + puts("external time stamp request okay"); + } + for (; extts; extts--) { + cnt = read(fd, &event, sizeof(event)); + if (cnt != sizeof(event)) { + perror("read"); + break; + } + printf("event index %u at %lld.%09u\n", event.index, + event.t.sec, event.t.nsec); + fflush(stdout); + } + /* Disable the feature again. */ + extts_request.flags = 0; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + } + } + + if (oneshot) { + install_handler(SIGALRM, handle_alarm); + /* Create a timer. */ + sigevent.sigev_notify = SIGEV_SIGNAL; + sigevent.sigev_signo = SIGALRM; + if (timer_create(clkid, &sigevent, &timerid)) { + perror("timer_create"); + return -1; + } + /* Start the timer. */ + memset(&timeout, 0, sizeof(timeout)); + timeout.it_value.tv_sec = oneshot; + if (timer_settime(timerid, 0, &timeout, NULL)) { + perror("timer_settime"); + return -1; + } + pause(); + timer_delete(timerid); + } + + if (periodic) { + install_handler(SIGALRM, handle_alarm); + /* Create a timer. */ + sigevent.sigev_notify = SIGEV_SIGNAL; + sigevent.sigev_signo = SIGALRM; + if (timer_create(clkid, &sigevent, &timerid)) { + perror("timer_create"); + return -1; + } + /* Start the timer. */ + memset(&timeout, 0, sizeof(timeout)); + timeout.it_interval.tv_sec = periodic; + timeout.it_value.tv_sec = periodic; + if (timer_settime(timerid, 0, &timeout, NULL)) { + perror("timer_settime"); + return -1; + } + while (1) { + pause(); + } + timer_delete(timerid); + } + + if (perout >= 0) { + if (clock_gettime(clkid, &ts)) { + perror("clock_gettime"); + return -1; + } + memset(&perout_request, 0, sizeof(perout_request)); + perout_request.index = 0; + perout_request.start.sec = ts.tv_sec + 2; + perout_request.start.nsec = 0; + perout_request.period.sec = 0; + perout_request.period.nsec = perout; + if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) { + perror("PTP_PEROUT_REQUEST"); + } else { + puts("periodic output request okay"); + } + } + + if (pps != -1) { + int enable = pps ? 1 : 0; + if (ioctl(fd, PTP_ENABLE_PPS, enable)) { + perror("PTP_ENABLE_PPS"); + } else { + puts("pps for system time request okay"); + } + } + + close(fd); + return 0; +} diff --git a/trunk/Documentation/ptp/testptp.mk b/trunk/Documentation/ptp/testptp.mk new file mode 100644 index 000000000000..4ef2d9755421 --- /dev/null +++ b/trunk/Documentation/ptp/testptp.mk @@ -0,0 +1,33 @@ +# PTP 1588 clock support - User space test program +# +# Copyright (C) 2010 OMICRON electronics GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +CC = $(CROSS_COMPILE)gcc +INC = -I$(KBUILD_OUTPUT)/usr/include +CFLAGS = -Wall $(INC) +LDLIBS = -lrt +PROGS = testptp + +all: $(PROGS) + +testptp: testptp.o + +clean: + rm -f testptp.o + +distclean: clean + rm -f $(PROGS) diff --git a/trunk/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/trunk/Documentation/virtual/uml/UserModeLinux-HOWTO.txt index 9b7e1904db1c..5d0fc8bfcdb9 100644 --- a/trunk/Documentation/virtual/uml/UserModeLinux-HOWTO.txt +++ b/trunk/Documentation/virtual/uml/UserModeLinux-HOWTO.txt @@ -1182,6 +1182,16 @@ forge.net/> and explains these in detail, as well as some other issues. + There is also a related point-to-point only "ucast" transport. + This is useful when your network does not support multicast, and + all network connections are simple point to point links. + + The full set of command line options for this transport are + + + ethn=ucast,ethernet address,remote address,listen port,remote port + + 66..66.. TTUUNN//TTAAPP wwiitthh tthhee uummll__nneett hheellppeerr diff --git a/trunk/Documentation/vm/cleancache.txt b/trunk/Documentation/vm/cleancache.txt new file mode 100644 index 000000000000..36c367c73084 --- /dev/null +++ b/trunk/Documentation/vm/cleancache.txt @@ -0,0 +1,278 @@ +MOTIVATION + +Cleancache is a new optional feature provided by the VFS layer that +potentially dramatically increases page cache effectiveness for +many workloads in many environments at a negligible cost. + +Cleancache can be thought of as a page-granularity victim cache for clean +pages that the kernel's pageframe replacement algorithm (PFRA) would like +to keep around, but can't since there isn't enough memory. So when the +PFRA "evicts" a page, it first attempts to use cleancache code to +put the data contained in that page into "transcendent memory", memory +that is not directly accessible or addressable by the kernel and is +of unknown and possibly time-varying size. + +Later, when a cleancache-enabled filesystem wishes to access a page +in a file on disk, it first checks cleancache to see if it already +contains it; if it does, the page of data is copied into the kernel +and a disk access is avoided. + +Transcendent memory "drivers" for cleancache are currently implemented +in Xen (using hypervisor memory) and zcache (using in-kernel compressed +memory) and other implementations are in development. + +FAQs are included below. + +IMPLEMENTATION OVERVIEW + +A cleancache "backend" that provides transcendent memory registers itself +to the kernel's cleancache "frontend" by calling cleancache_register_ops, +passing a pointer to a cleancache_ops structure with funcs set appropriately. +Note that cleancache_register_ops returns the previous settings so that +chaining can be performed if desired. The functions provided must conform to +certain semantics as follows: + +Most important, cleancache is "ephemeral". Pages which are copied into +cleancache have an indefinite lifetime which is completely unknowable +by the kernel and so may or may not still be in cleancache at any later time. +Thus, as its name implies, cleancache is not suitable for dirty pages. +Cleancache has complete discretion over what pages to preserve and what +pages to discard and when. + +Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a +pool id which, if positive, must be saved in the filesystem's superblock; +a negative return value indicates failure. A "put_page" will copy a +(presumably about-to-be-evicted) page into cleancache and associate it with +the pool id, a file key, and a page index into the file. (The combination +of a pool id, a file key, and an index is sometimes called a "handle".) +A "get_page" will copy the page, if found, from cleancache into kernel memory. +A "flush_page" will ensure the page no longer is present in cleancache; +a "flush_inode" will flush all pages associated with the specified file; +and, when a filesystem is unmounted, a "flush_fs" will flush all pages in +all files specified by the given pool id and also surrender the pool id. + +An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache +to treat the pool as shared using a 128-bit UUID as a key. On systems +that may run multiple kernels (such as hard partitioned or virtualized +systems) that may share a clustered filesystem, and where cleancache +may be shared among those kernels, calls to init_shared_fs that specify the +same UUID will receive the same pool id, thus allowing the pages to +be shared. Note that any security requirements must be imposed outside +of the kernel (e.g. by "tools" that control cleancache). Or a +cleancache implementation can simply disable shared_init by always +returning a negative value. + +If a get_page is successful on a non-shared pool, the page is flushed (thus +making cleancache an "exclusive" cache). On a shared pool, the page +is NOT flushed on a successful get_page so that it remains accessible to +other sharers. The kernel is responsible for ensuring coherency between +cleancache (shared or not), the page cache, and the filesystem, using +cleancache flush operations as required. + +Note that cleancache must enforce put-put-get coherency and get-get +coherency. For the former, if two puts are made to the same handle but +with different data, say AAA by the first put and BBB by the second, a +subsequent get can never return the stale data (AAA). For get-get coherency, +if a get for a given handle fails, subsequent gets for that handle will +never succeed unless preceded by a successful put with that handle. + +Last, cleancache provides no SMP serialization guarantees; if two +different Linux threads are simultaneously putting and flushing a page +with the same handle, the results are indeterminate. Callers must +lock the page to ensure serial behavior. + +CLEANCACHE PERFORMANCE METRICS + +Cleancache monitoring is done by sysfs files in the +/sys/kernel/mm/cleancache directory. The effectiveness of cleancache +can be measured (across all filesystems) with: + +succ_gets - number of gets that were successful +failed_gets - number of gets that failed +puts - number of puts attempted (all "succeed") +flushes - number of flushes attempted + +A backend implementatation may provide additional metrics. + +FAQ + +1) Where's the value? (Andrew Morton) + +Cleancache provides a significant performance benefit to many workloads +in many environments with negligible overhead by improving the +effectiveness of the pagecache. Clean pagecache pages are +saved in transcendent memory (RAM that is otherwise not directly +addressable to the kernel); fetching those pages later avoids "refaults" +and thus disk reads. + +Cleancache (and its sister code "frontswap") provide interfaces for +this transcendent memory (aka "tmem"), which conceptually lies between +fast kernel-directly-addressable RAM and slower DMA/asynchronous devices. +Disallowing direct kernel or userland reads/writes to tmem +is ideal when data is transformed to a different form and size (such +as with compression) or secretly moved (as might be useful for write- +balancing for some RAM-like devices). Evicted page-cache pages (and +swap pages) are a great use for this kind of slower-than-RAM-but-much- +faster-than-disk transcendent memory, and the cleancache (and frontswap) +"page-object-oriented" specification provides a nice way to read and +write -- and indirectly "name" -- the pages. + +In the virtual case, the whole point of virtualization is to statistically +multiplex physical resources across the varying demands of multiple +virtual machines. This is really hard to do with RAM and efforts to +do it well with no kernel change have essentially failed (except in some +well-publicized special-case workloads). Cleancache -- and frontswap -- +with a fairly small impact on the kernel, provide a huge amount +of flexibility for more dynamic, flexible RAM multiplexing. +Specifically, the Xen Transcendent Memory backend allows otherwise +"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple +virtual machines, but the pages can be compressed and deduplicated to +optimize RAM utilization. And when guest OS's are induced to surrender +underutilized RAM (e.g. with "self-ballooning"), page cache pages +are the first to go, and cleancache allows those pages to be +saved and reclaimed if overall host system memory conditions allow. + +And the identical interface used for cleancache can be used in +physical systems as well. The zcache driver acts as a memory-hungry +device that stores pages of data in a compressed state. And +the proposed "RAMster" driver shares RAM across multiple physical +systems. + +2) Why does cleancache have its sticky fingers so deep inside the + filesystems and VFS? (Andrew Morton and Christoph Hellwig) + +The core hooks for cleancache in VFS are in most cases a single line +and the minimum set are placed precisely where needed to maintain +coherency (via cleancache_flush operations) between cleancache, +the page cache, and disk. All hooks compile into nothingness if +cleancache is config'ed off and turn into a function-pointer- +compare-to-NULL if config'ed on but no backend claims the ops +functions, or to a compare-struct-element-to-negative if a +backend claims the ops functions but a filesystem doesn't enable +cleancache. + +Some filesystems are built entirely on top of VFS and the hooks +in VFS are sufficient, so don't require an "init_fs" hook; the +initial implementation of cleancache didn't provide this hook. +But for some filesystems (such as btrfs), the VFS hooks are +incomplete and one or more hooks in fs-specific code are required. +And for some other filesystems, such as tmpfs, cleancache may +be counterproductive. So it seemed prudent to require a filesystem +to "opt in" to use cleancache, which requires adding a hook in +each filesystem. Not all filesystems are supported by cleancache +only because they haven't been tested. The existing set should +be sufficient to validate the concept, the opt-in approach means +that untested filesystems are not affected, and the hooks in the +existing filesystems should make it very easy to add more +filesystems in the future. + +The total impact of the hooks to existing fs and mm files is only +about 40 lines added (not counting comments and blank lines). + +3) Why not make cleancache asynchronous and batched so it can + more easily interface with real devices with DMA instead + of copying each individual page? (Minchan Kim) + +The one-page-at-a-time copy semantics simplifies the implementation +on both the frontend and backend and also allows the backend to +do fancy things on-the-fly like page compression and +page deduplication. And since the data is "gone" (copied into/out +of the pageframe) before the cleancache get/put call returns, +a great deal of race conditions and potential coherency issues +are avoided. While the interface seems odd for a "real device" +or for real kernel-addressable RAM, it makes perfect sense for +transcendent memory. + +4) Why is non-shared cleancache "exclusive"? And where is the + page "flushed" after a "get"? (Minchan Kim) + +The main reason is to free up space in transcendent memory and +to avoid unnecessary cleancache_flush calls. If you want inclusive, +the page can be "put" immediately following the "get". If +put-after-get for inclusive becomes common, the interface could +be easily extended to add a "get_no_flush" call. + +The flush is done by the cleancache backend implementation. + +5) What's the performance impact? + +Performance analysis has been presented at OLS'09 and LCA'10. +Briefly, performance gains can be significant on most workloads, +especially when memory pressure is high (e.g. when RAM is +overcommitted in a virtual workload); and because the hooks are +invoked primarily in place of or in addition to a disk read/write, +overhead is negligible even in worst case workloads. Basically +cleancache replaces I/O with memory-copy-CPU-overhead; on older +single-core systems with slow memory-copy speeds, cleancache +has little value, but in newer multicore machines, especially +consolidated/virtualized machines, it has great value. + +6) How do I add cleancache support for filesystem X? (Boaz Harrash) + +Filesystems that are well-behaved and conform to certain +restrictions can utilize cleancache simply by making a call to +cleancache_init_fs at mount time. Unusual, misbehaving, or +poorly layered filesystems must either add additional hooks +and/or undergo extensive additional testing... or should just +not enable the optional cleancache. + +Some points for a filesystem to consider: + +- The FS should be block-device-based (e.g. a ram-based FS such + as tmpfs should not enable cleancache) +- To ensure coherency/correctness, the FS must ensure that all + file removal or truncation operations either go through VFS or + add hooks to do the equivalent cleancache "flush" operations +- To ensure coherency/correctness, either inode numbers must + be unique across the lifetime of the on-disk file OR the + FS must provide an "encode_fh" function. +- The FS must call the VFS superblock alloc and deactivate routines + or add hooks to do the equivalent cleancache calls done there. +- To maximize performance, all pages fetched from the FS should + go through the do_mpag_readpage routine or the FS should add + hooks to do the equivalent (cf. btrfs) +- Currently, the FS blocksize must be the same as PAGESIZE. This + is not an architectural restriction, but no backends currently + support anything different. +- A clustered FS should invoke the "shared_init_fs" cleancache + hook to get best performance for some backends. + +7) Why not use the KVA of the inode as the key? (Christoph Hellwig) + +If cleancache would use the inode virtual address instead of +inode/filehandle, the pool id could be eliminated. But, this +won't work because cleancache retains pagecache data pages +persistently even when the inode has been pruned from the +inode unused list, and only flushes the data page if the file +gets removed/truncated. So if cleancache used the inode kva, +there would be potential coherency issues if/when the inode +kva is reused for a different file. Alternately, if cleancache +flushed the pages when the inode kva was freed, much of the value +of cleancache would be lost because the cache of pages in cleanache +is potentially much larger than the kernel pagecache and is most +useful if the pages survive inode cache removal. + +8) Why is a global variable required? + +The cleancache_enabled flag is checked in all of the frequently-used +cleancache hooks. The alternative is a function call to check a static +variable. Since cleancache is enabled dynamically at runtime, systems +that don't enable cleancache would suffer thousands (possibly +tens-of-thousands) of unnecessary function calls per second. So the +global variable allows cleancache to be enabled by default at compile +time, but have insignificant performance impact when cleancache remains +disabled at runtime. + +9) Does cleanache work with KVM? + +The memory model of KVM is sufficiently different that a cleancache +backend may have less value for KVM. This remains to be tested, +especially in an overcommitted system. + +10) Does cleancache work in userspace? It sounds useful for + memory hungry caches like web browsers. (Jamie Lokier) + +No plans yet, though we agree it sounds useful, at least for +apps that bypass the page cache (e.g. O_DIRECT). + +Last updated: Dan Magenheimer, April 13 2011 diff --git a/trunk/Documentation/vm/locking b/trunk/Documentation/vm/locking index 25fadb448760..f61228bd6395 100644 --- a/trunk/Documentation/vm/locking +++ b/trunk/Documentation/vm/locking @@ -66,7 +66,7 @@ in some cases it is not really needed. Eg, vm_start is modified by expand_stack(), it is hard to come up with a destructive scenario without having the vmlist protection in this case. -The page_table_lock nests with the inode i_mmap_lock and the kmem cache +The page_table_lock nests with the inode i_mmap_mutex and the kmem cache c_spinlock spinlocks. This is okay, since the kmem code asks for pages after dropping c_spinlock. The page_table_lock also nests with pagecache_lock and pagemap_lru_lock spinlocks, and no code asks for memory with these locks diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 98c324b07a16..d54d551004f7 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -287,35 +287,35 @@ F: sound/pci/ad1889.* AD525X ANALOG DEVICES DIGITAL POTENTIOMETERS DRIVER M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/AD5254 S: Supported F: drivers/misc/ad525x_dpot.c AD5398 CURRENT REGULATOR DRIVER (AD5398/AD5821) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/AD5398 S: Supported F: drivers/regulator/ad5398.c AD714X CAPACITANCE TOUCH SENSOR DRIVER (AD7142/3/7/8/7A) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/AD7142 S: Supported F: drivers/input/misc/ad714x.c AD7877 TOUCHSCREEN DRIVER M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/AD7877 S: Supported F: drivers/input/touchscreen/ad7877.c AD7879 TOUCHSCREEN DRIVER (AD7879/AD7889) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/AD7879 S: Supported F: drivers/input/touchscreen/ad7879.c @@ -341,7 +341,7 @@ F: drivers/net/wireless/adm8211.* ADP5520 BACKLIGHT DRIVER WITH IO EXPANDER (ADP5520/ADP5501) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/ADP5520 S: Supported F: drivers/mfd/adp5520.c @@ -352,7 +352,7 @@ F: drivers/input/keyboard/adp5520-keys.c ADP5588 QWERTY KEYPAD AND IO EXPANDER DRIVER (ADP5588/ADP5587) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/ADP5588 S: Supported F: drivers/input/keyboard/adp5588-keys.c @@ -360,7 +360,7 @@ F: drivers/gpio/adp5588-gpio.c ADP8860 BACKLIGHT DRIVER (ADP8860/ADP8861/ADP8863) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/ADP8860 S: Supported F: drivers/video/backlight/adp8860_bl.c @@ -387,7 +387,7 @@ F: drivers/hwmon/adt7475.c ADXL34X THREE-AXIS DIGITAL ACCELEROMETER DRIVER (ADXL345/ADXL346) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/ADXL345 S: Supported F: drivers/input/misc/adxl34x.c @@ -483,6 +483,13 @@ F: drivers/tty/serial/altera_jtaguart.c F: include/linux/altera_uart.h F: include/linux/altera_jtaguart.h +AMD FAM15H PROCESSOR POWER MONITORING DRIVER +M: Andreas Herrmann +L: lm-sensors@lm-sensors.org +S: Maintained +F: Documentation/hwmon/fam15h_power +F: drivers/hwmon/fam15h_power.c + AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER M: Thomas Dahlmann L: linux-geode@lists.infradead.org (moderated for non-subscribers) @@ -526,7 +533,7 @@ S: Maintained F: drivers/infiniband/hw/amso1100/ ANALOG DEVICES INC ASOC CODEC DRIVERS -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org L: alsa-devel@alsa-project.org (moderated for non-subscribers) W: http://wiki.analog.com/ S: Supported @@ -2034,9 +2041,8 @@ F: net/ax25/ax25_timer.c F: net/ax25/sysctl_net_ax25.c DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER -M: Tobias Ringstrom L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/networking/dmfe.txt F: drivers/net/tulip/dmfe.c @@ -3566,9 +3572,16 @@ M: Andrew Morton M: Jan Kara L: linux-ext4@vger.kernel.org S: Maintained -F: fs/jbd*/ -F: include/linux/ext*jbd*.h -F: include/linux/jbd*.h +F: fs/jbd/ +F: include/linux/ext3_jbd.h +F: include/linux/jbd.h + +JOURNALLING LAYER FOR BLOCK DEVICES (JBD2) +M: "Theodore Ts'o" +L: linux-ext4@vger.kernel.org +S: Maintained +F: fs/jbd2/ +F: include/linux/jbd2.h JSM Neo PCI based serial card M: Breno Leitao @@ -3591,10 +3604,9 @@ F: Documentation/hwmon/k8temp F: drivers/hwmon/k8temp.c KCONFIG -M: Roman Zippel +M: Michal Marek L: linux-kbuild@vger.kernel.org -Q: http://patchwork.kernel.org/project/linux-kbuild/list/ -S: Maintained +S: Odd Fixes F: Documentation/kbuild/kconfig-language.txt F: scripts/kconfig/ @@ -3898,7 +3910,6 @@ F: drivers/*/*/*pasemi* LINUX SECURITY MODULE (LSM) FRAMEWORK M: Chris Wright L: linux-security-module@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/chrisw/lsm-2.6.git S: Supported LIS3LV02D ACCELEROMETER DRIVER @@ -6796,6 +6807,13 @@ L: lm-sensors@lm-sensors.org S: Maintained F: drivers/hwmon/vt8231.c +VUB300 USB to SDIO/SD/MMC bridge chip +M: Tony Olech +L: linux-mmc@vger.kernel.org +L: linux-usb@vger.kernel.org +S: Supported +F: drivers/mmc/host/vub300.c + W1 DALLAS'S 1-WIRE BUS M: Evgeniy Polyakov S: Maintained diff --git a/trunk/Makefile b/trunk/Makefile index 6b73d1eed1ea..529d93fa2430 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -220,6 +220,14 @@ ifeq ($(ARCH),sh64) SRCARCH := sh endif +# Additional ARCH settings for tile +ifeq ($(ARCH),tilepro) + SRCARCH := tile +endif +ifeq ($(ARCH),tilegx) + SRCARCH := tile +endif + # Where to locate arch specific headers hdr-arch := $(SRCARCH) @@ -1009,7 +1017,8 @@ include/generated/utsrelease.h: include/config/kernel.release FORCE PHONY += headerdep headerdep: - $(Q)find include/ -name '*.h' | xargs --max-args 1 scripts/headerdep.pl + $(Q)find $(srctree)/include/ -name '*.h' | xargs --max-args 1 \ + $(srctree)/scripts/headerdep.pl -I$(srctree)/include # --------------------------------------------------------------------------- @@ -1417,13 +1426,15 @@ tags TAGS cscope gtags: FORCE # Scripts to check various things for consistency # --------------------------------------------------------------------------- +PHONY += includecheck versioncheck coccicheck namespacecheck export_report + includecheck: - find * $(RCS_FIND_IGNORE) \ + find $(srctree)/* $(RCS_FIND_IGNORE) \ -name '*.[hcS]' -type f -print | sort \ | xargs $(PERL) -w $(srctree)/scripts/checkincludes.pl versioncheck: - find * $(RCS_FIND_IGNORE) \ + find $(srctree)/* $(RCS_FIND_IGNORE) \ -name '*.[hcS]' -type f -print | sort \ | xargs $(PERL) -w $(srctree)/scripts/checkversion.pl diff --git a/trunk/arch/Kconfig b/trunk/arch/Kconfig index 8d24bacaa61e..26b0e2397a57 100644 --- a/trunk/arch/Kconfig +++ b/trunk/arch/Kconfig @@ -175,4 +175,7 @@ config HAVE_ARCH_JUMP_LABEL config HAVE_ARCH_MUTEX_CPU_RELAX bool +config HAVE_RCU_TABLE_FREE + bool + source "kernel/gcov/Kconfig" diff --git a/trunk/arch/alpha/Kconfig b/trunk/arch/alpha/Kconfig index 9808998cc073..e3a82775f9da 100644 --- a/trunk/arch/alpha/Kconfig +++ b/trunk/arch/alpha/Kconfig @@ -12,6 +12,7 @@ config ALPHA select GENERIC_IRQ_PROBE select AUTO_IRQ_AFFINITY if SMP select GENERIC_IRQ_SHOW + select ARCH_WANT_OPTIONAL_GPIOLIB help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, @@ -51,6 +52,9 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_CMOS_UPDATE def_bool y +config GENERIC_GPIO + def_bool y + config ZONE_DMA bool default y diff --git a/trunk/arch/alpha/include/asm/gpio.h b/trunk/arch/alpha/include/asm/gpio.h new file mode 100644 index 000000000000..7dc6a6343c06 --- /dev/null +++ b/trunk/arch/alpha/include/asm/gpio.h @@ -0,0 +1,55 @@ +/* + * Generic GPIO API implementation for Alpha. + * + * A stright copy of that for PowerPC which was: + * + * Copyright (c) 2007-2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _ASM_ALPHA_GPIO_H +#define _ASM_ALPHA_GPIO_H + +#include +#include + +#ifdef CONFIG_GPIOLIB + +/* + * We don't (yet) implement inlined/rapid versions for on-chip gpios. + * Just call gpiolib. + */ +static inline int gpio_get_value(unsigned int gpio) +{ + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned int gpio, int value) +{ + __gpio_set_value(gpio, value); +} + +static inline int gpio_cansleep(unsigned int gpio) +{ + return __gpio_cansleep(gpio); +} + +static inline int gpio_to_irq(unsigned int gpio) +{ + return __gpio_to_irq(gpio); +} + +static inline int irq_to_gpio(unsigned int irq) +{ + return -EINVAL; +} + +#endif /* CONFIG_GPIOLIB */ + +#endif /* _ASM_ALPHA_GPIO_H */ diff --git a/trunk/arch/alpha/include/asm/smp.h b/trunk/arch/alpha/include/asm/smp.h index 3f390e8cc0b3..c46e714aa3e0 100644 --- a/trunk/arch/alpha/include/asm/smp.h +++ b/trunk/arch/alpha/include/asm/smp.h @@ -39,8 +39,6 @@ struct cpuinfo_alpha { extern struct cpuinfo_alpha cpu_data[NR_CPUS]; -#define PROC_CHANGE_PENALTY 20 - #define hard_smp_processor_id() __hard_smp_processor_id() #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/trunk/arch/alpha/kernel/process.c b/trunk/arch/alpha/kernel/process.c index 3ec35066f1dc..838eac128409 100644 --- a/trunk/arch/alpha/kernel/process.c +++ b/trunk/arch/alpha/kernel/process.c @@ -121,7 +121,7 @@ common_shutdown_1(void *generic_ptr) /* Wait for the secondaries to halt. */ set_cpu_present(boot_cpuid, false); set_cpu_possible(boot_cpuid, false); - while (cpus_weight(cpu_present_map)) + while (cpumask_weight(cpu_present_mask)) barrier(); #endif diff --git a/trunk/arch/alpha/kernel/setup.c b/trunk/arch/alpha/kernel/setup.c index edbddcbd5bc6..cc0fd862cf26 100644 --- a/trunk/arch/alpha/kernel/setup.c +++ b/trunk/arch/alpha/kernel/setup.c @@ -1257,7 +1257,7 @@ show_cpuinfo(struct seq_file *f, void *slot) #ifdef CONFIG_SMP seq_printf(f, "cpus active\t\t: %u\n" "cpu active mask\t\t: %016lx\n", - num_online_cpus(), cpus_addr(cpu_possible_map)[0]); + num_online_cpus(), cpumask_bits(cpu_possible_mask)[0]); #endif show_cache_size (f, "L1 Icache", alpha_l1i_cacheshape); diff --git a/trunk/arch/alpha/kernel/smp.c b/trunk/arch/alpha/kernel/smp.c index 5a621c6d22ab..d739703608fc 100644 --- a/trunk/arch/alpha/kernel/smp.c +++ b/trunk/arch/alpha/kernel/smp.c @@ -451,7 +451,7 @@ setup_smp(void) } printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_map = %lx\n", - smp_num_probed, cpu_present_map.bits[0]); + smp_num_probed, cpumask_bits(cpu_present_mask)[0]); } /* @@ -629,8 +629,9 @@ smp_send_reschedule(int cpu) void smp_send_stop(void) { - cpumask_t to_whom = cpu_possible_map; - cpu_clear(smp_processor_id(), to_whom); + cpumask_t to_whom; + cpumask_copy(&to_whom, cpu_possible_mask); + cpumask_clear_cpu(smp_processor_id(), &to_whom); #ifdef DEBUG_IPI_MSG if (hard_smp_processor_id() != boot_cpu_id) printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n"); diff --git a/trunk/arch/alpha/kernel/sys_dp264.c b/trunk/arch/alpha/kernel/sys_dp264.c index 5ac00fd4cd0c..f8856829c22a 100644 --- a/trunk/arch/alpha/kernel/sys_dp264.c +++ b/trunk/arch/alpha/kernel/sys_dp264.c @@ -140,7 +140,7 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) for (cpu = 0; cpu < 4; cpu++) { unsigned long aff = cpu_irq_affinity[cpu]; - if (cpu_isset(cpu, affinity)) + if (cpumask_test_cpu(cpu, &affinity)) aff |= 1UL << irq; else aff &= ~(1UL << irq); diff --git a/trunk/arch/alpha/kernel/sys_titan.c b/trunk/arch/alpha/kernel/sys_titan.c index fea0e4620994..6994407e242a 100644 --- a/trunk/arch/alpha/kernel/sys_titan.c +++ b/trunk/arch/alpha/kernel/sys_titan.c @@ -65,10 +65,11 @@ titan_update_irq_hw(unsigned long mask) register int bcpu = boot_cpuid; #ifdef CONFIG_SMP - cpumask_t cpm = cpu_present_map; + cpumask_t cpm; volatile unsigned long *dim0, *dim1, *dim2, *dim3; unsigned long mask0, mask1, mask2, mask3, dummy; + cpumask_copy(&cpm, cpu_present_mask); mask &= ~isa_enable; mask0 = mask & titan_cpu_irq_affinity[0]; mask1 = mask & titan_cpu_irq_affinity[1]; @@ -84,10 +85,10 @@ titan_update_irq_hw(unsigned long mask) dim1 = &cchip->dim1.csr; dim2 = &cchip->dim2.csr; dim3 = &cchip->dim3.csr; - if (!cpu_isset(0, cpm)) dim0 = &dummy; - if (!cpu_isset(1, cpm)) dim1 = &dummy; - if (!cpu_isset(2, cpm)) dim2 = &dummy; - if (!cpu_isset(3, cpm)) dim3 = &dummy; + if (!cpumask_test_cpu(0, &cpm)) dim0 = &dummy; + if (!cpumask_test_cpu(1, &cpm)) dim1 = &dummy; + if (!cpumask_test_cpu(2, &cpm)) dim2 = &dummy; + if (!cpumask_test_cpu(3, &cpm)) dim3 = &dummy; *dim0 = mask0; *dim1 = mask1; @@ -137,7 +138,7 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) int cpu; for (cpu = 0; cpu < 4; cpu++) { - if (cpu_isset(cpu, affinity)) + if (cpumask_test_cpu(cpu, &affinity)) titan_cpu_irq_affinity[cpu] |= 1UL << irq; else titan_cpu_irq_affinity[cpu] &= ~(1UL << irq); diff --git a/trunk/arch/alpha/mm/init.c b/trunk/arch/alpha/mm/init.c index 86425ab53bf5..69d0c5761e2f 100644 --- a/trunk/arch/alpha/mm/init.c +++ b/trunk/arch/alpha/mm/init.c @@ -32,8 +32,6 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - extern void die_if_kernel(char *,struct pt_regs *,long); static struct pcb_struct original_pcb; diff --git a/trunk/arch/alpha/mm/numa.c b/trunk/arch/alpha/mm/numa.c index 7b2c56d8f930..3973ae395772 100644 --- a/trunk/arch/alpha/mm/numa.c +++ b/trunk/arch/alpha/mm/numa.c @@ -313,6 +313,7 @@ void __init paging_init(void) zones_size[ZONE_DMA] = dma_local_pfn; zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn; } + node_set_state(nid, N_NORMAL_MEMORY); free_area_init_node(nid, zones_size, start_pfn, NULL); } diff --git a/trunk/arch/arm/Kconfig.debug b/trunk/arch/arm/Kconfig.debug index 03d01d783e3b..81cbe40c159c 100644 --- a/trunk/arch/arm/Kconfig.debug +++ b/trunk/arch/arm/Kconfig.debug @@ -63,13 +63,6 @@ config DEBUG_USER 8 - SIGSEGV faults 16 - SIGBUS faults -config DEBUG_STACK_USAGE - bool "Enable stack utilization instrumentation" - depends on DEBUG_KERNEL - help - Enables the display of the minimum amount of free stack which each - task has ever had available in the sysrq-T output. - # These options are only for real kernel hackers who want to get their hands dirty. config DEBUG_LL bool "Kernel low-level debugging functions" diff --git a/trunk/arch/arm/include/asm/smp.h b/trunk/arch/arm/include/asm/smp.h index a87664f54f93..d2b514fd76f4 100644 --- a/trunk/arch/arm/include/asm/smp.h +++ b/trunk/arch/arm/include/asm/smp.h @@ -20,12 +20,6 @@ #define raw_smp_processor_id() (current_thread_info()->cpu) -/* - * at the moment, there's not a big penalty for changing CPUs - * (the >big< penalty is running SMP in the first place) - */ -#define PROC_CHANGE_PENALTY 15 - struct seq_file; /* diff --git a/trunk/arch/arm/include/asm/tlb.h b/trunk/arch/arm/include/asm/tlb.h index 82dfe5d0c41e..265f908c4a6e 100644 --- a/trunk/arch/arm/include/asm/tlb.h +++ b/trunk/arch/arm/include/asm/tlb.h @@ -41,12 +41,12 @@ */ #if defined(CONFIG_SMP) || defined(CONFIG_CPU_32v7) #define tlb_fast_mode(tlb) 0 -#define FREE_PTE_NR 500 #else #define tlb_fast_mode(tlb) 1 -#define FREE_PTE_NR 0 #endif +#define MMU_GATHER_BUNDLE 8 + /* * TLB handling. This allows us to remove pages from the page * tables, and efficiently handle the TLB issues. @@ -58,7 +58,9 @@ struct mmu_gather { unsigned long range_start; unsigned long range_end; unsigned int nr; - struct page *pages[FREE_PTE_NR]; + unsigned int max; + struct page **pages; + struct page *local[MMU_GATHER_BUNDLE]; }; DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -97,26 +99,37 @@ static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr) } } +static inline void __tlb_alloc_page(struct mmu_gather *tlb) +{ + unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); + + if (addr) { + tlb->pages = (void *)addr; + tlb->max = PAGE_SIZE / sizeof(struct page *); + } +} + static inline void tlb_flush_mmu(struct mmu_gather *tlb) { tlb_flush(tlb); if (!tlb_fast_mode(tlb)) { free_pages_and_swap_cache(tlb->pages, tlb->nr); tlb->nr = 0; + if (tlb->pages == tlb->local) + __tlb_alloc_page(tlb); } } -static inline struct mmu_gather * -tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) +static inline void +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm) { - struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); - tlb->mm = mm; - tlb->fullmm = full_mm_flush; + tlb->fullmm = fullmm; tlb->vma = NULL; + tlb->max = ARRAY_SIZE(tlb->local); + tlb->pages = tlb->local; tlb->nr = 0; - - return tlb; + __tlb_alloc_page(tlb); } static inline void @@ -127,7 +140,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) /* keep the page table cache within bounds */ check_pgt_cache(); - put_cpu_var(mmu_gathers); + if (tlb->pages != tlb->local) + free_pages((unsigned long)tlb->pages, 0); } /* @@ -162,15 +176,22 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) tlb_flush(tlb); } -static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { if (tlb_fast_mode(tlb)) { free_page_and_swap_cache(page); - } else { - tlb->pages[tlb->nr++] = page; - if (tlb->nr >= FREE_PTE_NR) - tlb_flush_mmu(tlb); + return 1; /* avoid calling tlb_flush_mmu */ } + + tlb->pages[tlb->nr++] = page; + VM_BUG_ON(tlb->nr > tlb->max); + return tlb->max - tlb->nr; +} + +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + if (!__tlb_remove_page(tlb, page)) + tlb_flush_mmu(tlb); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, diff --git a/trunk/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h b/trunk/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h new file mode 100644 index 000000000000..292d55ed2113 --- /dev/null +++ b/trunk/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h @@ -0,0 +1,78 @@ +/* + * PTP 1588 clock using the IXP46X + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _IXP46X_TS_H_ +#define _IXP46X_TS_H_ + +#define DEFAULT_ADDEND 0xF0000029 +#define TICKS_NS_SHIFT 4 + +struct ixp46x_channel_ctl { + u32 ch_control; /* 0x40 Time Synchronization Channel Control */ + u32 ch_event; /* 0x44 Time Synchronization Channel Event */ + u32 tx_snap_lo; /* 0x48 Transmit Snapshot Low Register */ + u32 tx_snap_hi; /* 0x4C Transmit Snapshot High Register */ + u32 rx_snap_lo; /* 0x50 Receive Snapshot Low Register */ + u32 rx_snap_hi; /* 0x54 Receive Snapshot High Register */ + u32 src_uuid_lo; /* 0x58 Source UUID0 Low Register */ + u32 src_uuid_hi; /* 0x5C Sequence Identifier/Source UUID0 High */ +}; + +struct ixp46x_ts_regs { + u32 control; /* 0x00 Time Sync Control Register */ + u32 event; /* 0x04 Time Sync Event Register */ + u32 addend; /* 0x08 Time Sync Addend Register */ + u32 accum; /* 0x0C Time Sync Accumulator Register */ + u32 test; /* 0x10 Time Sync Test Register */ + u32 unused; /* 0x14 */ + u32 rsystime_lo; /* 0x18 RawSystemTime_Low Register */ + u32 rsystime_hi; /* 0x1C RawSystemTime_High Register */ + u32 systime_lo; /* 0x20 SystemTime_Low Register */ + u32 systime_hi; /* 0x24 SystemTime_High Register */ + u32 trgt_lo; /* 0x28 TargetTime_Low Register */ + u32 trgt_hi; /* 0x2C TargetTime_High Register */ + u32 asms_lo; /* 0x30 Auxiliary Slave Mode Snapshot Low */ + u32 asms_hi; /* 0x34 Auxiliary Slave Mode Snapshot High */ + u32 amms_lo; /* 0x38 Auxiliary Master Mode Snapshot Low */ + u32 amms_hi; /* 0x3C Auxiliary Master Mode Snapshot High */ + + struct ixp46x_channel_ctl channel[3]; +}; + +/* 0x00 Time Sync Control Register Bits */ +#define TSCR_AMM (1<<3) +#define TSCR_ASM (1<<2) +#define TSCR_TTM (1<<1) +#define TSCR_RST (1<<0) + +/* 0x04 Time Sync Event Register Bits */ +#define TSER_SNM (1<<3) +#define TSER_SNS (1<<2) +#define TTIPEND (1<<1) + +/* 0x40 Time Synchronization Channel Control Register Bits */ +#define MASTER_MODE (1<<0) +#define TIMESTAMP_ALL (1<<1) + +/* 0x44 Time Synchronization Channel Event Register Bits */ +#define TX_SNAPSHOT_LOCKED (1<<0) +#define RX_SNAPSHOT_LOCKED (1<<1) + +#endif diff --git a/trunk/arch/arm/mach-omap2/board-3430sdp.c b/trunk/arch/arm/mach-omap2/board-3430sdp.c index 9afd087cc29c..23244cd0a5b6 100644 --- a/trunk/arch/arm/mach-omap2/board-3430sdp.c +++ b/trunk/arch/arm/mach-omap2/board-3430sdp.c @@ -37,8 +37,8 @@ #include #include #include -#include -#include +#include