diff --git a/[refs] b/[refs] index 315349a027f2..d11dc2402c50 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: a98889f3d8882995b5aa2255b931cf0202325cc0 +refs/heads/master: 2c4aabcca847ac4c92aa5e960c3f6053e1051b62 diff --git a/trunk/.gitignore b/trunk/.gitignore index 3016ed30526d..090b293b8779 100644 --- a/trunk/.gitignore +++ b/trunk/.gitignore @@ -41,6 +41,7 @@ include/linux/autoconf.h include/linux/compile.h include/linux/version.h include/linux/utsrelease.h +include/linux/bounds.h # stgit generated dirs patches-* diff --git a/trunk/.mailmap b/trunk/.mailmap index ebf9bf84da0a..726084286d33 100644 --- a/trunk/.mailmap +++ b/trunk/.mailmap @@ -88,6 +88,7 @@ Rudolf Marek Rui Saraiva Sachin P Sant Sam Ravnborg +S.Çağlar Onur Simon Kelley Stéphane Witzmann Stephen Hemminger diff --git a/trunk/Documentation/ABI/testing/sysfs-class-bdi b/trunk/Documentation/ABI/testing/sysfs-class-bdi new file mode 100644 index 000000000000..5ac1e01bbd48 --- /dev/null +++ b/trunk/Documentation/ABI/testing/sysfs-class-bdi @@ -0,0 +1,46 @@ +What: /sys/class/bdi// +Date: January 2008 +Contact: Peter Zijlstra +Description: + +Provide a place in sysfs for the backing_dev_info object. This allows +setting and retrieving various BDI specific variables. + +The identifier can be either of the following: + +MAJOR:MINOR + + Device number for block devices, or value of st_dev on + non-block filesystems which provide their own BDI, such as NFS + and FUSE. + +default + + The default backing dev, used for non-block device backed + filesystems which do not provide their own BDI. + +Files under /sys/class/bdi// +--------------------------------- + +read_ahead_kb (read-write) + + Size of the read-ahead window in kilobytes + +min_ratio (read-write) + + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_ratio' parameter allows assigning a minimum + percentage of the write-back cache to a particular device. + For example, this is useful for providing a minimum QoS. + +max_ratio (read-write) + + Allows limiting a particular device to use not more than the + given percentage of the write-back cache. This is useful in + situations where we want to avoid one device taking all or + most of the write-back cache. For example in case of an NFS + mount that is prone to get stuck, or a FUSE mount which cannot + be trusted to play fair. diff --git a/trunk/Documentation/DMA-API.txt b/trunk/Documentation/DMA-API.txt index b939ebb62871..80d150458c80 100644 --- a/trunk/Documentation/DMA-API.txt +++ b/trunk/Documentation/DMA-API.txt @@ -145,7 +145,7 @@ Part Ic - DMA addressing limitations int dma_supported(struct device *dev, u64 mask) int -pci_dma_supported(struct device *dev, u64 mask) +pci_dma_supported(struct pci_dev *hwdev, u64 mask) Checks to see if the device can support DMA to the memory described by mask. @@ -189,7 +189,7 @@ dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) dma_addr_t -pci_map_single(struct device *dev, void *cpu_addr, size_t size, +pci_map_single(struct pci_dev *hwdev, void *cpu_addr, size_t size, int direction) Maps a piece of processor virtual memory so it can be accessed by the @@ -395,6 +395,71 @@ Notes: You must do this: See also dma_map_single(). +dma_addr_t +dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) + +void +dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) + +int +dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) + +void +dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) + +The four functions above are just like the counterpart functions +without the _attrs suffixes, except that they pass an optional +struct dma_attrs*. + +struct dma_attrs encapsulates a set of "dma attributes". For the +definition of struct dma_attrs see linux/dma-attrs.h. + +The interpretation of dma attributes is architecture-specific, and +each attribute should be documented in Documentation/DMA-attributes.txt. + +If struct dma_attrs* is NULL, the semantics of each of these +functions is identical to those of the corresponding function +without the _attrs suffix. As a result dma_map_single_attrs() +can generally replace dma_map_single(), etc. + +As an example of the use of the *_attrs functions, here's how +you could pass an attribute DMA_ATTR_FOO when mapping memory +for DMA: + +#include +/* DMA_ATTR_FOO should be defined in linux/dma-attrs.h and + * documented in Documentation/DMA-attributes.txt */ +... + + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_FOO, &attrs); + .... + n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, &attr); + .... + +Architectures that care about DMA_ATTR_FOO would check for its +presence in their implementations of the mapping and unmapping +routines, e.g.: + +void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + .... + int foo = dma_get_attr(DMA_ATTR_FOO, attrs); + .... + if (foo) + /* twizzle the frobnozzle */ + .... + Part II - Advanced dma_ usage ----------------------------- diff --git a/trunk/Documentation/DMA-attributes.txt b/trunk/Documentation/DMA-attributes.txt new file mode 100644 index 000000000000..6d772f84b477 --- /dev/null +++ b/trunk/Documentation/DMA-attributes.txt @@ -0,0 +1,24 @@ + DMA attributes + ============== + +This document describes the semantics of the DMA attributes that are +defined in linux/dma-attrs.h. + +DMA_ATTR_WRITE_BARRIER +---------------------- + +DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA. DMA +to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces +all pending DMA writes to complete, and thus provides a mechanism to +strictly order DMA from a device across all intervening busses and +bridges. This barrier is not specific to a particular type of +interconnect, it applies to the system as a whole, and so its +implementation must account for the idiosyncracies of the system all +the way from the DMA device to memory. + +As an example of a situation where DMA_ATTR_WRITE_BARRIER would be +useful, suppose that a device does a DMA write to indicate that data is +ready and available in memory. The DMA of the "completion indication" +could race with data DMA. Mapping the memory used for completion +indications with DMA_ATTR_WRITE_BARRIER would prevent the race. + diff --git a/trunk/Documentation/DMA-mapping.txt b/trunk/Documentation/DMA-mapping.txt index d84f89dbf921..b463ecd0c7ce 100644 --- a/trunk/Documentation/DMA-mapping.txt +++ b/trunk/Documentation/DMA-mapping.txt @@ -315,11 +315,11 @@ you should do: dma_addr_t dma_handle; - cpu_addr = pci_alloc_consistent(dev, size, &dma_handle); + cpu_addr = pci_alloc_consistent(pdev, size, &dma_handle); -where dev is a struct pci_dev *. You should pass NULL for PCI like buses -where devices don't have struct pci_dev (like ISA, EISA). This may be -called in interrupt context. +where pdev is a struct pci_dev *. This may be called in interrupt context. +You should use dma_alloc_coherent (see DMA-API.txt) for buses +where devices don't have struct pci_dev (like ISA, EISA). This argument is needed because the DMA translations may be bus specific (and often is private to the bus which the device is attached @@ -332,7 +332,7 @@ __get_free_pages (but takes size instead of a page order). If your driver needs regions sized smaller than a page, you may prefer using the pci_pool interface, described below. -The consistent DMA mapping interfaces, for non-NULL dev, will by +The consistent DMA mapping interfaces, for non-NULL pdev, will by default return a DMA address which is SAC (Single Address Cycle) addressable. Even if the device indicates (via PCI dma mask) that it may address the upper 32-bits and thus perform DAC cycles, consistent @@ -354,9 +354,9 @@ buffer you receive will not cross a 64K boundary. To unmap and free such a DMA region, you call: - pci_free_consistent(dev, size, cpu_addr, dma_handle); + pci_free_consistent(pdev, size, cpu_addr, dma_handle); -where dev, size are the same as in the above call and cpu_addr and +where pdev, size are the same as in the above call and cpu_addr and dma_handle are the values pci_alloc_consistent returned to you. This function may not be called in interrupt context. @@ -371,9 +371,9 @@ Create a pci_pool like this: struct pci_pool *pool; - pool = pci_pool_create(name, dev, size, align, alloc); + pool = pci_pool_create(name, pdev, size, align, alloc); -The "name" is for diagnostics (like a kmem_cache name); dev and size +The "name" is for diagnostics (like a kmem_cache name); pdev and size are as above. The device's hardware alignment requirement for this type of data is "align" (which is expressed in bytes, and must be a power of two). If your device has no boundary crossing restrictions, @@ -472,11 +472,11 @@ To map a single region, you do: void *addr = buffer->ptr; size_t size = buffer->len; - dma_handle = pci_map_single(dev, addr, size, direction); + dma_handle = pci_map_single(pdev, addr, size, direction); and to unmap it: - pci_unmap_single(dev, dma_handle, size, direction); + pci_unmap_single(pdev, dma_handle, size, direction); You should call pci_unmap_single when the DMA activity is finished, e.g. from the interrupt which told you that the DMA transfer is done. @@ -493,17 +493,17 @@ Specifically: unsigned long offset = buffer->offset; size_t size = buffer->len; - dma_handle = pci_map_page(dev, page, offset, size, direction); + dma_handle = pci_map_page(pdev, page, offset, size, direction); ... - pci_unmap_page(dev, dma_handle, size, direction); + pci_unmap_page(pdev, dma_handle, size, direction); Here, "offset" means byte offset within the given page. With scatterlists, you map a region gathered from several regions by: - int i, count = pci_map_sg(dev, sglist, nents, direction); + int i, count = pci_map_sg(pdev, sglist, nents, direction); struct scatterlist *sg; for_each_sg(sglist, sg, count, i) { @@ -527,7 +527,7 @@ accessed sg->address and sg->length as shown above. To unmap a scatterlist, just call: - pci_unmap_sg(dev, sglist, nents, direction); + pci_unmap_sg(pdev, sglist, nents, direction); Again, make sure DMA activity has already finished. @@ -550,11 +550,11 @@ correct copy of the DMA buffer. So, firstly, just map it with pci_map_{single,sg}, and after each DMA transfer call either: - pci_dma_sync_single_for_cpu(dev, dma_handle, size, direction); + pci_dma_sync_single_for_cpu(pdev, dma_handle, size, direction); or: - pci_dma_sync_sg_for_cpu(dev, sglist, nents, direction); + pci_dma_sync_sg_for_cpu(pdev, sglist, nents, direction); as appropriate. @@ -562,7 +562,7 @@ Then, if you wish to let the device get at the DMA area again, finish accessing the data with the cpu, and then before actually giving the buffer to the hardware call either: - pci_dma_sync_single_for_device(dev, dma_handle, size, direction); + pci_dma_sync_single_for_device(pdev, dma_handle, size, direction); or: @@ -739,7 +739,7 @@ failure can be determined by: dma_addr_t dma_handle; - dma_handle = pci_map_single(dev, addr, size, direction); + dma_handle = pci_map_single(pdev, addr, size, direction); if (pci_dma_mapping_error(dma_handle)) { /* * reduce current DMA mapping usage, diff --git a/trunk/Documentation/DocBook/Makefile b/trunk/Documentation/DocBook/Makefile index 83966e94cc32..0eb0d027eb32 100644 --- a/trunk/Documentation/DocBook/Makefile +++ b/trunk/Documentation/DocBook/Makefile @@ -12,7 +12,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ - mac80211.xml + mac80211.xml debugobjects.xml ### # The build process is as follows (targets): diff --git a/trunk/Documentation/DocBook/debugobjects.tmpl b/trunk/Documentation/DocBook/debugobjects.tmpl new file mode 100644 index 000000000000..7f5f218015fe --- /dev/null +++ b/trunk/Documentation/DocBook/debugobjects.tmpl @@ -0,0 +1,391 @@ + + + + + + Debug objects life time + + + + Thomas + Gleixner + +
+ tglx@linutronix.de +
+
+
+
+ + + 2008 + Thomas Gleixner + + + + + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License version 2 as published by the Free Software Foundation. + + + + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + + + You should have received a copy of the GNU General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + + + + For more details see the file COPYING in the source + distribution of Linux. + + +
+ + + + + Introduction + + debugobjects is a generic infrastructure to track the life time + of kernel objects and validate the operations on those. + + + debugobjects is useful to check for the following error patterns: + + Activation of uninitialized objects + Initialization of active objects + Usage of freed/destroyed objects + + + + debugobjects is not changing the data structure of the real + object so it can be compiled in with a minimal runtime impact + and enabled on demand with a kernel command line option. + + + + + Howto use debugobjects + + A kernel subsystem needs to provide a data structure which + describes the object type and add calls into the debug code at + appropriate places. The data structure to describe the object + type needs at minimum the name of the object type. Optional + functions can and should be provided to fixup detected problems + so the kernel can continue to work and the debug information can + be retrieved from a live system instead of hard core debugging + with serial consoles and stack trace transcripts from the + monitor. + + + The debug calls provided by debugobjects are: + + debug_object_init + debug_object_init_on_stack + debug_object_activate + debug_object_deactivate + debug_object_destroy + debug_object_free + + Each of these functions takes the address of the real object and + a pointer to the object type specific debug description + structure. + + + Each detected error is reported in the statistics and a limited + number of errors are printk'ed including a full stack trace. + + + The statistics are available via debugfs/debug_objects/stats. + They provide information about the number of warnings and the + number of successful fixups along with information about the + usage of the internal tracking objects and the state of the + internal tracking objects pool. + + + + Debug functions + + Debug object function reference +!Elib/debugobjects.c + + + debug_object_init + + This function is called whenever the initialization function + of a real object is called. + + + When the real object is already tracked by debugobjects it is + checked, whether the object can be initialized. Initializing + is not allowed for active and destroyed objects. When + debugobjects detects an error, then it calls the fixup_init + function of the object type description structure if provided + by the caller. The fixup function can correct the problem + before the real initialization of the object happens. E.g. it + can deactivate an active object in order to prevent damage to + the subsystem. + + + When the real object is not yet tracked by debugobjects, + debugobjects allocates a tracker object for the real object + and sets the tracker object state to ODEBUG_STATE_INIT. It + verifies that the object is not on the callers stack. If it is + on the callers stack then a limited number of warnings + including a full stack trace is printk'ed. The calling code + must use debug_object_init_on_stack() and remove the object + before leaving the function which allocated it. See next + section. + + + + + debug_object_init_on_stack + + This function is called whenever the initialization function + of a real object which resides on the stack is called. + + + When the real object is already tracked by debugobjects it is + checked, whether the object can be initialized. Initializing + is not allowed for active and destroyed objects. When + debugobjects detects an error, then it calls the fixup_init + function of the object type description structure if provided + by the caller. The fixup function can correct the problem + before the real initialization of the object happens. E.g. it + can deactivate an active object in order to prevent damage to + the subsystem. + + + When the real object is not yet tracked by debugobjects + debugobjects allocates a tracker object for the real object + and sets the tracker object state to ODEBUG_STATE_INIT. It + verifies that the object is on the callers stack. + + + An object which is on the stack must be removed from the + tracker by calling debug_object_free() before the function + which allocates the object returns. Otherwise we keep track of + stale objects. + + + + + debug_object_activate + + This function is called whenever the activation function of a + real object is called. + + + When the real object is already tracked by debugobjects it is + checked, whether the object can be activated. Activating is + not allowed for active and destroyed objects. When + debugobjects detects an error, then it calls the + fixup_activate function of the object type description + structure if provided by the caller. The fixup function can + correct the problem before the real activation of the object + happens. E.g. it can deactivate an active object in order to + prevent damage to the subsystem. + + + When the real object is not yet tracked by debugobjects then + the fixup_activate function is called if available. This is + necessary to allow the legitimate activation of statically + allocated and initialized objects. The fixup function checks + whether the object is valid and calls the debug_objects_init() + function to initialize the tracking of this object. + + + When the activation is legitimate, then the state of the + associated tracker object is set to ODEBUG_STATE_ACTIVE. + + + + + debug_object_deactivate + + This function is called whenever the deactivation function of + a real object is called. + + + When the real object is tracked by debugobjects it is checked, + whether the object can be deactivated. Deactivating is not + allowed for untracked or destroyed objects. + + + When the deactivation is legitimate, then the state of the + associated tracker object is set to ODEBUG_STATE_INACTIVE. + + + + + debug_object_destroy + + This function is called to mark an object destroyed. This is + useful to prevent the usage of invalid objects, which are + still available in memory: either statically allocated objects + or objects which are freed later. + + + When the real object is tracked by debugobjects it is checked, + whether the object can be destroyed. Destruction is not + allowed for active and destroyed objects. When debugobjects + detects an error, then it calls the fixup_destroy function of + the object type description structure if provided by the + caller. The fixup function can correct the problem before the + real destruction of the object happens. E.g. it can deactivate + an active object in order to prevent damage to the subsystem. + + + When the destruction is legitimate, then the state of the + associated tracker object is set to ODEBUG_STATE_DESTROYED. + + + + + debug_object_free + + This function is called before an object is freed. + + + When the real object is tracked by debugobjects it is checked, + whether the object can be freed. Free is not allowed for + active objects. When debugobjects detects an error, then it + calls the fixup_free function of the object type description + structure if provided by the caller. The fixup function can + correct the problem before the real free of the object + happens. E.g. it can deactivate an active object in order to + prevent damage to the subsystem. + + + Note that debug_object_free removes the object from the + tracker. Later usage of the object is detected by the other + debug checks. + + + + + Fixup functions + + Debug object type description structure +!Iinclude/linux/debugobjects.h + + + fixup_init + + This function is called from the debug code whenever a problem + in debug_object_init is detected. The function takes the + address of the object and the state which is currently + recorded in the tracker. + + + Called from debug_object_init when the object state is: + + ODEBUG_STATE_ACTIVE + + + + The function returns 1 when the fixup was successful, + otherwise 0. The return value is used to update the + statistics. + + + Note, that the function needs to call the debug_object_init() + function again, after the damage has been repaired in order to + keep the state consistent. + + + + + fixup_activate + + This function is called from the debug code whenever a problem + in debug_object_activate is detected. + + + Called from debug_object_activate when the object state is: + + ODEBUG_STATE_NOTAVAILABLE + ODEBUG_STATE_ACTIVE + + + + The function returns 1 when the fixup was successful, + otherwise 0. The return value is used to update the + statistics. + + + Note that the function needs to call the debug_object_activate() + function again after the damage has been repaired in order to + keep the state consistent. + + + The activation of statically initialized objects is a special + case. When debug_object_activate() has no tracked object for + this object address then fixup_activate() is called with + object state ODEBUG_STATE_NOTAVAILABLE. The fixup function + needs to check whether this is a legitimate case of a + statically initialized object or not. In case it is it calls + debug_object_init() and debug_object_activate() to make the + object known to the tracker and marked active. In this case + the function should return 0 because this is not a real fixup. + + + + + fixup_destroy + + This function is called from the debug code whenever a problem + in debug_object_destroy is detected. + + + Called from debug_object_destroy when the object state is: + + ODEBUG_STATE_ACTIVE + + + + The function returns 1 when the fixup was successful, + otherwise 0. The return value is used to update the + statistics. + + + + fixup_free + + This function is called from the debug code whenever a problem + in debug_object_free is detected. Further it can be called + from the debug checks in kfree/vfree, when an active object is + detected from the debug_check_no_obj_freed() sanity checks. + + + Called from debug_object_free() or debug_check_no_obj_freed() + when the object state is: + + ODEBUG_STATE_ACTIVE + + + + The function returns 1 when the fixup was successful, + otherwise 0. The return value is used to update the + statistics. + + + + + Known Bugs And Assumptions + + None (knock on wood). + + +
diff --git a/trunk/Documentation/DocBook/kernel-api.tmpl b/trunk/Documentation/DocBook/kernel-api.tmpl index 488dd4a4945b..b7b1482f6e04 100644 --- a/trunk/Documentation/DocBook/kernel-api.tmpl +++ b/trunk/Documentation/DocBook/kernel-api.tmpl @@ -119,7 +119,7 @@ X!Ilib/string.c !Elib/string.c Bit Operations -!Iinclude/asm-x86/bitops_32.h +!Iinclude/asm-x86/bitops.h @@ -645,4 +645,58 @@ X!Idrivers/video/console/fonts.c !Edrivers/i2c/i2c-core.c + + Clock Framework + + + The clock framework defines programming interfaces to support + software management of the system clock tree. + This framework is widely used with System-On-Chip (SOC) platforms + to support power management and various devices which may need + custom clock rates. + Note that these "clocks" don't relate to timekeeping or real + time clocks (RTCs), each of which have separate frameworks. + These struct clk instances may be used + to manage for example a 96 MHz signal that is used to shift bits + into and out of peripherals or busses, or otherwise trigger + synchronous state machine transitions in system hardware. + + + + Power management is supported by explicit software clock gating: + unused clocks are disabled, so the system doesn't waste power + changing the state of transistors that aren't in active use. + On some systems this may be backed by hardware clock gating, + where clocks are gated without being disabled in software. + Sections of chips that are powered but not clocked may be able + to retain their last state. + This low power state is often called a retention + mode. + This mode still incurs leakage currents, especially with finer + circuit geometries, but for CMOS circuits power is mostly used + by clocked state changes. + + + + Power-aware drivers only enable their clocks when the device + they manage is in active use. Also, system sleep states often + differ according to which clock domains are active: while a + "standby" state may allow wakeup from several active domains, a + "mem" (suspend-to-RAM) state may require a more wholesale shutdown + of clocks derived from higher speed PLLs and oscillators, limiting + the number of possible wakeup event sources. A driver's suspend + method may need to be aware of system-specific clock constraints + on the target sleep state. + + + + Some platforms support programmable clock generators. These + can be used by external chips of various kinds, such as other + CPUs, multimedia codecs, and devices with strict requirements + for interface clocking. + + +!Iinclude/linux/clk.h + + diff --git a/trunk/Documentation/DocBook/rapidio.tmpl b/trunk/Documentation/DocBook/rapidio.tmpl index b9e143e28c64..54eb26b57372 100644 --- a/trunk/Documentation/DocBook/rapidio.tmpl +++ b/trunk/Documentation/DocBook/rapidio.tmpl @@ -133,7 +133,6 @@ !Idrivers/rapidio/rio-sysfs.c PPC32 support -!Iarch/powerpc/kernel/rio.c !Earch/powerpc/sysdev/fsl_rio.c !Iarch/powerpc/sysdev/fsl_rio.c diff --git a/trunk/Documentation/braille-console.txt b/trunk/Documentation/braille-console.txt new file mode 100644 index 000000000000..000b0fbdc105 --- /dev/null +++ b/trunk/Documentation/braille-console.txt @@ -0,0 +1,34 @@ + Linux Braille Console + +To get early boot messages on a braille device (before userspace screen +readers can start), you first need to compile the support for the usual serial +console (see serial-console.txt), and for braille device (in Device Drivers - +Accessibility). + +Then you need to specify a console=brl, option on the kernel command line, the +format is: + + console=brl,serial_options... + +where serial_options... are the same as described in serial-console.txt + +So for instance you can use console=brl,ttyS0 if the braille device is connected +to the first serial port, and console=brl,ttyS0,115200 to override the baud rate +to 115200, etc. + +By default, the braille device will just show the last kernel message (console +mode). To review previous messages, press the Insert key to switch to the VT +review mode. In review mode, the arrow keys permit to browse in the VT content, +page up/down keys go at the top/bottom of the screen, and the home key goes back +to the cursor, hence providing very basic screen reviewing facility. + +Sound feedback can be obtained by adding the braille_console.sound=1 kernel +parameter. + +For simplicity, only one braille console can be enabled, other uses of +console=brl,... will be discarded. Also note that it does not interfere with +the console selection mecanism described in serial-console.txt + +For now, only the VisioBraille device is supported. + +Samuel Thibault diff --git a/trunk/Documentation/cgroups.txt b/trunk/Documentation/cgroups.txt index 31d12e21ff8a..c298a6690e0d 100644 --- a/trunk/Documentation/cgroups.txt +++ b/trunk/Documentation/cgroups.txt @@ -500,8 +500,7 @@ post-attachment activity that requires memory allocations or blocking. void fork(struct cgroup_subsy *ss, struct task_struct *task) -Called when a task is forked into a cgroup. Also called during -registration for all existing tasks. +Called when a task is forked into a cgroup. void exit(struct cgroup_subsys *ss, struct task_struct *task) diff --git a/trunk/Documentation/controllers/devices.txt b/trunk/Documentation/controllers/devices.txt new file mode 100644 index 000000000000..4dcea42432c2 --- /dev/null +++ b/trunk/Documentation/controllers/devices.txt @@ -0,0 +1,48 @@ +Device Whitelist Controller + +1. Description: + +Implement a cgroup to track and enforce open and mknod restrictions +on device files. A device cgroup associates a device access +whitelist with each cgroup. A whitelist entry has 4 fields. +'type' is a (all), c (char), or b (block). 'all' means it applies +to all types and all major and minor numbers. Major and minor are +either an integer or * for all. Access is a composition of r +(read), w (write), and m (mknod). + +The root device cgroup starts with rwm to 'all'. A child device +cgroup gets a copy of the parent. Administrators can then remove +devices from the whitelist or add new entries. A child cgroup can +never receive a device access which is denied its parent. However +when a device access is removed from a parent it will not also be +removed from the child(ren). + +2. User Interface + +An entry is added using devices.allow, and removed using +devices.deny. For instance + + echo 'c 1:3 mr' > /cgroups/1/devices.allow + +allows cgroup 1 to read and mknod the device usually known as +/dev/null. Doing + + echo a > /cgroups/1/devices.deny + +will remove the default 'a *:* mrw' entry. + +3. Security + +Any task can move itself between cgroups. This clearly won't +suffice, but we can decide the best way to adequately restrict +movement as people get some experience with this. We may just want +to require CAP_SYS_ADMIN, which at least is a separate bit from +CAP_MKNOD. We may want to just refuse moving to a cgroup which +isn't a descendent of the current one. Or we may want to use +CAP_MAC_ADMIN, since we really are trying to lock down root. + +CAP_SYS_ADMIN is needed to modify the whitelist or move another +task to a new cgroup. (Again we'll probably want to change that). + +A cgroup may not be granted more permissions than the cgroup's +parent has. diff --git a/trunk/Documentation/controllers/resource_counter.txt b/trunk/Documentation/controllers/resource_counter.txt new file mode 100644 index 000000000000..f196ac1d7d25 --- /dev/null +++ b/trunk/Documentation/controllers/resource_counter.txt @@ -0,0 +1,181 @@ + + The Resource Counter + +The resource counter, declared at include/linux/res_counter.h, +is supposed to facilitate the resource management by controllers +by providing common stuff for accounting. + +This "stuff" includes the res_counter structure and routines +to work with it. + + + +1. Crucial parts of the res_counter structure + + a. unsigned long long usage + + The usage value shows the amount of a resource that is consumed + by a group at a given time. The units of measurement should be + determined by the controller that uses this counter. E.g. it can + be bytes, items or any other unit the controller operates on. + + b. unsigned long long max_usage + + The maximal value of the usage over time. + + This value is useful when gathering statistical information about + the particular group, as it shows the actual resource requirements + for a particular group, not just some usage snapshot. + + c. unsigned long long limit + + The maximal allowed amount of resource to consume by the group. In + case the group requests for more resources, so that the usage value + would exceed the limit, the resource allocation is rejected (see + the next section). + + d. unsigned long long failcnt + + The failcnt stands for "failures counter". This is the number of + resource allocation attempts that failed. + + c. spinlock_t lock + + Protects changes of the above values. + + + +2. Basic accounting routines + + a. void res_counter_init(struct res_counter *rc) + + Initializes the resource counter. As usual, should be the first + routine called for a new counter. + + b. int res_counter_charge[_locked] + (struct res_counter *rc, unsigned long val) + + When a resource is about to be allocated it has to be accounted + with the appropriate resource counter (controller should determine + which one to use on its own). This operation is called "charging". + + This is not very important which operation - resource allocation + or charging - is performed first, but + * if the allocation is performed first, this may create a + temporary resource over-usage by the time resource counter is + charged; + * if the charging is performed first, then it should be uncharged + on error path (if the one is called). + + c. void res_counter_uncharge[_locked] + (struct res_counter *rc, unsigned long val) + + When a resource is released (freed) it should be de-accounted + from the resource counter it was accounted to. This is called + "uncharging". + + The _locked routines imply that the res_counter->lock is taken. + + + 2.1 Other accounting routines + + There are more routines that may help you with common needs, like + checking whether the limit is reached or resetting the max_usage + value. They are all declared in include/linux/res_counter.h. + + + +3. Analyzing the resource counter registrations + + a. If the failcnt value constantly grows, this means that the counter's + limit is too tight. Either the group is misbehaving and consumes too + many resources, or the configuration is not suitable for the group + and the limit should be increased. + + b. The max_usage value can be used to quickly tune the group. One may + set the limits to maximal values and either load the container with + a common pattern or leave one for a while. After this the max_usage + value shows the amount of memory the container would require during + its common activity. + + Setting the limit a bit above this value gives a pretty good + configuration that works in most of the cases. + + c. If the max_usage is much less than the limit, but the failcnt value + is growing, then the group tries to allocate a big chunk of resource + at once. + + d. If the max_usage is much less than the limit, but the failcnt value + is 0, then this group is given too high limit, that it does not + require. It is better to lower the limit a bit leaving more resource + for other groups. + + + +4. Communication with the control groups subsystem (cgroups) + +All the resource controllers that are using cgroups and resource counters +should provide files (in the cgroup filesystem) to work with the resource +counter fields. They are recommended to adhere to the following rules: + + a. File names + + Field name File name + --------------------------------------------------- + usage usage_in_ + max_usage max_usage_in_ + limit limit_in_ + failcnt failcnt + lock no file :) + + b. Reading from file should show the corresponding field value in the + appropriate format. + + c. Writing to file + + Field Expected behavior + ---------------------------------- + usage prohibited + max_usage reset to usage + limit set the limit + failcnt reset to zero + + + +5. Usage example + + a. Declare a task group (take a look at cgroups subsystem for this) and + fold a res_counter into it + + struct my_group { + struct res_counter res; + + + } + + b. Put hooks in resource allocation/release paths + + int alloc_something(...) + { + if (res_counter_charge(res_counter_ptr, amount) < 0) + return -ENOMEM; + + + } + + void release_something(...) + { + res_counter_uncharge(res_counter_ptr, amount); + + + } + + In order to keep the usage value self-consistent, both the + "res_counter_ptr" and the "amount" in release_something() should be + the same as they were in the alloc_something() when the releasing + resource was allocated. + + c. Provide the way to read res_counter values and set them (the cgroups + still can help with it). + + c. Compile and run :) diff --git a/trunk/Documentation/cpu-freq/user-guide.txt b/trunk/Documentation/cpu-freq/user-guide.txt index af3b925ece08..6c442d8426b5 100644 --- a/trunk/Documentation/cpu-freq/user-guide.txt +++ b/trunk/Documentation/cpu-freq/user-guide.txt @@ -154,6 +154,11 @@ scaling_governor, and by "echoing" the name of another that some governors won't load - they only work on some specific architectures or processors. + +cpuinfo_cur_freq : Current speed of the CPU, in KHz. + +scaling_available_frequencies : List of available frequencies, in KHz. + scaling_min_freq and scaling_max_freq show the current "policy limits" (in kHz). By echoing new values into these @@ -162,6 +167,15 @@ scaling_max_freq show the current "policy limits" (in first set scaling_max_freq, then scaling_min_freq. +affected_cpus : List of CPUs that require software coordination + of frequency. + +related_cpus : List of CPUs that need some sort of frequency + coordination, whether software or hardware. + +scaling_driver : Hardware driver for cpufreq. + +scaling_cur_freq : Current frequency of the CPU, in KHz. If you have selected the "userspace" governor which allows you to set the CPU operating frequency to a specific value, you can read out diff --git a/trunk/Documentation/cpusets.txt b/trunk/Documentation/cpusets.txt index aa854b9b18cd..fb7b361e6eea 100644 --- a/trunk/Documentation/cpusets.txt +++ b/trunk/Documentation/cpusets.txt @@ -171,6 +171,7 @@ files describing that cpuset: - memory_migrate flag: if set, move pages to cpusets nodes - cpu_exclusive flag: is cpu placement exclusive? - mem_exclusive flag: is memory placement exclusive? + - mem_hardwall flag: is memory allocation hardwalled - memory_pressure: measure of how much paging pressure in cpuset In addition, the root cpuset only has the following file: @@ -222,17 +223,18 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct ancestor or descendent, may share any of the same CPUs or Memory Nodes. -A cpuset that is mem_exclusive restricts kernel allocations for -page, buffer and other data commonly shared by the kernel across -multiple users. All cpusets, whether mem_exclusive or not, restrict -allocations of memory for user space. This enables configuring a -system so that several independent jobs can share common kernel data, -such as file system pages, while isolating each jobs user allocation in -its own cpuset. To do this, construct a large mem_exclusive cpuset to -hold all the jobs, and construct child, non-mem_exclusive cpusets for -each individual job. Only a small amount of typical kernel memory, -such as requests from interrupt handlers, is allowed to be taken -outside even a mem_exclusive cpuset. +A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", +i.e. it restricts kernel allocations for page, buffer and other data +commonly shared by the kernel across multiple users. All cpusets, +whether hardwalled or not, restrict allocations of memory for user +space. This enables configuring a system so that several independent +jobs can share common kernel data, such as file system pages, while +isolating each job's user allocation in its own cpuset. To do this, +construct a large mem_exclusive cpuset to hold all the jobs, and +construct child, non-mem_exclusive cpusets for each individual job. +Only a small amount of typical kernel memory, such as requests from +interrupt handlers, is allowed to be taken outside even a +mem_exclusive cpuset. 1.5 What is memory_pressure ? @@ -707,7 +709,7 @@ Now you want to do something with this cpuset. In this directory you can find several files: # ls -cpus cpu_exclusive mems mem_exclusive tasks +cpus cpu_exclusive mems mem_exclusive mem_hardwall tasks Reading them will give you information about the state of this cpuset: the CPUs and Memory Nodes it can use, the processes that are using diff --git a/trunk/Documentation/dontdiff b/trunk/Documentation/dontdiff index 354aec047c0e..881e6dd03aea 100644 --- a/trunk/Documentation/dontdiff +++ b/trunk/Documentation/dontdiff @@ -141,6 +141,7 @@ mkprep mktables mktree modpost +modules.order modversions.h* offset.h offsets.h @@ -171,6 +172,7 @@ sm_tbl* split-include tags tftpboot.img +timeconst.h times.h* tkparse trix_boot.h diff --git a/trunk/Documentation/fb/gxfb.txt b/trunk/Documentation/fb/gxfb.txt new file mode 100644 index 000000000000..2f640903bbb2 --- /dev/null +++ b/trunk/Documentation/fb/gxfb.txt @@ -0,0 +1,52 @@ +[This file is cloned from VesaFB/aty128fb] + +What is gxfb? +================= + +This is a graphics framebuffer driver for AMD Geode GX2 based processors. + +Advantages: + + * No need to use AMD's VSA code (or other VESA emulation layer) in the + BIOS. + * It provides a nice large console (128 cols + 48 lines with 1024x768) + without using tiny, unreadable fonts. + * You can run XF68_FBDev on top of /dev/fb0 + * Most important: boot logo :-) + +Disadvantages: + + * graphic mode is slower than text mode... + + +How to use it? +============== + +Switching modes is done using gxfb.mode_option=... boot +parameter or using `fbset' program. + +See Documentation/fb/modedb.txt for more information on modedb +resolutions. + + +X11 +=== + +XF68_FBDev should generally work fine, but it is non-accelerated. + + +Configuration +============= + +You can pass kernel command line options to gxfb with gxfb.