From 40ce65d90b6394fd4156abc2e9214ffca183fd0c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 13 Apr 2009 11:04:19 +0900 Subject: [PATCH] --- yaml --- r: 143252 b: refs/heads/master c: 17a7b7b39056a82c5012539311850f202e6c3cd4 h: refs/heads/master v: v3 --- [refs] | 2 +- .../Documentation/ABI/testing/debugfs-pktcdvd | 6 +- trunk/Documentation/cgroups/memory.txt | 55 +- .../cgroups/resource_counter.txt | 27 +- trunk/Documentation/sysctl/net.txt | 2 +- trunk/Documentation/tomoyo.txt | 55 + trunk/Documentation/vm/00-INDEX | 2 - trunk/Documentation/vm/active_mm.txt | 83 -- trunk/Documentation/vm/unevictable-lru.txt | 1041 ++++++++--------- trunk/MAINTAINERS | 30 +- trunk/arch/arm/mach-omap2/usb-musb.c | 8 +- trunk/arch/ia64/kernel/pci-swiotlb.c | 2 +- .../arch/x86/include/asm/required-features.h | 2 +- trunk/arch/x86/include/asm/xen/page.h | 3 +- .../x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 2 +- trunk/arch/x86/xen/enlighten.c | 89 +- trunk/arch/x86/xen/mmu.c | 116 +- trunk/arch/x86/xen/mmu.h | 3 - trunk/arch/x86/xen/smp.c | 4 +- trunk/arch/x86/xen/xen-ops.h | 2 + trunk/drivers/atm/solos-pci.c | 2 +- trunk/drivers/block/cciss.c | 2 +- trunk/drivers/char/agp/intel-agp.c | 3 - trunk/drivers/char/sysrq.c | 1 + trunk/drivers/edac/edac_core.h | 12 +- trunk/drivers/edac/edac_device.c | 2 +- trunk/drivers/edac/edac_mc.c | 2 +- trunk/drivers/edac/edac_pci.c | 2 +- trunk/drivers/hwmon/Kconfig | 10 - trunk/drivers/hwmon/Makefile | 1 - trunk/drivers/hwmon/hp_accel.c | 1 + trunk/drivers/hwmon/sht15.c | 692 ----------- trunk/drivers/misc/eeprom/at24.c | 8 +- trunk/drivers/misc/eeprom/at25.c | 5 +- trunk/drivers/misc/sgi-xp/xpc.h | 254 ++-- trunk/drivers/misc/sgi-xp/xpc_channel.c | 138 +-- trunk/drivers/misc/sgi-xp/xpc_main.c | 128 +- trunk/drivers/misc/sgi-xp/xpc_partition.c | 20 +- trunk/drivers/misc/sgi-xp/xpc_sn2.c | 164 +-- trunk/drivers/misc/sgi-xp/xpc_uv.c | 257 ++-- trunk/drivers/net/atl1c/atl1c_main.c | 4 +- trunk/drivers/net/benet/be_main.c | 4 +- trunk/drivers/net/jme.c | 8 +- trunk/drivers/net/wireless/ath9k/pci.c | 4 +- trunk/drivers/net/wireless/p54/p54pci.c | 4 +- trunk/drivers/scsi/3w-9xxx.c | 8 +- trunk/drivers/scsi/aacraid/aachba.c | 2 +- trunk/drivers/scsi/mpt2sas/mpt2sas_base.c | 10 +- trunk/drivers/spi/spi.c | 22 +- trunk/drivers/staging/b3dfg/b3dfg.c | 2 +- trunk/drivers/usb/otg/nop-usb-xceiv.c | 4 +- trunk/drivers/video/aty/radeon_base.c | 4 +- trunk/drivers/video/backlight/backlight.c | 3 + trunk/drivers/video/backlight/lcd.c | 3 + trunk/drivers/video/cirrusfb.c | 4 +- trunk/drivers/video/console/fbcon.c | 55 +- trunk/drivers/video/efifb.c | 7 +- trunk/drivers/video/fbmem.c | 19 - trunk/drivers/video/intelfb/intelfb.h | 2 - trunk/drivers/video/intelfb/intelfb_i2c.c | 1 - trunk/drivers/video/intelfb/intelfbdrv.c | 1 - trunk/drivers/video/intelfb/intelfbhw.c | 5 - trunk/drivers/video/s3fb.c | 6 +- trunk/drivers/video/sa1100fb.c | 15 +- trunk/drivers/video/sa1100fb.h | 7 +- trunk/drivers/video/sis/sis_main.c | 2 +- trunk/drivers/video/skeletonfb.c | 8 +- trunk/drivers/video/uvesafb.c | 35 +- trunk/drivers/video/vfb.c | 11 +- trunk/drivers/xen/cpu_hotplug.c | 40 +- trunk/drivers/xen/manage.c | 5 +- trunk/fs/ext2/inode.c | 44 +- trunk/fs/hfs/inode.c | 4 - trunk/fs/hfs/mdb.c | 1 - trunk/fs/jbd/revoke.c | 24 +- trunk/fs/xfs/linux-2.6/xfs_aops.c | 38 +- trunk/fs/xfs/linux-2.6/xfs_aops.h | 1 - trunk/fs/xfs/linux-2.6/xfs_buf.c | 9 - trunk/fs/xfs/linux-2.6/xfs_fs_subr.c | 14 +- trunk/fs/xfs/linux-2.6/xfs_lrw.c | 18 +- trunk/fs/xfs/linux-2.6/xfs_sync.c | 78 +- trunk/fs/xfs/linux-2.6/xfs_sync.h | 9 +- trunk/fs/xfs/xfs_iget.c | 23 +- trunk/fs/xfs/xfs_iomap.c | 61 +- trunk/fs/xfs/xfs_iomap.h | 3 +- trunk/fs/xfs/xfs_log.c | 78 +- trunk/fs/xfs/xfs_mount.h | 2 +- trunk/fs/xfs/xfs_vnodeops.c | 7 - trunk/include/asm-generic/siginfo.h | 2 +- trunk/include/drm/drm_pciids.h | 2 - trunk/include/linux/fb.h | 8 +- trunk/include/linux/fiemap.h | 2 - trunk/include/linux/init_task.h | 13 + trunk/include/linux/pci_ids.h | 2 - trunk/include/linux/sht15.h | 24 - trunk/include/video/cyblafb.h | 175 +++ trunk/init/initramfs.c | 5 +- trunk/ipc/mq_sysctl.c | 2 +- trunk/kernel/ptrace.c | 7 +- trunk/kernel/sys.c | 24 +- trunk/kernel/sysctl.c | 20 +- trunk/mm/Kconfig | 2 - trunk/mm/filemap.c | 4 +- trunk/mm/memcontrol.c | 2 +- trunk/mm/shmem.c | 27 +- trunk/mm/util.c | 16 - trunk/sound/pci/hda/hda_intel.c | 8 +- 107 files changed, 1702 insertions(+), 2600 deletions(-) create mode 100644 trunk/Documentation/tomoyo.txt delete mode 100644 trunk/Documentation/vm/active_mm.txt delete mode 100644 trunk/drivers/hwmon/sht15.c delete mode 100644 trunk/include/linux/sht15.h create mode 100644 trunk/include/video/cyblafb.h diff --git a/[refs] b/[refs] index fab9f645f473..8f3d59627216 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 1c98aa7424ff163637d8321674ec58dee28152d4 +refs/heads/master: 17a7b7b39056a82c5012539311850f202e6c3cd4 diff --git a/trunk/Documentation/ABI/testing/debugfs-pktcdvd b/trunk/Documentation/ABI/testing/debugfs-pktcdvd index cf11736acb76..bf9c16b64c34 100644 --- a/trunk/Documentation/ABI/testing/debugfs-pktcdvd +++ b/trunk/Documentation/ABI/testing/debugfs-pktcdvd @@ -1,4 +1,4 @@ -What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7] +What: /debug/pktcdvd/pktcdvd[0-7] Date: Oct. 2006 KernelVersion: 2.6.20 Contact: Thomas Maier @@ -10,10 +10,10 @@ debugfs interface The pktcdvd module (packet writing driver) creates these files in debugfs: -/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/ +/debug/pktcdvd/pktcdvd[0-7]/ info (0444) Lots of driver statistics and infos. Example: ------- -cat /sys/kernel/debug/pktcdvd/pktcdvd0/info +cat /debug/pktcdvd/pktcdvd0/info diff --git a/trunk/Documentation/cgroups/memory.txt b/trunk/Documentation/cgroups/memory.txt index 1a608877b14e..a98a7fe7aabb 100644 --- a/trunk/Documentation/cgroups/memory.txt +++ b/trunk/Documentation/cgroups/memory.txt @@ -6,14 +6,15 @@ used here with the memory controller that is used in hardware. Salient features -a. Enable control of Anonymous, Page Cache (mapped and unmapped) and - Swap Cache memory pages. +a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages b. The infrastructure allows easy addition of other types of memory to control c. Provides *zero overhead* for non memory controller users d. Provides a double LRU: global memory pressure causes reclaim from the global LRU; a cgroup on hitting a limit, reclaims from the per cgroup LRU +NOTE: Swap Cache (unmapped) is not accounted now. + Benefits and Purpose of the memory controller The memory controller isolates the memory behaviour of a group of tasks @@ -289,44 +290,34 @@ will be charged as a new owner of it. moved to the parent. If you want to avoid that, force_empty will be useful. 5.2 stat file - -memory.stat file includes following statistics - -cache - # of bytes of page cache memory. -rss - # of bytes of anonymous and swap cache memory. -pgpgin - # of pages paged in (equivalent to # of charging events). -pgpgout - # of pages paged out (equivalent to # of uncharging events). -active_anon - # of bytes of anonymous and swap cache memory on active - lru list. -inactive_anon - # of bytes of anonymous memory and swap cache memory on - inactive lru list. -active_file - # of bytes of file-backed memory on active lru list. -inactive_file - # of bytes of file-backed memory on inactive lru list. -unevictable - # of bytes of memory that cannot be reclaimed (mlocked etc). - -The following additional stats are dependent on CONFIG_DEBUG_VM. - -inactive_ratio - VM internal parameter. (see mm/page_alloc.c) -recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) -recent_rotated_file - VM internal parameter. (see mm/vmscan.c) -recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) -recent_scanned_file - VM internal parameter. (see mm/vmscan.c) - -Memo: + memory.stat file includes following statistics (now) + cache - # of pages from page-cache and shmem. + rss - # of pages from anonymous memory. + pgpgin - # of event of charging + pgpgout - # of event of uncharging + active_anon - # of pages on active lru of anon, shmem. + inactive_anon - # of pages on active lru of anon, shmem + active_file - # of pages on active lru of file-cache + inactive_file - # of pages on inactive lru of file cache + unevictable - # of pages cannot be reclaimed.(mlocked etc) + + Below is depend on CONFIG_DEBUG_VM. + inactive_ratio - VM internal parameter. (see mm/page_alloc.c) + recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) + recent_rotated_file - VM internal parameter. (see mm/vmscan.c) + recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) + recent_scanned_file - VM internal parameter. (see mm/vmscan.c) + + Memo: recent_rotated means recent frequency of lru rotation. recent_scanned means recent # of scans to lru. showing for better debug please see the code for meanings. -Note: - Only anonymous and swap cache memory is listed as part of 'rss' stat. - This should not be confused with the true 'resident set size' or the - amount of physical memory used by the cgroup. Per-cgroup rss - accounting is not done yet. 5.3 swappiness Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. - Following cgroups' swapiness can't be changed. + Following cgroup's swapiness can't be changed. - root cgroup (uses /proc/sys/vm/swappiness). - a cgroup which uses hierarchy and it has child cgroup. - a cgroup which uses hierarchy and not the root of hierarchy. diff --git a/trunk/Documentation/cgroups/resource_counter.txt b/trunk/Documentation/cgroups/resource_counter.txt index 95b24d766eab..f196ac1d7d25 100644 --- a/trunk/Documentation/cgroups/resource_counter.txt +++ b/trunk/Documentation/cgroups/resource_counter.txt @@ -47,18 +47,13 @@ to work with it. 2. Basic accounting routines - a. void res_counter_init(struct res_counter *rc, - struct res_counter *rc_parent) + a. void res_counter_init(struct res_counter *rc) Initializes the resource counter. As usual, should be the first routine called for a new counter. - The struct res_counter *parent can be used to define a hierarchical - child -> parent relationship directly in the res_counter structure, - NULL can be used to define no relationship. - - c. int res_counter_charge(struct res_counter *rc, unsigned long val, - struct res_counter **limit_fail_at) + b. int res_counter_charge[_locked] + (struct res_counter *rc, unsigned long val) When a resource is about to be allocated it has to be accounted with the appropriate resource counter (controller should determine @@ -72,25 +67,15 @@ to work with it. * if the charging is performed first, then it should be uncharged on error path (if the one is called). - If the charging fails and a hierarchical dependency exists, the - limit_fail_at parameter is set to the particular res_counter element - where the charging failed. - - d. int res_counter_charge_locked - (struct res_counter *rc, unsigned long val) - - The same as res_counter_charge(), but it must not acquire/release the - res_counter->lock internally (it must be called with res_counter->lock - held). - - e. void res_counter_uncharge[_locked] + c. void res_counter_uncharge[_locked] (struct res_counter *rc, unsigned long val) When a resource is released (freed) it should be de-accounted from the resource counter it was accounted to. This is called "uncharging". - The _locked routines imply that the res_counter->lock is taken. + The _locked routines imply that the res_counter->lock is taken. + 2.1 Other accounting routines diff --git a/trunk/Documentation/sysctl/net.txt b/trunk/Documentation/sysctl/net.txt index df38ef046f8d..a34d55b65441 100644 --- a/trunk/Documentation/sysctl/net.txt +++ b/trunk/Documentation/sysctl/net.txt @@ -95,7 +95,7 @@ of struct cmsghdr structures with appended data. There is only one file in this directory. unix_dgram_qlen limits the max number of datagrams queued in Unix domain -socket's buffer. It will not take effect unless PF_UNIX flag is specified. +socket's buffer. It will not take effect unless PF_UNIX flag is spicified. 3. /proc/sys/net/ipv4 - IPV4 settings diff --git a/trunk/Documentation/tomoyo.txt b/trunk/Documentation/tomoyo.txt new file mode 100644 index 000000000000..b3a232cae7f8 --- /dev/null +++ b/trunk/Documentation/tomoyo.txt @@ -0,0 +1,55 @@ +--- What is TOMOYO? --- + +TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel. + +LiveCD-based tutorials are available at +http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/ubuntu8.04-live/ +http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/centos5-live/ . +Though these tutorials use non-LSM version of TOMOYO, they are useful for you +to know what TOMOYO is. + +--- How to enable TOMOYO? --- + +Build the kernel with CONFIG_SECURITY_TOMOYO=y and pass "security=tomoyo" on +kernel's command line. + +Please see http://tomoyo.sourceforge.jp/en/2.2.x/ for details. + +--- Where is documentation? --- + +User <-> Kernel interface documentation is available at +http://tomoyo.sourceforge.jp/en/2.2.x/policy-reference.html . + +Materials we prepared for seminars and symposiums are available at +http://sourceforge.jp/projects/tomoyo/docs/?category_id=532&language_id=1 . +Below lists are chosen from three aspects. + +What is TOMOYO? + TOMOYO Linux Overview + http://sourceforge.jp/projects/tomoyo/docs/lca2009-takeda.pdf + TOMOYO Linux: pragmatic and manageable security for Linux + http://sourceforge.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf + TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box + http://sourceforge.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf + +What can TOMOYO do? + Deep inside TOMOYO Linux + http://sourceforge.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf + The role of "pathname based access control" in security. + http://sourceforge.jp/projects/tomoyo/docs/lfj2008-bof.pdf + +History of TOMOYO? + Realities of Mainlining + http://sourceforge.jp/projects/tomoyo/docs/lfj2008.pdf + +--- What is future plan? --- + +We believe that inode based security and name based security are complementary +and both should be used together. But unfortunately, so far, we cannot enable +multiple LSM modules at the same time. We feel sorry that you have to give up +SELinux/SMACK/AppArmor etc. when you want to use TOMOYO. + +We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM +version of TOMOYO, available at http://tomoyo.sourceforge.jp/en/1.6.x/ . +LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning +to port non-LSM version's functionalities to LSM versions. diff --git a/trunk/Documentation/vm/00-INDEX b/trunk/Documentation/vm/00-INDEX index 2f77ced35df7..2131b00b63f6 100644 --- a/trunk/Documentation/vm/00-INDEX +++ b/trunk/Documentation/vm/00-INDEX @@ -1,7 +1,5 @@ 00-INDEX - this file. -active_mm.txt - - An explanation from Linus about tsk->active_mm vs tsk->mm. balance - various information on memory balancing. hugetlbpage.txt diff --git a/trunk/Documentation/vm/active_mm.txt b/trunk/Documentation/vm/active_mm.txt deleted file mode 100644 index 4ee1f643d897..000000000000 --- a/trunk/Documentation/vm/active_mm.txt +++ /dev/null @@ -1,83 +0,0 @@ -List: linux-kernel -Subject: Re: active_mm -From: Linus Torvalds -Date: 1999-07-30 21:36:24 - -Cc'd to linux-kernel, because I don't write explanations all that often, -and when I do I feel better about more people reading them. - -On Fri, 30 Jul 1999, David Mosberger wrote: -> -> Is there a brief description someplace on how "mm" vs. "active_mm" in -> the task_struct are supposed to be used? (My apologies if this was -> discussed on the mailing lists---I just returned from vacation and -> wasn't able to follow linux-kernel for a while). - -Basically, the new setup is: - - - we have "real address spaces" and "anonymous address spaces". The - difference is that an anonymous address space doesn't care about the - user-level page tables at all, so when we do a context switch into an - anonymous address space we just leave the previous address space - active. - - The obvious use for a "anonymous address space" is any thread that - doesn't need any user mappings - all kernel threads basically fall into - this category, but even "real" threads can temporarily say that for - some amount of time they are not going to be interested in user space, - and that the scheduler might as well try to avoid wasting time on - switching the VM state around. Currently only the old-style bdflush - sync does that. - - - "tsk->mm" points to the "real address space". For an anonymous process, - tsk->mm will be NULL, for the logical reason that an anonymous process - really doesn't _have_ a real address space at all. - - - however, we obviously need to keep track of which address space we - "stole" for such an anonymous user. For that, we have "tsk->active_mm", - which shows what the currently active address space is. - - The rule is that for a process with a real address space (ie tsk->mm is - non-NULL) the active_mm obviously always has to be the same as the real - one. - - For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the - "borrowed" mm while the anonymous process is running. When the - anonymous process gets scheduled away, the borrowed address space is - returned and cleared. - -To support all that, the "struct mm_struct" now has two counters: a -"mm_users" counter that is how many "real address space users" there are, -and a "mm_count" counter that is the number of "lazy" users (ie anonymous -users) plus one if there are any real users. - -Usually there is at least one real user, but it could be that the real -user exited on another CPU while a lazy user was still active, so you do -actually get cases where you have a address space that is _only_ used by -lazy users. That is often a short-lived state, because once that thread -gets scheduled away in favour of a real thread, the "zombie" mm gets -released because "mm_users" becomes zero. - -Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any -more. "init_mm" should be considered just a "lazy context when no other -context is available", and in fact it is mainly used just at bootup when -no real VM has yet been created. So code that used to check - - if (current->mm == &init_mm) - -should generally just do - - if (!current->mm) - -instead (which makes more sense anyway - the test is basically one of "do -we have a user context", and is generally done by the page fault handler -and things like that). - -Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago, -because it slightly changes the interfaces to accomodate the alpha (who -would have thought it, but the alpha actually ends up having one of the -ugliest context switch codes - unlike the other architectures where the MM -and register state is separate, the alpha PALcode joins the two, and you -need to switch both together). - -(From http://marc.info/?l=linux-kernel&m=93337278602211&w=2) diff --git a/trunk/Documentation/vm/unevictable-lru.txt b/trunk/Documentation/vm/unevictable-lru.txt index 2d70d0d95108..0706a7282a8c 100644 --- a/trunk/Documentation/vm/unevictable-lru.txt +++ b/trunk/Documentation/vm/unevictable-lru.txt @@ -1,691 +1,588 @@ - ============================== - UNEVICTABLE LRU INFRASTRUCTURE - ============================== - -======== -CONTENTS -======== - - (*) The Unevictable LRU - - - The unevictable page list. - - Memory control group interaction. - - Marking address spaces unevictable. - - Detecting Unevictable Pages. - - vmscan's handling of unevictable pages. - - (*) mlock()'d pages. - - - History. - - Basic management. - - mlock()/mlockall() system call handling. - - Filtering special vmas. - - munlock()/munlockall() system call handling. - - Migrating mlocked pages. - - mmap(MAP_LOCKED) system call handling. - - munmap()/exit()/exec() system call handling. - - try_to_unmap(). - - try_to_munlock() reverse map scan. - - Page reclaim in shrink_*_list(). - - -============ -INTRODUCTION -============ - -This document describes the Linux memory manager's "Unevictable LRU" -infrastructure and the use of this to manage several types of "unevictable" -pages. - -The document attempts to provide the overall rationale behind this mechanism -and the rationale for some of the design decisions that drove the -implementation. The latter design rationale is discussed in the context of an -implementation description. Admittedly, one can obtain the implementation -details - the "what does it do?" - by reading the code. One hopes that the -descriptions below add value by provide the answer to "why does it do that?". - - -=================== -THE UNEVICTABLE LRU -=================== - -The Unevictable LRU facility adds an additional LRU list to track unevictable -pages and to hide these pages from vmscan. This mechanism is based on a patch -by Larry Woodman of Red Hat to address several scalability problems with page -reclaim in Linux. The problems have been observed at customer sites on large -memory x86_64 systems. - -To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of -main memory will have over 32 million 4k pages in a single zone. When a large -fraction of these pages are not evictable for any reason [see below], vmscan -will spend a lot of time scanning the LRU lists looking for the small fraction -of pages that are evictable. This can result in a situation where all CPUs are -spending 100% of their time in vmscan for hours or days on end, with the system -completely unresponsive. - -The unevictable list addresses the following classes of unevictable pages: - - (*) Those owned by ramfs. - - (*) Those mapped into SHM_LOCK'd shared memory regions. - (*) Those mapped into VM_LOCKED [mlock()ed] VMAs. - -The infrastructure may also be able to handle other conditions that make pages +This document describes the Linux memory management "Unevictable LRU" +infrastructure and the use of this infrastructure to manage several types +of "unevictable" pages. The document attempts to provide the overall +rationale behind this mechanism and the rationale for some of the design +decisions that drove the implementation. The latter design rationale is +discussed in the context of an implementation description. Admittedly, one +can obtain the implementation details--the "what does it do?"--by reading the +code. One hopes that the descriptions below add value by provide the answer +to "why does it do that?". + +Unevictable LRU Infrastructure: + +The Unevictable LRU adds an additional LRU list to track unevictable pages +and to hide these pages from vmscan. This mechanism is based on a patch by +Larry Woodman of Red Hat to address several scalability problems with page +reclaim in Linux. The problems have been observed at customer sites on large +memory x86_64 systems. For example, a non-numal x86_64 platform with 128GB +of main memory will have over 32 million 4k pages in a single zone. When a +large fraction of these pages are not evictable for any reason [see below], +vmscan will spend a lot of time scanning the LRU lists looking for the small +fraction of pages that are evictable. This can result in a situation where +all cpus are spending 100% of their time in vmscan for hours or days on end, +with the system completely unresponsive. + +The Unevictable LRU infrastructure addresses the following classes of +unevictable pages: + ++ page owned by ramfs ++ page mapped into SHM_LOCKed shared memory regions ++ page mapped into VM_LOCKED [mlock()ed] vmas + +The infrastructure might be able to handle other conditions that make pages unevictable, either by definition or by circumstance, in the future. -THE UNEVICTABLE PAGE LIST -------------------------- +The Unevictable LRU List The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list called the "unevictable" list and an associated page flag, PG_unevictable, to -indicate that the page is being managed on the unevictable list. - -The PG_unevictable flag is analogous to, and mutually exclusive with, the -PG_active flag in that it indicates on which LRU list a page resides when -PG_lru is set. The unevictable list is compile-time configurable based on the -UNEVICTABLE_LRU Kconfig option. +indicate that the page is being managed on the unevictable list. The +PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active +flag in that it indicates on which LRU list a page resides when PG_lru is set. +The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU +Kconfig option. The Unevictable LRU infrastructure maintains unevictable pages on an additional LRU list for a few reasons: - (1) We get to "treat unevictable pages just like we treat other pages in the - system - which means we get to use the same code to manipulate them, the - same code to isolate them (for migrate, etc.), the same code to keep track - of the statistics, etc..." [Rik van Riel] - - (2) We want to be able to migrate unevictable pages between nodes for memory - defragmentation, workload management and memory hotplug. The linux kernel - can only migrate pages that it can successfully isolate from the LRU - lists. If we were to maintain pages elsewhere than on an LRU-like list, - where they can be found by isolate_lru_page(), we would prevent their - migration, unless we reworked migration code to find the unevictable pages - itself. +1) We get to "treat unevictable pages just like we treat other pages in the + system, which means we get to use the same code to manipulate them, the + same code to isolate them (for migrate, etc.), the same code to keep track + of the statistics, etc..." [Rik van Riel] +2) We want to be able to migrate unevictable pages between nodes--for memory + defragmentation, workload management and memory hotplug. The linux kernel + can only migrate pages that it can successfully isolate from the lru lists. + If we were to maintain pages elsewise than on an lru-like list, where they + can be found by isolate_lru_page(), we would prevent their migration, unless + we reworked migration code to find the unevictable pages. -The unevictable list does not differentiate between file-backed and anonymous, -swap-backed pages. This differentiation is only important while the pages are, -in fact, evictable. -The unevictable list benefits from the "arrayification" of the per-zone LRU -lists and statistics originally proposed and posted by Christoph Lameter. +The unevictable LRU list does not differentiate between file backed and swap +backed [anon] pages. This differentiation is only important while the pages +are, in fact, evictable. -The unevictable list does not use the LRU pagevec mechanism. Rather, -unevictable pages are placed directly on the page's zone's unevictable list -under the zone lru_lock. This allows us to prevent the stranding of pages on -the unevictable list when one task has the page isolated from the LRU and other -tasks are changing the "evictability" state of the page. +The unevictable LRU list benefits from the "arrayification" of the per-zone +LRU lists and statistics originally proposed and posted by Christoph Lameter. +The unevictable list does not use the lru pagevec mechanism. Rather, +unevictable pages are placed directly on the page's zone's unevictable +list under the zone lru_lock. The reason for this is to prevent stranding +of pages on the unevictable list when one task has the page isolated from the +lru and other tasks are changing the "evictability" state of the page. -MEMORY CONTROL GROUP INTERACTION --------------------------------- -The unevictable LRU facility interacts with the memory control group [aka -memory controller; see Documentation/cgroups/memory.txt] by extending the -lru_list enum. - -The memory controller data structure automatically gets a per-zone unevictable -list as a result of the "arrayification" of the per-zone LRU lists (one per -lru_list enum element). The memory controller tracks the movement of pages to -and from the unevictable list. +Unevictable LRU and Memory Controller Interaction +The memory controller data structure automatically gets a per zone unevictable +lru list as a result of the "arrayification" of the per-zone LRU lists. The +memory controller tracks the movement of pages to and from the unevictable list. When a memory control group comes under memory pressure, the controller will not attempt to reclaim pages on the unevictable list. This has a couple of -effects: - - (1) Because the pages are "hidden" from reclaim on the unevictable list, the - reclaim process can be more efficient, dealing only with pages that have a - chance of being reclaimed. - - (2) On the other hand, if too many of the pages charged to the control group - are unevictable, the evictable portion of the working set of the tasks in - the control group may not fit into the available memory. This can cause - the control group to thrash or to OOM-kill tasks. - - -MARKING ADDRESS SPACES UNEVICTABLE ----------------------------------- - -For facilities such as ramfs none of the pages attached to the address space -may be evicted. To prevent eviction of any such pages, the AS_UNEVICTABLE -address space flag is provided, and this can be manipulated by a filesystem -using a number of wrapper functions: - - (*) void mapping_set_unevictable(struct address_space *mapping); - - Mark the address space as being completely unevictable. - - (*) void mapping_clear_unevictable(struct address_space *mapping); - - Mark the address space as being evictable. - - (*) int mapping_unevictable(struct address_space *mapping); - - Query the address space, and return true if it is completely - unevictable. - -These are currently used in two places in the kernel: - - (1) By ramfs to mark the address spaces of its inodes when they are created, - and this mark remains for the life of the inode. - - (2) By SYSV SHM to mark SHM_LOCK'd address spaces until SHM_UNLOCK is called. - - Note that SHM_LOCK is not required to page in the locked pages if they're - swapped out; the application must touch the pages manually if it wants to - ensure they're in memory. - - -DETECTING UNEVICTABLE PAGES ---------------------------- - -The function page_evictable() in vmscan.c determines whether a page is -evictable or not using the query function outlined above [see section "Marking -address spaces unevictable"] to check the AS_UNEVICTABLE flag. - -For address spaces that are so marked after being populated (as SHM regions -might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate -the page tables for the region as does, for example, mlock(), nor need it make -any special effort to push any pages in the SHM_LOCK'd area to the unevictable -list. Instead, vmscan will do this if and when it encounters the pages during -a reclamation scan. - -On an unlock action (such as SHM_UNLOCK), the unlocker (eg: shmctl()) must scan -the pages in the region and "rescue" them from the unevictable list if no other -condition is keeping them unevictable. If an unevictable region is destroyed, -the pages are also "rescued" from the unevictable list in the process of -freeing them. - -page_evictable() also checks for mlocked pages by testing an additional page -flag, PG_mlocked (as wrapped by PageMlocked()). If the page is NOT mlocked, -and a non-NULL VMA is supplied, page_evictable() will check whether the VMA is +effects. Because the pages are "hidden" from reclaim on the unevictable list, +the reclaim process can be more efficient, dealing only with pages that have +a chance of being reclaimed. On the other hand, if too many of the pages +charged to the control group are unevictable, the evictable portion of the +working set of the tasks in the control group may not fit into the available +memory. This can cause the control group to thrash or to oom-kill tasks. + + +Unevictable LRU: Detecting Unevictable Pages + +The function page_evictable(page, vma) in vmscan.c determines whether a +page is evictable or not. For ramfs pages and pages in SHM_LOCKed regions, +page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's +address space using a wrapper function. Wrapper functions are used to set, +clear and test the flag to reduce the requirement for #ifdef's throughout the +source code. AS_UNEVICTABLE is set on ramfs inode/mapping when it is created. +This flag remains for the life of the inode. + +For shared memory regions, AS_UNEVICTABLE is set when an application +successfully SHM_LOCKs the region and is removed when the region is +SHM_UNLOCKed. Note that shmctl(SHM_LOCK, ...) does not populate the page +tables for the region as does, for example, mlock(). So, we make no special +effort to push any pages in the SHM_LOCKed region to the unevictable list. +Vmscan will do this when/if it encounters the pages during reclaim. On +SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the +unevictable list if no other condition keeps them unevictable. If a SHM_LOCKed +region is destroyed, the pages are also "rescued" from the unevictable list in +the process of freeing them. + +page_evictable() detects mlock()ed pages by testing an additional page flag, +PG_mlocked via the PageMlocked() wrapper. If the page is NOT mlocked, and a +non-NULL vma is supplied, page_evictable() will check whether the vma is VM_LOCKED via is_mlocked_vma(). is_mlocked_vma() will SetPageMlocked() and update the appropriate statistics if the vma is VM_LOCKED. This method allows efficient "culling" of pages in the fault path that are being faulted in to -VM_LOCKED VMAs. +VM_LOCKED vmas. -VMSCAN'S HANDLING OF UNEVICTABLE PAGES --------------------------------------- +Unevictable Pages and Vmscan [shrink_*_list()] If unevictable pages are culled in the fault path, or moved to the unevictable -list at mlock() or mmap() time, vmscan will not encounter the pages until they -have become evictable again (via munlock() for example) and have been "rescued" -from the unevictable list. However, there may be situations where we decide, -for the sake of expediency, to leave a unevictable page on one of the regular -active/inactive LRU lists for vmscan to deal with. vmscan checks for such -pages in all of the shrink_{active|inactive|page}_list() functions and will -"cull" such pages that it encounters: that is, it diverts those pages to the -unevictable list for the zone being scanned. - -There may be situations where a page is mapped into a VM_LOCKED VMA, but the -page is not marked as PG_mlocked. Such pages will make it all the way to +list at mlock() or mmap() time, vmscan will never encounter the pages until +they have become evictable again, for example, via munlock() and have been +"rescued" from the unevictable list. However, there may be situations where we +decide, for the sake of expediency, to leave a unevictable page on one of the +regular active/inactive LRU lists for vmscan to deal with. Vmscan checks for +such pages in all of the shrink_{active|inactive|page}_list() functions and +will "cull" such pages that it encounters--that is, it diverts those pages to +the unevictable list for the zone being scanned. + +There may be situations where a page is mapped into a VM_LOCKED vma, but the +page is not marked as PageMlocked. Such pages will make it all the way to shrink_page_list() where they will be detected when vmscan walks the reverse -map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, -shrink_page_list() will cull the page at that point. +map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list() +will cull the page at that point. -To "cull" an unevictable page, vmscan simply puts the page back on the LRU list -using putback_lru_page() - the inverse operation to isolate_lru_page() - after -dropping the page lock. Because the condition which makes the page unevictable -may change once the page is unlocked, putback_lru_page() will recheck the -unevictable state of a page that it places on the unevictable list. If the -page has become unevictable, putback_lru_page() removes it from the list and -retries, including the page_unevictable() test. Because such a race is a rare -event and movement of pages onto the unevictable list should be rare, these -extra evictabilty checks should not occur in the majority of calls to -putback_lru_page(). +To "cull" an unevictable page, vmscan simply puts the page back on the lru +list using putback_lru_page()--the inverse operation to isolate_lru_page()-- +after dropping the page lock. Because the condition which makes the page +unevictable may change once the page is unlocked, putback_lru_page() will +recheck the unevictable state of a page that it places on the unevictable lru +list. If the page has become unevictable, putback_lru_page() removes it from +the list and retries, including the page_unevictable() test. Because such a +race is a rare event and movement of pages onto the unevictable list should be +rare, these extra evictabilty checks should not occur in the majority of calls +to putback_lru_page(). -============= -MLOCKED PAGES -============= +Mlocked Page: Prior Work -The unevictable page list is also useful for mlock(), in addition to ramfs and -SYSV SHM. Note that mlock() is only available in CONFIG_MMU=y situations; in -NOMMU situations, all mappings are effectively mlocked. - - -HISTORY -------- - -The "Unevictable mlocked Pages" infrastructure is based on work originally +The "Unevictable Mlocked Pages" infrastructure is based on work originally posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU". -Nick posted his patch as an alternative to a patch posted by Christoph Lameter -to achieve the same objective: hiding mlocked pages from vmscan. - -In Nick's patch, he used one of the struct page LRU list link fields as a count -of VM_LOCKED VMAs that map the page. This use of the link field for a count -prevented the management of the pages on an LRU list, and thus mlocked pages -were not migratable as isolate_lru_page() could not find them, and the LRU list -link field was not available to the migration subsystem. - -Nick resolved this by putting mlocked pages back on the lru list before -attempting to isolate them, thus abandoning the count of VM_LOCKED VMAs. When -Nick's patch was integrated with the Unevictable LRU work, the count was -replaced by walking the reverse map to determine whether any VM_LOCKED VMAs -mapped the page. More on this below. - - -BASIC MANAGEMENT ----------------- - -mlocked pages - pages mapped into a VM_LOCKED VMA - are a class of unevictable -pages. When such a page has been "noticed" by the memory management subsystem, -the page is marked with the PG_mlocked flag. This can be manipulated using the -PageMlocked() functions. - -A PG_mlocked page will be placed on the unevictable list when it is added to -the LRU. Such pages can be "noticed" by memory management in several places: - - (1) in the mlock()/mlockall() system call handlers; - - (2) in the mmap() system call handler when mmapping a region with the - MAP_LOCKED flag; - - (3) mmapping a region in a task that has called mlockall() with the MCL_FUTURE - flag - - (4) in the fault path, if mlocked pages are "culled" in the fault path, - and when a VM_LOCKED stack segment is expanded; or - - (5) as mentioned above, in vmscan:shrink_page_list() when attempting to - reclaim a page in a VM_LOCKED VMA via try_to_unmap() - -all of which result in the VM_LOCKED flag being set for the VMA if it doesn't -already have it set. - -mlocked pages become unlocked and rescued from the unevictable list when: - - (1) mapped in a range unlocked via the munlock()/munlockall() system calls; - - (2) munmap()'d out of the last VM_LOCKED VMA that maps the page, including - unmapping at task exit; - - (3) when the page is truncated from the last VM_LOCKED VMA of an mmapped file; - or - - (4) before a page is COW'd in a VM_LOCKED VMA. - - -mlock()/mlockall() SYSTEM CALL HANDLING ---------------------------------------- +Nick posted his patch as an alternative to a patch posted by Christoph +Lameter to achieve the same objective--hiding mlocked pages from vmscan. +In Nick's patch, he used one of the struct page lru list link fields as a count +of VM_LOCKED vmas that map the page. This use of the link field for a count +prevented the management of the pages on an LRU list. Thus, mlocked pages were +not migratable as isolate_lru_page() could not find them and the lru list link +field was not available to the migration subsystem. Nick resolved this by +putting mlocked pages back on the lru list before attempting to isolate them, +thus abandoning the count of VM_LOCKED vmas. When Nick's patch was integrated +with the Unevictable LRU work, the count was replaced by walking the reverse +map to determine whether any VM_LOCKED vmas mapped the page. More on this +below. + + +Mlocked Pages: Basic Management + +Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of +unevictable pages. When such a page has been "noticed" by the memory +management subsystem, the page is marked with the PG_mlocked [PageMlocked()] +flag. A PageMlocked() page will be placed on the unevictable LRU list when +it is added to the LRU. Pages can be "noticed" by memory management in +several places: + +1) in the mlock()/mlockall() system call handlers. +2) in the mmap() system call handler when mmap()ing a region with the + MAP_LOCKED flag, or mmap()ing a region in a task that has called + mlockall() with the MCL_FUTURE flag. Both of these conditions result + in the VM_LOCKED flag being set for the vma. +3) in the fault path, if mlocked pages are "culled" in the fault path, + and when a VM_LOCKED stack segment is expanded. +4) as mentioned above, in vmscan:shrink_page_list() when attempting to + reclaim a page in a VM_LOCKED vma via try_to_unmap(). + +Mlocked pages become unlocked and rescued from the unevictable list when: + +1) mapped in a range unlocked via the munlock()/munlockall() system calls. +2) munmapped() out of the last VM_LOCKED vma that maps the page, including + unmapping at task exit. +3) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file. +4) before a page is COWed in a VM_LOCKED vma. + + +Mlocked Pages: mlock()/mlockall() System Call Handling Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup() -for each VMA in the range specified by the call. In the case of mlockall(), +for each vma in the range specified by the call. In the case of mlockall(), this is the entire active address space of the task. Note that mlock_fixup() -is used for both mlocking and munlocking a range of memory. A call to mlock() -an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED is -treated as a no-op, and mlock_fixup() simply returns. - -If the VMA passes some filtering as described in "Filtering Special Vmas" -below, mlock_fixup() will attempt to merge the VMA with its neighbors or split -off a subset of the VMA if the range does not cover the entire VMA. Once the -VMA has been merged or split or neither, mlock_fixup() will call -__mlock_vma_pages_range() to fault in the pages via get_user_pages() and to -mark the pages as mlocked via mlock_vma_page(). - -Note that the VMA being mlocked might be mapped with PROT_NONE. In this case, -get_user_pages() will be unable to fault in the pages. That's okay. If pages -do end up getting faulted into this VM_LOCKED VMA, we'll handle them in the +is used for both mlock()ing and munlock()ing a range of memory. A call to +mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED +is treated as a no-op--mlock_fixup() simply returns. + +If the vma passes some filtering described in "Mlocked Pages: Filtering Vmas" +below, mlock_fixup() will attempt to merge the vma with its neighbors or split +off a subset of the vma if the range does not cover the entire vma. Once the +vma has been merged or split or neither, mlock_fixup() will call +__mlock_vma_pages_range() to fault in the pages via get_user_pages() and +to mark the pages as mlocked via mlock_vma_page(). + +Note that the vma being mlocked might be mapped with PROT_NONE. In this case, +get_user_pages() will be unable to fault in the pages. That's OK. If pages +do end up getting faulted into this VM_LOCKED vma, we'll handle them in the fault path or in vmscan. Also note that a page returned by get_user_pages() could be truncated or -migrated out from under us, while we're trying to mlock it. To detect this, -__mlock_vma_pages_range() checks page_mapping() after acquiring the page lock. -If the page is still associated with its mapping, we'll go ahead and call -mlock_vma_page(). If the mapping is gone, we just unlock the page and move on. -In the worst case, this will result in a page mapped in a VM_LOCKED VMA -remaining on a normal LRU list without being PageMlocked(). Again, vmscan will -detect and cull such pages. - -mlock_vma_page() will call TestSetPageMlocked() for each page returned by -get_user_pages(). We use TestSetPageMlocked() because the page might already -be mlocked by another task/VMA and we don't want to do extra work. We -especially do not want to count an mlocked page more than once in the -statistics. If the page was already mlocked, mlock_vma_page() need do nothing -more. +migrated out from under us, while we're trying to mlock it. To detect +this, __mlock_vma_pages_range() tests the page_mapping after acquiring +the page lock. If the page is still associated with its mapping, we'll +go ahead and call mlock_vma_page(). If the mapping is gone, we just +unlock the page and move on. Worse case, this results in page mapped +in a VM_LOCKED vma remaining on a normal LRU list without being +PageMlocked(). Again, vmscan will detect and cull such pages. + +mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will +TestSetPageMlocked() for each page returned by get_user_pages(). We use +TestSetPageMlocked() because the page might already be mlocked by another +task/vma and we don't want to do extra work. We especially do not want to +count an mlocked page more than once in the statistics. If the page was +already mlocked, mlock_vma_page() is done. If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the page from the LRU, as it is likely on the appropriate active or inactive list -at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will put -back the page - by calling putback_lru_page() - which will notice that the page -is now mlocked and divert the page to the zone's unevictable list. If +at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will +putback the page--putback_lru_page()--which will notice that the page is now +mlocked and divert the page to the zone's unevictable LRU list. If mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle -it later if and when it attempts to reclaim the page. +it later if/when it attempts to reclaim the page. -FILTERING SPECIAL VMAS ----------------------- +Mlocked Pages: Filtering Special Vmas -mlock_fixup() filters several classes of "special" VMAs: +mlock_fixup() filters several classes of "special" vmas: -1) VMAs with VM_IO or VM_PFNMAP set are skipped entirely. The pages behind +1) vmas with VM_IO|VM_PFNMAP set are skipped entirely. The pages behind these mappings are inherently pinned, so we don't need to mark them as - mlocked. In any case, most of the pages have no struct page in which to so - mark the page. Because of this, get_user_pages() will fail for these VMAs, - so there is no sense in attempting to visit them. - -2) VMAs mapping hugetlbfs page are already effectively pinned into memory. We - neither need nor want to mlock() these pages. However, to preserve the - prior behavior of mlock() - before the unevictable/mlock changes - - mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to - allocate the huge pages and populate the ptes. - -3) VMAs with VM_DONTEXPAND or VM_RESERVED are generally userspace mappings of - kernel pages, such as the VDSO page, relay channel pages, etc. These pages + mlocked. In any case, most of the pages have no struct page in which to + so mark the page. Because of this, get_user_pages() will fail for these + vmas, so there is no sense in attempting to visit them. + +2) vmas mapping hugetlbfs page are already effectively pinned into memory. + We don't need nor want to mlock() these pages. However, to preserve the + prior behavior of mlock()--before the unevictable/mlock changes-- + mlock_fixup() will call make_pages_present() in the hugetlbfs vma range + to allocate the huge pages and populate the ptes. + +3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of + kernel pages, such as the vdso page, relay channel pages, etc. These pages are inherently unevictable and are not managed on the LRU lists. - mlock_fixup() treats these VMAs the same as hugetlbfs VMAs. It calls + mlock_fixup() treats these vmas the same as hugetlbfs vmas. It calls make_pages_present() to populate the ptes. -Note that for all of these special VMAs, mlock_fixup() does not set the +Note that for all of these special vmas, mlock_fixup() does not set the VM_LOCKED flag. Therefore, we won't have to deal with them later during -munlock(), munmap() or task exit. Neither does mlock_fixup() account these -VMAs against the task's "locked_vm". - - -munlock()/munlockall() SYSTEM CALL HANDLING -------------------------------------------- - -The munlock() and munlockall() system calls are handled by the same functions - -do_mlock[all]() - as the mlock() and mlockall() system calls with the unlock vs -lock operation indicated by an argument. So, these system calls are also -handled by mlock_fixup(). Again, if called for an already munlocked VMA, -mlock_fixup() simply returns. Because of the VMA filtering discussed above, -VM_LOCKED will not be set in any "special" VMAs. So, these VMAs will be +munlock() or munmap()--for example, at task exit. Neither does mlock_fixup() +account these vmas against the task's "locked_vm". + +Mlocked Pages: Downgrading the Mmap Semaphore. + +mlock_fixup() must be called with the mmap semaphore held for write, because +it may have to merge or split vmas. However, mlocking a large region of +memory can take a long time--especially if vmscan must reclaim pages to +satisfy the regions requirements. Faulting in a large region with the mmap +semaphore held for write can hold off other faults on the address space, in +the case of a multi-threaded task. It can also hold off scans of the task's +address space via /proc. While testing under heavy load, it was observed that +the ps(1) command could be held off for many minutes while a large segment was +mlock()ed down. + +To address this issue, and to make the system more responsive during mlock()ing +of large segments, mlock_fixup() downgrades the mmap semaphore to read mode +during the call to __mlock_vma_pages_range(). This works fine. However, the +callers of mlock_fixup() expect the semaphore to be returned in write mode. +So, mlock_fixup() "upgrades" the semphore to write mode. Linux does not +support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore +and reacquire it in write mode. In a multi-threaded task, it is possible for +the task memory map to change while the semaphore is dropped. Therefore, +mlock_fixup() looks up the vma at the range start address after reacquiring +the semaphore in write mode and verifies that it still covers the original +range. If not, mlock_fixup() returns an error [-EAGAIN]. All callers of +mlock_fixup() have been changed to deal with this new error condition. + +Note: when munlocking a region, all of the pages should already be resident-- +unless we have racing threads mlocking() and munlocking() regions. So, +unlocking should not have to wait for page allocations nor faults of any kind. +Therefore mlock_fixup() does not downgrade the semaphore for munlock(). + + +Mlocked Pages: munlock()/munlockall() System Call Handling + +The munlock() and munlockall() system calls are handled by the same functions-- +do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock +vs lock operation indicated by an argument. So, these system calls are also +handled by mlock_fixup(). Again, if called for an already munlock()ed vma, +mlock_fixup() simply returns. Because of the vma filtering discussed above, +VM_LOCKED will not be set in any "special" vmas. So, these vmas will be ignored for munlock. -If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the -specified range. The range is then munlocked via the function -__mlock_vma_pages_range() - the same function used to mlock a VMA range - +If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off +the specified range. The range is then munlocked via the function +__mlock_vma_pages_range()--the same function used to mlock a vma range-- passing a flag to indicate that munlock() is being performed. -Because the VMA access protections could have been changed to PROT_NONE after +Because the vma access protections could have been changed to PROT_NONE after faulting in and mlocking pages, get_user_pages() was unreliable for visiting -these pages for munlocking. Because we don't want to leave pages mlocked, +these pages for munlocking. Because we don't want to leave pages mlocked(), get_user_pages() was enhanced to accept a flag to ignore the permissions when -fetching the pages - all of which should be resident as a result of previous -mlocking. +fetching the pages--all of which should be resident as a result of previous +mlock()ing. For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked -flag using TestClearPageMlocked(). As with mlock_vma_page(), -munlock_vma_page() use the Test*PageMlocked() function to handle the case where -the page might have already been unlocked by another task. If the page was -mlocked, munlock_vma_page() updates that zone statistics for the number of -mlocked pages. Note, however, that at this point we haven't checked whether -the page is mapped by other VM_LOCKED VMAs. - -We can't call try_to_munlock(), the function that walks the reverse map to -check for other VM_LOCKED VMAs, without first isolating the page from the LRU. +flag using TestClearPageMlocked(). As with mlock_vma_page(), munlock_vma_page() +use the Test*PageMlocked() function to handle the case where the page might +have already been unlocked by another task. If the page was mlocked, +munlock_vma_page() updates that zone statistics for the number of mlocked +pages. Note, however, that at this point we haven't checked whether the page +is mapped by other VM_LOCKED vmas. + +We can't call try_to_munlock(), the function that walks the reverse map to check +for other VM_LOCKED vmas, without first isolating the page from the LRU. try_to_munlock() is a variant of try_to_unmap() and thus requires that the page -not be on an LRU list [more on these below]. However, the call to -isolate_lru_page() could fail, in which case we couldn't try_to_munlock(). So, -we go ahead and clear PG_mlocked up front, as this might be the only chance we -have. If we can successfully isolate the page, we go ahead and +not be on an lru list. [More on these below.] However, the call to +isolate_lru_page() could fail, in which case we couldn't try_to_munlock(). +So, we go ahead and clear PG_mlocked up front, as this might be the only chance +we have. If we can successfully isolate the page, we go ahead and try_to_munlock(), which will restore the PG_mlocked flag and update the zone -page statistics if it finds another VMA holding the page mlocked. If we fail +page statistics if it finds another vma holding the page mlocked. If we fail to isolate the page, we'll have left a potentially mlocked page on the LRU. -This is fine, because we'll catch it later if and if vmscan tries to reclaim -the page. This should be relatively rare. - - -MIGRATING MLOCKED PAGES ------------------------ - -A page that is being migrated has been isolated from the LRU lists and is held -locked across unmapping of the page, updating the page's address space entry -and copying the contents and state, until the page table entry has been -replaced with an entry that refers to the new page. Linux supports migration -of mlocked pages and other unevictable pages. This involves simply moving the -PG_mlocked and PG_unevictable states from the old page to the new page. - -Note that page migration can race with mlocking or munlocking of the same page. -This has been discussed from the mlock/munlock perspective in the respective -sections above. Both processes (migration and m[un]locking) hold the page -locked. This provides the first level of synchronization. Page migration -zeros out the page_mapping of the old page before unlocking it, so m[un]lock -can skip these pages by testing the page mapping under page lock. - -To complete page migration, we place the new and old pages back onto the LRU -after dropping the page lock. The "unneeded" page - old page on success, new -page on failure - will be freed when the reference count held by the migration -process is released. To ensure that we don't strand pages on the unevictable -list because of a race between munlock and migration, page migration uses the -putback_lru_page() function to add migrated pages back to the LRU. - - -mmap(MAP_LOCKED) SYSTEM CALL HANDLING -------------------------------------- +This is fine, because we'll catch it later when/if vmscan tries to reclaim the +page. This should be relatively rare. + +Mlocked Pages: Migrating Them... + +A page that is being migrated has been isolated from the lru lists and is +held locked across unmapping of the page, updating the page's mapping +[address_space] entry and copying the contents and state, until the +page table entry has been replaced with an entry that refers to the new +page. Linux supports migration of mlocked pages and other unevictable +pages. This involves simply moving the PageMlocked and PageUnevictable states +from the old page to the new page. + +Note that page migration can race with mlocking or munlocking of the same +page. This has been discussed from the mlock/munlock perspective in the +respective sections above. Both processes [migration, m[un]locking], hold +the page locked. This provides the first level of synchronization. Page +migration zeros out the page_mapping of the old page before unlocking it, +so m[un]lock can skip these pages by testing the page mapping under page +lock. + +When completing page migration, we place the new and old pages back onto the +lru after dropping the page lock. The "unneeded" page--old page on success, +new page on failure--will be freed when the reference count held by the +migration process is released. To ensure that we don't strand pages on the +unevictable list because of a race between munlock and migration, page +migration uses the putback_lru_page() function to add migrated pages back to +the lru. + + +Mlocked Pages: mmap(MAP_LOCKED) System Call Handling In addition the the mlock()/mlockall() system calls, an application can request -that a region of memory be mlocked supplying the MAP_LOCKED flag to the mmap() +that a region of memory be mlocked using the MAP_LOCKED flag with the mmap() call. Furthermore, any mmap() call or brk() call that expands the heap by a task that has previously called mlockall() with the MCL_FUTURE flag will result -in the newly mapped memory being mlocked. Before the unevictable/mlock -changes, the kernel simply called make_pages_present() to allocate pages and -populate the page table. +in the newly mapped memory being mlocked. Before the unevictable/mlock changes, +the kernel simply called make_pages_present() to allocate pages and populate +the page table. To mlock a range of memory under the unevictable/mlock infrastructure, the mmap() handler and task address space expansion functions call mlock_vma_pages_range() specifying the vma and the address range to mlock. -mlock_vma_pages_range() filters VMAs like mlock_fixup(), as described above in -"Filtering Special VMAs". It will clear the VM_LOCKED flag, which will have -already been set by the caller, in filtered VMAs. Thus these VMA's need not be -visited for munlock when the region is unmapped. +mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in +"Mlocked Pages: Filtering Vmas". It will clear the VM_LOCKED flag, which will +have already been set by the caller, in filtered vmas. Thus these vma's need +not be visited for munlock when the region is unmapped. -For "normal" VMAs, mlock_vma_pages_range() calls __mlock_vma_pages_range() to +For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to fault/allocate the pages and mlock them. Again, like mlock_fixup(), mlock_vma_pages_range() downgrades the mmap semaphore to read mode before -attempting to fault/allocate and mlock the pages and "upgrades" the semaphore +attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore back to write mode before returning. -The callers of mlock_vma_pages_range() will have already added the memory range -to be mlocked to the task's "locked_vm". To account for filtered VMAs, +The callers of mlock_vma_pages_range() will have already added the memory +range to be mlocked to the task's "locked_vm". To account for filtered vmas, mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the -callers then subtract a non-negative return value from the task's locked_vm. A -negative return value represent an error - for example, from get_user_pages() -attempting to fault in a VMA with PROT_NONE access. In this case, we leave the -memory range accounted as locked_vm, as the protections could be changed later -and pages allocated into that region. +callers then subtract a non-negative return value from the task's locked_vm. +A negative return value represent an error--for example, from get_user_pages() +attempting to fault in a vma with PROT_NONE access. In this case, we leave +the memory range accounted as locked_vm, as the protections could be changed +later and pages allocated into that region. -munmap()/exit()/exec() SYSTEM CALL HANDLING -------------------------------------------- +Mlocked Pages: munmap()/exit()/exec() System Call Handling When unmapping an mlocked region of memory, whether by an explicit call to munmap() or via an internal unmap from exit() or exec() processing, we must -munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages. +munlock the pages if we're removing the last VM_LOCKED vma that maps the pages. Before the unevictable/mlock changes, mlocking did not mark the pages in any way, so unmapping them required no processing. To munlock a range of memory under the unevictable/mlock infrastructure, the -munmap() handler and task address space call tear down function +munmap() hander and task address space tear down function call munlock_vma_pages_all(). The name reflects the observation that one always -specifies the entire VMA range when munlock()ing during unmap of a region. -Because of the VMA filtering when mlocking() regions, only "normal" VMAs that +specifies the entire vma range when munlock()ing during unmap of a region. +Because of the vma filtering when mlocking() regions, only "normal" vmas that actually contain mlocked pages will be passed to munlock_vma_pages_all(). -munlock_vma_pages_all() clears the VM_LOCKED VMA flag and, like mlock_fixup() +munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup() for the munlock case, calls __munlock_vma_pages_range() to walk the page table -for the VMA's memory range and munlock_vma_page() each resident page mapped by -the VMA. This effectively munlocks the page, only if this is the last -VM_LOCKED VMA that maps the page. +for the vma's memory range and munlock_vma_page() each resident page mapped by +the vma. This effectively munlocks the page, only if this is the last +VM_LOCKED vma that maps the page. + +Mlocked Page: try_to_unmap() -try_to_unmap() --------------- +[Note: the code changes represented by this section are really quite small +compared to the text to describe what happening and why, and to discuss the +implications.] -Pages can, of course, be mapped into multiple VMAs. Some of these VMAs may +Pages can, of course, be mapped into multiple vmas. Some of these vmas may have VM_LOCKED flag set. It is possible for a page mapped into one or more -VM_LOCKED VMAs not to have the PG_mlocked flag set and therefore reside on one -of the active or inactive LRU lists. This could happen if, for example, a task -in the process of munlocking the page could not isolate the page from the LRU. -As a result, vmscan/shrink_page_list() might encounter such a page as described -in section "vmscan's handling of unevictable pages". To handle this situation, -try_to_unmap() checks for VM_LOCKED VMAs while it is walking a page's reverse -map. +VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one +of the active or inactive LRU lists. This could happen if, for example, a +task in the process of munlock()ing the page could not isolate the page from +the LRU. As a result, vmscan/shrink_page_list() might encounter such a page +as described in "Unevictable Pages and Vmscan [shrink_*_list()]". To +handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED +vmas while it is walking a page's reverse map. try_to_unmap() is always called, by either vmscan for reclaim or for page -migration, with the argument page locked and isolated from the LRU. Separate -functions handle anonymous and mapped file pages, as these types of pages have -different reverse map mechanisms. - - (*) try_to_unmap_anon() - - To unmap anonymous pages, each VMA in the list anchored in the anon_vma - must be visited - at least until a VM_LOCKED VMA is encountered. If the - page is being unmapped for migration, VM_LOCKED VMAs do not stop the - process because mlocked pages are migratable. However, for reclaim, if - the page is mapped into a VM_LOCKED VMA, the scan stops. - - try_to_unmap_anon() attempts to acquire in read mode the mmap semphore of - the mm_struct to which the VMA belongs. If this is successful, it will - mlock the page via mlock_vma_page() - we wouldn't have gotten to - try_to_unmap_anon() if the page were already mlocked - and will return - SWAP_MLOCK, indicating that the page is unevictable. - - If the mmap semaphore cannot be acquired, we are not sure whether the page - is really unevictable or not. In this case, try_to_unmap_anon() will - return SWAP_AGAIN. - - (*) try_to_unmap_file() - linear mappings - - Unmapping of a mapped file page works the same as for anonymous mappings, - except that the scan visits all VMAs that map the page's index/page offset - in the page's mapping's reverse map priority search tree. It also visits - each VMA in the page's mapping's non-linear list, if the list is - non-empty. - - As for anonymous pages, on encountering a VM_LOCKED VMA for a mapped file - page, try_to_unmap_file() will attempt to acquire the associated - mm_struct's mmap semaphore to mlock the page, returning SWAP_MLOCK if this - is successful, and SWAP_AGAIN, if not. - - (*) try_to_unmap_file() - non-linear mappings - - If a page's mapping contains a non-empty non-linear mapping VMA list, then - try_to_un{map|lock}() must also visit each VMA in that list to determine - whether the page is mapped in a VM_LOCKED VMA. Again, the scan must visit - all VMAs in the non-linear list to ensure that the pages is not/should not - be mlocked. - - If a VM_LOCKED VMA is found in the list, the scan could terminate. - However, there is no easy way to determine whether the page is actually - mapped in a given VMA - either for unmapping or testing whether the - VM_LOCKED VMA actually pins the page. - - try_to_unmap_file() handles non-linear mappings by scanning a certain - number of pages - a "cluster" - in each non-linear VMA associated with the - page's mapping, for each file mapped page that vmscan tries to unmap. If - this happens to unmap the page we're trying to unmap, try_to_unmap() will - notice this on return (page_mapcount(page) will be 0) and return - SWAP_SUCCESS. Otherwise, it will return SWAP_AGAIN, causing vmscan to - recirculate this page. We take advantage of the cluster scan in - try_to_unmap_cluster() as follows: - - For each non-linear VMA, try_to_unmap_cluster() attempts to acquire the - mmap semaphore of the associated mm_struct for read without blocking. - - If this attempt is successful and the VMA is VM_LOCKED, - try_to_unmap_cluster() will retain the mmap semaphore for the scan; - otherwise it drops it here. - - Then, for each page in the cluster, if we're holding the mmap semaphore - for a locked VMA, try_to_unmap_cluster() calls mlock_vma_page() to - mlock the page. This call is a no-op if the page is already locked, - but will mlock any pages in the non-linear mapping that happen to be - unlocked. - - If one of the pages so mlocked is the page passed in to try_to_unmap(), - try_to_unmap_cluster() will return SWAP_MLOCK, rather than the default - SWAP_AGAIN. This will allow vmscan to cull the page, rather than - recirculating it on the inactive list. - - Again, if try_to_unmap_cluster() cannot acquire the VMA's mmap sem, it - returns SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED - VMA, but couldn't be mlocked. - - -try_to_munlock() REVERSE MAP SCAN ---------------------------------- - - [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the - page_referenced() reverse map walker. - -When munlock_vma_page() [see section "munlock()/munlockall() System Call -Handling" above] tries to munlock a page, it needs to determine whether or not -the page is mapped by any VM_LOCKED VMA without actually attempting to unmap -all PTEs from the page. For this purpose, the unevictable/mlock infrastructure -introduced a variant of try_to_unmap() called try_to_munlock(). +migration, with the argument page locked and isolated from the LRU. BUG_ON() +assertions enforce this requirement. Separate functions handle anonymous and +mapped file pages, as these types of pages have different reverse map +mechanisms. + + try_to_unmap_anon() + +To unmap anonymous pages, each vma in the list anchored in the anon_vma must be +visited--at least until a VM_LOCKED vma is encountered. If the page is being +unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked +pages are migratable. However, for reclaim, if the page is mapped into a +VM_LOCKED vma, the scan stops. try_to_unmap() attempts to acquire the mmap +semphore of the mm_struct to which the vma belongs in read mode. If this is +successful, try_to_unmap() will mlock the page via mlock_vma_page()--we +wouldn't have gotten to try_to_unmap() if the page were already mlocked--and +will return SWAP_MLOCK, indicating that the page is unevictable. If the +mmap semaphore cannot be acquired, we are not sure whether the page is really +unevictable or not. In this case, try_to_unmap() will return SWAP_AGAIN. + + try_to_unmap_file() -- linear mappings + +Unmapping of a mapped file page works the same, except that the scan visits +all vmas that maps the page's index/page offset in the page's mapping's +reverse map priority search tree. It must also visit each vma in the page's +mapping's non-linear list, if the list is non-empty. As for anonymous pages, +on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will +attempt to acquire the associated mm_struct's mmap semaphore to mlock the page, +returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not. + + try_to_unmap_file() -- non-linear mappings + +If a page's mapping contains a non-empty non-linear mapping vma list, then +try_to_un{map|lock}() must also visit each vma in that list to determine +whether the page is mapped in a VM_LOCKED vma. Again, the scan must visit +all vmas in the non-linear list to ensure that the pages is not/should not be +mlocked. If a VM_LOCKED vma is found in the list, the scan could terminate. +However, there is no easy way to determine whether the page is actually mapped +in a given vma--either for unmapping or testing whether the VM_LOCKED vma +actually pins the page. + +So, try_to_unmap_file() handles non-linear mappings by scanning a certain +number of pages--a "cluster"--in each non-linear vma associated with the page's +mapping, for each file mapped page that vmscan tries to unmap. If this happens +to unmap the page we're trying to unmap, try_to_unmap() will notice this on +return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS. Otherwise, it +will return SWAP_AGAIN, causing vmscan to recirculate this page. We take +advantage of the cluster scan in try_to_unmap_cluster() as follows: + +For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap +semaphore of the associated mm_struct for read without blocking. If this +attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will +retain the mmap semaphore for the scan; otherwise it drops it here. Then, +for each page in the cluster, if we're holding the mmap semaphore for a locked +vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page. This +call is a no-op if the page is already locked, but will mlock any pages in +the non-linear mapping that happen to be unlocked. If one of the pages so +mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will +return SWAP_MLOCK, rather than the default SWAP_AGAIN. This will allow vmscan +to cull the page, rather than recirculating it on the inactive list. Again, +if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns +SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but +couldn't be mlocked. + + +Mlocked pages: try_to_munlock() Reverse Map Scan + +TODO/FIXME: a better name might be page_mlocked()--analogous to the +page_referenced() reverse map walker. + +When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() +System Call Handling" above--tries to munlock a page, it needs to +determine whether or not the page is mapped by any VM_LOCKED vma, without +actually attempting to unmap all ptes from the page. For this purpose, the +unevictable/mlock infrastructure introduced a variant of try_to_unmap() called +try_to_munlock(). try_to_munlock() calls the same functions as try_to_unmap() for anonymous and mapped file pages with an additional argument specifing unlock versus unmap processing. Again, these functions walk the respective reverse maps looking -for VM_LOCKED VMAs. When such a VMA is found for anonymous pages and file +for VM_LOCKED vmas. When such a vma is found for anonymous pages and file pages mapped in linear VMAs, as in the try_to_unmap() case, the functions attempt to acquire the associated mmap semphore, mlock the page via mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the pre-clearing of the page's PG_mlocked done by munlock_vma_page. -If try_to_unmap() is unable to acquire a VM_LOCKED VMA's associated mmap -semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() to -recycle the page on the inactive list and hope that it has better luck with the -page next time. - -For file pages mapped into non-linear VMAs, the try_to_munlock() logic works -slightly differently. On encountering a VM_LOCKED non-linear VMA that might -map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking the -page. munlock_vma_page() will just leave the page unlocked and let vmscan deal -with it - the usual fallback position. - -Note that try_to_munlock()'s reverse map walk must visit every VMA in a page's -reverse map to determine that a page is NOT mapped into any VM_LOCKED VMA. -However, the scan can terminate when it encounters a VM_LOCKED VMA and can -successfully acquire the VMA's mmap semphore for read and mlock the page. -Although try_to_munlock() might be called a great many times when munlocking a -large region or tearing down a large address space that has been mlocked via -mlockall(), overall this is a fairly rare event. - - -PAGE RECLAIM IN shrink_*_list() -------------------------------- - -shrink_active_list() culls any obviously unevictable pages - i.e. -!page_evictable(page, NULL) - diverting these to the unevictable list. -However, shrink_active_list() only sees unevictable pages that made it onto the -active/inactive lru lists. Note that these pages do not have PageUnevictable -set - otherwise they would be on the unevictable list and shrink_active_list -would never see them. +If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap +semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() +to recycle the page on the inactive list and hope that it has better luck +with the page next time. + +For file pages mapped into non-linear vmas, the try_to_munlock() logic works +slightly differently. On encountering a VM_LOCKED non-linear vma that might +map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking +the page. munlock_vma_page() will just leave the page unlocked and let +vmscan deal with it--the usual fallback position. + +Note that try_to_munlock()'s reverse map walk must visit every vma in a pages' +reverse map to determine that a page is NOT mapped into any VM_LOCKED vma. +However, the scan can terminate when it encounters a VM_LOCKED vma and can +successfully acquire the vma's mmap semphore for read and mlock the page. +Although try_to_munlock() can be called many [very many!] times when +munlock()ing a large region or tearing down a large address space that has been +mlocked via mlockall(), overall this is a fairly rare event. + +Mlocked Page: Page Reclaim in shrink_*_list() + +shrink_active_list() culls any obviously unevictable pages--i.e., +!page_evictable(page, NULL)--diverting these to the unevictable lru +list. However, shrink_active_list() only sees unevictable pages that +made it onto the active/inactive lru lists. Note that these pages do not +have PageUnevictable set--otherwise, they would be on the unevictable list and +shrink_active_list would never see them. Some examples of these unevictable pages on the LRU lists are: - (1) ramfs pages that have been placed on the LRU lists when first allocated. - - (2) SHM_LOCK'd shared memory pages. shmctl(SHM_LOCK) does not attempt to - allocate or fault in the pages in the shared memory region. This happens - when an application accesses the page the first time after SHM_LOCK'ing - the segment. +1) ramfs pages that have been placed on the lru lists when first allocated. - (3) mlocked pages that could not be isolated from the LRU and moved to the - unevictable list in mlock_vma_page(). +2) SHM_LOCKed shared memory pages. shmctl(SHM_LOCK) does not attempt to + allocate or fault in the pages in the shared memory region. This happens + when an application accesses the page the first time after SHM_LOCKing + the segment. - (4) Pages mapped into multiple VM_LOCKED VMAs, but try_to_munlock() couldn't - acquire the VMA's mmap semaphore to test the flags and set PageMlocked. - munlock_vma_page() was forced to let the page back on to the normal LRU - list for vmscan to handle. +3) Mlocked pages that could not be isolated from the lru and moved to the + unevictable list in mlock_vma_page(). -shrink_inactive_list() also diverts any unevictable pages that it finds on the -inactive lists to the appropriate zone's unevictable list. +3) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't + acquire the vma's mmap semaphore to test the flags and set PageMlocked. + munlock_vma_page() was forced to let the page back on to the normal + LRU list for vmscan to handle. -shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd -after shrink_active_list() had moved them to the inactive list, or pages mapped -into VM_LOCKED VMAs that munlock_vma_page() couldn't isolate from the LRU to -recheck via try_to_munlock(). shrink_inactive_list() won't notice the latter, -but will pass on to shrink_page_list(). +shrink_inactive_list() also culls any unevictable pages that it finds on +the inactive lists, again diverting them to the appropriate zone's unevictable +lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became +SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or +pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from +the lru to recheck via try_to_munlock(). shrink_inactive_list() won't notice +the latter, but will pass on to shrink_page_list(). shrink_page_list() again culls obviously unevictable pages that it could encounter for similar reason to shrink_inactive_list(). Pages mapped into -VM_LOCKED VMAs but without PG_mlocked set will make it all the way to +VM_LOCKED vmas but without PG_mlocked set will make it all the way to try_to_unmap(). shrink_page_list() will divert them to the unevictable list when try_to_unmap() returns SWAP_MLOCK, as discussed above. diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 29d74f47ba86..ca2997a45766 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -461,7 +461,7 @@ F: arch/x86/include/asm/amd_iommu*.h AMD MICROCODE UPDATE SUPPORT P: Andreas Herrmann -M: andreas.herrmann3@amd.com +M: andeas.herrmann3@amd.com L: amd64-microcode@amd64.org S: Supported F: arch/x86/kernel/microcode_amd.c @@ -1894,7 +1894,7 @@ F: fs/ecryptfs/ EDAC-CORE P: Doug Thompson M: dougthompson@xmission.com -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Supported F: Documentation/edac.txt @@ -1906,7 +1906,7 @@ P: Mark Gross P: Doug Thompson M: mark.gross@intel.com M: dougthompson@xmission.com -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/e752x_edac.c @@ -1914,7 +1914,7 @@ F: drivers/edac/e752x_edac.c EDAC-E7XXX P: Doug Thompson M: dougthompson@xmission.com -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/e7xxx_edac.c @@ -1922,7 +1922,7 @@ F: drivers/edac/e7xxx_edac.c EDAC-I82443BXGX P: Tim Small M: tim@buttersideup.com -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i82443bxgx_edac.c @@ -1930,7 +1930,7 @@ F: drivers/edac/i82443bxgx_edac.c EDAC-I3000 P: Jason Uhlenkott M: juhlenko@akamai.com -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i3000_edac.c @@ -1938,7 +1938,7 @@ F: drivers/edac/i3000_edac.c EDAC-I5000 P: Doug Thompson M: dougthompson@xmission.com -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i5000_edac.c @@ -1946,7 +1946,7 @@ F: drivers/edac/i5000_edac.c EDAC-I5400 P: Mauro Carvalho Chehab M: mchehab@redhat.com -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i5400_edac.c @@ -1956,7 +1956,7 @@ P: Ranganathan Desikan P: Arvind R. M: rdesikan@jetzbroadband.com M: arvind@acarlab.com -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i82975x_edac.c @@ -1964,7 +1964,7 @@ F: drivers/edac/i82975x_edac.c EDAC-PASEMI P: Egor Martovetsky M: egor@pasemi.com -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/pasemi_edac.c @@ -1972,7 +1972,7 @@ F: drivers/edac/pasemi_edac.c EDAC-R82600 P: Tim Small M: tim@buttersideup.com -L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) +L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/r82600_edac.c @@ -2592,8 +2592,8 @@ S: Maintained F: fs/hpfs/ HSO 3G MODEM DRIVER -P: Jan Dumon -M: j.dumon@option.com +P: Denis Joseph Barrow +M: d.barow@option.com W: http://www.pharscape.org S: Maintained F: drivers/net/usb/hso.c @@ -4978,8 +4978,8 @@ S: Maintained for 2.6. F: Documentation/sgi-visws.txt SGI XP/XPC/XPNET DRIVER -P: Robin Holt -M: holt@sgi.com +P: Dean Nelson +M: dcn@sgi.com S: Maintained F: drivers/misc/sgi-xp/ diff --git a/trunk/arch/arm/mach-omap2/usb-musb.c b/trunk/arch/arm/mach-omap2/usb-musb.c index 34a56a136efd..fc74e913c415 100644 --- a/trunk/arch/arm/mach-omap2/usb-musb.c +++ b/trunk/arch/arm/mach-omap2/usb-musb.c @@ -131,14 +131,14 @@ static struct musb_hdrc_platform_data musb_plat = { .power = 50, /* up to 100 mA */ }; -static u64 musb_dmamask = DMA_BIT_MASK(32); +static u64 musb_dmamask = DMA_32BIT_MASK; static struct platform_device musb_device = { .name = "musb_hdrc", .id = -1, .dev = { .dma_mask = &musb_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), + .coherent_dma_mask = DMA_32BIT_MASK, .platform_data = &musb_plat, }, .num_resources = ARRAY_SIZE(musb_resources), @@ -146,14 +146,14 @@ static struct platform_device musb_device = { }; #ifdef CONFIG_NOP_USB_XCEIV -static u64 nop_xceiv_dmamask = DMA_BIT_MASK(32); +static u64 nop_xceiv_dmamask = DMA_32BIT_MASK; static struct platform_device nop_xceiv_device = { .name = "nop_usb_xceiv", .id = -1, .dev = { .dma_mask = &nop_xceiv_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), + .coherent_dma_mask = DMA_32BIT_MASK, .platform_data = NULL, }, }; diff --git a/trunk/arch/ia64/kernel/pci-swiotlb.c b/trunk/arch/ia64/kernel/pci-swiotlb.c index 285aae8431c6..573f02c39a00 100644 --- a/trunk/arch/ia64/kernel/pci-swiotlb.c +++ b/trunk/arch/ia64/kernel/pci-swiotlb.c @@ -16,7 +16,7 @@ EXPORT_SYMBOL(swiotlb); static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { - if (dev->coherent_dma_mask != DMA_BIT_MASK(64)) + if (dev->coherent_dma_mask != DMA_64BIT_MASK) gfp |= GFP_DMA; return swiotlb_alloc_coherent(dev, size, dma_handle, gfp); } diff --git a/trunk/arch/x86/include/asm/required-features.h b/trunk/arch/x86/include/asm/required-features.h index a4737dddfd58..d5cd6c586881 100644 --- a/trunk/arch/x86/include/asm/required-features.h +++ b/trunk/arch/x86/include/asm/required-features.h @@ -50,7 +50,7 @@ #ifdef CONFIG_X86_64 #define NEED_PSE 0 #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) -#define NEED_PGE 0 +#define NEED_PGE (1<<(X86_FEATURE_PGE & 31)) #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) diff --git a/trunk/arch/x86/include/asm/xen/page.h b/trunk/arch/x86/include/asm/xen/page.h index 018a0a400799..1a918dde46b5 100644 --- a/trunk/arch/x86/include/asm/xen/page.h +++ b/trunk/arch/x86/include/asm/xen/page.h @@ -124,8 +124,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) -#define virt_to_pfn(v) (PFN_DOWN(__pa(v))) -#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) +#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) static inline unsigned long pte_mfn(pte_t pte) diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 837c2c4cc203..3e3cd3db7a0c 100644 --- a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -277,7 +277,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, unsigned int perf_percent; unsigned int retval; - if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) + if (smp_call_function_single(cpu, read_measured_perf_ctrs, &cur, 1)) return 0; cur.aperf.whole = readin.aperf.whole - diff --git a/trunk/arch/x86/xen/enlighten.c b/trunk/arch/x86/xen/enlighten.c index f09e8c36ee80..82cd39a6cbd3 100644 --- a/trunk/arch/x86/xen/enlighten.c +++ b/trunk/arch/x86/xen/enlighten.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -169,23 +168,21 @@ static void __init xen_banner(void) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } -static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; -static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; - static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { - unsigned maskecx = ~0; unsigned maskedx = ~0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ - if (*ax == 1) { - maskecx = cpuid_leaf1_ecx_mask; - maskedx = cpuid_leaf1_edx_mask; - } + if (*ax == 1) + maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ + (1 << X86_FEATURE_ACPI) | /* disable ACPI */ + (1 << X86_FEATURE_MCE) | /* disable MCE */ + (1 << X86_FEATURE_MCA) | /* disable MCA */ + (1 << X86_FEATURE_ACC)); /* thermal monitoring */ asm(XEN_EMULATE_PREFIX "cpuid" : "=a" (*ax), @@ -193,43 +190,9 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, "=c" (*cx), "=d" (*dx) : "0" (*ax), "2" (*cx)); - - *cx &= maskecx; *dx &= maskedx; } -static __init void xen_init_cpuid_mask(void) -{ - unsigned int ax, bx, cx, dx; - - cpuid_leaf1_edx_mask = - ~((1 << X86_FEATURE_MCE) | /* disable MCE */ - (1 << X86_FEATURE_MCA) | /* disable MCA */ - (1 << X86_FEATURE_ACC)); /* thermal monitoring */ - - if (!xen_initial_domain()) - cpuid_leaf1_edx_mask &= - ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ - (1 << X86_FEATURE_ACPI)); /* disable ACPI */ - - ax = 1; - xen_cpuid(&ax, &bx, &cx, &dx); - - /* cpuid claims we support xsave; try enabling it to see what happens */ - if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { - unsigned long cr4; - - set_in_cr4(X86_CR4_OSXSAVE); - - cr4 = read_cr4(); - - if ((cr4 & X86_CR4_OSXSAVE) == 0) - cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); - - clear_in_cr4(X86_CR4_OSXSAVE); - } -} - static void xen_set_debugreg(int reg, unsigned long val) { HYPERVISOR_set_debugreg(reg, val); @@ -321,11 +284,12 @@ static void xen_set_ldt(const void *addr, unsigned entries) static void xen_load_gdt(const struct desc_ptr *dtr) { + unsigned long *frames; unsigned long va = dtr->address; unsigned int size = dtr->size + 1; unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - unsigned long frames[pages]; int f; + struct multicall_space mcs; /* A GDT can be up to 64k in size, which corresponds to 8192 8-byte entries, or 16 4k pages.. */ @@ -333,26 +297,19 @@ static void xen_load_gdt(const struct desc_ptr *dtr) BUG_ON(size > 65536); BUG_ON(va & ~PAGE_MASK); - for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - int level; - pte_t *ptep = lookup_address(va, &level); - unsigned long pfn, mfn; - void *virt; - - BUG_ON(ptep == NULL); + mcs = xen_mc_entry(sizeof(*frames) * pages); + frames = mcs.args; - pfn = pte_pfn(*ptep); - mfn = pfn_to_mfn(pfn); - virt = __va(PFN_PHYS(pfn)); - - frames[f] = mfn; + for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { + frames[f] = arbitrary_virt_to_mfn((void *)va); make_lowmem_page_readonly((void *)va); - make_lowmem_page_readonly(virt); + make_lowmem_page_readonly(mfn_to_virt(frames[f])); } - if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) - BUG(); + MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); + + xen_mc_issue(PARAVIRT_LAZY_CPU); } static void load_TLS_descriptor(struct thread_struct *t, @@ -428,7 +385,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { - if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) + if (val->type != 0xf && val->type != 0xe) return 0; info->vector = vector; @@ -436,8 +393,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, info->cs = gate_segment(*val); info->flags = val->dpl; /* interrupt gates clear IF */ - if (val->type == GATE_INTERRUPT) - info->flags |= 1 << 2; + if (val->type == 0xe) + info->flags |= 4; return 1; } @@ -915,6 +872,7 @@ static const struct machine_ops __initdata xen_machine_ops = { .emergency_restart = xen_emergency_restart, }; + /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -939,8 +897,6 @@ asmlinkage void __init xen_start_kernel(void) xen_init_irq_ops(); - xen_init_cpuid_mask(); - #ifdef CONFIG_X86_LOCAL_APIC /* * set up the basic apic ops. @@ -982,11 +938,6 @@ asmlinkage void __init xen_start_kernel(void) if (!xen_initial_domain()) __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); -#ifdef CONFIG_X86_64 - /* Work out if we support NX */ - check_efer(); -#endif - /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; diff --git a/trunk/arch/x86/xen/mmu.c b/trunk/arch/x86/xen/mmu.c index 9842b1212407..2a81838a9ab7 100644 --- a/trunk/arch/x86/xen/mmu.c +++ b/trunk/arch/x86/xen/mmu.c @@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn) } /* Build the parallel p2m_top_mfn structures */ -static void __init xen_build_mfn_list_list(void) +void xen_setup_mfn_list_list(void) { unsigned pfn, idx; @@ -198,10 +198,7 @@ static void __init xen_build_mfn_list_list(void) unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); } -} -void xen_setup_mfn_list_list(void) -{ BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = @@ -221,8 +218,6 @@ void __init xen_build_dynamic_phys_to_machine(void) p2m_top[topidx] = &mfn_list[pfn]; } - - xen_build_mfn_list_list(); } unsigned long get_phys_to_machine(unsigned long pfn) @@ -238,74 +233,47 @@ unsigned long get_phys_to_machine(unsigned long pfn) } EXPORT_SYMBOL_GPL(get_phys_to_machine); -/* install a new p2m_top page */ -bool install_p2mtop_page(unsigned long pfn, unsigned long *p) +static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) { - unsigned topidx = p2m_top_index(pfn); - unsigned long **pfnp, *mfnp; + unsigned long *p; unsigned i; - pfnp = &p2m_top[topidx]; - mfnp = &p2m_top_mfn[topidx]; + p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); + BUG_ON(p == NULL); for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) p[i] = INVALID_P2M_ENTRY; - if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) { - *mfnp = virt_to_mfn(p); - return true; - } - - return false; -} - -static void alloc_p2m(unsigned long pfn) -{ - unsigned long *p; - - p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); - BUG_ON(p == NULL); - - if (!install_p2mtop_page(pfn, p)) + if (cmpxchg(pp, p2m_missing, p) != p2m_missing) free_page((unsigned long)p); + else + *mfnp = virt_to_mfn(p); } -/* Try to install p2m mapping; fail if intermediate bits missing */ -bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) +void set_phys_to_machine(unsigned long pfn, unsigned long mfn) { unsigned topidx, idx; + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } + if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { BUG_ON(mfn != INVALID_P2M_ENTRY); - return true; + return; } topidx = p2m_top_index(pfn); if (p2m_top[topidx] == p2m_missing) { + /* no need to allocate a page to store an invalid entry */ if (mfn == INVALID_P2M_ENTRY) - return true; - return false; + return; + alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); } idx = p2m_index(pfn); p2m_top[topidx][idx] = mfn; - - return true; -} - -void set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return; - } - - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { - alloc_p2m(pfn); - - if (!__set_phys_to_machine(pfn, mfn)) - BUG(); - } } unsigned long arbitrary_virt_to_mfn(void *vaddr) @@ -1019,7 +987,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, return 0; } -static void __init xen_mark_init_mm_pinned(void) +void __init xen_mark_init_mm_pinned(void) { xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } @@ -1302,8 +1270,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, } *args; struct multicall_space mcs; - if (cpumask_empty(cpus)) - return; /* nothing to do */ + BUG_ON(cpumask_empty(cpus)); + BUG_ON(!mm); mcs = xen_mc_entry(sizeof(*args)); args = mcs.args; @@ -1470,29 +1438,10 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) } #endif -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) -{ - struct mmuext_op op; - op.cmd = cmd; - op.arg1.mfn = pfn_to_mfn(pfn); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); -} - /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) { -#ifdef CONFIG_FLATMEM - BUG_ON(mem_map); /* should only be used early */ -#endif - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); -} - -/* Used for pmd and pud */ -static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) -{ #ifdef CONFIG_FLATMEM BUG_ON(mem_map); /* should only be used early */ #endif @@ -1501,15 +1450,18 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) /* Early release_pte assumes that all pts are pinned, since there's only init_mm and anything attached to that is pinned. */ -static __init void xen_release_pte_init(unsigned long pfn) +static void xen_release_pte_init(unsigned long pfn) { - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } -static __init void xen_release_pmd_init(unsigned long pfn) +static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) { - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); + struct mmuext_op op; + op.cmd = cmd; + op.arg1.mfn = pfn_to_mfn(pfn); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); } /* This needs to make sure the new pte page is pinned iff its being @@ -1821,9 +1773,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #ifdef CONFIG_X86_LOCAL_APIC case FIX_APIC_BASE: /* maps dummy local APIC */ #endif - case FIX_TEXT_POKE0: - case FIX_TEXT_POKE1: - /* All local page mappings */ pte = pfn_pte(phys, prot); break; @@ -1870,6 +1819,7 @@ __init void xen_post_allocator_init(void) xen_mark_init_mm_pinned(); } + const struct pv_mmu_ops xen_mmu_ops __initdata = { .pagetable_setup_start = xen_pagetable_setup_start, .pagetable_setup_done = xen_pagetable_setup_done, @@ -1893,9 +1843,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .alloc_pte = xen_alloc_pte_init, .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pmd_init, + .alloc_pmd = xen_alloc_pte_init, .alloc_pmd_clone = paravirt_nop, - .release_pmd = xen_release_pmd_init, + .release_pmd = xen_release_pte_init, #ifdef CONFIG_HIGHPTE .kmap_atomic_pte = xen_kmap_atomic_pte, @@ -1933,8 +1883,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, - .alloc_pud = xen_alloc_pmd_init, - .release_pud = xen_release_pmd_init, + .alloc_pud = xen_alloc_pte_init, + .release_pud = xen_release_pte_init, #endif /* PAGETABLE_LEVELS == 4 */ .activate_mm = xen_activate_mm, diff --git a/trunk/arch/x86/xen/mmu.h b/trunk/arch/x86/xen/mmu.h index da7302624897..24d1b44a337d 100644 --- a/trunk/arch/x86/xen/mmu.h +++ b/trunk/arch/x86/xen/mmu.h @@ -11,9 +11,6 @@ enum pt_level { }; -bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); -bool install_p2mtop_page(unsigned long pfn, unsigned long *p); - void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); diff --git a/trunk/arch/x86/xen/smp.c b/trunk/arch/x86/xen/smp.c index 429834ec1687..585a6e330837 100644 --- a/trunk/arch/x86/xen/smp.c +++ b/trunk/arch/x86/xen/smp.c @@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) BUG_ON(rc); while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { - HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + HYPERVISOR_sched_op(SCHEDOP_yield, 0); barrier(); } @@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) /* Make sure other vcpus get a chance to run if they need to. */ for_each_cpu(cpu, mask) { if (xen_vcpu_stolen(cpu)) { - HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + HYPERVISOR_sched_op(SCHEDOP_yield, 0); break; } } diff --git a/trunk/arch/x86/xen/xen-ops.h b/trunk/arch/x86/xen/xen-ops.h index 20139464943c..2f5ef2632ea2 100644 --- a/trunk/arch/x86/xen/xen-ops.h +++ b/trunk/arch/x86/xen/xen-ops.h @@ -57,6 +57,8 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); bool xen_vcpu_stolen(int vcpu); +void xen_mark_init_mm_pinned(void); + void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP diff --git a/trunk/drivers/atm/solos-pci.c b/trunk/drivers/atm/solos-pci.c index 9359613addc5..be204308cc1b 100644 --- a/trunk/drivers/atm/solos-pci.c +++ b/trunk/drivers/atm/solos-pci.c @@ -1059,7 +1059,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) goto out; } - err = pci_set_dma_mask(dev, DMA_BIT_MASK(32)); + err = pci_set_dma_mask(dev, DMA_32BIT_MASK); if (err) { dev_warn(&dev->dev, "Failed to set 32-bit DMA mask\n"); goto out; diff --git a/trunk/drivers/block/cciss.c b/trunk/drivers/block/cciss.c index 4d4d5e0d3fa6..0ef6f08aa6ea 100644 --- a/trunk/drivers/block/cciss.c +++ b/trunk/drivers/block/cciss.c @@ -3505,7 +3505,7 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u /* The Inbound Post Queue only accepts 32-bit physical addresses for the CCISS commands, so they must be allocated from the lower 4GiB of memory. */ - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); if (err) { iounmap(vaddr); return -ENOMEM; diff --git a/trunk/drivers/char/agp/intel-agp.c b/trunk/drivers/char/agp/intel-agp.c index 3686912427ba..9d9490e22e07 100644 --- a/trunk/drivers/char/agp/intel-agp.c +++ b/trunk/drivers/char/agp/intel-agp.c @@ -2131,8 +2131,6 @@ static const struct intel_driver_description { { PCI_DEVICE_ID_INTEL_82845G_HB, PCI_DEVICE_ID_INTEL_82845G_IG, 0, "830M", &intel_845_driver, &intel_830_driver }, { PCI_DEVICE_ID_INTEL_82850_HB, 0, 0, "i850", &intel_850_driver, NULL }, - { PCI_DEVICE_ID_INTEL_82854_HB, PCI_DEVICE_ID_INTEL_82854_IG, 0, "854", - &intel_845_driver, &intel_830_driver }, { PCI_DEVICE_ID_INTEL_82855PM_HB, 0, 0, "855PM", &intel_845_driver, NULL }, { PCI_DEVICE_ID_INTEL_82855GM_HB, PCI_DEVICE_ID_INTEL_82855GM_IG, 0, "855GM", &intel_845_driver, &intel_830_driver }, @@ -2357,7 +2355,6 @@ static struct pci_device_id agp_intel_pci_table[] = { ID(PCI_DEVICE_ID_INTEL_82845_HB), ID(PCI_DEVICE_ID_INTEL_82845G_HB), ID(PCI_DEVICE_ID_INTEL_82850_HB), - ID(PCI_DEVICE_ID_INTEL_82854_HB), ID(PCI_DEVICE_ID_INTEL_82855PM_HB), ID(PCI_DEVICE_ID_INTEL_82855GM_HB), ID(PCI_DEVICE_ID_INTEL_82860_HB), diff --git a/trunk/drivers/char/sysrq.c b/trunk/drivers/char/sysrq.c index b0a6a3e51924..6de020d078e1 100644 --- a/trunk/drivers/char/sysrq.c +++ b/trunk/drivers/char/sysrq.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include diff --git a/trunk/drivers/edac/edac_core.h b/trunk/drivers/edac/edac_core.h index 6ad95c8d6363..28f2c3f959b5 100644 --- a/trunk/drivers/edac/edac_core.h +++ b/trunk/drivers/edac/edac_core.h @@ -767,19 +767,11 @@ static inline void pci_write_bits16(struct pci_dev *pdev, int offset, pci_write_config_word(pdev, offset, value); } -/* - * pci_write_bits32 - * - * edac local routine to do pci_write_config_dword, but adds - * a mask parameter. If mask is all ones, ignore the mask. - * Otherwise utilize the mask to isolate specified bits - * - * write all or some bits in a dword-register - */ +/* write all or some bits in a dword-register*/ static inline void pci_write_bits32(struct pci_dev *pdev, int offset, u32 value, u32 mask) { - if (mask != 0xffffffff) { + if (mask != 0xffff) { u32 buf; pci_read_config_dword(pdev, offset, &buf); diff --git a/trunk/drivers/edac/edac_device.c b/trunk/drivers/edac/edac_device.c index a7d2c717d033..ca9113e1c106 100644 --- a/trunk/drivers/edac/edac_device.c +++ b/trunk/drivers/edac/edac_device.c @@ -389,7 +389,7 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info */ static void edac_device_workq_function(struct work_struct *work_req) { - struct delayed_work *d_work = to_delayed_work(work_req); + struct delayed_work *d_work = (struct delayed_work *)work_req; struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work); mutex_lock(&device_ctls_mutex); diff --git a/trunk/drivers/edac/edac_mc.c b/trunk/drivers/edac/edac_mc.c index 335b7ebdb11c..25d66940b4fa 100644 --- a/trunk/drivers/edac/edac_mc.c +++ b/trunk/drivers/edac/edac_mc.c @@ -260,7 +260,7 @@ static int edac_mc_assert_error_check_and_clear(void) */ static void edac_mc_workq_function(struct work_struct *work_req) { - struct delayed_work *d_work = to_delayed_work(work_req); + struct delayed_work *d_work = (struct delayed_work *)work_req; struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); mutex_lock(&mem_ctls_mutex); diff --git a/trunk/drivers/edac/edac_pci.c b/trunk/drivers/edac/edac_pci.c index 30b585b1d60b..5b150aea703a 100644 --- a/trunk/drivers/edac/edac_pci.c +++ b/trunk/drivers/edac/edac_pci.c @@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(edac_pci_find); */ static void edac_pci_workq_function(struct work_struct *work_req) { - struct delayed_work *d_work = to_delayed_work(work_req); + struct delayed_work *d_work = (struct delayed_work *)work_req; struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work); int msec; unsigned long delay; diff --git a/trunk/drivers/hwmon/Kconfig b/trunk/drivers/hwmon/Kconfig index d73f5f473e38..0e8a9185f676 100644 --- a/trunk/drivers/hwmon/Kconfig +++ b/trunk/drivers/hwmon/Kconfig @@ -692,16 +692,6 @@ config SENSORS_PCF8591 These devices are hard to detect and rarely found on mainstream hardware. If unsure, say N. -config SENSORS_SHT15 - tristate "Sensiron humidity and temperature sensors. SHT15 and compat." - depends on GENERIC_GPIO - help - If you say yes here you get support for the Sensiron SHT10, SHT11, - SHT15, SHT71, SHT75 humidity and temperature sensors. - - This driver can also be built as a module. If so, the module - will be called sht15. - config SENSORS_SIS5595 tristate "Silicon Integrated Systems Corp. SiS5595" depends on PCI diff --git a/trunk/drivers/hwmon/Makefile b/trunk/drivers/hwmon/Makefile index 0ae26984ba45..1d3757837b4f 100644 --- a/trunk/drivers/hwmon/Makefile +++ b/trunk/drivers/hwmon/Makefile @@ -76,7 +76,6 @@ obj-$(CONFIG_SENSORS_MAX6650) += max6650.o obj-$(CONFIG_SENSORS_PC87360) += pc87360.o obj-$(CONFIG_SENSORS_PC87427) += pc87427.o obj-$(CONFIG_SENSORS_PCF8591) += pcf8591.o -obj-$(CONFIG_SENSORS_SHT15) += sht15.o obj-$(CONFIG_SENSORS_SIS5595) += sis5595.o obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o diff --git a/trunk/drivers/hwmon/hp_accel.c b/trunk/drivers/hwmon/hp_accel.c index abca7e9f953b..55d3dc565be6 100644 --- a/trunk/drivers/hwmon/hp_accel.c +++ b/trunk/drivers/hwmon/hp_accel.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/trunk/drivers/hwmon/sht15.c b/trunk/drivers/hwmon/sht15.c deleted file mode 100644 index 6cbdc2fea734..000000000000 --- a/trunk/drivers/hwmon/sht15.c +++ /dev/null @@ -1,692 +0,0 @@ -/* - * sht15.c - support for the SHT15 Temperature and Humidity Sensor - * - * Copyright (c) 2009 Jonathan Cameron - * - * Copyright (c) 2007 Wouter Horre - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Currently ignoring checksum on readings. - * Default resolution only (14bit temp, 12bit humidity) - * Ignoring battery status. - * Heater not enabled. - * Timings are all conservative. - * - * Data sheet available (1/2009) at - * http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf - * - * Regulator supply name = vcc - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define SHT15_MEASURE_TEMP 3 -#define SHT15_MEASURE_RH 5 - -#define SHT15_READING_NOTHING 0 -#define SHT15_READING_TEMP 1 -#define SHT15_READING_HUMID 2 - -/* Min timings in nsecs */ -#define SHT15_TSCKL 100 /* clock low */ -#define SHT15_TSCKH 100 /* clock high */ -#define SHT15_TSU 150 /* data setup time */ - -/** - * struct sht15_temppair - elements of voltage dependant temp calc - * @vdd: supply voltage in microvolts - * @d1: see data sheet - */ -struct sht15_temppair { - int vdd; /* microvolts */ - int d1; -}; - -/* Table 9 from data sheet - relates temperature calculation - * to supply voltage. - */ -static const struct sht15_temppair temppoints[] = { - { 2500000, -39400 }, - { 3000000, -39600 }, - { 3500000, -39700 }, - { 4000000, -39800 }, - { 5000000, -40100 }, -}; - -/** - * struct sht15_data - device instance specific data - * @pdata: platform data (gpio's etc) - * @read_work: bh of interrupt handler - * @wait_queue: wait queue for getting values from device - * @val_temp: last temperature value read from device - * @val_humid: last humidity value read from device - * @flag: status flag used to identify what the last request was - * @valid: are the current stored values valid (start condition) - * @last_updat: time of last update - * @read_lock: mutex to ensure only one read in progress - * at a time. - * @dev: associate device structure - * @hwmon_dev: device associated with hwmon subsystem - * @reg: associated regulator (if specified) - * @nb: notifier block to handle notifications of voltage changes - * @supply_uV: local copy of supply voltage used to allow - * use of regulator consumer if available - * @supply_uV_valid: indicates that an updated value has not yet - * been obtained from the regulator and so any calculations - * based upon it will be invalid. - * @update_supply_work: work struct that is used to update the supply_uV - * @interrupt_handled: flag used to indicate a hander has been scheduled - */ -struct sht15_data { - struct sht15_platform_data *pdata; - struct work_struct read_work; - wait_queue_head_t wait_queue; - uint16_t val_temp; - uint16_t val_humid; - u8 flag; - u8 valid; - unsigned long last_updat; - struct mutex read_lock; - struct device *dev; - struct device *hwmon_dev; - struct regulator *reg; - struct notifier_block nb; - int supply_uV; - int supply_uV_valid; - struct work_struct update_supply_work; - atomic_t interrupt_handled; -}; - -/** - * sht15_connection_reset() - reset the comms interface - * @data: sht15 specific data - * - * This implements section 3.4 of the data sheet - */ -static void sht15_connection_reset(struct sht15_data *data) -{ - int i; - gpio_direction_output(data->pdata->gpio_data, 1); - ndelay(SHT15_TSCKL); - gpio_set_value(data->pdata->gpio_sck, 0); - ndelay(SHT15_TSCKL); - for (i = 0; i < 9; ++i) { - gpio_set_value(data->pdata->gpio_sck, 1); - ndelay(SHT15_TSCKH); - gpio_set_value(data->pdata->gpio_sck, 0); - ndelay(SHT15_TSCKL); - } -} -/** - * sht15_send_bit() - send an individual bit to the device - * @data: device state data - * @val: value of bit to be sent - **/ -static inline void sht15_send_bit(struct sht15_data *data, int val) -{ - - gpio_set_value(data->pdata->gpio_data, val); - ndelay(SHT15_TSU); - gpio_set_value(data->pdata->gpio_sck, 1); - ndelay(SHT15_TSCKH); - gpio_set_value(data->pdata->gpio_sck, 0); - ndelay(SHT15_TSCKL); /* clock low time */ -} - -/** - * sht15_transmission_start() - specific sequence for new transmission - * - * @data: device state data - * Timings for this are not documented on the data sheet, so very - * conservative ones used in implementation. This implements - * figure 12 on the data sheet. - **/ -static void sht15_transmission_start(struct sht15_data *data) -{ - /* ensure data is high and output */ - gpio_direction_output(data->pdata->gpio_data, 1); - ndelay(SHT15_TSU); - gpio_set_value(data->pdata->gpio_sck, 0); - ndelay(SHT15_TSCKL); - gpio_set_value(data->pdata->gpio_sck, 1); - ndelay(SHT15_TSCKH); - gpio_set_value(data->pdata->gpio_data, 0); - ndelay(SHT15_TSU); - gpio_set_value(data->pdata->gpio_sck, 0); - ndelay(SHT15_TSCKL); - gpio_set_value(data->pdata->gpio_sck, 1); - ndelay(SHT15_TSCKH); - gpio_set_value(data->pdata->gpio_data, 1); - ndelay(SHT15_TSU); - gpio_set_value(data->pdata->gpio_sck, 0); - ndelay(SHT15_TSCKL); -} -/** - * sht15_send_byte() - send a single byte to the device - * @data: device state - * @byte: value to be sent - **/ -static void sht15_send_byte(struct sht15_data *data, u8 byte) -{ - int i; - for (i = 0; i < 8; i++) { - sht15_send_bit(data, !!(byte & 0x80)); - byte <<= 1; - } -} -/** - * sht15_wait_for_response() - checks for ack from device - * @data: device state - **/ -static int sht15_wait_for_response(struct sht15_data *data) -{ - gpio_direction_input(data->pdata->gpio_data); - gpio_set_value(data->pdata->gpio_sck, 1); - ndelay(SHT15_TSCKH); - if (gpio_get_value(data->pdata->gpio_data)) { - gpio_set_value(data->pdata->gpio_sck, 0); - dev_err(data->dev, "Command not acknowledged\n"); - sht15_connection_reset(data); - return -EIO; - } - gpio_set_value(data->pdata->gpio_sck, 0); - ndelay(SHT15_TSCKL); - return 0; -} - -/** - * sht15_send_cmd() - Sends a command to the device. - * @data: device state - * @cmd: command byte to be sent - * - * On entry, sck is output low, data is output pull high - * and the interrupt disabled. - **/ -static int sht15_send_cmd(struct sht15_data *data, u8 cmd) -{ - int ret = 0; - sht15_transmission_start(data); - sht15_send_byte(data, cmd); - ret = sht15_wait_for_response(data); - return ret; -} -/** - * sht15_update_single_val() - get a new value from device - * @data: device instance specific data - * @command: command sent to request value - * @timeout_msecs: timeout after which comms are assumed - * to have failed are reset. - **/ -static inline int sht15_update_single_val(struct sht15_data *data, - int command, - int timeout_msecs) -{ - int ret; - ret = sht15_send_cmd(data, command); - if (ret) - return ret; - - gpio_direction_input(data->pdata->gpio_data); - atomic_set(&data->interrupt_handled, 0); - - enable_irq(gpio_to_irq(data->pdata->gpio_data)); - if (gpio_get_value(data->pdata->gpio_data) == 0) { - disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data)); - /* Only relevant if the interrupt hasn't occured. */ - if (!atomic_read(&data->interrupt_handled)) - schedule_work(&data->read_work); - } - ret = wait_event_timeout(data->wait_queue, - (data->flag == SHT15_READING_NOTHING), - msecs_to_jiffies(timeout_msecs)); - if (ret == 0) {/* timeout occurred */ - disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));; - sht15_connection_reset(data); - return -ETIME; - } - return 0; -} - -/** - * sht15_update_vals() - get updated readings from device if too old - * @data: device state - **/ -static int sht15_update_vals(struct sht15_data *data) -{ - int ret = 0; - int timeout = HZ; - - mutex_lock(&data->read_lock); - if (time_after(jiffies, data->last_updat + timeout) - || !data->valid) { - data->flag = SHT15_READING_HUMID; - ret = sht15_update_single_val(data, SHT15_MEASURE_RH, 160); - if (ret) - goto error_ret; - data->flag = SHT15_READING_TEMP; - ret = sht15_update_single_val(data, SHT15_MEASURE_TEMP, 400); - if (ret) - goto error_ret; - data->valid = 1; - data->last_updat = jiffies; - } -error_ret: - mutex_unlock(&data->read_lock); - - return ret; -} - -/** - * sht15_calc_temp() - convert the raw reading to a temperature - * @data: device state - * - * As per section 4.3 of the data sheet. - **/ -static inline int sht15_calc_temp(struct sht15_data *data) -{ - int d1 = 0; - int i; - - for (i = 1; i < ARRAY_SIZE(temppoints) - 1; i++) - /* Find pointer to interpolate */ - if (data->supply_uV > temppoints[i - 1].vdd) { - d1 = (data->supply_uV/1000 - temppoints[i - 1].vdd) - * (temppoints[i].d1 - temppoints[i - 1].d1) - / (temppoints[i].vdd - temppoints[i - 1].vdd) - + temppoints[i - 1].d1; - break; - } - - return data->val_temp*10 + d1; -} - -/** - * sht15_calc_humid() - using last temperature convert raw to humid - * @data: device state - * - * This is the temperature compensated version as per section 4.2 of - * the data sheet. - **/ -static inline int sht15_calc_humid(struct sht15_data *data) -{ - int RHlinear; /* milli percent */ - int temp = sht15_calc_temp(data); - - const int c1 = -4; - const int c2 = 40500; /* x 10 ^ -6 */ - const int c3 = 2800; /* x10 ^ -9 */ - - RHlinear = c1*1000 - + c2 * data->val_humid/1000 - + (data->val_humid * data->val_humid * c3)/1000000; - return (temp - 25000) * (10000 + 800 * data->val_humid) - / 1000000 + RHlinear; -} - -static ssize_t sht15_show_temp(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - int ret; - struct sht15_data *data = dev_get_drvdata(dev); - - /* Technically no need to read humidity as well */ - ret = sht15_update_vals(data); - - return ret ? ret : sprintf(buf, "%d\n", - sht15_calc_temp(data)); -} - -static ssize_t sht15_show_humidity(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - int ret; - struct sht15_data *data = dev_get_drvdata(dev); - - ret = sht15_update_vals(data); - - return ret ? ret : sprintf(buf, "%d\n", sht15_calc_humid(data)); - -}; -static ssize_t show_name(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct platform_device *pdev = to_platform_device(dev); - return sprintf(buf, "%s\n", pdev->name); -} - -static SENSOR_DEVICE_ATTR(temp1_input, - S_IRUGO, sht15_show_temp, - NULL, 0); -static SENSOR_DEVICE_ATTR(humidity1_input, - S_IRUGO, sht15_show_humidity, - NULL, 0); -static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); -static struct attribute *sht15_attrs[] = { - &sensor_dev_attr_temp1_input.dev_attr.attr, - &sensor_dev_attr_humidity1_input.dev_attr.attr, - &dev_attr_name.attr, - NULL, -}; - -static const struct attribute_group sht15_attr_group = { - .attrs = sht15_attrs, -}; - -static irqreturn_t sht15_interrupt_fired(int irq, void *d) -{ - struct sht15_data *data = d; - /* First disable the interrupt */ - disable_irq_nosync(irq); - atomic_inc(&data->interrupt_handled); - /* Then schedule a reading work struct */ - if (data->flag != SHT15_READING_NOTHING) - schedule_work(&data->read_work); - return IRQ_HANDLED; -} - -/* Each byte of data is acknowledged by pulling the data line - * low for one clock pulse. - */ -static void sht15_ack(struct sht15_data *data) -{ - gpio_direction_output(data->pdata->gpio_data, 0); - ndelay(SHT15_TSU); - gpio_set_value(data->pdata->gpio_sck, 1); - ndelay(SHT15_TSU); - gpio_set_value(data->pdata->gpio_sck, 0); - ndelay(SHT15_TSU); - gpio_set_value(data->pdata->gpio_data, 1); - - gpio_direction_input(data->pdata->gpio_data); -} -/** - * sht15_end_transmission() - notify device of end of transmission - * @data: device state - * - * This is basically a NAK. (single clock pulse, data high) - **/ -static void sht15_end_transmission(struct sht15_data *data) -{ - gpio_direction_output(data->pdata->gpio_data, 1); - ndelay(SHT15_TSU); - gpio_set_value(data->pdata->gpio_sck, 1); - ndelay(SHT15_TSCKH); - gpio_set_value(data->pdata->gpio_sck, 0); - ndelay(SHT15_TSCKL); -} - -static void sht15_bh_read_data(struct work_struct *work_s) -{ - int i; - uint16_t val = 0; - struct sht15_data *data - = container_of(work_s, struct sht15_data, - read_work); - /* Firstly, verify the line is low */ - if (gpio_get_value(data->pdata->gpio_data)) { - /* If not, then start the interrupt again - care - here as could have gone low in meantime so verify - it hasn't! - */ - atomic_set(&data->interrupt_handled, 0); - enable_irq(gpio_to_irq(data->pdata->gpio_data)); - /* If still not occured or another handler has been scheduled */ - if (gpio_get_value(data->pdata->gpio_data) - || atomic_read(&data->interrupt_handled)) - return; - } - /* Read the data back from the device */ - for (i = 0; i < 16; ++i) { - val <<= 1; - gpio_set_value(data->pdata->gpio_sck, 1); - ndelay(SHT15_TSCKH); - val |= !!gpio_get_value(data->pdata->gpio_data); - gpio_set_value(data->pdata->gpio_sck, 0); - ndelay(SHT15_TSCKL); - if (i == 7) - sht15_ack(data); - } - /* Tell the device we are done */ - sht15_end_transmission(data); - - switch (data->flag) { - case SHT15_READING_TEMP: - data->val_temp = val; - break; - case SHT15_READING_HUMID: - data->val_humid = val; - break; - } - - data->flag = SHT15_READING_NOTHING; - wake_up(&data->wait_queue); -} - -static void sht15_update_voltage(struct work_struct *work_s) -{ - struct sht15_data *data - = container_of(work_s, struct sht15_data, - update_supply_work); - data->supply_uV = regulator_get_voltage(data->reg); -} - -/** - * sht15_invalidate_voltage() - mark supply voltage invalid when notified by reg - * @nb: associated notification structure - * @event: voltage regulator state change event code - * @ignored: function parameter - ignored here - * - * Note that as the notification code holds the regulator lock, we have - * to schedule an update of the supply voltage rather than getting it directly. - **/ -static int sht15_invalidate_voltage(struct notifier_block *nb, - unsigned long event, - void *ignored) -{ - struct sht15_data *data = container_of(nb, struct sht15_data, nb); - - if (event == REGULATOR_EVENT_VOLTAGE_CHANGE) - data->supply_uV_valid = false; - schedule_work(&data->update_supply_work); - - return NOTIFY_OK; -} - -static int __devinit sht15_probe(struct platform_device *pdev) -{ - int ret = 0; - struct sht15_data *data = kzalloc(sizeof(*data), GFP_KERNEL); - - if (!data) { - ret = -ENOMEM; - dev_err(&pdev->dev, "kzalloc failed"); - goto error_ret; - } - - INIT_WORK(&data->read_work, sht15_bh_read_data); - INIT_WORK(&data->update_supply_work, sht15_update_voltage); - platform_set_drvdata(pdev, data); - mutex_init(&data->read_lock); - data->dev = &pdev->dev; - init_waitqueue_head(&data->wait_queue); - - if (pdev->dev.platform_data == NULL) { - dev_err(&pdev->dev, "no platform data supplied"); - goto err_free_data; - } - data->pdata = pdev->dev.platform_data; - data->supply_uV = data->pdata->supply_mv*1000; - -/* If a regulator is available, query what the supply voltage actually is!*/ - data->reg = regulator_get(data->dev, "vcc"); - if (!IS_ERR(data->reg)) { - data->supply_uV = regulator_get_voltage(data->reg); - regulator_enable(data->reg); - /* setup a notifier block to update this if another device - * causes the voltage to change */ - data->nb.notifier_call = &sht15_invalidate_voltage; - ret = regulator_register_notifier(data->reg, &data->nb); - } -/* Try requesting the GPIOs */ - ret = gpio_request(data->pdata->gpio_sck, "SHT15 sck"); - if (ret) { - dev_err(&pdev->dev, "gpio request failed"); - goto err_free_data; - } - gpio_direction_output(data->pdata->gpio_sck, 0); - ret = gpio_request(data->pdata->gpio_data, "SHT15 data"); - if (ret) { - dev_err(&pdev->dev, "gpio request failed"); - goto err_release_gpio_sck; - } - ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group); - if (ret) { - dev_err(&pdev->dev, "sysfs create failed"); - goto err_free_data; - } - - ret = request_irq(gpio_to_irq(data->pdata->gpio_data), - sht15_interrupt_fired, - IRQF_TRIGGER_FALLING, - "sht15 data", - data); - if (ret) { - dev_err(&pdev->dev, "failed to get irq for data line"); - goto err_release_gpio_data; - } - disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data)); - sht15_connection_reset(data); - sht15_send_cmd(data, 0x1E); - - data->hwmon_dev = hwmon_device_register(data->dev); - if (IS_ERR(data->hwmon_dev)) { - ret = PTR_ERR(data->hwmon_dev); - goto err_release_gpio_data; - } - return 0; - -err_release_gpio_data: - gpio_free(data->pdata->gpio_data); -err_release_gpio_sck: - gpio_free(data->pdata->gpio_sck); -err_free_data: - kfree(data); -error_ret: - - return ret; -} - -static int __devexit sht15_remove(struct platform_device *pdev) -{ - struct sht15_data *data = platform_get_drvdata(pdev); - - /* Make sure any reads from the device are done and - * prevent new ones beginnning */ - mutex_lock(&data->read_lock); - hwmon_device_unregister(data->hwmon_dev); - sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group); - if (!IS_ERR(data->reg)) { - regulator_unregister_notifier(data->reg, &data->nb); - regulator_disable(data->reg); - regulator_put(data->reg); - } - - free_irq(gpio_to_irq(data->pdata->gpio_data), data); - gpio_free(data->pdata->gpio_data); - gpio_free(data->pdata->gpio_sck); - mutex_unlock(&data->read_lock); - kfree(data); - return 0; -} - - -static struct platform_driver sht_drivers[] = { - { - .driver = { - .name = "sht10", - .owner = THIS_MODULE, - }, - .probe = sht15_probe, - .remove = sht15_remove, - }, { - .driver = { - .name = "sht11", - .owner = THIS_MODULE, - }, - .probe = sht15_probe, - .remove = sht15_remove, - }, { - .driver = { - .name = "sht15", - .owner = THIS_MODULE, - }, - .probe = sht15_probe, - .remove = sht15_remove, - }, { - .driver = { - .name = "sht71", - .owner = THIS_MODULE, - }, - .probe = sht15_probe, - .remove = sht15_remove, - }, { - .driver = { - .name = "sht75", - .owner = THIS_MODULE, - }, - .probe = sht15_probe, - .remove = sht15_remove, - }, -}; - - -static int __init sht15_init(void) -{ - int ret; - int i; - - for (i = 0; i < ARRAY_SIZE(sht_drivers); i++) { - ret = platform_driver_register(&sht_drivers[i]); - if (ret) - goto error_unreg; - } - - return 0; - -error_unreg: - while (--i >= 0) - platform_driver_unregister(&sht_drivers[i]); - - return ret; -} -module_init(sht15_init); - -static void __exit sht15_exit(void) -{ - int i; - for (i = ARRAY_SIZE(sht_drivers) - 1; i >= 0; i--) - platform_driver_unregister(&sht_drivers[i]); -} -module_exit(sht15_exit); - -MODULE_LICENSE("GPL"); diff --git a/trunk/drivers/misc/eeprom/at24.c b/trunk/drivers/misc/eeprom/at24.c index db39f4a52f53..d184dfab9631 100644 --- a/trunk/drivers/misc/eeprom/at24.c +++ b/trunk/drivers/misc/eeprom/at24.c @@ -278,7 +278,7 @@ static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr, * We only use page mode writes; the alternative is sloooow. This routine * writes at most one page. */ -static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf, +static ssize_t at24_eeprom_write(struct at24_data *at24, char *buf, unsigned offset, size_t count) { struct i2c_client *client; @@ -347,8 +347,8 @@ static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf, return -ETIMEDOUT; } -static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off, - size_t count) +static ssize_t at24_write(struct at24_data *at24, + char *buf, loff_t off, size_t count) { ssize_t retval = 0; @@ -406,7 +406,7 @@ static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf, return at24_read(at24, buf, offset, count); } -static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf, +static ssize_t at24_macc_write(struct memory_accessor *macc, char *buf, off_t offset, size_t count) { struct at24_data *at24 = container_of(macc, struct at24_data, macc); diff --git a/trunk/drivers/misc/eeprom/at25.c b/trunk/drivers/misc/eeprom/at25.c index b34cb5f79eea..6bc0dac5c1e8 100644 --- a/trunk/drivers/misc/eeprom/at25.c +++ b/trunk/drivers/misc/eeprom/at25.c @@ -140,8 +140,7 @@ at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr, static ssize_t -at25_ee_write(struct at25_data *at25, const char *buf, loff_t off, - size_t count) +at25_ee_write(struct at25_data *at25, char *buf, loff_t off, size_t count) { ssize_t status = 0; unsigned written = 0; @@ -277,7 +276,7 @@ static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf, return at25_ee_read(at25, buf, offset, count); } -static ssize_t at25_mem_write(struct memory_accessor *mem, const char *buf, +static ssize_t at25_mem_write(struct memory_accessor *mem, char *buf, off_t offset, size_t count) { struct at25_data *at25 = container_of(mem, struct at25_data, mem); diff --git a/trunk/drivers/misc/sgi-xp/xpc.h b/trunk/drivers/misc/sgi-xp/xpc.h index b94d5f767703..114444cfd496 100644 --- a/trunk/drivers/misc/sgi-xp/xpc.h +++ b/trunk/drivers/misc/sgi-xp/xpc.h @@ -90,21 +90,18 @@ struct xpc_rsvd_page { short max_npartitions; /* value of XPC_MAX_PARTITIONS */ u8 version; u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */ - unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ union { - struct { - unsigned long vars_pa; /* phys addr */ - } sn2; - struct { - unsigned long heartbeat_gpa; /* phys addr */ - unsigned long activate_gru_mq_desc_gpa; /* phys addr */ - } uv; + unsigned long vars_pa; /* phys address of struct xpc_vars */ + unsigned long activate_gru_mq_desc_gpa; /* phys addr of */ + /* activate mq's */ + /* gru mq descriptor */ } sn; - u64 pad2[9]; /* align to last u64 in 2nd 64-byte cacheline */ + unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ + u64 pad2[10]; /* align to last u64 in 2nd 64-byte cacheline */ u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */ }; -#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */ +#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */ /* * Define the structures by which XPC variables can be exported to other @@ -185,17 +182,6 @@ struct xpc_vars_part_sn2 { (XPC_RP_MACH_NASIDS(_rp) + \ xpc_nasid_mask_nlongs)) - -/* - * The following structure describes the partition's heartbeat info which - * will be periodically read by other partitions to determine whether this - * XPC is still 'alive'. - */ -struct xpc_heartbeat_uv { - unsigned long value; - unsigned long offline; /* if 0, heartbeat should be changing */ -}; - /* * Info pertinent to a GRU message queue using a watch list for irq generation. */ @@ -212,7 +198,7 @@ struct xpc_gru_mq_uv { /* * The activate_mq is used to send/receive GRU messages that affect XPC's - * partition active state and channel state. This is uv only. + * heartbeat, partition active state, and channel state. This is UV only. */ struct xpc_activate_mq_msghdr_uv { unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ @@ -224,27 +210,33 @@ struct xpc_activate_mq_msghdr_uv { /* activate_mq defined message types */ #define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0 +#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV 1 +#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV 2 +#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV 3 -#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 1 -#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 2 +#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 4 +#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 5 -#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 3 -#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 4 -#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 5 -#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 6 -#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV 7 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 6 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 7 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 8 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 9 -#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 8 -#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 9 +#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 10 +#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 11 struct xpc_activate_mq_msg_uv { struct xpc_activate_mq_msghdr_uv hdr; }; +struct xpc_activate_mq_msg_heartbeat_req_uv { + struct xpc_activate_mq_msghdr_uv hdr; + u64 heartbeat; +}; + struct xpc_activate_mq_msg_activate_req_uv { struct xpc_activate_mq_msghdr_uv hdr; unsigned long rp_gpa; - unsigned long heartbeat_gpa; unsigned long activate_gru_mq_desc_gpa; }; @@ -279,11 +271,6 @@ struct xpc_activate_mq_msg_chctl_openreply_uv { unsigned long notify_gru_mq_desc_gpa; }; -struct xpc_activate_mq_msg_chctl_opencomplete_uv { - struct xpc_activate_mq_msghdr_uv hdr; - short ch_number; -}; - /* * Functions registered by add_timer() or called by kernel_thread() only * allow for a single 64-bit argument. The following macros can be used to @@ -589,32 +576,30 @@ struct xpc_channel { #define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */ -#define XPC_C_ROPENCOMPLETE 0x00000002 /* remote open channel complete */ -#define XPC_C_OPENCOMPLETE 0x00000004 /* local open channel complete */ -#define XPC_C_ROPENREPLY 0x00000008 /* remote open channel reply */ -#define XPC_C_OPENREPLY 0x00000010 /* local open channel reply */ -#define XPC_C_ROPENREQUEST 0x00000020 /* remote open channel request */ -#define XPC_C_OPENREQUEST 0x00000040 /* local open channel request */ +#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */ +#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */ +#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */ +#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */ -#define XPC_C_SETUP 0x00000080 /* channel's msgqueues are alloc'd */ -#define XPC_C_CONNECTEDCALLOUT 0x00000100 /* connected callout initiated */ +#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */ +#define XPC_C_CONNECTEDCALLOUT 0x00000040 /* connected callout initiated */ #define XPC_C_CONNECTEDCALLOUT_MADE \ - 0x00000200 /* connected callout completed */ -#define XPC_C_CONNECTED 0x00000400 /* local channel is connected */ -#define XPC_C_CONNECTING 0x00000800 /* channel is being connected */ + 0x00000080 /* connected callout completed */ +#define XPC_C_CONNECTED 0x00000100 /* local channel is connected */ +#define XPC_C_CONNECTING 0x00000200 /* channel is being connected */ -#define XPC_C_RCLOSEREPLY 0x00001000 /* remote close channel reply */ -#define XPC_C_CLOSEREPLY 0x00002000 /* local close channel reply */ -#define XPC_C_RCLOSEREQUEST 0x00004000 /* remote close channel request */ -#define XPC_C_CLOSEREQUEST 0x00008000 /* local close channel request */ +#define XPC_C_RCLOSEREPLY 0x00000400 /* remote close channel reply */ +#define XPC_C_CLOSEREPLY 0x00000800 /* local close channel reply */ +#define XPC_C_RCLOSEREQUEST 0x00001000 /* remote close channel request */ +#define XPC_C_CLOSEREQUEST 0x00002000 /* local close channel request */ -#define XPC_C_DISCONNECTED 0x00010000 /* channel is disconnected */ -#define XPC_C_DISCONNECTING 0x00020000 /* channel is being disconnected */ +#define XPC_C_DISCONNECTED 0x00004000 /* channel is disconnected */ +#define XPC_C_DISCONNECTING 0x00008000 /* channel is being disconnected */ #define XPC_C_DISCONNECTINGCALLOUT \ - 0x00040000 /* disconnecting callout initiated */ + 0x00010000 /* disconnecting callout initiated */ #define XPC_C_DISCONNECTINGCALLOUT_MADE \ - 0x00080000 /* disconnecting callout completed */ -#define XPC_C_WDISCONNECT 0x00100000 /* waiting for channel disconnect */ + 0x00020000 /* disconnecting callout completed */ +#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */ /* * The channel control flags (chctl) union consists of a 64-bit variable which @@ -633,13 +618,11 @@ union xpc_channel_ctl_flags { #define XPC_CHCTL_CLOSEREPLY 0x02 #define XPC_CHCTL_OPENREQUEST 0x04 #define XPC_CHCTL_OPENREPLY 0x08 -#define XPC_CHCTL_OPENCOMPLETE 0x10 -#define XPC_CHCTL_MSGREQUEST 0x20 +#define XPC_CHCTL_MSGREQUEST 0x10 #define XPC_OPENCLOSE_CHCTL_FLAGS \ (XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \ - XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \ - XPC_CHCTL_OPENCOMPLETE) + XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY) #define XPC_MSG_CHCTL_FLAGS XPC_CHCTL_MSGREQUEST static inline int @@ -704,9 +687,6 @@ struct xpc_partition_sn2 { }; struct xpc_partition_uv { - unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */ - struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */ - /* partition's heartbeat */ unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */ /* activate mq's gru mq */ /* descriptor */ @@ -718,12 +698,14 @@ struct xpc_partition_uv { u8 remote_act_state; /* remote partition's act_state */ u8 act_state_req; /* act_state request from remote partition */ enum xp_retval reason; /* reason for deactivate act_state request */ + u64 heartbeat; /* incremented by remote partition */ }; /* struct xpc_partition_uv flags */ -#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000001 +#define XPC_P_HEARTBEAT_OFFLINE_UV 0x00000001 #define XPC_P_ENGAGED_UV 0x00000002 +#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000004 /* struct xpc_partition_uv act_state change requests */ @@ -780,62 +762,6 @@ struct xpc_partition { } ____cacheline_aligned; -struct xpc_arch_operations { - int (*setup_partitions) (void); - void (*teardown_partitions) (void); - void (*process_activate_IRQ_rcvd) (void); - enum xp_retval (*get_partition_rsvd_page_pa) - (void *, u64 *, unsigned long *, size_t *); - int (*setup_rsvd_page) (struct xpc_rsvd_page *); - - void (*allow_hb) (short); - void (*disallow_hb) (short); - void (*disallow_all_hbs) (void); - void (*increment_heartbeat) (void); - void (*offline_heartbeat) (void); - void (*online_heartbeat) (void); - void (*heartbeat_init) (void); - void (*heartbeat_exit) (void); - enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *); - - void (*request_partition_activation) (struct xpc_rsvd_page *, - unsigned long, int); - void (*request_partition_reactivation) (struct xpc_partition *); - void (*request_partition_deactivation) (struct xpc_partition *); - void (*cancel_partition_deactivation_request) (struct xpc_partition *); - enum xp_retval (*setup_ch_structures) (struct xpc_partition *); - void (*teardown_ch_structures) (struct xpc_partition *); - - enum xp_retval (*make_first_contact) (struct xpc_partition *); - - u64 (*get_chctl_all_flags) (struct xpc_partition *); - void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *); - void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *); - void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *); - void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *); - void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *); - void (*process_msg_chctl_flags) (struct xpc_partition *, int); - - enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *, - unsigned long); - - enum xp_retval (*setup_msg_structures) (struct xpc_channel *); - void (*teardown_msg_structures) (struct xpc_channel *); - - void (*indicate_partition_engaged) (struct xpc_partition *); - void (*indicate_partition_disengaged) (struct xpc_partition *); - void (*assume_partition_disengaged) (short); - int (*partition_engaged) (short); - int (*any_partition_engaged) (void); - - int (*n_of_deliverable_payloads) (struct xpc_channel *); - enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *, - u16, u8, xpc_notify_func, void *); - void *(*get_deliverable_payload) (struct xpc_channel *); - void (*received_payload) (struct xpc_channel *, void *); - void (*notify_senders_of_disconnect) (struct xpc_channel *); -}; - /* struct xpc_partition act_state values (for XPC HB) */ #define XPC_P_AS_INACTIVE 0x00 /* partition is not active */ @@ -876,17 +802,67 @@ extern struct xpc_registration xpc_registrations[]; /* found in xpc_main.c */ extern struct device *xpc_part; extern struct device *xpc_chan; -extern struct xpc_arch_operations xpc_arch_ops; extern int xpc_disengage_timelimit; extern int xpc_disengage_timedout; extern int xpc_activate_IRQ_rcvd; extern spinlock_t xpc_activate_IRQ_rcvd_lock; extern wait_queue_head_t xpc_activate_IRQ_wq; +extern void *xpc_heartbeating_to_mask; extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **); extern void xpc_activate_partition(struct xpc_partition *); extern void xpc_activate_kthreads(struct xpc_channel *, int); extern void xpc_create_kthreads(struct xpc_channel *, int, int); extern void xpc_disconnect_wait(int); +extern int (*xpc_setup_partitions_sn) (void); +extern void (*xpc_teardown_partitions_sn) (void); +extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *, + unsigned long *, + size_t *); +extern int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *); +extern void (*xpc_heartbeat_init) (void); +extern void (*xpc_heartbeat_exit) (void); +extern void (*xpc_increment_heartbeat) (void); +extern void (*xpc_offline_heartbeat) (void); +extern void (*xpc_online_heartbeat) (void); +extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *); +extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *); +extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *); +extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *); +extern void (*xpc_teardown_msg_structures) (struct xpc_channel *); +extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *); +extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int); +extern int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *); +extern void *(*xpc_get_deliverable_payload) (struct xpc_channel *); +extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *, + unsigned long, int); +extern void (*xpc_request_partition_reactivation) (struct xpc_partition *); +extern void (*xpc_request_partition_deactivation) (struct xpc_partition *); +extern void (*xpc_cancel_partition_deactivation_request) ( + struct xpc_partition *); +extern void (*xpc_process_activate_IRQ_rcvd) (void); +extern enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *); +extern void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *); + +extern void (*xpc_indicate_partition_engaged) (struct xpc_partition *); +extern int (*xpc_partition_engaged) (short); +extern int (*xpc_any_partition_engaged) (void); +extern void (*xpc_indicate_partition_disengaged) (struct xpc_partition *); +extern void (*xpc_assume_partition_disengaged) (short); + +extern void (*xpc_send_chctl_closerequest) (struct xpc_channel *, + unsigned long *); +extern void (*xpc_send_chctl_closereply) (struct xpc_channel *, + unsigned long *); +extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *, + unsigned long *); +extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *); + +extern enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *, + unsigned long); + +extern enum xp_retval (*xpc_send_payload) (struct xpc_channel *, u32, void *, + u16, u8, xpc_notify_func, void *); +extern void (*xpc_received_payload) (struct xpc_channel *, void *); /* found in xpc_sn2.c */ extern int xpc_init_sn2(void); @@ -933,6 +909,40 @@ extern void xpc_disconnect_channel(const int, struct xpc_channel *, extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval); extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval); +static inline int +xpc_hb_allowed(short partid, void *heartbeating_to_mask) +{ + return test_bit(partid, heartbeating_to_mask); +} + +static inline int +xpc_any_hbs_allowed(void) +{ + DBUG_ON(xpc_heartbeating_to_mask == NULL); + return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions); +} + +static inline void +xpc_allow_hb(short partid) +{ + DBUG_ON(xpc_heartbeating_to_mask == NULL); + set_bit(partid, xpc_heartbeating_to_mask); +} + +static inline void +xpc_disallow_hb(short partid) +{ + DBUG_ON(xpc_heartbeating_to_mask == NULL); + clear_bit(partid, xpc_heartbeating_to_mask); +} + +static inline void +xpc_disallow_all_hbs(void) +{ + DBUG_ON(xpc_heartbeating_to_mask == NULL); + bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions); +} + static inline void xpc_wakeup_channel_mgr(struct xpc_partition *part) { diff --git a/trunk/drivers/misc/sgi-xp/xpc_channel.c b/trunk/drivers/misc/sgi-xp/xpc_channel.c index 652593fc486d..99a2534c38a1 100644 --- a/trunk/drivers/misc/sgi-xp/xpc_channel.c +++ b/trunk/drivers/misc/sgi-xp/xpc_channel.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. */ /* @@ -39,38 +39,34 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) if (!(ch->flags & XPC_C_SETUP)) { spin_unlock_irqrestore(&ch->lock, *irq_flags); - ret = xpc_arch_ops.setup_msg_structures(ch); + ret = xpc_setup_msg_structures(ch); spin_lock_irqsave(&ch->lock, *irq_flags); if (ret != xpSuccess) XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); - else - ch->flags |= XPC_C_SETUP; - if (ch->flags & XPC_C_DISCONNECTING) + ch->flags |= XPC_C_SETUP; + + if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING)) return; } if (!(ch->flags & XPC_C_OPENREPLY)) { ch->flags |= XPC_C_OPENREPLY; - xpc_arch_ops.send_chctl_openreply(ch, irq_flags); + xpc_send_chctl_openreply(ch, irq_flags); } if (!(ch->flags & XPC_C_ROPENREPLY)) return; - if (!(ch->flags & XPC_C_OPENCOMPLETE)) { - ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED); - xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags); - } - - if (!(ch->flags & XPC_C_ROPENCOMPLETE)) - return; + ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ dev_info(xpc_chan, "channel %d to partition %d connected\n", ch->number, ch->partid); - ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ + spin_unlock_irqrestore(&ch->lock, *irq_flags); + xpc_create_kthreads(ch, 1, 0); + spin_lock_irqsave(&ch->lock, *irq_flags); } /* @@ -100,7 +96,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) if (part->act_state == XPC_P_AS_DEACTIVATING) { /* can't proceed until the other side disengages from us */ - if (xpc_arch_ops.partition_engaged(ch->partid)) + if (xpc_partition_engaged(ch->partid)) return; } else { @@ -112,7 +108,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) if (!(ch->flags & XPC_C_CLOSEREPLY)) { ch->flags |= XPC_C_CLOSEREPLY; - xpc_arch_ops.send_chctl_closereply(ch, irq_flags); + xpc_send_chctl_closereply(ch, irq_flags); } if (!(ch->flags & XPC_C_RCLOSEREPLY)) @@ -122,7 +118,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* wake those waiting for notify completion */ if (atomic_read(&ch->n_to_notify) > 0) { /* we do callout while holding ch->lock, callout can't block */ - xpc_arch_ops.notify_senders_of_disconnect(ch); + xpc_notify_senders_of_disconnect(ch); } /* both sides are disconnected now */ @@ -136,7 +132,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) DBUG_ON(atomic_read(&ch->n_to_notify) != 0); /* it's now safe to free the channel's message queues */ - xpc_arch_ops.teardown_msg_structures(ch); + xpc_teardown_msg_structures(ch); ch->func = NULL; ch->key = NULL; @@ -148,9 +144,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* * Mark the channel disconnected and clear all other flags, including - * XPC_C_SETUP (because of call to - * xpc_arch_ops.teardown_msg_structures()) but not including - * XPC_C_WDISCONNECT (if it was set). + * XPC_C_SETUP (because of call to xpc_teardown_msg_structures()) but + * not including XPC_C_WDISCONNECT (if it was set). */ ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT)); @@ -189,7 +184,6 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, struct xpc_channel *ch = &part->channels[ch_number]; enum xp_retval reason; enum xp_retval ret; - int create_kthread = 0; spin_lock_irqsave(&ch->lock, irq_flags); @@ -202,7 +196,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, * has had a chance to see that the channel is disconnected. */ ch->delayed_chctl_flags |= chctl_flags; - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) { @@ -244,7 +239,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, XPC_CHCTL_CLOSEREQUEST; spin_unlock(&part->chctl_lock); } - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } XPC_SET_REASON(ch, 0, 0); @@ -254,8 +250,7 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); } - chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | - XPC_CHCTL_OPENCOMPLETE); + chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY); /* * The meaningful CLOSEREQUEST connection state fields are: @@ -274,7 +269,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY); - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } xpc_process_disconnect(ch, &irq_flags); @@ -287,7 +283,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, if (ch->flags & XPC_C_DISCONNECTED) { DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING); - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); @@ -302,7 +299,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, XPC_CHCTL_CLOSEREPLY; spin_unlock(&part->chctl_lock); } - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } ch->flags |= XPC_C_RCLOSEREPLY; @@ -322,12 +320,14 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, if (part->act_state == XPC_P_AS_DEACTIVATING || (ch->flags & XPC_C_ROPENREQUEST)) { - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) { ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST; - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED | XPC_C_OPENREQUEST))); @@ -341,7 +341,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, */ if (args->entry_size == 0 || args->local_nentries == 0) { /* assume OPENREQUEST was delayed by mistake */ - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); @@ -351,7 +352,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, if (args->entry_size != ch->entry_size) { XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes, &irq_flags); - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } } else { ch->entry_size = args->entry_size; @@ -373,13 +375,15 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, args->local_msgqueue_pa, args->local_nentries, args->remote_nentries, ch->partid, ch->number); - if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) - goto out; - + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } if (!(ch->flags & XPC_C_OPENREQUEST)) { XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError, &irq_flags); - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); @@ -396,11 +400,11 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, DBUG_ON(args->local_nentries == 0); DBUG_ON(args->remote_nentries == 0); - ret = xpc_arch_ops.save_remote_msgqueue_pa(ch, - args->local_msgqueue_pa); + ret = xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa); if (ret != xpSuccess) { XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags); - goto out; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } ch->flags |= XPC_C_ROPENREPLY; @@ -426,36 +430,7 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, xpc_process_connect(ch, &irq_flags); } - if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) { - - dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from " - "partid=%d, channel=%d\n", ch->partid, ch->number); - - if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) - goto out; - - if (!(ch->flags & XPC_C_OPENREQUEST) || - !(ch->flags & XPC_C_OPENREPLY)) { - XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError, - &irq_flags); - goto out; - } - - DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); - DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY)); - DBUG_ON(!(ch->flags & XPC_C_CONNECTED)); - - ch->flags |= XPC_C_ROPENCOMPLETE; - - xpc_process_connect(ch, &irq_flags); - create_kthread = 1; - } - -out: spin_unlock_irqrestore(&ch->lock, irq_flags); - - if (create_kthread) - xpc_create_kthreads(ch, 1, 0); } /* @@ -533,7 +508,7 @@ xpc_connect_channel(struct xpc_channel *ch) /* initiate the connection */ ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); - xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags); + xpc_send_chctl_openrequest(ch, &irq_flags); xpc_process_connect(ch, &irq_flags); @@ -551,7 +526,7 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part) int ch_number; u32 ch_flags; - chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part); + chctl.all_flags = xpc_get_chctl_all_flags(part); /* * Initiate channel connections for registered channels. @@ -589,6 +564,10 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part) if (!(ch_flags & XPC_C_OPENREQUEST)) { DBUG_ON(ch_flags & XPC_C_SETUP); (void)xpc_connect_channel(ch); + } else { + spin_lock_irqsave(&ch->lock, irq_flags); + xpc_process_connect(ch, &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); } continue; } @@ -600,7 +579,7 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part) */ if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS) - xpc_arch_ops.process_msg_chctl_flags(part, ch_number); + xpc_process_msg_chctl_flags(part, ch_number); } } @@ -776,7 +755,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch, XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | XPC_C_CONNECTING | XPC_C_CONNECTED); - xpc_arch_ops.send_chctl_closerequest(ch, irq_flags); + xpc_send_chctl_closerequest(ch, irq_flags); if (channel_was_connected) ch->flags |= XPC_C_WASCONNECTED; @@ -883,8 +862,8 @@ xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload, DBUG_ON(payload == NULL); if (xpc_part_ref(part)) { - ret = xpc_arch_ops.send_payload(&part->channels[ch_number], - flags, payload, payload_size, 0, NULL, NULL); + ret = xpc_send_payload(&part->channels[ch_number], flags, + payload, payload_size, 0, NULL, NULL); xpc_part_deref(part); } @@ -935,8 +914,9 @@ xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload, DBUG_ON(func == NULL); if (xpc_part_ref(part)) { - ret = xpc_arch_ops.send_payload(&part->channels[ch_number], - flags, payload, payload_size, XPC_N_CALL, func, key); + ret = xpc_send_payload(&part->channels[ch_number], flags, + payload, payload_size, XPC_N_CALL, func, + key); xpc_part_deref(part); } return ret; @@ -950,7 +930,7 @@ xpc_deliver_payload(struct xpc_channel *ch) { void *payload; - payload = xpc_arch_ops.get_deliverable_payload(ch); + payload = xpc_get_deliverable_payload(ch); if (payload != NULL) { /* @@ -1004,7 +984,7 @@ xpc_initiate_received(short partid, int ch_number, void *payload) DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); ch = &part->channels[ch_number]; - xpc_arch_ops.received_payload(ch, payload); + xpc_received_payload(ch, payload); /* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload() */ xpc_msgqueue_deref(ch); diff --git a/trunk/drivers/misc/sgi-xp/xpc_main.c b/trunk/drivers/misc/sgi-xp/xpc_main.c index fd3688a3e23f..1ab9fda87fab 100644 --- a/trunk/drivers/misc/sgi-xp/xpc_main.c +++ b/trunk/drivers/misc/sgi-xp/xpc_main.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. */ /* @@ -150,6 +150,7 @@ DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq); static unsigned long xpc_hb_check_timeout; static struct timer_list xpc_hb_timer; +void *xpc_heartbeating_to_mask; /* notification that the xpc_hb_checker thread has exited */ static DECLARE_COMPLETION(xpc_hb_checker_exited); @@ -169,7 +170,62 @@ static struct notifier_block xpc_die_notifier = { .notifier_call = xpc_system_die, }; -struct xpc_arch_operations xpc_arch_ops; +int (*xpc_setup_partitions_sn) (void); +void (*xpc_teardown_partitions_sn) (void); +enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie, + unsigned long *rp_pa, + size_t *len); +int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp); +void (*xpc_heartbeat_init) (void); +void (*xpc_heartbeat_exit) (void); +void (*xpc_increment_heartbeat) (void); +void (*xpc_offline_heartbeat) (void); +void (*xpc_online_heartbeat) (void); +enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part); + +enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part); +void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch); +u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part); +enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch); +void (*xpc_teardown_msg_structures) (struct xpc_channel *ch); +void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number); +int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch); +void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch); + +void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_pa, + int nasid); +void (*xpc_request_partition_reactivation) (struct xpc_partition *part); +void (*xpc_request_partition_deactivation) (struct xpc_partition *part); +void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part); + +void (*xpc_process_activate_IRQ_rcvd) (void); +enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part); +void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part); + +void (*xpc_indicate_partition_engaged) (struct xpc_partition *part); +int (*xpc_partition_engaged) (short partid); +int (*xpc_any_partition_engaged) (void); +void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part); +void (*xpc_assume_partition_disengaged) (short partid); + +void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch, + unsigned long *irq_flags); +void (*xpc_send_chctl_closereply) (struct xpc_channel *ch, + unsigned long *irq_flags); +void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch, + unsigned long *irq_flags); +void (*xpc_send_chctl_openreply) (struct xpc_channel *ch, + unsigned long *irq_flags); + +enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch, + unsigned long msgqueue_pa); + +enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags, + void *payload, u16 payload_size, + u8 notify_type, xpc_notify_func func, + void *key); +void (*xpc_received_payload) (struct xpc_channel *ch, void *payload); /* * Timer function to enforce the timelimit on the partition disengage. @@ -184,7 +240,7 @@ xpc_timeout_partition_disengage(unsigned long data) (void)xpc_partition_disengaged(part); DBUG_ON(part->disengage_timeout != 0); - DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part))); + DBUG_ON(xpc_partition_engaged(XPC_PARTID(part))); } /* @@ -195,7 +251,7 @@ xpc_timeout_partition_disengage(unsigned long data) static void xpc_hb_beater(unsigned long dummy) { - xpc_arch_ops.increment_heartbeat(); + xpc_increment_heartbeat(); if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) wake_up_interruptible(&xpc_activate_IRQ_wq); @@ -207,7 +263,7 @@ xpc_hb_beater(unsigned long dummy) static void xpc_start_hb_beater(void) { - xpc_arch_ops.heartbeat_init(); + xpc_heartbeat_init(); init_timer(&xpc_hb_timer); xpc_hb_timer.function = xpc_hb_beater; xpc_hb_beater(0); @@ -217,7 +273,7 @@ static void xpc_stop_hb_beater(void) { del_timer_sync(&xpc_hb_timer); - xpc_arch_ops.heartbeat_exit(); + xpc_heartbeat_exit(); } /* @@ -246,7 +302,7 @@ xpc_check_remote_hb(void) continue; } - ret = xpc_arch_ops.get_remote_heartbeat(part); + ret = xpc_get_remote_heartbeat(part); if (ret != xpSuccess) XPC_DEACTIVATE_PARTITION(part, ret); } @@ -297,7 +353,7 @@ xpc_hb_checker(void *ignore) force_IRQ = 0; dev_dbg(xpc_part, "processing activate IRQs " "received\n"); - xpc_arch_ops.process_activate_IRQ_rcvd(); + xpc_process_activate_IRQ_rcvd(); } /* wait for IRQ or timeout */ @@ -472,7 +528,7 @@ xpc_setup_ch_structures(struct xpc_partition *part) init_waitqueue_head(&ch->idle_wq); } - ret = xpc_arch_ops.setup_ch_structures(part); + ret = xpc_setup_ch_structures_sn(part); if (ret != xpSuccess) goto out_2; @@ -516,7 +572,7 @@ xpc_teardown_ch_structures(struct xpc_partition *part) /* now we can begin tearing down the infrastructure */ - xpc_arch_ops.teardown_ch_structures(part); + xpc_teardown_ch_structures_sn(part); kfree(part->remote_openclose_args_base); part->remote_openclose_args = NULL; @@ -564,12 +620,12 @@ xpc_activating(void *__partid) dev_dbg(xpc_part, "activating partition %d\n", partid); - xpc_arch_ops.allow_hb(partid); + xpc_allow_hb(partid); if (xpc_setup_ch_structures(part) == xpSuccess) { (void)xpc_part_ref(part); /* this will always succeed */ - if (xpc_arch_ops.make_first_contact(part) == xpSuccess) { + if (xpc_make_first_contact(part) == xpSuccess) { xpc_mark_partition_active(part); xpc_channel_mgr(part); /* won't return until partition is deactivating */ @@ -579,12 +635,12 @@ xpc_activating(void *__partid) xpc_teardown_ch_structures(part); } - xpc_arch_ops.disallow_hb(partid); + xpc_disallow_hb(partid); xpc_mark_partition_inactive(part); if (part->reason == xpReactivating) { /* interrupting ourselves results in activating partition */ - xpc_arch_ops.request_partition_reactivation(part); + xpc_request_partition_reactivation(part); } return 0; @@ -657,13 +713,10 @@ xpc_activate_kthreads(struct xpc_channel *ch, int needed) static void xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) { - int (*n_of_deliverable_payloads) (struct xpc_channel *) = - xpc_arch_ops.n_of_deliverable_payloads; - do { /* deliver messages to their intended recipients */ - while (n_of_deliverable_payloads(ch) > 0 && + while (xpc_n_of_deliverable_payloads(ch) > 0 && !(ch->flags & XPC_C_DISCONNECTING)) { xpc_deliver_payload(ch); } @@ -679,7 +732,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) "wait_event_interruptible_exclusive()\n"); (void)wait_event_interruptible_exclusive(ch->idle_wq, - (n_of_deliverable_payloads(ch) > 0 || + (xpc_n_of_deliverable_payloads(ch) > 0 || (ch->flags & XPC_C_DISCONNECTING))); atomic_dec(&ch->kthreads_idle); @@ -696,8 +749,6 @@ xpc_kthread_start(void *args) struct xpc_channel *ch; int n_needed; unsigned long irq_flags; - int (*n_of_deliverable_payloads) (struct xpc_channel *) = - xpc_arch_ops.n_of_deliverable_payloads; dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n", partid, ch_number); @@ -726,7 +777,7 @@ xpc_kthread_start(void *args) * additional kthreads to help deliver them. We only * need one less than total #of messages to deliver. */ - n_needed = n_of_deliverable_payloads(ch) - 1; + n_needed = xpc_n_of_deliverable_payloads(ch) - 1; if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING)) xpc_activate_kthreads(ch, n_needed); @@ -754,7 +805,7 @@ xpc_kthread_start(void *args) if (atomic_dec_return(&ch->kthreads_assigned) == 0 && atomic_dec_return(&part->nchannels_engaged) == 0) { - xpc_arch_ops.indicate_partition_disengaged(part); + xpc_indicate_partition_disengaged(part); } xpc_msgqueue_deref(ch); @@ -786,8 +837,6 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed, u64 args = XPC_PACK_ARGS(ch->partid, ch->number); struct xpc_partition *part = &xpc_partitions[ch->partid]; struct task_struct *kthread; - void (*indicate_partition_disengaged) (struct xpc_partition *) = - xpc_arch_ops.indicate_partition_disengaged; while (needed-- > 0) { @@ -809,7 +858,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed, } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 && atomic_inc_return(&part->nchannels_engaged) == 1) { - xpc_arch_ops.indicate_partition_engaged(part); + xpc_indicate_partition_engaged(part); } (void)xpc_part_ref(part); xpc_msgqueue_ref(ch); @@ -831,7 +880,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed, if (atomic_dec_return(&ch->kthreads_assigned) == 0 && atomic_dec_return(&part->nchannels_engaged) == 0) { - indicate_partition_disengaged(part); + xpc_indicate_partition_disengaged(part); } xpc_msgqueue_deref(ch); xpc_part_deref(part); @@ -944,13 +993,13 @@ xpc_setup_partitions(void) atomic_set(&part->references, 0); } - return xpc_arch_ops.setup_partitions(); + return xpc_setup_partitions_sn(); } static void xpc_teardown_partitions(void) { - xpc_arch_ops.teardown_partitions(); + xpc_teardown_partitions_sn(); kfree(xpc_partitions); } @@ -1006,7 +1055,7 @@ xpc_do_exit(enum xp_retval reason) disengage_timeout = part->disengage_timeout; } - if (xpc_arch_ops.any_partition_engaged()) { + if (xpc_any_partition_engaged()) { if (time_is_before_jiffies(printmsg_time)) { dev_info(xpc_part, "waiting for remote " "partitions to deactivate, timeout in " @@ -1037,7 +1086,8 @@ xpc_do_exit(enum xp_retval reason) } while (1); - DBUG_ON(xpc_arch_ops.any_partition_engaged()); + DBUG_ON(xpc_any_partition_engaged()); + DBUG_ON(xpc_any_hbs_allowed() != 0); xpc_teardown_rsvd_page(); @@ -1102,15 +1152,15 @@ xpc_die_deactivate(void) /* keep xpc_hb_checker thread from doing anything (just in case) */ xpc_exiting = 1; - xpc_arch_ops.disallow_all_hbs(); /*indicate we're deactivated */ + xpc_disallow_all_hbs(); /*indicate we're deactivated */ for (partid = 0; partid < xp_max_npartitions; partid++) { part = &xpc_partitions[partid]; - if (xpc_arch_ops.partition_engaged(partid) || + if (xpc_partition_engaged(partid) || part->act_state != XPC_P_AS_INACTIVE) { - xpc_arch_ops.request_partition_deactivation(part); - xpc_arch_ops.indicate_partition_disengaged(part); + xpc_request_partition_deactivation(part); + xpc_indicate_partition_disengaged(part); } } @@ -1127,7 +1177,7 @@ xpc_die_deactivate(void) wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5; while (1) { - any_engaged = xpc_arch_ops.any_partition_engaged(); + any_engaged = xpc_any_partition_engaged(); if (!any_engaged) { dev_info(xpc_part, "all partitions have deactivated\n"); break; @@ -1136,7 +1186,7 @@ xpc_die_deactivate(void) if (!keep_waiting--) { for (partid = 0; partid < xp_max_npartitions; partid++) { - if (xpc_arch_ops.partition_engaged(partid)) { + if (xpc_partition_engaged(partid)) { dev_info(xpc_part, "deactivate from " "remote partition %d timed " "out\n", partid); @@ -1183,7 +1233,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) /* fall through */ case DIE_MCA_MONARCH_ENTER: case DIE_INIT_MONARCH_ENTER: - xpc_arch_ops.offline_heartbeat(); + xpc_offline_heartbeat(); break; case DIE_KDEBUG_LEAVE: @@ -1194,7 +1244,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) /* fall through */ case DIE_MCA_MONARCH_LEAVE: case DIE_INIT_MONARCH_LEAVE: - xpc_arch_ops.online_heartbeat(); + xpc_online_heartbeat(); break; } #else diff --git a/trunk/drivers/misc/sgi-xp/xpc_partition.c b/trunk/drivers/misc/sgi-xp/xpc_partition.c index 65877bc5edaa..6722f6fe4dc7 100644 --- a/trunk/drivers/misc/sgi-xp/xpc_partition.c +++ b/trunk/drivers/misc/sgi-xp/xpc_partition.c @@ -70,9 +70,6 @@ xpc_get_rsvd_page_pa(int nasid) size_t buf_len = 0; void *buf = buf; void *buf_base = NULL; - enum xp_retval (*get_partition_rsvd_page_pa) - (void *, u64 *, unsigned long *, size_t *) = - xpc_arch_ops.get_partition_rsvd_page_pa; while (1) { @@ -82,7 +79,8 @@ xpc_get_rsvd_page_pa(int nasid) * ??? function or have two versions? Rename rp_pa for UV to * ??? rp_gpa? */ - ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len); + ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, + &len); dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, " "address=0x%016lx, len=0x%016lx\n", ret, @@ -174,7 +172,7 @@ xpc_setup_rsvd_page(void) xpc_part_nasids = XPC_RP_PART_NASIDS(rp); xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); - ret = xpc_arch_ops.setup_rsvd_page(rp); + ret = xpc_setup_rsvd_page_sn(rp); if (ret != 0) return ret; @@ -266,7 +264,7 @@ xpc_partition_disengaged(struct xpc_partition *part) short partid = XPC_PARTID(part); int disengaged; - disengaged = !xpc_arch_ops.partition_engaged(partid); + disengaged = !xpc_partition_engaged(partid); if (part->disengage_timeout) { if (!disengaged) { if (time_is_after_jiffies(part->disengage_timeout)) { @@ -282,7 +280,7 @@ xpc_partition_disengaged(struct xpc_partition *part) dev_info(xpc_part, "deactivate request to remote " "partition %d timed out\n", partid); xpc_disengage_timedout = 1; - xpc_arch_ops.assume_partition_disengaged(partid); + xpc_assume_partition_disengaged(partid); disengaged = 1; } part->disengage_timeout = 0; @@ -296,7 +294,7 @@ xpc_partition_disengaged(struct xpc_partition *part) if (part->act_state != XPC_P_AS_INACTIVE) xpc_wakeup_channel_mgr(part); - xpc_arch_ops.cancel_partition_deactivation_request(part); + xpc_cancel_partition_deactivation_request(part); } return disengaged; } @@ -341,7 +339,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part, spin_unlock_irqrestore(&part->act_lock, irq_flags); if (reason == xpReactivating) { /* we interrupt ourselves to reactivate partition */ - xpc_arch_ops.request_partition_reactivation(part); + xpc_request_partition_reactivation(part); } return; } @@ -360,7 +358,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part, spin_unlock_irqrestore(&part->act_lock, irq_flags); /* ask remote partition to deactivate with regard to us */ - xpc_arch_ops.request_partition_deactivation(part); + xpc_request_partition_deactivation(part); /* set a timelimit on the disengage phase of the deactivation request */ part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ); @@ -498,7 +496,7 @@ xpc_discovery(void) continue; } - xpc_arch_ops.request_partition_activation(remote_rp, + xpc_request_partition_activation(remote_rp, remote_rp_pa, nasid); } } diff --git a/trunk/drivers/misc/sgi-xp/xpc_sn2.c b/trunk/drivers/misc/sgi-xp/xpc_sn2.c index 915a3b495da5..eaaa964942de 100644 --- a/trunk/drivers/misc/sgi-xp/xpc_sn2.c +++ b/trunk/drivers/misc/sgi-xp/xpc_sn2.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. */ /* @@ -60,14 +60,14 @@ static struct xpc_vars_sn2 *xpc_vars_sn2; static struct xpc_vars_part_sn2 *xpc_vars_part_sn2; static int -xpc_setup_partitions_sn2(void) +xpc_setup_partitions_sn_sn2(void) { /* nothing needs to be done */ return 0; } static void -xpc_teardown_partitions_sn2(void) +xpc_teardown_partitions_sn_sn2(void) { /* nothing needs to be done */ } @@ -430,13 +430,6 @@ xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags); } -static void -xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch, - unsigned long *irq_flags) -{ - XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags); -} - static void xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch) { @@ -628,7 +621,7 @@ xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa, static int -xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp) +xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp) { struct amo *amos_page; int i; @@ -636,7 +629,7 @@ xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp) xpc_vars_sn2 = XPC_RP_VARS(rp); - rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2); + rp->sn.vars_pa = xp_pa(xpc_vars_sn2); /* vars_part array follows immediately after vars */ xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) + @@ -700,33 +693,6 @@ xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp) return 0; } -static int -xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask) -{ - return test_bit(partid, heartbeating_to_mask); -} - -static void -xpc_allow_hb_sn2(short partid) -{ - DBUG_ON(xpc_vars_sn2 == NULL); - set_bit(partid, xpc_vars_sn2->heartbeating_to_mask); -} - -static void -xpc_disallow_hb_sn2(short partid) -{ - DBUG_ON(xpc_vars_sn2 == NULL); - clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask); -} - -static void -xpc_disallow_all_hbs_sn2(void) -{ - DBUG_ON(xpc_vars_sn2 == NULL); - bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions); -} - static void xpc_increment_heartbeat_sn2(void) { @@ -753,6 +719,7 @@ xpc_heartbeat_init_sn2(void) DBUG_ON(xpc_vars_sn2 == NULL); bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); + xpc_heartbeating_to_mask = &xpc_vars_sn2->heartbeating_to_mask[0]; xpc_online_heartbeat_sn2(); } @@ -784,9 +751,9 @@ xpc_get_remote_heartbeat_sn2(struct xpc_partition *part) remote_vars->heartbeating_to_mask[0]); if ((remote_vars->heartbeat == part->last_heartbeat && - !remote_vars->heartbeat_offline) || - !xpc_hb_allowed_sn2(sn_partition_id, - remote_vars->heartbeating_to_mask)) { + remote_vars->heartbeat_offline == 0) || + !xpc_hb_allowed(sn_partition_id, + &remote_vars->heartbeating_to_mask)) { ret = xpNoHeartbeat; } else { part->last_heartbeat = remote_vars->heartbeat; @@ -1005,7 +972,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid) return; } - remote_vars_pa = remote_rp->sn.sn2.vars_pa; + remote_vars_pa = remote_rp->sn.vars_pa; remote_rp_version = remote_rp->version; remote_rp_ts_jiffies = remote_rp->ts_jiffies; @@ -1162,7 +1129,7 @@ xpc_process_activate_IRQ_rcvd_sn2(void) * Setup the channel structures that are sn2 specific. */ static enum xp_retval -xpc_setup_ch_structures_sn2(struct xpc_partition *part) +xpc_setup_ch_structures_sn_sn2(struct xpc_partition *part) { struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; struct xpc_channel_sn2 *ch_sn2; @@ -1284,7 +1251,7 @@ xpc_setup_ch_structures_sn2(struct xpc_partition *part) * Teardown the channel structures that are sn2 specific. */ static void -xpc_teardown_ch_structures_sn2(struct xpc_partition *part) +xpc_teardown_ch_structures_sn_sn2(struct xpc_partition *part) { struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; short partid = XPC_PARTID(part); @@ -2348,70 +2315,61 @@ xpc_received_payload_sn2(struct xpc_channel *ch, void *payload) xpc_acknowledge_msgs_sn2(ch, get, msg->flags); } -static struct xpc_arch_operations xpc_arch_ops_sn2 = { - .setup_partitions = xpc_setup_partitions_sn2, - .teardown_partitions = xpc_teardown_partitions_sn2, - .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2, - .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2, - .setup_rsvd_page = xpc_setup_rsvd_page_sn2, - - .allow_hb = xpc_allow_hb_sn2, - .disallow_hb = xpc_disallow_hb_sn2, - .disallow_all_hbs = xpc_disallow_all_hbs_sn2, - .increment_heartbeat = xpc_increment_heartbeat_sn2, - .offline_heartbeat = xpc_offline_heartbeat_sn2, - .online_heartbeat = xpc_online_heartbeat_sn2, - .heartbeat_init = xpc_heartbeat_init_sn2, - .heartbeat_exit = xpc_heartbeat_exit_sn2, - .get_remote_heartbeat = xpc_get_remote_heartbeat_sn2, - - .request_partition_activation = - xpc_request_partition_activation_sn2, - .request_partition_reactivation = - xpc_request_partition_reactivation_sn2, - .request_partition_deactivation = - xpc_request_partition_deactivation_sn2, - .cancel_partition_deactivation_request = - xpc_cancel_partition_deactivation_request_sn2, - - .setup_ch_structures = xpc_setup_ch_structures_sn2, - .teardown_ch_structures = xpc_teardown_ch_structures_sn2, - - .make_first_contact = xpc_make_first_contact_sn2, - - .get_chctl_all_flags = xpc_get_chctl_all_flags_sn2, - .send_chctl_closerequest = xpc_send_chctl_closerequest_sn2, - .send_chctl_closereply = xpc_send_chctl_closereply_sn2, - .send_chctl_openrequest = xpc_send_chctl_openrequest_sn2, - .send_chctl_openreply = xpc_send_chctl_openreply_sn2, - .send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2, - .process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2, - - .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2, - - .setup_msg_structures = xpc_setup_msg_structures_sn2, - .teardown_msg_structures = xpc_teardown_msg_structures_sn2, - - .indicate_partition_engaged = xpc_indicate_partition_engaged_sn2, - .indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2, - .partition_engaged = xpc_partition_engaged_sn2, - .any_partition_engaged = xpc_any_partition_engaged_sn2, - .assume_partition_disengaged = xpc_assume_partition_disengaged_sn2, - - .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2, - .send_payload = xpc_send_payload_sn2, - .get_deliverable_payload = xpc_get_deliverable_payload_sn2, - .received_payload = xpc_received_payload_sn2, - .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2, -}; - int xpc_init_sn2(void) { int ret; size_t buf_size; - xpc_arch_ops = xpc_arch_ops_sn2; + xpc_setup_partitions_sn = xpc_setup_partitions_sn_sn2; + xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_sn2; + xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2; + xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_sn2; + xpc_increment_heartbeat = xpc_increment_heartbeat_sn2; + xpc_offline_heartbeat = xpc_offline_heartbeat_sn2; + xpc_online_heartbeat = xpc_online_heartbeat_sn2; + xpc_heartbeat_init = xpc_heartbeat_init_sn2; + xpc_heartbeat_exit = xpc_heartbeat_exit_sn2; + xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_sn2; + + xpc_request_partition_activation = xpc_request_partition_activation_sn2; + xpc_request_partition_reactivation = + xpc_request_partition_reactivation_sn2; + xpc_request_partition_deactivation = + xpc_request_partition_deactivation_sn2; + xpc_cancel_partition_deactivation_request = + xpc_cancel_partition_deactivation_request_sn2; + + xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2; + xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_sn2; + xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_sn2; + xpc_make_first_contact = xpc_make_first_contact_sn2; + + xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2; + xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2; + xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2; + xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2; + xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2; + + xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2; + + xpc_setup_msg_structures = xpc_setup_msg_structures_sn2; + xpc_teardown_msg_structures = xpc_teardown_msg_structures_sn2; + + xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2; + xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2; + xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2; + xpc_get_deliverable_payload = xpc_get_deliverable_payload_sn2; + + xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2; + xpc_indicate_partition_disengaged = + xpc_indicate_partition_disengaged_sn2; + xpc_partition_engaged = xpc_partition_engaged_sn2; + xpc_any_partition_engaged = xpc_any_partition_engaged_sn2; + xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2; + + xpc_send_payload = xpc_send_payload_sn2; + xpc_received_payload = xpc_received_payload_sn2; if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) { dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is " diff --git a/trunk/drivers/misc/sgi-xp/xpc_uv.c b/trunk/drivers/misc/sgi-xp/xpc_uv.c index 9172fcdee4e2..f7fff4727edb 100644 --- a/trunk/drivers/misc/sgi-xp/xpc_uv.c +++ b/trunk/drivers/misc/sgi-xp/xpc_uv.c @@ -46,7 +46,8 @@ struct uv_IO_APIC_route_entry { }; #endif -static struct xpc_heartbeat_uv *xpc_heartbeat_uv; +static atomic64_t xpc_heartbeat_uv; +static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); #define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) #define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ @@ -62,7 +63,7 @@ static struct xpc_gru_mq_uv *xpc_activate_mq_uv; static struct xpc_gru_mq_uv *xpc_notify_mq_uv; static int -xpc_setup_partitions_uv(void) +xpc_setup_partitions_sn_uv(void) { short partid; struct xpc_partition_uv *part_uv; @@ -78,7 +79,7 @@ xpc_setup_partitions_uv(void) } static void -xpc_teardown_partitions_uv(void) +xpc_teardown_partitions_sn_uv(void) { short partid; struct xpc_partition_uv *part_uv; @@ -422,6 +423,41 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, /* syncing of remote_act_state was just done above */ break; + case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: { + struct xpc_activate_mq_msg_heartbeat_req_uv *msg; + + msg = container_of(msg_hdr, + struct xpc_activate_mq_msg_heartbeat_req_uv, + hdr); + part_uv->heartbeat = msg->heartbeat; + break; + } + case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: { + struct xpc_activate_mq_msg_heartbeat_req_uv *msg; + + msg = container_of(msg_hdr, + struct xpc_activate_mq_msg_heartbeat_req_uv, + hdr); + part_uv->heartbeat = msg->heartbeat; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + } + case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: { + struct xpc_activate_mq_msg_heartbeat_req_uv *msg; + + msg = container_of(msg_hdr, + struct xpc_activate_mq_msg_heartbeat_req_uv, + hdr); + part_uv->heartbeat = msg->heartbeat; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + } case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: { struct xpc_activate_mq_msg_activate_req_uv *msg; @@ -439,7 +475,6 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV; part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */ part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies; - part_uv->heartbeat_gpa = msg->heartbeat_gpa; if (msg->activate_gru_mq_desc_gpa != part_uv->activate_gru_mq_desc_gpa) { @@ -534,17 +569,6 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, xpc_wakeup_channel_mgr(part); break; } - case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: { - struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg; - - msg = container_of(msg_hdr, struct - xpc_activate_mq_msg_chctl_opencomplete_uv, hdr); - spin_lock_irqsave(&part->chctl_lock, irq_flags); - part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE; - spin_unlock_irqrestore(&part->chctl_lock, irq_flags); - - xpc_wakeup_channel_mgr(part); - } case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV: spin_lock_irqsave(&part_uv->flags_lock, irq_flags); part_uv->flags |= XPC_P_ENGAGED_UV; @@ -735,7 +759,7 @@ xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req) /* * !!! Make our side think that the remote partition sent an activate - * !!! mq message our way by doing what the activate IRQ handler would + * !!! message our way by doing what the activate IRQ handler would * !!! do had one really been sent. */ @@ -782,82 +806,90 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, } static int -xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp) +xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp) { - xpc_heartbeat_uv = - &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat; - rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv); - rp->sn.uv.activate_gru_mq_desc_gpa = + rp->sn.activate_gru_mq_desc_gpa = uv_gpa(xpc_activate_mq_uv->gru_mq_desc); return 0; } static void -xpc_allow_hb_uv(short partid) +xpc_send_heartbeat_uv(int msg_type) { -} + short partid; + struct xpc_partition *part; + struct xpc_activate_mq_msg_heartbeat_req_uv msg; -static void -xpc_disallow_hb_uv(short partid) -{ -} + /* + * !!! On uv we're broadcasting a heartbeat message every 5 seconds. + * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20 + * !!! seconds. This is an increase in numalink traffic. + * ??? Is this good? + */ -static void -xpc_disallow_all_hbs_uv(void) -{ + msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv); + + partid = find_first_bit(xpc_heartbeating_to_mask_uv, + XP_MAX_NPARTITIONS_UV); + + while (partid < XP_MAX_NPARTITIONS_UV) { + part = &xpc_partitions[partid]; + + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + msg_type); + + partid = find_next_bit(xpc_heartbeating_to_mask_uv, + XP_MAX_NPARTITIONS_UV, partid + 1); + } } static void xpc_increment_heartbeat_uv(void) { - xpc_heartbeat_uv->value++; + xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV); } static void xpc_offline_heartbeat_uv(void) { - xpc_increment_heartbeat_uv(); - xpc_heartbeat_uv->offline = 1; + xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV); } static void xpc_online_heartbeat_uv(void) { - xpc_increment_heartbeat_uv(); - xpc_heartbeat_uv->offline = 0; + xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV); } static void xpc_heartbeat_init_uv(void) { - xpc_heartbeat_uv->value = 1; - xpc_heartbeat_uv->offline = 0; + atomic64_set(&xpc_heartbeat_uv, 0); + bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); + xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0]; } static void xpc_heartbeat_exit_uv(void) { - xpc_offline_heartbeat_uv(); + xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV); } static enum xp_retval xpc_get_remote_heartbeat_uv(struct xpc_partition *part) { struct xpc_partition_uv *part_uv = &part->sn.uv; - enum xp_retval ret; + enum xp_retval ret = xpNoHeartbeat; - ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat), - part_uv->heartbeat_gpa, - sizeof(struct xpc_heartbeat_uv)); - if (ret != xpSuccess) - return ret; + if (part_uv->remote_act_state != XPC_P_AS_INACTIVE && + part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) { - if (part_uv->cached_heartbeat.value == part->last_heartbeat && - !part_uv->cached_heartbeat.offline) { + if (part_uv->heartbeat != part->last_heartbeat || + (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) { - ret = xpNoHeartbeat; - } else { - part->last_heartbeat = part_uv->cached_heartbeat.value; + part->last_heartbeat = part_uv->heartbeat; + ret = xpSuccess; + } } return ret; } @@ -872,9 +904,8 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp, part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */ part->remote_rp_ts_jiffies = remote_rp->ts_jiffies; - part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa; part->sn.uv.activate_gru_mq_desc_gpa = - remote_rp->sn.uv.activate_gru_mq_desc_gpa; + remote_rp->sn.activate_gru_mq_desc_gpa; /* * ??? Is it a good idea to make this conditional on what is @@ -882,9 +913,8 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp, */ if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) { msg.rp_gpa = uv_gpa(xpc_rsvd_page); - msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa; msg.activate_gru_mq_desc_gpa = - xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa; + xpc_rsvd_page->sn.activate_gru_mq_desc_gpa; xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV); } @@ -980,7 +1010,7 @@ xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head) * Setup the channel structures that are uv specific. */ static enum xp_retval -xpc_setup_ch_structures_uv(struct xpc_partition *part) +xpc_setup_ch_structures_sn_uv(struct xpc_partition *part) { struct xpc_channel_uv *ch_uv; int ch_number; @@ -999,7 +1029,7 @@ xpc_setup_ch_structures_uv(struct xpc_partition *part) * Teardown the channel structures that are uv specific. */ static void -xpc_teardown_ch_structures_uv(struct xpc_partition *part) +xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part) { /* nothing needs to be done */ return; @@ -1212,16 +1242,6 @@ xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags) XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV); } -static void -xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_activate_mq_msg_chctl_opencomplete_uv msg; - - msg.ch_number = ch->number; - xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), - XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV); -} - static void xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number) { @@ -1649,67 +1669,58 @@ xpc_received_payload_uv(struct xpc_channel *ch, void *payload) msg->hdr.msg_slot_number += ch->remote_nentries; } -static struct xpc_arch_operations xpc_arch_ops_uv = { - .setup_partitions = xpc_setup_partitions_uv, - .teardown_partitions = xpc_teardown_partitions_uv, - .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv, - .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv, - .setup_rsvd_page = xpc_setup_rsvd_page_uv, - - .allow_hb = xpc_allow_hb_uv, - .disallow_hb = xpc_disallow_hb_uv, - .disallow_all_hbs = xpc_disallow_all_hbs_uv, - .increment_heartbeat = xpc_increment_heartbeat_uv, - .offline_heartbeat = xpc_offline_heartbeat_uv, - .online_heartbeat = xpc_online_heartbeat_uv, - .heartbeat_init = xpc_heartbeat_init_uv, - .heartbeat_exit = xpc_heartbeat_exit_uv, - .get_remote_heartbeat = xpc_get_remote_heartbeat_uv, - - .request_partition_activation = - xpc_request_partition_activation_uv, - .request_partition_reactivation = - xpc_request_partition_reactivation_uv, - .request_partition_deactivation = - xpc_request_partition_deactivation_uv, - .cancel_partition_deactivation_request = - xpc_cancel_partition_deactivation_request_uv, - - .setup_ch_structures = xpc_setup_ch_structures_uv, - .teardown_ch_structures = xpc_teardown_ch_structures_uv, - - .make_first_contact = xpc_make_first_contact_uv, - - .get_chctl_all_flags = xpc_get_chctl_all_flags_uv, - .send_chctl_closerequest = xpc_send_chctl_closerequest_uv, - .send_chctl_closereply = xpc_send_chctl_closereply_uv, - .send_chctl_openrequest = xpc_send_chctl_openrequest_uv, - .send_chctl_openreply = xpc_send_chctl_openreply_uv, - .send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv, - .process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv, - - .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv, - - .setup_msg_structures = xpc_setup_msg_structures_uv, - .teardown_msg_structures = xpc_teardown_msg_structures_uv, - - .indicate_partition_engaged = xpc_indicate_partition_engaged_uv, - .indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv, - .assume_partition_disengaged = xpc_assume_partition_disengaged_uv, - .partition_engaged = xpc_partition_engaged_uv, - .any_partition_engaged = xpc_any_partition_engaged_uv, - - .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv, - .send_payload = xpc_send_payload_uv, - .get_deliverable_payload = xpc_get_deliverable_payload_uv, - .received_payload = xpc_received_payload_uv, - .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, -}; - int xpc_init_uv(void) { - xpc_arch_ops = xpc_arch_ops_uv; + xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv; + xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_uv; + xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv; + xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv; + xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv; + xpc_increment_heartbeat = xpc_increment_heartbeat_uv; + xpc_offline_heartbeat = xpc_offline_heartbeat_uv; + xpc_online_heartbeat = xpc_online_heartbeat_uv; + xpc_heartbeat_init = xpc_heartbeat_init_uv; + xpc_heartbeat_exit = xpc_heartbeat_exit_uv; + xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_uv; + + xpc_request_partition_activation = xpc_request_partition_activation_uv; + xpc_request_partition_reactivation = + xpc_request_partition_reactivation_uv; + xpc_request_partition_deactivation = + xpc_request_partition_deactivation_uv; + xpc_cancel_partition_deactivation_request = + xpc_cancel_partition_deactivation_request_uv; + + xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv; + xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv; + + xpc_make_first_contact = xpc_make_first_contact_uv; + + xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv; + xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_uv; + xpc_send_chctl_closereply = xpc_send_chctl_closereply_uv; + xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_uv; + xpc_send_chctl_openreply = xpc_send_chctl_openreply_uv; + + xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv; + + xpc_setup_msg_structures = xpc_setup_msg_structures_uv; + xpc_teardown_msg_structures = xpc_teardown_msg_structures_uv; + + xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_uv; + xpc_indicate_partition_disengaged = + xpc_indicate_partition_disengaged_uv; + xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_uv; + xpc_partition_engaged = xpc_partition_engaged_uv; + xpc_any_partition_engaged = xpc_any_partition_engaged_uv; + + xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv; + xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv; + xpc_send_payload = xpc_send_payload_uv; + xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv; + xpc_get_deliverable_payload = xpc_get_deliverable_payload_uv; + xpc_received_payload = xpc_received_payload_uv; if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n", diff --git a/trunk/drivers/net/atl1c/atl1c_main.c b/trunk/drivers/net/atl1c/atl1c_main.c index 83a12125b94e..deb7b53167ee 100644 --- a/trunk/drivers/net/atl1c/atl1c_main.c +++ b/trunk/drivers/net/atl1c/atl1c_main.c @@ -2532,8 +2532,8 @@ static int __devinit atl1c_probe(struct pci_dev *pdev, * various kernel subsystems to support the mechanics required by a * fixed-high-32-bit system. */ - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) || - (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) { + if ((pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) || + (pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK) != 0)) { dev_err(&pdev->dev, "No usable DMA configuration,aborting\n"); goto err_dma; } diff --git a/trunk/drivers/net/benet/be_main.c b/trunk/drivers/net/benet/be_main.c index 30d0c81c989e..9b75aa630062 100644 --- a/trunk/drivers/net/benet/be_main.c +++ b/trunk/drivers/net/benet/be_main.c @@ -1821,11 +1821,11 @@ static int __devinit be_probe(struct pci_dev *pdev, be_msix_enable(adapter); - status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + status = pci_set_dma_mask(pdev, DMA_64BIT_MASK); if (!status) { netdev->features |= NETIF_F_HIGHDMA; } else { - status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + status = pci_set_dma_mask(pdev, DMA_32BIT_MASK); if (status) { dev_err(&pdev->dev, "Could not set PCI DMA Mask\n"); goto free_netdev; diff --git a/trunk/drivers/net/jme.c b/trunk/drivers/net/jme.c index 621a7c0c46ba..ece35040288c 100644 --- a/trunk/drivers/net/jme.c +++ b/trunk/drivers/net/jme.c @@ -2591,13 +2591,13 @@ static int jme_pci_dma64(struct pci_dev *pdev) { if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 && - !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) - if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) + !pci_set_dma_mask(pdev, DMA_64BIT_MASK)) + if (!pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK)) return 1; if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 && - !pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) - if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40))) + !pci_set_dma_mask(pdev, DMA_40BIT_MASK)) + if (!pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK)) return 1; if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) diff --git a/trunk/drivers/net/wireless/ath9k/pci.c b/trunk/drivers/net/wireless/ath9k/pci.c index 168411d322a2..6dbc58580abb 100644 --- a/trunk/drivers/net/wireless/ath9k/pci.c +++ b/trunk/drivers/net/wireless/ath9k/pci.c @@ -93,14 +93,14 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (pci_enable_device(pdev)) return -EIO; - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK); if (ret) { printk(KERN_ERR "ath9k: 32-bit DMA not available\n"); goto bad; } - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); if (ret) { printk(KERN_ERR "ath9k: 32-bit DMA consistent " diff --git a/trunk/drivers/net/wireless/p54/p54pci.c b/trunk/drivers/net/wireless/p54/p54pci.c index b1610ea4bb3d..e3569a0a952d 100644 --- a/trunk/drivers/net/wireless/p54/p54pci.c +++ b/trunk/drivers/net/wireless/p54/p54pci.c @@ -492,8 +492,8 @@ static int __devinit p54p_probe(struct pci_dev *pdev, goto err_disable_dev; } - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) || - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) { + if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) || + pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) { dev_err(&pdev->dev, "No suitable DMA available\n"); goto err_free_reg; } diff --git a/trunk/drivers/scsi/3w-9xxx.c b/trunk/drivers/scsi/3w-9xxx.c index 8b7983aba8f7..fdb14ec4fd47 100644 --- a/trunk/drivers/scsi/3w-9xxx.c +++ b/trunk/drivers/scsi/3w-9xxx.c @@ -2234,10 +2234,10 @@ static int twa_resume(struct pci_dev *pdev) pci_set_master(pdev); pci_try_set_mwi(pdev); - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) - || pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) - || pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) { + if (pci_set_dma_mask(pdev, DMA_64BIT_MASK) + || pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK)) + if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) + || pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) { TW_PRINTK(host, TW_DRIVER, 0x40, "Failed to set dma mask during resume"); retval = -ENODEV; goto out_disable_device; diff --git a/trunk/drivers/scsi/aacraid/aachba.c b/trunk/drivers/scsi/aacraid/aachba.c index 2a889853a106..280261c451d6 100644 --- a/trunk/drivers/scsi/aacraid/aachba.c +++ b/trunk/drivers/scsi/aacraid/aachba.c @@ -1378,7 +1378,7 @@ int aac_get_adapter_info(struct aac_dev* dev) if (dev->nondasd_support && !dev->in_reset) printk(KERN_INFO "%s%d: Non-DASD support enabled.\n",dev->name, dev->id); - if (dma_get_required_mask(&dev->pdev->dev) > DMA_BIT_MASK(32)) + if (dma_get_required_mask(&dev->pdev->dev) > DMA_32BIT_MASK) dev->needs_dac = 1; dev->dac_support = 0; if ((sizeof(dma_addr_t) > 4) && dev->needs_dac && diff --git a/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c b/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c index a91f5143ceac..52427a8324f5 100644 --- a/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -855,9 +855,9 @@ _base_config_dma_addressing(struct MPT2SAS_ADAPTER *ioc, struct pci_dev *pdev) if (sizeof(dma_addr_t) > 4) { const uint64_t required_mask = dma_get_required_mask(&pdev->dev); - if ((required_mask > DMA_BIT_MASK(32)) && !pci_set_dma_mask(pdev, - DMA_BIT_MASK(64)) && !pci_set_consistent_dma_mask(pdev, - DMA_BIT_MASK(64))) { + if ((required_mask > DMA_32BIT_MASK) && !pci_set_dma_mask(pdev, + DMA_64BIT_MASK) && !pci_set_consistent_dma_mask(pdev, + DMA_64BIT_MASK)) { ioc->base_add_sg_single = &_base_add_sg_single_64; ioc->sge_size = sizeof(Mpi2SGESimple64_t); desc = "64"; @@ -865,8 +865,8 @@ _base_config_dma_addressing(struct MPT2SAS_ADAPTER *ioc, struct pci_dev *pdev) } } - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) - && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) { + if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK) + && !pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) { ioc->base_add_sg_single = &_base_add_sg_single_32; ioc->sge_size = sizeof(Mpi2SGESimple32_t); desc = "32"; diff --git a/trunk/drivers/spi/spi.c b/trunk/drivers/spi/spi.c index 8eba98c8ed1e..643908b74bc0 100644 --- a/trunk/drivers/spi/spi.c +++ b/trunk/drivers/spi/spi.c @@ -658,7 +658,7 @@ int spi_write_then_read(struct spi_device *spi, int status; struct spi_message message; - struct spi_transfer x[2]; + struct spi_transfer x; u8 *local_buf; /* Use preallocated DMA-safe buffer. We can't avoid copying here, @@ -669,15 +669,9 @@ int spi_write_then_read(struct spi_device *spi, return -EINVAL; spi_message_init(&message); - memset(x, 0, sizeof x); - if (n_tx) { - x[0].len = n_tx; - spi_message_add_tail(&x[0], &message); - } - if (n_rx) { - x[1].len = n_rx; - spi_message_add_tail(&x[1], &message); - } + memset(&x, 0, sizeof x); + x.len = n_tx + n_rx; + spi_message_add_tail(&x, &message); /* ... unless someone else is using the pre-allocated buffer */ if (!mutex_trylock(&lock)) { @@ -688,15 +682,15 @@ int spi_write_then_read(struct spi_device *spi, local_buf = buf; memcpy(local_buf, txbuf, n_tx); - x[0].tx_buf = local_buf; - x[1].rx_buf = local_buf + n_tx; + x.tx_buf = local_buf; + x.rx_buf = local_buf; /* do the i/o */ status = spi_sync(spi, &message); if (status == 0) - memcpy(rxbuf, x[1].rx_buf, n_rx); + memcpy(rxbuf, x.rx_buf + n_tx, n_rx); - if (x[0].tx_buf == buf) + if (x.tx_buf == buf) mutex_unlock(&lock); else kfree(local_buf); diff --git a/trunk/drivers/staging/b3dfg/b3dfg.c b/trunk/drivers/staging/b3dfg/b3dfg.c index 75ebe338c6f2..0348072b3ab5 100644 --- a/trunk/drivers/staging/b3dfg/b3dfg.c +++ b/trunk/drivers/staging/b3dfg/b3dfg.c @@ -1000,7 +1000,7 @@ static int __devinit b3dfg_probe(struct pci_dev *pdev, pci_set_master(pdev); - r = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + r = pci_set_dma_mask(pdev, DMA_32BIT_MASK); if (r) { dev_err(&pdev->dev, "no usable DMA configuration\n"); goto err_free_res; diff --git a/trunk/drivers/usb/otg/nop-usb-xceiv.c b/trunk/drivers/usb/otg/nop-usb-xceiv.c index c567168f89af..4b933f646f2e 100644 --- a/trunk/drivers/usb/otg/nop-usb-xceiv.c +++ b/trunk/drivers/usb/otg/nop-usb-xceiv.c @@ -36,14 +36,14 @@ struct nop_usb_xceiv { struct device *dev; }; -static u64 nop_xceiv_dmamask = DMA_BIT_MASK(32); +static u64 nop_xceiv_dmamask = DMA_32BIT_MASK; static struct platform_device nop_xceiv_device = { .name = "nop_usb_xceiv", .id = -1, .dev = { .dma_mask = &nop_xceiv_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), + .coherent_dma_mask = DMA_32BIT_MASK, .platform_data = NULL, }, }; diff --git a/trunk/drivers/video/aty/radeon_base.c b/trunk/drivers/video/aty/radeon_base.c index 6c37e8ee5efe..16bb7e3c0310 100644 --- a/trunk/drivers/video/aty/radeon_base.c +++ b/trunk/drivers/video/aty/radeon_base.c @@ -698,8 +698,8 @@ static void __devinit radeon_get_pllinfo(struct radeonfb_info *rinfo) found: /* * Some methods fail to retrieve SCLK and MCLK values, we apply default - * settings in this case (200Mhz). If that really happens often, we - * could fetch from registers instead... + * settings in this case (200Mhz). If that really happne often, we could + * fetch from registers instead... */ if (rinfo->pll.mclk == 0) rinfo->pll.mclk = 20000; diff --git a/trunk/drivers/video/backlight/backlight.c b/trunk/drivers/video/backlight/backlight.c index 157057c79ca3..dd37cbcaf8ce 100644 --- a/trunk/drivers/video/backlight/backlight.c +++ b/trunk/drivers/video/backlight/backlight.c @@ -35,6 +35,8 @@ static int fb_notifier_callback(struct notifier_block *self, return 0; bd = container_of(self, struct backlight_device, fb_notif); + if (!lock_fb_info(evdata->info)) + return -ENODEV; mutex_lock(&bd->ops_lock); if (bd->ops) if (!bd->ops->check_fb || @@ -47,6 +49,7 @@ static int fb_notifier_callback(struct notifier_block *self, backlight_update_status(bd); } mutex_unlock(&bd->ops_lock); + unlock_fb_info(evdata->info); return 0; } diff --git a/trunk/drivers/video/backlight/lcd.c b/trunk/drivers/video/backlight/lcd.c index b6449470106c..0bb13df0fa89 100644 --- a/trunk/drivers/video/backlight/lcd.c +++ b/trunk/drivers/video/backlight/lcd.c @@ -40,6 +40,8 @@ static int fb_notifier_callback(struct notifier_block *self, if (!ld->ops) return 0; + if (!lock_fb_info(evdata->info)) + return -ENODEV; mutex_lock(&ld->ops_lock); if (!ld->ops->check_fb || ld->ops->check_fb(ld, evdata->info)) { if (event == FB_EVENT_BLANK) { @@ -51,6 +53,7 @@ static int fb_notifier_callback(struct notifier_block *self, } } mutex_unlock(&ld->ops_lock); + unlock_fb_info(evdata->info); return 0; } diff --git a/trunk/drivers/video/cirrusfb.c b/trunk/drivers/video/cirrusfb.c index 4c2bf923418c..d42e385f091c 100644 --- a/trunk/drivers/video/cirrusfb.c +++ b/trunk/drivers/video/cirrusfb.c @@ -567,7 +567,9 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var, default: dev_dbg(info->device, "Unsupported bpp size: %d\n", var->bits_per_pixel); - return -EINVAL; + assert(false); + /* should never occur */ + break; } if (var->xres_virtual < var->xres) diff --git a/trunk/drivers/video/console/fbcon.c b/trunk/drivers/video/console/fbcon.c index 471a9a60376a..2cd500a304f2 100644 --- a/trunk/drivers/video/console/fbcon.c +++ b/trunk/drivers/video/console/fbcon.c @@ -2263,12 +2263,9 @@ static void fbcon_generic_blank(struct vc_data *vc, struct fb_info *info, } - if (!lock_fb_info(info)) - return; event.info = info; event.data = ␣ fb_notifier_call_chain(FB_EVENT_CONBLANK, &event); - unlock_fb_info(info); } static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch) @@ -2959,6 +2956,8 @@ static int fbcon_fb_unregistered(struct fb_info *info) { int i, idx; + if (!lock_fb_info(info)) + return -ENODEV; idx = info->node; for (i = first_fb_vc; i <= last_fb_vc; i++) { if (con2fb_map[i] == idx) @@ -2986,6 +2985,8 @@ static int fbcon_fb_unregistered(struct fb_info *info) if (primary_device == idx) primary_device = -1; + unlock_fb_info(info); + if (!num_registered_fb) unregister_con_driver(&fb_con); @@ -3026,8 +3027,11 @@ static int fbcon_fb_registered(struct fb_info *info) { int ret = 0, i, idx; + if (!lock_fb_info(info)) + return -ENODEV; idx = info->node; fbcon_select_primary(info); + unlock_fb_info(info); if (info_idx == -1) { for (i = first_fb_vc; i <= last_fb_vc; i++) { @@ -3148,23 +3152,53 @@ static int fbcon_event_notify(struct notifier_block *self, switch(action) { case FB_EVENT_SUSPEND: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_suspended(info); + unlock_fb_info(info); break; case FB_EVENT_RESUME: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_resumed(info); + unlock_fb_info(info); break; case FB_EVENT_MODE_CHANGE: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_modechanged(info); + unlock_fb_info(info); break; case FB_EVENT_MODE_CHANGE_ALL: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_set_all_vcs(info); + unlock_fb_info(info); break; case FB_EVENT_MODE_DELETE: mode = event->data; + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } ret = fbcon_mode_deleted(info, mode); + unlock_fb_info(info); break; case FB_EVENT_FB_UNBIND: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } idx = info->node; + unlock_fb_info(info); ret = fbcon_fb_unbind(idx); break; case FB_EVENT_FB_REGISTERED: @@ -3183,14 +3217,29 @@ static int fbcon_event_notify(struct notifier_block *self, con2fb->framebuffer = con2fb_map[con2fb->console - 1]; break; case FB_EVENT_BLANK: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_fb_blanked(info, *(int *)event->data); + unlock_fb_info(info); break; case FB_EVENT_NEW_MODELIST: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_new_modelist(info); + unlock_fb_info(info); break; case FB_EVENT_GET_REQ: caps = event->data; + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_get_requirement(info, caps); + unlock_fb_info(info); break; } done: diff --git a/trunk/drivers/video/efifb.c b/trunk/drivers/video/efifb.c index 8dea2bc92705..0c5b9a9fd56f 100644 --- a/trunk/drivers/video/efifb.c +++ b/trunk/drivers/video/efifb.c @@ -210,15 +210,12 @@ static int __init efifb_probe(struct platform_device *dev) unsigned int size_total; int request_succeeded = 0; + printk(KERN_INFO "efifb: probing for efifb\n"); + if (!screen_info.lfb_depth) screen_info.lfb_depth = 32; if (!screen_info.pages) screen_info.pages = 1; - if (!screen_info.lfb_base) { - printk(KERN_DEBUG "efifb: invalid framebuffer address\n"); - return -ENODEV; - } - printk(KERN_INFO "efifb: probing for efifb\n"); /* just assume they're all unset if any are */ if (!screen_info.blue_size) { diff --git a/trunk/drivers/video/fbmem.c b/trunk/drivers/video/fbmem.c index d412a1ddc12f..2ac32e6b5953 100644 --- a/trunk/drivers/video/fbmem.c +++ b/trunk/drivers/video/fbmem.c @@ -1097,11 +1097,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, return -EINVAL; con2fb.framebuffer = -1; event.data = &con2fb; - if (!lock_fb_info(info)) - return -ENODEV; event.info = info; fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event); - unlock_fb_info(info); ret = copy_to_user(argp, &con2fb, sizeof(con2fb)) ? -EFAULT : 0; break; case FBIOPUT_CON2FBMAP: @@ -1118,11 +1115,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, break; } event.data = &con2fb; - if (!lock_fb_info(info)) - return -ENODEV; event.info = info; ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, &event); - unlock_fb_info(info); break; case FBIOBLANK: if (!lock_fb_info(info)) @@ -1527,10 +1521,7 @@ register_framebuffer(struct fb_info *fb_info) registered_fb[i] = fb_info; event.info = fb_info; - if (!lock_fb_info(fb_info)) - return -ENODEV; fb_notifier_call_chain(FB_EVENT_FB_REGISTERED, &event); - unlock_fb_info(fb_info); return 0; } @@ -1564,12 +1555,8 @@ unregister_framebuffer(struct fb_info *fb_info) goto done; } - - if (!lock_fb_info(fb_info)) - return -ENODEV; event.info = fb_info; ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event); - unlock_fb_info(fb_info); if (ret) { ret = -EINVAL; @@ -1603,8 +1590,6 @@ void fb_set_suspend(struct fb_info *info, int state) { struct fb_event event; - if (!lock_fb_info(info)) - return; event.info = info; if (state) { fb_notifier_call_chain(FB_EVENT_SUSPEND, &event); @@ -1613,7 +1598,6 @@ void fb_set_suspend(struct fb_info *info, int state) info->state = FBINFO_STATE_RUNNING; fb_notifier_call_chain(FB_EVENT_RESUME, &event); } - unlock_fb_info(info); } /** @@ -1683,11 +1667,8 @@ int fb_new_modelist(struct fb_info *info) err = 1; if (!list_empty(&info->modelist)) { - if (!lock_fb_info(info)) - return -ENODEV; event.info = info; err = fb_notifier_call_chain(FB_EVENT_NEW_MODELIST, &event); - unlock_fb_info(info); } return err; diff --git a/trunk/drivers/video/intelfb/intelfb.h b/trunk/drivers/video/intelfb/intelfb.h index 40984551c927..a50bea614804 100644 --- a/trunk/drivers/video/intelfb/intelfb.h +++ b/trunk/drivers/video/intelfb/intelfb.h @@ -53,7 +53,6 @@ #define PCI_DEVICE_ID_INTEL_830M 0x3577 #define PCI_DEVICE_ID_INTEL_845G 0x2562 #define PCI_DEVICE_ID_INTEL_85XGM 0x3582 -#define PCI_DEVICE_ID_INTEL_854 0x358E #define PCI_DEVICE_ID_INTEL_865G 0x2572 #define PCI_DEVICE_ID_INTEL_915G 0x2582 #define PCI_DEVICE_ID_INTEL_915GM 0x2592 @@ -155,7 +154,6 @@ enum intel_chips { INTEL_85XGM, INTEL_852GM, INTEL_852GME, - INTEL_854, INTEL_855GM, INTEL_855GME, INTEL_865G, diff --git a/trunk/drivers/video/intelfb/intelfb_i2c.c b/trunk/drivers/video/intelfb/intelfb_i2c.c index 487f2be47460..b3065492bb20 100644 --- a/trunk/drivers/video/intelfb/intelfb_i2c.c +++ b/trunk/drivers/video/intelfb/intelfb_i2c.c @@ -156,7 +156,6 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo) switch(dinfo->chipset) { case INTEL_830M: case INTEL_845G: - case INTEL_854: case INTEL_855GM: case INTEL_865G: dinfo->output[i].type = INTELFB_OUTPUT_DVO; diff --git a/trunk/drivers/video/intelfb/intelfbdrv.c b/trunk/drivers/video/intelfb/intelfbdrv.c index ace14fe02fc4..6d8e5415c809 100644 --- a/trunk/drivers/video/intelfb/intelfbdrv.c +++ b/trunk/drivers/video/intelfb/intelfbdrv.c @@ -182,7 +182,6 @@ static struct pci_device_id intelfb_pci_table[] __devinitdata = { { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_845G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_845G }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_85XGM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_85XGM }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_865G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_865G }, - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_854, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_854 }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915G }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915GM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915GM }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_945G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_945G }, diff --git a/trunk/drivers/video/intelfb/intelfbhw.c b/trunk/drivers/video/intelfb/intelfbhw.c index 0689f97c5238..8b26b27c2db6 100644 --- a/trunk/drivers/video/intelfb/intelfbhw.c +++ b/trunk/drivers/video/intelfb/intelfbhw.c @@ -84,11 +84,6 @@ int intelfbhw_get_chipset(struct pci_dev *pdev, struct intelfb_info *dinfo) dinfo->mobile = 0; dinfo->pll_index = PLLS_I8xx; return 0; - case PCI_DEVICE_ID_INTEL_854: - dinfo->mobile = 1; - dinfo->name = "Intel(R) 854"; - dinfo->chipset = INTEL_854; - return 0; case PCI_DEVICE_ID_INTEL_85XGM: tmp = 0; dinfo->mobile = 1; diff --git a/trunk/drivers/video/s3fb.c b/trunk/drivers/video/s3fb.c index c3fad34309ed..4dcec48a1d78 100644 --- a/trunk/drivers/video/s3fb.c +++ b/trunk/drivers/video/s3fb.c @@ -45,11 +45,11 @@ struct s3fb_info { static const struct svga_fb_format s3fb_formats[] = { { 0, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, FB_TYPE_TEXT, FB_AUX_TEXT_SVGA_STEP4, FB_VISUAL_PSEUDOCOLOR, 8, 16}, - { 4, {0, 4, 0}, {0, 4, 0}, {0, 4, 0}, {0, 0, 0}, 0, + { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 8, 16}, - { 4, {0, 4, 0}, {0, 4, 0}, {0, 4, 0}, {0, 0, 0}, 1, + { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 1, FB_TYPE_INTERLEAVED_PLANES, 1, FB_VISUAL_PSEUDOCOLOR, 8, 16}, - { 8, {0, 8, 0}, {0, 8, 0}, {0, 8, 0}, {0, 0, 0}, 0, + { 8, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 4, 8}, {16, {10, 5, 0}, {5, 5, 0}, {0, 5, 0}, {0, 0, 0}, 0, FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 2, 4}, diff --git a/trunk/drivers/video/sa1100fb.c b/trunk/drivers/video/sa1100fb.c index 10ddad8e17d6..fad58cf9ef73 100644 --- a/trunk/drivers/video/sa1100fb.c +++ b/trunk/drivers/video/sa1100fb.c @@ -199,20 +199,16 @@ extern void (*sa1100fb_backlight_power)(int on); extern void (*sa1100fb_lcd_power)(int on); -static struct sa1100fb_rgb rgb_4 = { +/* + * IMHO this looks wrong. In 8BPP, length should be 8. + */ +static struct sa1100fb_rgb rgb_8 = { .red = { .offset = 0, .length = 4, }, .green = { .offset = 0, .length = 4, }, .blue = { .offset = 0, .length = 4, }, .transp = { .offset = 0, .length = 0, }, }; -static struct sa1100fb_rgb rgb_8 = { - .red = { .offset = 0, .length = 8, }, - .green = { .offset = 0, .length = 8, }, - .blue = { .offset = 0, .length = 8, }, - .transp = { .offset = 0, .length = 0, }, -}; - static struct sa1100fb_rgb def_rgb_16 = { .red = { .offset = 11, .length = 5, }, .green = { .offset = 5, .length = 6, }, @@ -617,7 +613,7 @@ sa1100fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) DPRINTK("var->bits_per_pixel=%d\n", var->bits_per_pixel); switch (var->bits_per_pixel) { case 4: - rgbidx = RGB_4; + rgbidx = RGB_8; break; case 8: rgbidx = RGB_8; @@ -1386,7 +1382,6 @@ static struct sa1100fb_info * __init sa1100fb_init_fbinfo(struct device *dev) fbi->fb.monspecs = monspecs; fbi->fb.pseudo_palette = (fbi + 1); - fbi->rgb[RGB_4] = &rgb_4; fbi->rgb[RGB_8] = &rgb_8; fbi->rgb[RGB_16] = &def_rgb_16; diff --git a/trunk/drivers/video/sa1100fb.h b/trunk/drivers/video/sa1100fb.h index 1c3b459865d8..86831db9a042 100644 --- a/trunk/drivers/video/sa1100fb.h +++ b/trunk/drivers/video/sa1100fb.h @@ -57,10 +57,9 @@ struct sa1100fb_lcd_reg { unsigned long lccr3; }; -#define RGB_4 (0) -#define RGB_8 (1) -#define RGB_16 (2) -#define NR_RGB 3 +#define RGB_8 (0) +#define RGB_16 (1) +#define NR_RGB 2 struct sa1100fb_info { struct fb_info fb; diff --git a/trunk/drivers/video/sis/sis_main.c b/trunk/drivers/video/sis/sis_main.c index 7e17ee95a97a..346d6458cf76 100644 --- a/trunk/drivers/video/sis/sis_main.c +++ b/trunk/drivers/video/sis/sis_main.c @@ -1129,7 +1129,7 @@ sisfb_bpp_to_var(struct sis_video_info *ivideo, struct fb_var_screeninfo *var) switch(var->bits_per_pixel) { case 8: var->red.offset = var->green.offset = var->blue.offset = 0; - var->red.length = var->green.length = var->blue.length = 8; + var->red.length = var->green.length = var->blue.length = 6; break; case 16: var->red.offset = 11; diff --git a/trunk/drivers/video/skeletonfb.c b/trunk/drivers/video/skeletonfb.c index 89158bc71da2..a439159204a8 100644 --- a/trunk/drivers/video/skeletonfb.c +++ b/trunk/drivers/video/skeletonfb.c @@ -308,11 +308,9 @@ static int xxxfb_setcolreg(unsigned regno, unsigned red, unsigned green, * color depth = SUM(var->{color}.length) * * Pseudocolor: - * var->{color}.offset is 0 unless the palette index takes less than - * bits_per_pixel bits and is stored in the upper - * bits of the pixel value - * var->{color}.length is set so that 1 << length is the number of - * available palette entries + * var->{color}.offset is 0 + * var->{color}.length contains width of DAC or the number of unique + * colors available (color depth) * pseudo_palette is not used * RAMDAC[X] is programmed to (red, green, blue) * color depth = var->{color}.length diff --git a/trunk/drivers/video/uvesafb.c b/trunk/drivers/video/uvesafb.c index 421770b5e6ab..0b370aebdbfd 100644 --- a/trunk/drivers/video/uvesafb.c +++ b/trunk/drivers/video/uvesafb.c @@ -55,7 +55,6 @@ static u16 maxvf __devinitdata; /* maximum vertical frequency */ static u16 maxhf __devinitdata; /* maximum horizontal frequency */ static u16 vbemode __devinitdata; /* force use of a specific VBE mode */ static char *mode_option __devinitdata; -static u8 dac_width = 6; static struct uvesafb_ktask *uvfb_tasks[UVESAFB_TASKS_MAX]; static DEFINE_MUTEX(uvfb_lock); @@ -304,10 +303,22 @@ static void uvesafb_setup_var(struct fb_var_screeninfo *var, var->blue.offset = 0; var->transp.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; + /* + * We're assuming that we can switch the DAC to 8 bits. If + * this proves to be incorrect, we'll update the fields + * later in set_par(). + */ + if (par->vbe_ib.capabilities & VBE_CAP_CAN_SWITCH_DAC) { + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.length = 0; + } else { + var->red.length = 6; + var->green.length = 6; + var->blue.length = 6; + var->transp.length = 0; + } } } @@ -995,7 +1006,7 @@ static int uvesafb_setcolreg(unsigned regno, unsigned red, unsigned green, struct fb_info *info) { struct uvesafb_pal_entry entry; - int shift = 16 - dac_width; + int shift = 16 - info->var.green.length; int err = 0; if (regno >= info->cmap.len) @@ -1044,7 +1055,7 @@ static int uvesafb_setcolreg(unsigned regno, unsigned red, unsigned green, static int uvesafb_setcmap(struct fb_cmap *cmap, struct fb_info *info) { struct uvesafb_pal_entry *entries; - int shift = 16 - dac_width; + int shift = 16 - info->var.green.length; int i, err = 0; if (info->var.bits_per_pixel == 8) { @@ -1306,9 +1317,13 @@ static int uvesafb_set_par(struct fb_info *info) err = uvesafb_exec(task); if (err || (task->t.regs.eax & 0xffff) != 0x004f || ((task->t.regs.ebx & 0xff00) >> 8) != 8) { - dac_width = 6; - } else { - dac_width = 8; + /* + * We've failed to set the DAC palette format - + * time to correct var. + */ + info->var.red.length = 6; + info->var.green.length = 6; + info->var.blue.length = 6; } } diff --git a/trunk/drivers/video/vfb.c b/trunk/drivers/video/vfb.c index 050d432c7d95..cc919ae46571 100644 --- a/trunk/drivers/video/vfb.c +++ b/trunk/drivers/video/vfb.c @@ -318,16 +318,13 @@ static int vfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, * {hardwarespecific} contains width of RAMDAC * cmap[X] is programmed to (X << red.offset) | (X << green.offset) | (X << blue.offset) * RAMDAC[X] is programmed to (red, green, blue) - * + * * Pseudocolor: - * var->{color}.offset is 0 unless the palette index takes less than - * bits_per_pixel bits and is stored in the upper - * bits of the pixel value - * var->{color}.length is set so that 1 << length is the number of available - * palette entries + * uses offset = 0 && length = RAMDAC register width. + * var->{color}.offset is 0 + * var->{color}.length contains widht of DAC * cmap is not used * RAMDAC[X] is programmed to (red, green, blue) - * * Truecolor: * does not use DAC. Usually 3 are present. * var->{color}.offset contains start of bitfield diff --git a/trunk/drivers/xen/cpu_hotplug.c b/trunk/drivers/xen/cpu_hotplug.c index bdfd584ad853..5f54c01c1568 100644 --- a/trunk/drivers/xen/cpu_hotplug.c +++ b/trunk/drivers/xen/cpu_hotplug.c @@ -21,41 +21,29 @@ static void disable_hotplug_cpu(int cpu) set_cpu_present(cpu, false); } -static int vcpu_online(unsigned int cpu) +static void vcpu_hotplug(unsigned int cpu) { int err; char dir[32], state[32]; + if (!cpu_possible(cpu)) + return; + sprintf(dir, "cpu/%u", cpu); err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); if (err != 1) { printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); - return err; - } - - if (strcmp(state, "online") == 0) - return 1; - else if (strcmp(state, "offline") == 0) - return 0; - - printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu); - return -EINVAL; -} -static void vcpu_hotplug(unsigned int cpu) -{ - if (!cpu_possible(cpu)) return; + } - switch (vcpu_online(cpu)) { - case 1: + if (strcmp(state, "online") == 0) { enable_hotplug_cpu(cpu); - break; - case 0: + } else if (strcmp(state, "offline") == 0) { (void)cpu_down(cpu); disable_hotplug_cpu(cpu); - break; - default: - break; + } else { + printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", + state, cpu); } } @@ -76,20 +64,12 @@ static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, static int setup_cpu_watcher(struct notifier_block *notifier, unsigned long event, void *data) { - int cpu; static struct xenbus_watch cpu_watch = { .node = "cpu", .callback = handle_vcpu_hotplug_event}; (void)register_xenbus_watch(&cpu_watch); - for_each_possible_cpu(cpu) { - if (vcpu_online(cpu) == 0) { - (void)cpu_down(cpu); - cpu_clear(cpu, cpu_present_map); - } - } - return NOTIFY_DONE; } diff --git a/trunk/drivers/xen/manage.c b/trunk/drivers/xen/manage.c index 4b5b84837ee1..0d61db1e7b49 100644 --- a/trunk/drivers/xen/manage.c +++ b/trunk/drivers/xen/manage.c @@ -62,15 +62,14 @@ static int xen_suspend(void *data) gnttab_resume(); xen_mm_unpin_all(); + sysdev_resume(); + if (!*cancelled) { xen_irq_resume(); xen_console_resume(); xen_timer_resume(); } - sysdev_resume(); - device_power_up(PMSG_RESUME); - return 0; } diff --git a/trunk/fs/ext2/inode.c b/trunk/fs/ext2/inode.c index acf678831103..b43b95563663 100644 --- a/trunk/fs/ext2/inode.c +++ b/trunk/fs/ext2/inode.c @@ -590,8 +590,9 @@ static int ext2_get_blocks(struct inode *inode, if (depth == 0) return (err); - +reread: partial = ext2_get_branch(inode, depth, offsets, chain, &err); + /* Simplest case - block found, no allocation needed */ if (!partial) { first_block = le32_to_cpu(chain[depth - 1].key); @@ -601,16 +602,15 @@ static int ext2_get_blocks(struct inode *inode, while (count < maxblocks && count <= blocks_to_boundary) { ext2_fsblk_t blk; - if (!verify_chain(chain, chain + depth - 1)) { + if (!verify_chain(chain, partial)) { /* * Indirect block might be removed by * truncate while we were reading it. * Handling of that case: forget what we've * got now, go to reread. */ - err = -EAGAIN; count = 0; - break; + goto changed; } blk = le32_to_cpu(*(chain[depth-1].p + count)); if (blk == first_block + count) @@ -618,8 +618,7 @@ static int ext2_get_blocks(struct inode *inode, else break; } - if (err != -EAGAIN) - goto got_it; + goto got_it; } /* Next simple case - plain lookup or failed read of indirect block */ @@ -627,33 +626,6 @@ static int ext2_get_blocks(struct inode *inode, goto cleanup; mutex_lock(&ei->truncate_mutex); - /* - * If the indirect block is missing while we are reading - * the chain(ext3_get_branch() returns -EAGAIN err), or - * if the chain has been changed after we grab the semaphore, - * (either because another process truncated this branch, or - * another get_block allocated this branch) re-grab the chain to see if - * the request block has been allocated or not. - * - * Since we already block the truncate/other get_block - * at this point, we will have the current copy of the chain when we - * splice the branch into the tree. - */ - if (err == -EAGAIN || !verify_chain(chain, partial)) { - while (partial > chain) { - brelse(partial->bh); - partial--; - } - partial = ext2_get_branch(inode, depth, offsets, chain, &err); - if (!partial) { - count++; - mutex_unlock(&ei->truncate_mutex); - if (err) - goto cleanup; - clear_buffer_new(bh_result); - goto got_it; - } - } /* * Okay, we need to do block allocation. Lazily initialize the block @@ -711,6 +683,12 @@ static int ext2_get_blocks(struct inode *inode, partial--; } return err; +changed: + while (partial > chain) { + brelse(partial->bh); + partial--; + } + goto reread; } int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) diff --git a/trunk/fs/hfs/inode.c b/trunk/fs/hfs/inode.c index a1cbff2b4d99..9435dda8f1e0 100644 --- a/trunk/fs/hfs/inode.c +++ b/trunk/fs/hfs/inode.c @@ -70,10 +70,6 @@ static int hfs_releasepage(struct page *page, gfp_t mask) BUG(); return 0; } - - if (!tree) - return 0; - if (tree->node_size >= PAGE_CACHE_SIZE) { nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); spin_lock(&tree->hash_lock); diff --git a/trunk/fs/hfs/mdb.c b/trunk/fs/hfs/mdb.c index 7b6165f25fbe..36ca2e1a4fa3 100644 --- a/trunk/fs/hfs/mdb.c +++ b/trunk/fs/hfs/mdb.c @@ -349,7 +349,6 @@ void hfs_mdb_put(struct super_block *sb) if (HFS_SB(sb)->nls_disk) unload_nls(HFS_SB(sb)->nls_disk); - free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0); kfree(HFS_SB(sb)); sb->s_fs_info = NULL; } diff --git a/trunk/fs/jbd/revoke.c b/trunk/fs/jbd/revoke.c index 3e9afc2a91d2..c7bd649bbbdc 100644 --- a/trunk/fs/jbd/revoke.c +++ b/trunk/fs/jbd/revoke.c @@ -55,25 +55,6 @@ * need do nothing. * RevokeValid set, Revoked set: * buffer has been revoked. - * - * Locking rules: - * We keep two hash tables of revoke records. One hashtable belongs to the - * running transaction (is pointed to by journal->j_revoke), the other one - * belongs to the committing transaction. Accesses to the second hash table - * happen only from the kjournald and no other thread touches this table. Also - * journal_switch_revoke_table() which switches which hashtable belongs to the - * running and which to the committing transaction is called only from - * kjournald. Therefore we need no locks when accessing the hashtable belonging - * to the committing transaction. - * - * All users operating on the hash table belonging to the running transaction - * have a handle to the transaction. Therefore they are safe from kjournald - * switching hash tables under them. For operations on the lists of entries in - * the hash table j_revoke_lock is used. - * - * Finally, also replay code uses the hash tables but at this moment noone else - * can touch them (filesystem isn't mounted yet) and hence no locking is - * needed. */ #ifndef __KERNEL__ @@ -421,6 +402,8 @@ int journal_revoke(handle_t *handle, unsigned long blocknr, * the second time we would still have a pending revoke to cancel. So, * do not trust the Revoked bit on buffers unless RevokeValid is also * set. + * + * The caller must have the journal locked. */ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) { @@ -498,7 +481,10 @@ void journal_switch_revoke_table(journal_t *journal) /* * Write revoke records to the journal for all entries in the current * revoke hash, deleting the entries as we go. + * + * Called with the journal lock held. */ + void journal_write_revoke_records(journal_t *journal, transaction_t *transaction) { diff --git a/trunk/fs/xfs/linux-2.6/xfs_aops.c b/trunk/fs/xfs/linux-2.6/xfs_aops.c index 7ec89fc05b2b..c13f67300fe7 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_aops.c +++ b/trunk/fs/xfs/linux-2.6/xfs_aops.c @@ -152,6 +152,23 @@ xfs_find_bdev_for_inode( return mp->m_ddev_targp->bt_bdev; } +/* + * Schedule IO completion handling on a xfsdatad if this was + * the final hold on this ioend. If we are asked to wait, + * flush the workqueue. + */ +STATIC void +xfs_finish_ioend( + xfs_ioend_t *ioend, + int wait) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) { + queue_work(xfsdatad_workqueue, &ioend->io_work); + if (wait) + flush_workqueue(xfsdatad_workqueue); + } +} + /* * We're now finished for good with this ioend structure. * Update the page state via the associated buffer_heads, @@ -292,27 +309,6 @@ xfs_end_bio_read( xfs_destroy_ioend(ioend); } -/* - * Schedule IO completion handling on a xfsdatad if this was - * the final hold on this ioend. If we are asked to wait, - * flush the workqueue. - */ -STATIC void -xfs_finish_ioend( - xfs_ioend_t *ioend, - int wait) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) { - struct workqueue_struct *wq = xfsdatad_workqueue; - if (ioend->io_work.func == xfs_end_bio_unwritten) - wq = xfsconvertd_workqueue; - - queue_work(wq, &ioend->io_work); - if (wait) - flush_workqueue(wq); - } -} - /* * Allocate and initialise an IO completion structure. * We need to track unwritten extent write completion here initially. diff --git a/trunk/fs/xfs/linux-2.6/xfs_aops.h b/trunk/fs/xfs/linux-2.6/xfs_aops.h index 221b3e66ceef..1dd528849755 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_aops.h +++ b/trunk/fs/xfs/linux-2.6/xfs_aops.h @@ -19,7 +19,6 @@ #define __XFS_AOPS_H__ extern struct workqueue_struct *xfsdatad_workqueue; -extern struct workqueue_struct *xfsconvertd_workqueue; extern mempool_t *xfs_ioend_pool; /* diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.c b/trunk/fs/xfs/linux-2.6/xfs_buf.c index e28800a9f2b5..aa1016bb9134 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_buf.c +++ b/trunk/fs/xfs/linux-2.6/xfs_buf.c @@ -51,7 +51,6 @@ static struct shrinker xfs_buf_shake = { static struct workqueue_struct *xfslogd_workqueue; struct workqueue_struct *xfsdatad_workqueue; -struct workqueue_struct *xfsconvertd_workqueue; #ifdef XFS_BUF_TRACE void @@ -1776,7 +1775,6 @@ xfs_flush_buftarg( xfs_buf_t *bp, *n; int pincount = 0; - xfs_buf_runall_queues(xfsconvertd_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue); xfs_buf_runall_queues(xfslogd_workqueue); @@ -1833,15 +1831,9 @@ xfs_buf_init(void) if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; - xfsconvertd_workqueue = create_workqueue("xfsconvertd"); - if (!xfsconvertd_workqueue) - goto out_destroy_xfsdatad_workqueue; - register_shrinker(&xfs_buf_shake); return 0; - out_destroy_xfsdatad_workqueue: - destroy_workqueue(xfsdatad_workqueue); out_destroy_xfslogd_workqueue: destroy_workqueue(xfslogd_workqueue); out_free_buf_zone: @@ -1857,7 +1849,6 @@ void xfs_buf_terminate(void) { unregister_shrinker(&xfs_buf_shake); - destroy_workqueue(xfsconvertd_workqueue); destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone); diff --git a/trunk/fs/xfs/linux-2.6/xfs_fs_subr.c b/trunk/fs/xfs/linux-2.6/xfs_fs_subr.c index 08be36d7326c..5aeb77776961 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/trunk/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -74,14 +74,14 @@ xfs_flush_pages( if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { xfs_iflags_clear(ip, XFS_ITRUNCATED); - ret = -filemap_fdatawrite(mapping); + ret = filemap_fdatawrite(mapping); + if (flags & XFS_B_ASYNC) + return -ret; + ret2 = filemap_fdatawait(mapping); + if (!ret) + ret = ret2; } - if (flags & XFS_B_ASYNC) - return ret; - ret2 = xfs_wait_on_pages(ip, first, last); - if (!ret) - ret = ret2; - return ret; + return -ret; } int diff --git a/trunk/fs/xfs/linux-2.6/xfs_lrw.c b/trunk/fs/xfs/linux-2.6/xfs_lrw.c index 9142192ccbe6..7e90daa0d1d1 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_lrw.c +++ b/trunk/fs/xfs/linux-2.6/xfs_lrw.c @@ -751,26 +751,10 @@ xfs_write( goto relock; } } else { - int enospc = 0; - ssize_t ret2 = 0; - -write_retry: xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, *offset, ioflags); - ret2 = generic_file_buffered_write(iocb, iovp, segs, + ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); - /* - * if we just got an ENOSPC, flush the inode now we - * aren't holding any page locks and retry *once* - */ - if (ret2 == -ENOSPC && !enospc) { - error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE); - if (error) - goto out_unlock_internal; - enospc = 1; - goto write_retry; - } - ret = ret2; } current->backing_dev_info = NULL; diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c index f7ba76633c29..a608e72fa405 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.c +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c @@ -62,6 +62,12 @@ xfs_sync_inodes_ag( uint32_t first_index = 0; int error = 0; int last_error = 0; + int fflag = XFS_B_ASYNC; + + if (flags & SYNC_DELWRI) + fflag = XFS_B_DELWRI; + if (flags & SYNC_WAIT) + fflag = 0; /* synchronous overrides all */ do { struct inode *inode; @@ -122,23 +128,11 @@ xfs_sync_inodes_ag( * If we have to flush data or wait for I/O completion * we need to hold the iolock. */ - if (flags & SYNC_DELWRI) { - if (VN_DIRTY(inode)) { - if (flags & SYNC_TRYLOCK) { - if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) - lock_flags |= XFS_IOLOCK_SHARED; - } else { - xfs_ilock(ip, XFS_IOLOCK_SHARED); - lock_flags |= XFS_IOLOCK_SHARED; - } - if (lock_flags & XFS_IOLOCK_SHARED) { - error = xfs_flush_pages(ip, 0, -1, - (flags & SYNC_WAIT) ? 0 - : XFS_B_ASYNC, - FI_NONE); - } - } - if (VN_CACHED(inode) && (flags & SYNC_IOWAIT)) + if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { + xfs_ilock(ip, XFS_IOLOCK_SHARED); + lock_flags |= XFS_IOLOCK_SHARED; + error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); + if (flags & SYNC_IOWAIT) xfs_ioend_wait(ip); } xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -404,17 +398,15 @@ STATIC void xfs_syncd_queue_work( struct xfs_mount *mp, void *data, - void (*syncer)(struct xfs_mount *, void *), - struct completion *completion) + void (*syncer)(struct xfs_mount *, void *)) { - struct xfs_sync_work *work; + struct bhv_vfs_sync_work *work; - work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); + work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); INIT_LIST_HEAD(&work->w_list); work->w_syncer = syncer; work->w_data = data; work->w_mount = mp; - work->w_completion = completion; spin_lock(&mp->m_sync_lock); list_add_tail(&work->w_list, &mp->m_sync_list); spin_unlock(&mp->m_sync_lock); @@ -428,26 +420,49 @@ xfs_syncd_queue_work( * heads, looking about for more room... */ STATIC void -xfs_flush_inodes_work( +xfs_flush_inode_work( + struct xfs_mount *mp, + void *arg) +{ + struct inode *inode = arg; + filemap_flush(inode->i_mapping); + iput(inode); +} + +void +xfs_flush_inode( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + igrab(inode); + xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); + delay(msecs_to_jiffies(500)); +} + +/* + * This is the "bigger hammer" version of xfs_flush_inode_work... + * (IOW, "If at first you don't succeed, use a Bigger Hammer"). + */ +STATIC void +xfs_flush_device_work( struct xfs_mount *mp, void *arg) { struct inode *inode = arg; - xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK); - xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT); + sync_blockdev(mp->m_super->s_bdev); iput(inode); } void -xfs_flush_inodes( +xfs_flush_device( xfs_inode_t *ip) { struct inode *inode = VFS_I(ip); - DECLARE_COMPLETION_ONSTACK(completion); igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); - wait_for_completion(&completion); + xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); + delay(msecs_to_jiffies(500)); xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); } @@ -482,7 +497,7 @@ xfssyncd( { struct xfs_mount *mp = arg; long timeleft; - xfs_sync_work_t *work, *n; + bhv_vfs_sync_work_t *work, *n; LIST_HEAD (tmp); set_freezable(); @@ -517,8 +532,6 @@ xfssyncd( list_del(&work->w_list); if (work == &mp->m_sync_work) continue; - if (work->w_completion) - complete(work->w_completion); kmem_free(work); } } @@ -532,7 +545,6 @@ xfs_syncd_init( { mp->m_sync_work.w_syncer = xfs_sync_worker; mp->m_sync_work.w_mount = mp; - mp->m_sync_work.w_completion = NULL; mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); if (IS_ERR(mp->m_sync_task)) return -PTR_ERR(mp->m_sync_task); diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.h b/trunk/fs/xfs/linux-2.6/xfs_sync.h index 308d5bf6dfbd..04f058c848ae 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.h +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.h @@ -21,20 +21,18 @@ struct xfs_mount; struct xfs_perag; -typedef struct xfs_sync_work { +typedef struct bhv_vfs_sync_work { struct list_head w_list; struct xfs_mount *w_mount; void *w_data; /* syncer routine argument */ void (*w_syncer)(struct xfs_mount *, void *); - struct completion *w_completion; -} xfs_sync_work_t; +} bhv_vfs_sync_work_t; #define SYNC_ATTR 0x0001 /* sync attributes */ #define SYNC_DELWRI 0x0002 /* look at delayed writes */ #define SYNC_WAIT 0x0004 /* wait for i/o to complete */ #define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ #define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ -#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */ int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); @@ -45,7 +43,8 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); -void xfs_flush_inodes(struct xfs_inode *ip); +void xfs_flush_inode(struct xfs_inode *ip); +void xfs_flush_device(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c index 89b81eedce6a..478e587087fe 100644 --- a/trunk/fs/xfs/xfs_iget.c +++ b/trunk/fs/xfs/xfs_iget.c @@ -69,6 +69,15 @@ xfs_inode_alloc( ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); + /* + * initialise the VFS inode here to get failures + * out of the way early. + */ + if (!inode_init_always(mp->m_super, VFS_I(ip))) { + kmem_zone_free(xfs_inode_zone, ip); + return NULL; + } + /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; @@ -104,20 +113,6 @@ xfs_inode_alloc( #ifdef XFS_DIR2_TRACE ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); #endif - /* - * Now initialise the VFS inode. We do this after the xfs_inode - * initialisation as internal failures will result in ->destroy_inode - * being called and that will pass down through the reclaim path and - * free the XFS inode. This path requires the XFS inode to already be - * initialised. Hence if this call fails, the xfs_inode has already - * been freed and we should not reference it at all in the error - * handling. - */ - if (!inode_init_always(mp->m_super, VFS_I(ip))) - return NULL; - - /* prevent anyone from using this yet */ - VFS_I(ip)->i_state = I_NEW|I_LOCK; return ip; } diff --git a/trunk/fs/xfs/xfs_iomap.c b/trunk/fs/xfs/xfs_iomap.c index 5aaa2d7ec155..08ce72316bfe 100644 --- a/trunk/fs/xfs/xfs_iomap.c +++ b/trunk/fs/xfs/xfs_iomap.c @@ -337,6 +337,38 @@ xfs_iomap_eof_align_last_fsb( return 0; } +STATIC int +xfs_flush_space( + xfs_inode_t *ip, + int *fsynced, + int *ioflags) +{ + switch (*fsynced) { + case 0: + if (ip->i_delayed_blks) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_flush_inode(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); + *fsynced = 1; + } else { + *ioflags |= BMAPI_SYNC; + *fsynced = 2; + } + return 0; + case 1: + *fsynced = 2; + *ioflags |= BMAPI_SYNC; + return 0; + case 2: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_flush_device(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); + *fsynced = 3; + return 0; + } + return 1; +} + STATIC int xfs_cmn_err_fsblock_zero( xfs_inode_t *ip, @@ -506,9 +538,15 @@ xfs_iomap_write_direct( } /* - * If the caller is doing a write at the end of the file, then extend the - * allocation out to the file system's write iosize. We clean up any extra - * space left over when the file is closed in xfs_inactive(). + * If the caller is doing a write at the end of the file, + * then extend the allocation out to the file system's write + * iosize. We clean up any extra space left over when the + * file is closed in xfs_inactive(). + * + * For sync writes, we are flushing delayed allocate space to + * try to make additional space available for allocation near + * the filesystem full boundary - preallocation hurts in that + * situation, of course. */ STATIC int xfs_iomap_eof_want_preallocate( @@ -527,7 +565,7 @@ xfs_iomap_eof_want_preallocate( int n, error, imaps; *prealloc = 0; - if ((offset + count) <= ip->i_size) + if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size) return 0; /* @@ -573,7 +611,7 @@ xfs_iomap_write_delay( xfs_extlen_t extsz; int nimaps; xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; - int prealloc, flushed = 0; + int prealloc, fsynced = 0; int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); @@ -589,12 +627,12 @@ xfs_iomap_write_delay( extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); +retry: error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, ioflag, imap, XFS_WRITE_IMAPS, &prealloc); if (error) return error; -retry: if (prealloc) { aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); ioalign = XFS_B_TO_FSBT(mp, aligned_offset); @@ -621,22 +659,15 @@ xfs_iomap_write_delay( /* * If bmapi returned us nothing, and if we didn't get back EDQUOT, - * then we must have run out of space - flush all other inodes with - * delalloc blocks and retry without EOF preallocation. + * then we must have run out of space - flush delalloc, and retry.. */ if (nimaps == 0) { xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, ip, offset, count); - if (flushed) + if (xfs_flush_space(ip, &fsynced, &ioflag)) return XFS_ERROR(ENOSPC); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_flush_inodes(ip); - xfs_ilock(ip, XFS_ILOCK_EXCL); - - flushed = 1; error = 0; - prealloc = 0; goto retry; } diff --git a/trunk/fs/xfs/xfs_iomap.h b/trunk/fs/xfs/xfs_iomap.h index fdcf7b82747f..a1cc1322fc0f 100644 --- a/trunk/fs/xfs/xfs_iomap.h +++ b/trunk/fs/xfs/xfs_iomap.h @@ -40,7 +40,8 @@ typedef enum { BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ - BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ + BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */ + BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */ } bmapi_flags_t; diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c index 3750f04ede0b..f76c6d7cea21 100644 --- a/trunk/fs/xfs/xfs_log.c +++ b/trunk/fs/xfs/xfs_log.c @@ -562,8 +562,9 @@ xfs_log_mount( } mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); - if (IS_ERR(mp->m_log)) { - error = -PTR_ERR(mp->m_log); + if (!mp->m_log) { + cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); + error = ENOMEM; goto out; } @@ -1179,13 +1180,10 @@ xlog_alloc_log(xfs_mount_t *mp, xfs_buf_t *bp; int i; int iclogsize; - int error = ENOMEM; log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); - if (!log) { - xlog_warn("XFS: Log allocation failed: No memory!"); - goto out; - } + if (!log) + return NULL; log->l_mp = mp; log->l_targ = log_target; @@ -1203,35 +1201,19 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_grant_reserve_cycle = 1; log->l_grant_write_cycle = 1; - error = EFSCORRUPTED; if (xfs_sb_version_hassector(&mp->m_sb)) { log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; - if (log->l_sectbb_log < 0 || - log->l_sectbb_log > mp->m_sectbb_log) { - xlog_warn("XFS: Log sector size (0x%x) out of range.", - log->l_sectbb_log); - goto out_free_log; - } - + ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); /* for larger sector sizes, must have v2 or external log */ - if (log->l_sectbb_log != 0 && - (log->l_logBBstart != 0 && - !xfs_sb_version_haslogv2(&mp->m_sb))) { - xlog_warn("XFS: log sector size (0x%x) invalid " - "for configuration.", log->l_sectbb_log); - goto out_free_log; - } - if (mp->m_sb.sb_logsectlog < BBSHIFT) { - xlog_warn("XFS: Log sector log (0x%x) too small.", - mp->m_sb.sb_logsectlog); - goto out_free_log; - } + ASSERT(log->l_sectbb_log == 0 || + log->l_logBBstart == 0 || + xfs_sb_version_haslogv2(&mp->m_sb)); + ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); } log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; xlog_get_iclog_buffer_size(mp, log); - error = ENOMEM; bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); if (!bp) goto out_free_log; @@ -1331,8 +1313,7 @@ xlog_alloc_log(xfs_mount_t *mp, xfs_buf_free(log->l_xbuf); out_free_log: kmem_free(log); -out: - return ERR_PTR(-error); + return NULL; } /* xlog_alloc_log */ @@ -2560,19 +2541,18 @@ xlog_grant_log_space(xlog_t *log, xlog_ins_ticketq(&log->l_reserve_headq, tic); xlog_trace_loggrant(log, tic, "xlog_grant_log_space: sleep 2"); - spin_unlock(&log->l_grant_lock); - xlog_grant_push_ail(log->l_mp, need_bytes); - spin_lock(&log->l_grant_lock); - XFS_STATS_INC(xs_sleep_logspace); sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); - spin_lock(&log->l_grant_lock); - if (XLOG_FORCED_SHUTDOWN(log)) + if (XLOG_FORCED_SHUTDOWN(log)) { + spin_lock(&log->l_grant_lock); goto error_return; + } xlog_trace_loggrant(log, tic, "xlog_grant_log_space: wake 2"); + xlog_grant_push_ail(log->l_mp, need_bytes); + spin_lock(&log->l_grant_lock); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_reserve_headq, tic); @@ -2651,7 +2631,7 @@ xlog_regrant_write_log_space(xlog_t *log, * for more free space, otherwise try to get some space for * this transaction. */ - need_bytes = tic->t_unit_res; + if ((ntic = log->l_write_headq)) { free_bytes = xlog_space_left(log, log->l_grant_write_cycle, log->l_grant_write_bytes); @@ -2671,25 +2651,26 @@ xlog_regrant_write_log_space(xlog_t *log, xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: sleep 1"); - spin_unlock(&log->l_grant_lock); - xlog_grant_push_ail(log->l_mp, need_bytes); - spin_lock(&log->l_grant_lock); - XFS_STATS_INC(xs_sleep_logspace); sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* If we're shutting down, this tic is already * off the queue */ - spin_lock(&log->l_grant_lock); - if (XLOG_FORCED_SHUTDOWN(log)) + if (XLOG_FORCED_SHUTDOWN(log)) { + spin_lock(&log->l_grant_lock); goto error_return; + } xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: wake 1"); + xlog_grant_push_ail(log->l_mp, tic->t_unit_res); + spin_lock(&log->l_grant_lock); } } + need_bytes = tic->t_unit_res; + redo: if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; @@ -2699,20 +2680,19 @@ xlog_regrant_write_log_space(xlog_t *log, if (free_bytes < need_bytes) { if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) xlog_ins_ticketq(&log->l_write_headq, tic); - spin_unlock(&log->l_grant_lock); - xlog_grant_push_ail(log->l_mp, need_bytes); - spin_lock(&log->l_grant_lock); - XFS_STATS_INC(xs_sleep_logspace); sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* If we're shutting down, this tic is already off the queue */ - spin_lock(&log->l_grant_lock); - if (XLOG_FORCED_SHUTDOWN(log)) + if (XLOG_FORCED_SHUTDOWN(log)) { + spin_lock(&log->l_grant_lock); goto error_return; + } xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: wake 2"); + xlog_grant_push_ail(log->l_mp, need_bytes); + spin_lock(&log->l_grant_lock); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_write_headq, tic); diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h index d6a64392f983..7af44adffc8f 100644 --- a/trunk/fs/xfs/xfs_mount.h +++ b/trunk/fs/xfs/xfs_mount.h @@ -313,7 +313,7 @@ typedef struct xfs_mount { #endif struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ struct task_struct *m_sync_task; /* generalised sync thread */ - xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ + bhv_vfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ struct list_head m_sync_list; /* sync thread work item list */ spinlock_t m_sync_lock; /* work item list lock */ int m_sync_seq; /* sync thread generation no. */ diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c index 19cf90a9c762..7394c7af5de5 100644 --- a/trunk/fs/xfs/xfs_vnodeops.c +++ b/trunk/fs/xfs/xfs_vnodeops.c @@ -1457,13 +1457,6 @@ xfs_create( error = xfs_trans_reserve(tp, resblks, log_res, 0, XFS_TRANS_PERM_LOG_RES, log_count); if (error == ENOSPC) { - /* flush outstanding delalloc blocks and retry */ - xfs_flush_inodes(dp); - error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); - } - if (error == ENOSPC) { - /* No space at all so try a "no-allocation" reservation */ resblks = 0; error = xfs_trans_reserve(tp, 0, log_res, 0, XFS_TRANS_PERM_LOG_RES, log_count); diff --git a/trunk/include/asm-generic/siginfo.h b/trunk/include/asm-generic/siginfo.h index c840719a8c59..35752dadd6df 100644 --- a/trunk/include/asm-generic/siginfo.h +++ b/trunk/include/asm-generic/siginfo.h @@ -201,7 +201,7 @@ typedef struct siginfo { #define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */ #define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */ #define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint/watchpoint */ -#define NSIGTRAP 4 +#define NSIGTRAP 2 /* * SIGCHLD si_codes diff --git a/trunk/include/drm/drm_pciids.h b/trunk/include/drm/drm_pciids.h index 9477af01a639..2df74eb09563 100644 --- a/trunk/include/drm/drm_pciids.h +++ b/trunk/include/drm/drm_pciids.h @@ -472,7 +472,6 @@ {0x8086, 0x2562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x8086, 0x3582, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x8086, 0x2572, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ - {0x8086, 0x358e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0, 0, 0} #define gamma_PCI_IDS \ @@ -534,5 +533,4 @@ {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ - {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0, 0, 0} diff --git a/trunk/include/linux/fb.h b/trunk/include/linux/fb.h index 330c4b1bfcaa..f563c5013932 100644 --- a/trunk/include/linux/fb.h +++ b/trunk/include/linux/fb.h @@ -173,12 +173,8 @@ struct fb_fix_screeninfo { /* Interpretation of offset for color fields: All offsets are from the right, * inside a "pixel" value, which is exactly 'bits_per_pixel' wide (means: you * can use the offset as right argument to <<). A pixel afterwards is a bit - * stream and is written to video memory as that unmodified. - * - * For pseudocolor: offset and length should be the same for all color - * components. Offset specifies the position of the least significant bit - * of the pallette index in a pixel value. Length indicates the number - * of available palette entries (i.e. # of entries = 1 << length). + * stream and is written to video memory as that unmodified. This implies + * big-endian byte order if bits_per_pixel is greater than 8. */ struct fb_bitfield { __u32 offset; /* beginning of bitfield */ diff --git a/trunk/include/linux/fiemap.h b/trunk/include/linux/fiemap.h index 934e22d65801..671decbd2aeb 100644 --- a/trunk/include/linux/fiemap.h +++ b/trunk/include/linux/fiemap.h @@ -11,8 +11,6 @@ #ifndef _LINUX_FIEMAP_H #define _LINUX_FIEMAP_H -#include - struct fiemap_extent { __u64 fe_logical; /* logical offset in bytes for the start of * the extent from the beginning of the file */ diff --git a/trunk/include/linux/init_task.h b/trunk/include/linux/init_task.h index d87247d2641f..dcfb93337e9a 100644 --- a/trunk/include/linux/init_task.h +++ b/trunk/include/linux/init_task.h @@ -15,6 +15,19 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; +#define INIT_KIOCTX(name, which_mm) \ +{ \ + .users = ATOMIC_INIT(1), \ + .dead = 0, \ + .mm = &which_mm, \ + .user_id = 0, \ + .next = NULL, \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \ + .ctx_lock = __SPIN_LOCK_UNLOCKED(name.ctx_lock), \ + .reqs_active = 0U, \ + .max_reqs = ~0U, \ +} + #define INIT_MM(name) \ { \ .mm_rb = RB_ROOT, \ diff --git a/trunk/include/linux/pci_ids.h b/trunk/include/linux/pci_ids.h index 06ba90c211a5..ee98cd570885 100644 --- a/trunk/include/linux/pci_ids.h +++ b/trunk/include/linux/pci_ids.h @@ -2514,8 +2514,6 @@ #define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433 #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575 #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 -#define PCI_DEVICE_ID_INTEL_82854_HB 0x358c -#define PCI_DEVICE_ID_INTEL_82854_IG 0x358e #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580 #define PCI_DEVICE_ID_INTEL_82855GM_IG 0x3582 #define PCI_DEVICE_ID_INTEL_E7520_MCH 0x3590 diff --git a/trunk/include/linux/sht15.h b/trunk/include/linux/sht15.h deleted file mode 100644 index 046bce05ecab..000000000000 --- a/trunk/include/linux/sht15.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * sht15.h - support for the SHT15 Temperature and Humidity Sensor - * - * Copyright (c) 2009 Jonathan Cameron - * - * Copyright (c) 2007 Wouter Horre - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/** - * struct sht15_platform_data - sht15 connectivity info - * @gpio_data: no. of gpio to which bidirectional data line is connected - * @gpio_sck: no. of gpio to which the data clock is connected. - * @supply_mv: supply voltage in mv. Overridden by regulator if available. - **/ -struct sht15_platform_data { - int gpio_data; - int gpio_sck; - int supply_mv; -}; - diff --git a/trunk/include/video/cyblafb.h b/trunk/include/video/cyblafb.h new file mode 100644 index 000000000000..d3c1d4e2c8e3 --- /dev/null +++ b/trunk/include/video/cyblafb.h @@ -0,0 +1,175 @@ + +#ifndef CYBLAFB_DEBUG +#define CYBLAFB_DEBUG 0 +#endif + +#if CYBLAFB_DEBUG +#define debug(f,a...) printk("%s:" f, __func__ , ## a); +#else +#define debug(f,a...) +#endif + +#define output(f, a...) printk("cyblafb: " f, ## a) + +#define Kb (1024) +#define Mb (Kb*Kb) + +/* PCI IDS of supported cards temporarily here */ + +#define CYBERBLADEi1 0x8500 + +/* these defines are for 'lcd' variable */ +#define LCD_STRETCH 0 +#define LCD_CENTER 1 +#define LCD_BIOS 2 + +/* display types */ +#define DISPLAY_CRT 0 +#define DISPLAY_FP 1 + +#define ROP_S 0xCC + +#define point(x,y) ((y)<<16|(x)) + +// +// Attribute Regs, ARxx, 3c0/3c1 +// +#define AR00 0x00 +#define AR01 0x01 +#define AR02 0x02 +#define AR03 0x03 +#define AR04 0x04 +#define AR05 0x05 +#define AR06 0x06 +#define AR07 0x07 +#define AR08 0x08 +#define AR09 0x09 +#define AR0A 0x0A +#define AR0B 0x0B +#define AR0C 0x0C +#define AR0D 0x0D +#define AR0E 0x0E +#define AR0F 0x0F +#define AR10 0x10 +#define AR12 0x12 +#define AR13 0x13 + +// +// Sequencer Regs, SRxx, 3c4/3c5 +// +#define SR00 0x00 +#define SR01 0x01 +#define SR02 0x02 +#define SR03 0x03 +#define SR04 0x04 +#define SR0D 0x0D +#define SR0E 0x0E +#define SR11 0x11 +#define SR18 0x18 +#define SR19 0x19 + +// +// +// +#define CR00 0x00 +#define CR01 0x01 +#define CR02 0x02 +#define CR03 0x03 +#define CR04 0x04 +#define CR05 0x05 +#define CR06 0x06 +#define CR07 0x07 +#define CR08 0x08 +#define CR09 0x09 +#define CR0A 0x0A +#define CR0B 0x0B +#define CR0C 0x0C +#define CR0D 0x0D +#define CR0E 0x0E +#define CR0F 0x0F +#define CR10 0x10 +#define CR11 0x11 +#define CR12 0x12 +#define CR13 0x13 +#define CR14 0x14 +#define CR15 0x15 +#define CR16 0x16 +#define CR17 0x17 +#define CR18 0x18 +#define CR19 0x19 +#define CR1A 0x1A +#define CR1B 0x1B +#define CR1C 0x1C +#define CR1D 0x1D +#define CR1E 0x1E +#define CR1F 0x1F +#define CR20 0x20 +#define CR21 0x21 +#define CR27 0x27 +#define CR29 0x29 +#define CR2A 0x2A +#define CR2B 0x2B +#define CR2D 0x2D +#define CR2F 0x2F +#define CR36 0x36 +#define CR38 0x38 +#define CR39 0x39 +#define CR3A 0x3A +#define CR55 0x55 +#define CR56 0x56 +#define CR57 0x57 +#define CR58 0x58 + +// +// +// + +#define GR00 0x01 +#define GR01 0x01 +#define GR02 0x02 +#define GR03 0x03 +#define GR04 0x04 +#define GR05 0x05 +#define GR06 0x06 +#define GR07 0x07 +#define GR08 0x08 +#define GR0F 0x0F +#define GR20 0x20 +#define GR23 0x23 +#define GR2F 0x2F +#define GR30 0x30 +#define GR31 0x31 +#define GR33 0x33 +#define GR52 0x52 +#define GR53 0x53 +#define GR5D 0x5d + + +// +// Graphics Engine +// +#define GEBase 0x2100 // could be mapped elsewhere if we like it +#define GE00 (GEBase+0x00) // source 1, p 111 +#define GE04 (GEBase+0x04) // source 2, p 111 +#define GE08 (GEBase+0x08) // destination 1, p 111 +#define GE0C (GEBase+0x0C) // destination 2, p 112 +#define GE10 (GEBase+0x10) // right view base & enable, p 112 +#define GE13 (GEBase+0x13) // left view base & enable, p 112 +#define GE18 (GEBase+0x18) // block write start address, p 112 +#define GE1C (GEBase+0x1C) // block write end address, p 112 +#define GE20 (GEBase+0x20) // engine status, p 113 +#define GE24 (GEBase+0x24) // reset all GE pointers +#define GE44 (GEBase+0x44) // command register, p 126 +#define GE48 (GEBase+0x48) // raster operation, p 127 +#define GE60 (GEBase+0x60) // foreground color, p 128 +#define GE64 (GEBase+0x64) // background color, p 128 +#define GE6C (GEBase+0x6C) // Pattern and Style, p 129, ok +#define GE9C (GEBase+0x9C) // pixel engine data port, p 125 +#define GEB8 (GEBase+0xB8) // Destination Stride / Buffer Base 0, p 133 +#define GEBC (GEBase+0xBC) // Destination Stride / Buffer Base 1, p 133 +#define GEC0 (GEBase+0xC0) // Destination Stride / Buffer Base 2, p 133 +#define GEC4 (GEBase+0xC4) // Destination Stride / Buffer Base 3, p 133 +#define GEC8 (GEBase+0xC8) // Source Stride / Buffer Base 0, p 133 +#define GECC (GEBase+0xCC) // Source Stride / Buffer Base 1, p 133 +#define GED0 (GEBase+0xD0) // Source Stride / Buffer Base 2, p 133 +#define GED4 (GEBase+0xD4) // Source Stride / Buffer Base 3, p 133 diff --git a/trunk/init/initramfs.c b/trunk/init/initramfs.c index 9ee7b7810417..80cd713f6cc5 100644 --- a/trunk/init/initramfs.c +++ b/trunk/init/initramfs.c @@ -310,8 +310,7 @@ static int __init do_name(void) if (wfd >= 0) { sys_fchown(wfd, uid, gid); sys_fchmod(wfd, mode); - if (body_len) - sys_ftruncate(wfd, body_len); + sys_ftruncate(wfd, body_len); vcollected = kstrdup(collected, GFP_KERNEL); state = CopyFile; } @@ -516,7 +515,6 @@ static void __init free_initrd(void) initrd_end = 0; } -#ifdef CONFIG_BLK_DEV_RAM #define BUF_SIZE 1024 static void __init clean_rootfs(void) { @@ -563,7 +561,6 @@ static void __init clean_rootfs(void) sys_close(fd); kfree(buf); } -#endif static int __init populate_rootfs(void) { diff --git a/trunk/ipc/mq_sysctl.c b/trunk/ipc/mq_sysctl.c index 24ae46dfe45d..89f60ec8ee54 100644 --- a/trunk/ipc/mq_sysctl.c +++ b/trunk/ipc/mq_sysctl.c @@ -22,7 +22,6 @@ #define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */ #define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */ -#ifdef CONFIG_PROC_SYSCTL static void *get_mq(ctl_table *table) { char *which = table->data; @@ -31,6 +30,7 @@ static void *get_mq(ctl_table *table) return which; } +#ifdef CONFIG_PROC_SYSCTL static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/trunk/kernel/ptrace.c b/trunk/kernel/ptrace.c index dfcd83ceee3b..64191fa09b7e 100644 --- a/trunk/kernel/ptrace.c +++ b/trunk/kernel/ptrace.c @@ -604,11 +604,10 @@ int ptrace_traceme(void) ret = security_ptrace_traceme(current->parent); /* - * Check PF_EXITING to ensure ->real_parent has not passed - * exit_ptrace(). Otherwise we don't report the error but - * pretend ->real_parent untraces us right after return. + * Set the ptrace bit in the process ptrace flags. + * Then link us on our parent's ptraced list. */ - if (!ret && !(current->real_parent->flags & PF_EXITING)) { + if (!ret) { current->ptrace |= PT_PTRACED; __ptrace_link(current, current->real_parent); } diff --git a/trunk/kernel/sys.c b/trunk/kernel/sys.c index e7998cf31498..51dbb55604e8 100644 --- a/trunk/kernel/sys.c +++ b/trunk/kernel/sys.c @@ -360,7 +360,6 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg) { char buffer[256]; - int ret = 0; /* We only trust the superuser with rebooting the system. */ if (!capable(CAP_SYS_BOOT)) @@ -398,7 +397,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, kernel_halt(); unlock_kernel(); do_exit(0); - panic("cannot halt"); + break; case LINUX_REBOOT_CMD_POWER_OFF: kernel_power_off(); @@ -418,22 +417,29 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, #ifdef CONFIG_KEXEC case LINUX_REBOOT_CMD_KEXEC: - ret = kernel_kexec(); - break; + { + int ret; + ret = kernel_kexec(); + unlock_kernel(); + return ret; + } #endif #ifdef CONFIG_HIBERNATION case LINUX_REBOOT_CMD_SW_SUSPEND: - ret = hibernate(); - break; + { + int ret = hibernate(); + unlock_kernel(); + return ret; + } #endif default: - ret = -EINVAL; - break; + unlock_kernel(); + return -EINVAL; } unlock_kernel(); - return ret; + return 0; } static void deferred_cad(struct work_struct *dummy) diff --git a/trunk/kernel/sysctl.c b/trunk/kernel/sysctl.c index e3d2c7dd59b9..4286b62b34a0 100644 --- a/trunk/kernel/sysctl.c +++ b/trunk/kernel/sysctl.c @@ -902,6 +902,16 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_UNEVICTABLE_LRU + { + .ctl_name = CTL_UNNUMBERED, + .procname = "scan_unevictable_pages", + .data = &scan_unevictable_pages, + .maxlen = sizeof(scan_unevictable_pages), + .mode = 0644, + .proc_handler = &scan_unevictable_handler, + }, +#endif #ifdef CONFIG_SLOW_WORK { .ctl_name = CTL_UNNUMBERED, @@ -1292,16 +1302,6 @@ static struct ctl_table vm_table[] = { .extra2 = &one, }, #endif -#ifdef CONFIG_UNEVICTABLE_LRU - { - .ctl_name = CTL_UNNUMBERED, - .procname = "scan_unevictable_pages", - .data = &scan_unevictable_pages, - .maxlen = sizeof(scan_unevictable_pages), - .mode = 0644, - .proc_handler = &scan_unevictable_handler, - }, -#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt diff --git a/trunk/mm/Kconfig b/trunk/mm/Kconfig index 57971d2ab848..b53427ad30a3 100644 --- a/trunk/mm/Kconfig +++ b/trunk/mm/Kconfig @@ -213,8 +213,6 @@ config UNEVICTABLE_LRU will use one page flag and increase the code size a little, say Y unless you know what you are doing. - See Documentation/vm/unevictable-lru.txt for more information. - config HAVE_MLOCK bool default y if MMU=y diff --git a/trunk/mm/filemap.c b/trunk/mm/filemap.c index 8bd498040f32..2e2d38ebda4b 100644 --- a/trunk/mm/filemap.c +++ b/trunk/mm/filemap.c @@ -567,8 +567,8 @@ EXPORT_SYMBOL(wait_on_page_bit); /** * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue - * @page: Page defining the wait queue of interest - * @waiter: Waiter to add to the queue + * @page - Page defining the wait queue of interest + * @waiter - Waiter to add to the queue * * Add an arbitrary @waiter to the wait queue for the nominated @page. */ diff --git a/trunk/mm/memcontrol.c b/trunk/mm/memcontrol.c index e44fb0fbb80e..2fc6d6c48238 100644 --- a/trunk/mm/memcontrol.c +++ b/trunk/mm/memcontrol.c @@ -932,7 +932,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, if (unlikely(!mem)) return 0; - VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem)); + VM_BUG_ON(mem_cgroup_is_obsolete(mem)); while (1) { int ret; diff --git a/trunk/mm/shmem.c b/trunk/mm/shmem.c index f9cb20ebb990..d94d2e9146bc 100644 --- a/trunk/mm/shmem.c +++ b/trunk/mm/shmem.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -44,6 +43,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include #include #include @@ -65,28 +65,13 @@ static struct vfsmount *shm_mnt; #include #include -/* - * The maximum size of a shmem/tmpfs file is limited by the maximum size of - * its triple-indirect swap vector - see illustration at shmem_swp_entry(). - * - * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel, - * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum - * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, - * MAX_LFS_FILESIZE being then more restrictive than swap vector layout. - * - * We use / and * instead of shifts in the definitions below, so that the swap - * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE. - */ #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) -#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) - -#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) -#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT) +#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) +#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) -#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE) -#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT)) +#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) +#define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) -#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) /* info->flags needs VM_flags to handle pagein/truncate races efficiently */ @@ -2596,7 +2581,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page) #define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev) #define shmem_acct_size(flags, size) 0 #define shmem_unacct_size(flags, size) do {} while (0) -#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE +#define SHMEM_MAX_BYTES LLONG_MAX #endif /* CONFIG_SHMEM */ diff --git a/trunk/mm/util.c b/trunk/mm/util.c index 55bef160b9f1..2599e83eea17 100644 --- a/trunk/mm/util.c +++ b/trunk/mm/util.c @@ -223,22 +223,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm) } #endif -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. - */ int __attribute__((weak)) get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { diff --git a/trunk/sound/pci/hda/hda_intel.c b/trunk/sound/pci/hda/hda_intel.c index 7ba8db5d4c42..30829ee920c3 100644 --- a/trunk/sound/pci/hda/hda_intel.c +++ b/trunk/sound/pci/hda/hda_intel.c @@ -2260,11 +2260,11 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci, gcap &= ~0x01; /* allow 64bit DMA address if supported by H/W */ - if ((gcap & 0x01) && !pci_set_dma_mask(pci, DMA_BIT_MASK(64))) - pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(64)); + if ((gcap & 0x01) && !pci_set_dma_mask(pci, DMA_64BIT_MASK)) + pci_set_consistent_dma_mask(pci, DMA_64BIT_MASK); else { - pci_set_dma_mask(pci, DMA_BIT_MASK(32)); - pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(32)); + pci_set_dma_mask(pci, DMA_32BIT_MASK); + pci_set_consistent_dma_mask(pci, DMA_32BIT_MASK); } /* read number of streams from GCAP register instead of using