From 877115062b7fb24397a700f50c172ab26043f982 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Jun 2012 17:47:34 -0400 Subject: [PATCH] --- yaml --- r: 310463 b: refs/heads/master c: 03240b279dbbda41f2fc55ff4424acf651e25bef h: refs/heads/master i: 310461: 6aac892d3eb0b187efc43691484d42f3283d33b7 310459: fcf38516c4eb6bf6055a0688632ddedb0b92538e 310455: 2e96f68ff016a042ce730c20467823eeba5e80e7 310447: f940ae21d427e00ed7d1865f82ecc447f6fe37b0 310431: 466e51a8430db59b1aa972712468f691e2377921 310399: 7e050e6230310c9795013f40b5618395177db15b v: v3 --- [refs] | 2 +- trunk/Documentation/vm/frontswap.txt | 278 ----------------- trunk/MAINTAINERS | 7 - trunk/arch/avr32/kernel/signal.c | 2 +- trunk/arch/xtensa/include/asm/syscall.h | 4 +- trunk/arch/xtensa/kernel/signal.c | 2 +- trunk/drivers/staging/ramster/zcache-main.c | 8 +- trunk/drivers/staging/zcache/zcache-main.c | 10 +- trunk/drivers/xen/tmem.c | 8 +- trunk/fs/cifs/cifsglob.h | 7 - trunk/fs/cifs/cifsproto.h | 1 + trunk/fs/cifs/cifssmb.c | 8 +- trunk/fs/cifs/connect.c | 8 +- trunk/fs/cifs/file.c | 106 +++---- trunk/fs/cifs/misc.c | 89 +++++- trunk/fs/cifs/smb1ops.c | 89 ------ trunk/fs/cifs/transport.c | 2 +- trunk/include/linux/frontswap.h | 127 -------- trunk/include/linux/swap.h | 4 - trunk/include/linux/swapfile.h | 13 - trunk/mm/Kconfig | 17 -- trunk/mm/Makefile | 1 - trunk/mm/frontswap.c | 314 -------------------- trunk/mm/page_io.c | 12 - trunk/mm/swapfile.c | 54 +--- 25 files changed, 174 insertions(+), 999 deletions(-) delete mode 100644 trunk/Documentation/vm/frontswap.txt delete mode 100644 trunk/include/linux/frontswap.h delete mode 100644 trunk/include/linux/swapfile.h delete mode 100644 trunk/mm/frontswap.c diff --git a/[refs] b/[refs] index 50b1f77ea774..d138dd513164 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: bf2785a818372603ad3ca3abcab65010f08a1d68 +refs/heads/master: 03240b279dbbda41f2fc55ff4424acf651e25bef diff --git a/trunk/Documentation/vm/frontswap.txt b/trunk/Documentation/vm/frontswap.txt deleted file mode 100644 index 37067cf455f4..000000000000 --- a/trunk/Documentation/vm/frontswap.txt +++ /dev/null @@ -1,278 +0,0 @@ -Frontswap provides a "transcendent memory" interface for swap pages. -In some environments, dramatic performance savings may be obtained because -swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk. - -(Note, frontswap -- and cleancache (merged at 3.0) -- are the "frontends" -and the only necessary changes to the core kernel for transcendent memory; -all other supporting code -- the "backends" -- is implemented as drivers. -See the LWN.net article "Transcendent memory in a nutshell" for a detailed -overview of frontswap and related kernel parts: -https://lwn.net/Articles/454795/ ) - -Frontswap is so named because it can be thought of as the opposite of -a "backing" store for a swap device. The storage is assumed to be -a synchronous concurrency-safe page-oriented "pseudo-RAM device" conforming -to the requirements of transcendent memory (such as Xen's "tmem", or -in-kernel compressed memory, aka "zcache", or future RAM-like devices); -this pseudo-RAM device is not directly accessible or addressable by the -kernel and is of unknown and possibly time-varying size. The driver -links itself to frontswap by calling frontswap_register_ops to set the -frontswap_ops funcs appropriately and the functions it provides must -conform to certain policies as follows: - -An "init" prepares the device to receive frontswap pages associated -with the specified swap device number (aka "type"). A "store" will -copy the page to transcendent memory and associate it with the type and -offset associated with the page. A "load" will copy the page, if found, -from transcendent memory into kernel memory, but will NOT remove the page -from from transcendent memory. An "invalidate_page" will remove the page -from transcendent memory and an "invalidate_area" will remove ALL pages -associated with the swap type (e.g., like swapoff) and notify the "device" -to refuse further stores with that swap type. - -Once a page is successfully stored, a matching load on the page will normally -succeed. So when the kernel finds itself in a situation where it needs -to swap out a page, it first attempts to use frontswap. If the store returns -success, the data has been successfully saved to transcendent memory and -a disk write and, if the data is later read back, a disk read are avoided. -If a store returns failure, transcendent memory has rejected the data, and the -page can be written to swap as usual. - -If a backend chooses, frontswap can be configured as a "writethrough -cache" by calling frontswap_writethrough(). In this mode, the reduction -in swap device writes is lost (and also a non-trivial performance advantage) -in order to allow the backend to arbitrarily "reclaim" space used to -store frontswap pages to more completely manage its memory usage. - -Note that if a page is stored and the page already exists in transcendent memory -(a "duplicate" store), either the store succeeds and the data is overwritten, -or the store fails AND the page is invalidated. This ensures stale data may -never be obtained from frontswap. - -If properly configured, monitoring of frontswap is done via debugfs in -the /sys/kernel/debug/frontswap directory. The effectiveness of -frontswap can be measured (across all swap devices) with: - -failed_stores - how many store attempts have failed -loads - how many loads were attempted (all should succeed) -succ_stores - how many store attempts have succeeded -invalidates - how many invalidates were attempted - -A backend implementation may provide additional metrics. - -FAQ - -1) Where's the value? - -When a workload starts swapping, performance falls through the floor. -Frontswap significantly increases performance in many such workloads by -providing a clean, dynamic interface to read and write swap pages to -"transcendent memory" that is otherwise not directly addressable to the kernel. -This interface is ideal when data is transformed to a different form -and size (such as with compression) or secretly moved (as might be -useful for write-balancing for some RAM-like devices). Swap pages (and -evicted page-cache pages) are a great use for this kind of slower-than-RAM- -but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and -cleancache) interface to transcendent memory provides a nice way to read -and write -- and indirectly "name" -- the pages. - -Frontswap -- and cleancache -- with a fairly small impact on the kernel, -provides a huge amount of flexibility for more dynamic, flexible RAM -utilization in various system configurations: - -In the single kernel case, aka "zcache", pages are compressed and -stored in local memory, thus increasing the total anonymous pages -that can be safely kept in RAM. Zcache essentially trades off CPU -cycles used in compression/decompression for better memory utilization. -Benchmarks have shown little or no impact when memory pressure is -low while providing a significant performance improvement (25%+) -on some workloads under high memory pressure. - -"RAMster" builds on zcache by adding "peer-to-peer" transcendent memory -support for clustered systems. Frontswap pages are locally compressed -as in zcache, but then "remotified" to another system's RAM. This -allows RAM to be dynamically load-balanced back-and-forth as needed, -i.e. when system A is overcommitted, it can swap to system B, and -vice versa. RAMster can also be configured as a memory server so -many servers in a cluster can swap, dynamically as needed, to a single -server configured with a large amount of RAM... without pre-configuring -how much of the RAM is available for each of the clients! - -In the virtual case, the whole point of virtualization is to statistically -multiplex physical resources acrosst the varying demands of multiple -virtual machines. This is really hard to do with RAM and efforts to do -it well with no kernel changes have essentially failed (except in some -well-publicized special-case workloads). -Specifically, the Xen Transcendent Memory backend allows otherwise -"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple -virtual machines, but the pages can be compressed and deduplicated to -optimize RAM utilization. And when guest OS's are induced to surrender -underutilized RAM (e.g. with "selfballooning"), sudden unexpected -memory pressure may result in swapping; frontswap allows those pages -to be swapped to and from hypervisor RAM (if overall host system memory -conditions allow), thus mitigating the potentially awful performance impact -of unplanned swapping. - -A KVM implementation is underway and has been RFC'ed to lkml. And, -using frontswap, investigation is also underway on the use of NVM as -a memory extension technology. - -2) Sure there may be performance advantages in some situations, but - what's the space/time overhead of frontswap? - -If CONFIG_FRONTSWAP is disabled, every frontswap hook compiles into -nothingness and the only overhead is a few extra bytes per swapon'ed -swap device. If CONFIG_FRONTSWAP is enabled but no frontswap "backend" -registers, there is one extra global variable compared to zero for -every swap page read or written. If CONFIG_FRONTSWAP is enabled -AND a frontswap backend registers AND the backend fails every "store" -request (i.e. provides no memory despite claiming it might), -CPU overhead is still negligible -- and since every frontswap fail -precedes a swap page write-to-disk, the system is highly likely -to be I/O bound and using a small fraction of a percent of a CPU -will be irrelevant anyway. - -As for space, if CONFIG_FRONTSWAP is enabled AND a frontswap backend -registers, one bit is allocated for every swap page for every swap -device that is swapon'd. This is added to the EIGHT bits (which -was sixteen until about 2.6.34) that the kernel already allocates -for every swap page for every swap device that is swapon'd. (Hugh -Dickins has observed that frontswap could probably steal one of -the existing eight bits, but let's worry about that minor optimization -later.) For very large swap disks (which are rare) on a standard -4K pagesize, this is 1MB per 32GB swap. - -When swap pages are stored in transcendent memory instead of written -out to disk, there is a side effect that this may create more memory -pressure that can potentially outweigh the other advantages. A -backend, such as zcache, must implement policies to carefully (but -dynamically) manage memory limits to ensure this doesn't happen. - -3) OK, how about a quick overview of what this frontswap patch does - in terms that a kernel hacker can grok? - -Let's assume that a frontswap "backend" has registered during -kernel initialization; this registration indicates that this -frontswap backend has access to some "memory" that is not directly -accessible by the kernel. Exactly how much memory it provides is -entirely dynamic and random. - -Whenever a swap-device is swapon'd frontswap_init() is called, -passing the swap device number (aka "type") as a parameter. -This notifies frontswap to expect attempts to "store" swap pages -associated with that number. - -Whenever the swap subsystem is readying a page to write to a swap -device (c.f swap_writepage()), frontswap_store is called. Frontswap -consults with the frontswap backend and if the backend says it does NOT -have room, frontswap_store returns -1 and the kernel swaps the page -to the swap device as normal. Note that the response from the frontswap -backend is unpredictable to the kernel; it may choose to never accept a -page, it could accept every ninth page, or it might accept every -page. But if the backend does accept a page, the data from the page -has already been copied and associated with the type and offset, -and the backend guarantees the persistence of the data. In this case, -frontswap sets a bit in the "frontswap_map" for the swap device -corresponding to the page offset on the swap device to which it would -otherwise have written the data. - -When the swap subsystem needs to swap-in a page (swap_readpage()), -it first calls frontswap_load() which checks the frontswap_map to -see if the page was earlier accepted by the frontswap backend. If -it was, the page of data is filled from the frontswap backend and -the swap-in is complete. If not, the normal swap-in code is -executed to obtain the page of data from the real swap device. - -So every time the frontswap backend accepts a page, a swap device read -and (potentially) a swap device write are replaced by a "frontswap backend -store" and (possibly) a "frontswap backend loads", which are presumably much -faster. - -4) Can't frontswap be configured as a "special" swap device that is - just higher priority than any real swap device (e.g. like zswap, - or maybe swap-over-nbd/NFS)? - -No. First, the existing swap subsystem doesn't allow for any kind of -swap hierarchy. Perhaps it could be rewritten to accomodate a hierarchy, -but this would require fairly drastic changes. Even if it were -rewritten, the existing swap subsystem uses the block I/O layer which -assumes a swap device is fixed size and any page in it is linearly -addressable. Frontswap barely touches the existing swap subsystem, -and works around the constraints of the block I/O subsystem to provide -a great deal of flexibility and dynamicity. - -For example, the acceptance of any swap page by the frontswap backend is -entirely unpredictable. This is critical to the definition of frontswap -backends because it grants completely dynamic discretion to the -backend. In zcache, one cannot know a priori how compressible a page is. -"Poorly" compressible pages can be rejected, and "poorly" can itself be -defined dynamically depending on current memory constraints. - -Further, frontswap is entirely synchronous whereas a real swap -device is, by definition, asynchronous and uses block I/O. The -block I/O layer is not only unnecessary, but may perform "optimizations" -that are inappropriate for a RAM-oriented device including delaying -the write of some pages for a significant amount of time. Synchrony is -required to ensure the dynamicity of the backend and to avoid thorny race -conditions that would unnecessarily and greatly complicate frontswap -and/or the block I/O subsystem. That said, only the initial "store" -and "load" operations need be synchronous. A separate asynchronous thread -is free to manipulate the pages stored by frontswap. For example, -the "remotification" thread in RAMster uses standard asynchronous -kernel sockets to move compressed frontswap pages to a remote machine. -Similarly, a KVM guest-side implementation could do in-guest compression -and use "batched" hypercalls. - -In a virtualized environment, the dynamicity allows the hypervisor -(or host OS) to do "intelligent overcommit". For example, it can -choose to accept pages only until host-swapping might be imminent, -then force guests to do their own swapping. - -There is a downside to the transcendent memory specifications for -frontswap: Since any "store" might fail, there must always be a real -slot on a real swap device to swap the page. Thus frontswap must be -implemented as a "shadow" to every swapon'd device with the potential -capability of holding every page that the swap device might have held -and the possibility that it might hold no pages at all. This means -that frontswap cannot contain more pages than the total of swapon'd -swap devices. For example, if NO swap device is configured on some -installation, frontswap is useless. Swapless portable devices -can still use frontswap but a backend for such devices must configure -some kind of "ghost" swap device and ensure that it is never used. - -5) Why this weird definition about "duplicate stores"? If a page - has been previously successfully stored, can't it always be - successfully overwritten? - -Nearly always it can, but no, sometimes it cannot. Consider an example -where data is compressed and the original 4K page has been compressed -to 1K. Now an attempt is made to overwrite the page with data that -is non-compressible and so would take the entire 4K. But the backend -has no more space. In this case, the store must be rejected. Whenever -frontswap rejects a store that would overwrite, it also must invalidate -the old data and ensure that it is no longer accessible. Since the -swap subsystem then writes the new data to the read swap device, -this is the correct course of action to ensure coherency. - -6) What is frontswap_shrink for? - -When the (non-frontswap) swap subsystem swaps out a page to a real -swap device, that page is only taking up low-value pre-allocated disk -space. But if frontswap has placed a page in transcendent memory, that -page may be taking up valuable real estate. The frontswap_shrink -routine allows code outside of the swap subsystem to force pages out -of the memory managed by frontswap and back into kernel-addressable memory. -For example, in RAMster, a "suction driver" thread will attempt -to "repatriate" pages sent to a remote machine back to the local machine; -this is driven using the frontswap_shrink mechanism when memory pressure -subsides. - -7) Why does the frontswap patch create the new include file swapfile.h? - -The frontswap code depends on some swap-subsystem-internal data -structures that have, over the years, moved back and forth between -static and global. This seemed a reasonable compromise: Define -them as global but declare them in a new include file that isn't -included by the large number of source files that include swap.h. - -Dan Magenheimer, last updated April 9, 2012 diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 6a52bb4a4fc7..55f0fda602ec 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -2930,13 +2930,6 @@ F: Documentation/power/freezing-of-tasks.txt F: include/linux/freezer.h F: kernel/freezer.c -FRONTSWAP API -M: Konrad Rzeszutek Wilk -L: linux-kernel@vger.kernel.org -S: Maintained -F: mm/frontswap.c -F: include/linux/frontswap.h - FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS M: David Howells L: linux-cachefs@redhat.com diff --git a/trunk/arch/avr32/kernel/signal.c b/trunk/arch/avr32/kernel/signal.c index c140f9b41dce..d552a854dacc 100644 --- a/trunk/arch/avr32/kernel/signal.c +++ b/trunk/arch/avr32/kernel/signal.c @@ -300,7 +300,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti) if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR) syscall = 1; - if (ti->flags & _TIF_SIGPENDING)) + if (ti->flags & _TIF_SIGPENDING) do_signal(regs, syscall); if (ti->flags & _TIF_NOTIFY_RESUME) { diff --git a/trunk/arch/xtensa/include/asm/syscall.h b/trunk/arch/xtensa/include/asm/syscall.h index 0b9f2e13c781..c1dacca312f3 100644 --- a/trunk/arch/xtensa/include/asm/syscall.h +++ b/trunk/arch/xtensa/include/asm/syscall.h @@ -31,5 +31,5 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize); - - +asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, + size_t sigsetsize); diff --git a/trunk/arch/xtensa/kernel/signal.c b/trunk/arch/xtensa/kernel/signal.c index b9f8e5850d3a..efe4e854b3cd 100644 --- a/trunk/arch/xtensa/kernel/signal.c +++ b/trunk/arch/xtensa/kernel/signal.c @@ -493,7 +493,7 @@ static void do_signal(struct pt_regs *regs) if (ret) return; - signal_delivered(signr, info, ka, regs, 0); + signal_delivered(signr, &info, &ka, regs, 0); if (current->ptrace & PT_SINGLESTEP) task_pt_regs(current)->icountlevel = 1; diff --git a/trunk/drivers/staging/ramster/zcache-main.c b/trunk/drivers/staging/ramster/zcache-main.c index d46764b5aaba..4e7ef0e6b79c 100644 --- a/trunk/drivers/staging/ramster/zcache-main.c +++ b/trunk/drivers/staging/ramster/zcache-main.c @@ -3002,7 +3002,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) return oid; } -static int zcache_frontswap_store(unsigned type, pgoff_t offset, +static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -3025,7 +3025,7 @@ static int zcache_frontswap_store(unsigned type, pgoff_t offset, /* returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int zcache_frontswap_load(unsigned type, pgoff_t offset, +static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -3080,8 +3080,8 @@ static void zcache_frontswap_init(unsigned ignored) } static struct frontswap_ops zcache_frontswap_ops = { - .store = zcache_frontswap_store, - .load = zcache_frontswap_load, + .put_page = zcache_frontswap_put_page, + .get_page = zcache_frontswap_get_page, .invalidate_page = zcache_frontswap_flush_page, .invalidate_area = zcache_frontswap_flush_area, .init = zcache_frontswap_init diff --git a/trunk/drivers/staging/zcache/zcache-main.c b/trunk/drivers/staging/zcache/zcache-main.c index 784c796b9848..2734dacacbaf 100644 --- a/trunk/drivers/staging/zcache/zcache-main.c +++ b/trunk/drivers/staging/zcache/zcache-main.c @@ -1835,7 +1835,7 @@ static int zcache_frontswap_poolid = -1; * Swizzling increases objects per swaptype, increasing tmem concurrency * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from - * frontswap_load(), but has side-effects. Hence using 8. + * frontswap_get_page(), but has side-effects. Hence using 8. */ #define SWIZ_BITS 8 #define SWIZ_MASK ((1 << SWIZ_BITS) - 1) @@ -1849,7 +1849,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) return oid; } -static int zcache_frontswap_store(unsigned type, pgoff_t offset, +static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -1870,7 +1870,7 @@ static int zcache_frontswap_store(unsigned type, pgoff_t offset, /* returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int zcache_frontswap_load(unsigned type, pgoff_t offset, +static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -1919,8 +1919,8 @@ static void zcache_frontswap_init(unsigned ignored) } static struct frontswap_ops zcache_frontswap_ops = { - .store = zcache_frontswap_store, - .load = zcache_frontswap_load, + .put_page = zcache_frontswap_put_page, + .get_page = zcache_frontswap_get_page, .invalidate_page = zcache_frontswap_flush_page, .invalidate_area = zcache_frontswap_flush_area, .init = zcache_frontswap_init diff --git a/trunk/drivers/xen/tmem.c b/trunk/drivers/xen/tmem.c index 89f264c67420..dcb79521e6c8 100644 --- a/trunk/drivers/xen/tmem.c +++ b/trunk/drivers/xen/tmem.c @@ -269,7 +269,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) } /* returns 0 if the page was successfully put into frontswap, -1 if not */ -static int tmem_frontswap_store(unsigned type, pgoff_t offset, +static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -295,7 +295,7 @@ static int tmem_frontswap_store(unsigned type, pgoff_t offset, * returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int tmem_frontswap_load(unsigned type, pgoff_t offset, +static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -362,8 +362,8 @@ static int __init no_frontswap(char *s) __setup("nofrontswap", no_frontswap); static struct frontswap_ops __initdata tmem_frontswap_ops = { - .store = tmem_frontswap_store, - .load = tmem_frontswap_load, + .put_page = tmem_frontswap_put_page, + .get_page = tmem_frontswap_get_page, .invalidate_page = tmem_frontswap_flush_page, .invalidate_area = tmem_frontswap_flush_area, .init = tmem_frontswap_init diff --git a/trunk/fs/cifs/cifsglob.h b/trunk/fs/cifs/cifsglob.h index 6df0cbe1cbc9..20350a93ed99 100644 --- a/trunk/fs/cifs/cifsglob.h +++ b/trunk/fs/cifs/cifsglob.h @@ -174,7 +174,6 @@ struct smb_version_operations { void (*add_credits)(struct TCP_Server_Info *, const unsigned int); void (*set_credits)(struct TCP_Server_Info *, const int); int * (*get_credits_field)(struct TCP_Server_Info *); - __u64 (*get_next_mid)(struct TCP_Server_Info *); /* data offset from read response message */ unsigned int (*read_data_offset)(char *); /* data length from read response message */ @@ -400,12 +399,6 @@ set_credits(struct TCP_Server_Info *server, const int val) server->ops->set_credits(server, val); } -static inline __u64 -get_next_mid(struct TCP_Server_Info *server) -{ - return server->ops->get_next_mid(server); -} - /* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. diff --git a/trunk/fs/cifs/cifsproto.h b/trunk/fs/cifs/cifsproto.h index 0a6cbfe2761e..5ec21ecf7980 100644 --- a/trunk/fs/cifs/cifsproto.h +++ b/trunk/fs/cifs/cifsproto.h @@ -114,6 +114,7 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct, void **request_buf); extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp); +extern __u64 GetNextMid(struct TCP_Server_Info *server); extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); extern u64 cifs_UnixTimeToNT(struct timespec); extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, diff --git a/trunk/fs/cifs/cifssmb.c b/trunk/fs/cifs/cifssmb.c index 5b400730c213..b5ad716b2642 100644 --- a/trunk/fs/cifs/cifssmb.c +++ b/trunk/fs/cifs/cifssmb.c @@ -268,7 +268,7 @@ small_smb_init_no_tc(const int smb_command, const int wct, return rc; buffer = (struct smb_hdr *)*request_buf; - buffer->Mid = get_next_mid(ses->server); + buffer->Mid = GetNextMid(ses->server); if (ses->capabilities & CAP_UNICODE) buffer->Flags2 |= SMBFLG2_UNICODE; if (ses->capabilities & CAP_STATUS32) @@ -402,7 +402,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) cFYI(1, "secFlags 0x%x", secFlags); - pSMB->hdr.Mid = get_next_mid(server); + pSMB->hdr.Mid = GetNextMid(server); pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) @@ -782,7 +782,7 @@ CIFSSMBLogoff(const int xid, struct cifs_ses *ses) return rc; } - pSMB->hdr.Mid = get_next_mid(ses->server); + pSMB->hdr.Mid = GetNextMid(ses->server); if (ses->server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) @@ -4762,7 +4762,7 @@ CIFSGetDFSRefer(const int xid, struct cifs_ses *ses, /* server pointer checked in called function, but should never be null here anyway */ - pSMB->hdr.Mid = get_next_mid(ses->server); + pSMB->hdr.Mid = GetNextMid(ses->server); pSMB->hdr.Tid = ses->ipc_tid; pSMB->hdr.Uid = ses->Suid; if (ses->capabilities & CAP_STATUS32) diff --git a/trunk/fs/cifs/connect.c b/trunk/fs/cifs/connect.c index 78db68a5cf44..ccafdedd0dbc 100644 --- a/trunk/fs/cifs/connect.c +++ b/trunk/fs/cifs/connect.c @@ -1058,15 +1058,13 @@ cifs_demultiplex_thread(void *p) if (mid_entry != NULL) { if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); - } else if (!server->ops->is_oplock_break || - !server->ops->is_oplock_break(buf, server)) { + } else if (!server->ops->is_oplock_break(buf, server)) { cERROR(1, "No task to wake, unknown frame received! " "NumMids %d", atomic_read(&midCount)); cifs_dump_mem("Received Data is: ", buf, HEADER_SIZE(server)); #ifdef CONFIG_CIFS_DEBUG2 - if (server->ops->dump_detail) - server->ops->dump_detail(buf); + server->ops->dump_detail(buf); cifs_dump_mids(server); #endif /* CIFS_DEBUG2 */ @@ -3940,7 +3938,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses, header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, NULL /*no tid */ , 4 /*wct */ ); - smb_buffer->Mid = get_next_mid(ses->server); + smb_buffer->Mid = GetNextMid(ses->server); smb_buffer->Uid = ses->Suid; pSMB = (TCONX_REQ *) smb_buffer; pSMBr = (TCONX_RSP *) smb_buffer_response; diff --git a/trunk/fs/cifs/file.c b/trunk/fs/cifs/file.c index 513adbc211d7..253170dfa716 100644 --- a/trunk/fs/cifs/file.c +++ b/trunk/fs/cifs/file.c @@ -876,7 +876,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) struct cifsLockInfo *li, *tmp; struct cifs_tcon *tcon; struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); - unsigned int num, max_num, max_buf; + unsigned int num, max_num; LOCKING_ANDX_RANGE *buf, *cur; int types[] = {LOCKING_ANDX_LARGE_FILES, LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; @@ -892,19 +892,8 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) return rc; } - /* - * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. - */ - max_buf = tcon->ses->server->maxBuf; - if (!max_buf) { - mutex_unlock(&cinode->lock_mutex); - FreeXid(xid); - return -EINVAL; - } - - max_num = (max_buf - sizeof(struct smb_hdr)) / - sizeof(LOCKING_ANDX_RANGE); + max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / + sizeof(LOCKING_ANDX_RANGE); buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) { mutex_unlock(&cinode->lock_mutex); @@ -1229,7 +1218,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) int types[] = {LOCKING_ANDX_LARGE_FILES, LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; unsigned int i; - unsigned int max_num, num, max_buf; + unsigned int max_num, num; LOCKING_ANDX_RANGE *buf, *cur; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); @@ -1239,16 +1228,8 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) INIT_LIST_HEAD(&tmp_llist); - /* - * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. - */ - max_buf = tcon->ses->server->maxBuf; - if (!max_buf) - return -EINVAL; - - max_num = (max_buf - sizeof(struct smb_hdr)) / - sizeof(LOCKING_ANDX_RANGE); + max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / + sizeof(LOCKING_ANDX_RANGE); buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1266,7 +1247,46 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) continue; if (types[i] != li->type) continue; - if (cinode->can_cache_brlcks) { + if (!cinode->can_cache_brlcks) { + cur->Pid = cpu_to_le16(li->pid); + cur->LengthLow = cpu_to_le32((u32)li->length); + cur->LengthHigh = + cpu_to_le32((u32)(li->length>>32)); + cur->OffsetLow = cpu_to_le32((u32)li->offset); + cur->OffsetHigh = + cpu_to_le32((u32)(li->offset>>32)); + /* + * We need to save a lock here to let us add + * it again to the file's list if the unlock + * range request fails on the server. + */ + list_move(&li->llist, &tmp_llist); + if (++num == max_num) { + stored_rc = cifs_lockv(xid, tcon, + cfile->netfid, + li->type, num, + 0, buf); + if (stored_rc) { + /* + * We failed on the unlock range + * request - add all locks from + * the tmp list to the head of + * the file's list. + */ + cifs_move_llist(&tmp_llist, + &cfile->llist); + rc = stored_rc; + } else + /* + * The unlock range request + * succeed - free the tmp list. + */ + cifs_free_llist(&tmp_llist); + cur = buf; + num = 0; + } else + cur++; + } else { /* * We can cache brlock requests - simply remove * a lock from the file's list. @@ -1274,41 +1294,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) list_del(&li->llist); cifs_del_lock_waiters(li); kfree(li); - continue; } - cur->Pid = cpu_to_le16(li->pid); - cur->LengthLow = cpu_to_le32((u32)li->length); - cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); - cur->OffsetLow = cpu_to_le32((u32)li->offset); - cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); - /* - * We need to save a lock here to let us add it again to - * the file's list if the unlock range request fails on - * the server. - */ - list_move(&li->llist, &tmp_llist); - if (++num == max_num) { - stored_rc = cifs_lockv(xid, tcon, cfile->netfid, - li->type, num, 0, buf); - if (stored_rc) { - /* - * We failed on the unlock range - * request - add all locks from the tmp - * list to the head of the file's list. - */ - cifs_move_llist(&tmp_llist, - &cfile->llist); - rc = stored_rc; - } else - /* - * The unlock range request succeed - - * free the tmp list. - */ - cifs_free_llist(&tmp_llist); - cur = buf; - num = 0; - } else - cur++; } if (num) { stored_rc = cifs_lockv(xid, tcon, cfile->netfid, diff --git a/trunk/fs/cifs/misc.c b/trunk/fs/cifs/misc.c index 557506ae1e2a..e2552d2b2e42 100644 --- a/trunk/fs/cifs/misc.c +++ b/trunk/fs/cifs/misc.c @@ -212,6 +212,93 @@ cifs_small_buf_release(void *buf_to_free) return; } +/* + * Find a free multiplex id (SMB mid). Otherwise there could be + * mid collisions which might cause problems, demultiplexing the + * wrong response to this request. Multiplex ids could collide if + * one of a series requests takes much longer than the others, or + * if a very large number of long lived requests (byte range + * locks or FindNotify requests) are pending. No more than + * 64K-1 requests can be outstanding at one time. If no + * mids are available, return zero. A future optimization + * could make the combination of mids and uid the key we use + * to demultiplex on (rather than mid alone). + * In addition to the above check, the cifs demultiplex + * code already used the command code as a secondary + * check of the frame and if signing is negotiated the + * response would be discarded if the mid were the same + * but the signature was wrong. Since the mid is not put in the + * pending queue until later (when it is about to be dispatched) + * we do have to limit the number of outstanding requests + * to somewhat less than 64K-1 although it is hard to imagine + * so many threads being in the vfs at one time. + */ +__u64 GetNextMid(struct TCP_Server_Info *server) +{ + __u64 mid = 0; + __u16 last_mid, cur_mid; + bool collision; + + spin_lock(&GlobalMid_Lock); + + /* mid is 16 bit only for CIFS/SMB */ + cur_mid = (__u16)((server->CurrentMid) & 0xffff); + /* we do not want to loop forever */ + last_mid = cur_mid; + cur_mid++; + + /* + * This nested loop looks more expensive than it is. + * In practice the list of pending requests is short, + * fewer than 50, and the mids are likely to be unique + * on the first pass through the loop unless some request + * takes longer than the 64 thousand requests before it + * (and it would also have to have been a request that + * did not time out). + */ + while (cur_mid != last_mid) { + struct mid_q_entry *mid_entry; + unsigned int num_mids; + + collision = false; + if (cur_mid == 0) + cur_mid++; + + num_mids = 0; + list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { + ++num_mids; + if (mid_entry->mid == cur_mid && + mid_entry->mid_state == MID_REQUEST_SUBMITTED) { + /* This mid is in use, try a different one */ + collision = true; + break; + } + } + + /* + * if we have more than 32k mids in the list, then something + * is very wrong. Possibly a local user is trying to DoS the + * box by issuing long-running calls and SIGKILL'ing them. If + * we get to 2^16 mids then we're in big trouble as this + * function could loop forever. + * + * Go ahead and assign out the mid in this situation, but force + * an eventual reconnect to clean out the pending_mid_q. + */ + if (num_mids > 32768) + server->tcpStatus = CifsNeedReconnect; + + if (!collision) { + mid = (__u64)cur_mid; + server->CurrentMid = mid; + break; + } + cur_mid++; + } + spin_unlock(&GlobalMid_Lock); + return mid; +} + /* NB: MID can not be set if treeCon not passed in, in that case it is responsbility of caller to set the mid */ void @@ -247,7 +334,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , /* Uid is not converted */ buffer->Uid = treeCon->ses->Suid; - buffer->Mid = get_next_mid(treeCon->ses->server); + buffer->Mid = GetNextMid(treeCon->ses->server); } if (treeCon->Flags & SMB_SHARE_IS_IN_DFS) buffer->Flags2 |= SMBFLG2_DFS; diff --git a/trunk/fs/cifs/smb1ops.c b/trunk/fs/cifs/smb1ops.c index 6dec38f5522d..d9d615fbed3f 100644 --- a/trunk/fs/cifs/smb1ops.c +++ b/trunk/fs/cifs/smb1ops.c @@ -125,94 +125,6 @@ cifs_get_credits_field(struct TCP_Server_Info *server) return &server->credits; } -/* - * Find a free multiplex id (SMB mid). Otherwise there could be - * mid collisions which might cause problems, demultiplexing the - * wrong response to this request. Multiplex ids could collide if - * one of a series requests takes much longer than the others, or - * if a very large number of long lived requests (byte range - * locks or FindNotify requests) are pending. No more than - * 64K-1 requests can be outstanding at one time. If no - * mids are available, return zero. A future optimization - * could make the combination of mids and uid the key we use - * to demultiplex on (rather than mid alone). - * In addition to the above check, the cifs demultiplex - * code already used the command code as a secondary - * check of the frame and if signing is negotiated the - * response would be discarded if the mid were the same - * but the signature was wrong. Since the mid is not put in the - * pending queue until later (when it is about to be dispatched) - * we do have to limit the number of outstanding requests - * to somewhat less than 64K-1 although it is hard to imagine - * so many threads being in the vfs at one time. - */ -static __u64 -cifs_get_next_mid(struct TCP_Server_Info *server) -{ - __u64 mid = 0; - __u16 last_mid, cur_mid; - bool collision; - - spin_lock(&GlobalMid_Lock); - - /* mid is 16 bit only for CIFS/SMB */ - cur_mid = (__u16)((server->CurrentMid) & 0xffff); - /* we do not want to loop forever */ - last_mid = cur_mid; - cur_mid++; - - /* - * This nested loop looks more expensive than it is. - * In practice the list of pending requests is short, - * fewer than 50, and the mids are likely to be unique - * on the first pass through the loop unless some request - * takes longer than the 64 thousand requests before it - * (and it would also have to have been a request that - * did not time out). - */ - while (cur_mid != last_mid) { - struct mid_q_entry *mid_entry; - unsigned int num_mids; - - collision = false; - if (cur_mid == 0) - cur_mid++; - - num_mids = 0; - list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { - ++num_mids; - if (mid_entry->mid == cur_mid && - mid_entry->mid_state == MID_REQUEST_SUBMITTED) { - /* This mid is in use, try a different one */ - collision = true; - break; - } - } - - /* - * if we have more than 32k mids in the list, then something - * is very wrong. Possibly a local user is trying to DoS the - * box by issuing long-running calls and SIGKILL'ing them. If - * we get to 2^16 mids then we're in big trouble as this - * function could loop forever. - * - * Go ahead and assign out the mid in this situation, but force - * an eventual reconnect to clean out the pending_mid_q. - */ - if (num_mids > 32768) - server->tcpStatus = CifsNeedReconnect; - - if (!collision) { - mid = (__u64)cur_mid; - server->CurrentMid = mid; - break; - } - cur_mid++; - } - spin_unlock(&GlobalMid_Lock); - return mid; -} - struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -221,7 +133,6 @@ struct smb_version_operations smb1_operations = { .add_credits = cifs_add_credits, .set_credits = cifs_set_credits, .get_credits_field = cifs_get_credits_field, - .get_next_mid = cifs_get_next_mid, .read_data_offset = cifs_read_data_offset, .read_data_length = cifs_read_data_length, .map_error = map_smb_to_linux_error, diff --git a/trunk/fs/cifs/transport.c b/trunk/fs/cifs/transport.c index 3097ee58fd7d..1b36ffe6a47b 100644 --- a/trunk/fs/cifs/transport.c +++ b/trunk/fs/cifs/transport.c @@ -779,7 +779,7 @@ send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon, pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; pSMB->Timeout = 0; - pSMB->hdr.Mid = get_next_mid(ses->server); + pSMB->hdr.Mid = GetNextMid(ses->server); return SendReceive(xid, ses, in_buf, out_buf, &bytes_returned, 0); diff --git a/trunk/include/linux/frontswap.h b/trunk/include/linux/frontswap.h deleted file mode 100644 index 0e4e2eec5c1d..000000000000 --- a/trunk/include/linux/frontswap.h +++ /dev/null @@ -1,127 +0,0 @@ -#ifndef _LINUX_FRONTSWAP_H -#define _LINUX_FRONTSWAP_H - -#include -#include -#include - -struct frontswap_ops { - void (*init)(unsigned); - int (*store)(unsigned, pgoff_t, struct page *); - int (*load)(unsigned, pgoff_t, struct page *); - void (*invalidate_page)(unsigned, pgoff_t); - void (*invalidate_area)(unsigned); -}; - -extern bool frontswap_enabled; -extern struct frontswap_ops - frontswap_register_ops(struct frontswap_ops *ops); -extern void frontswap_shrink(unsigned long); -extern unsigned long frontswap_curr_pages(void); -extern void frontswap_writethrough(bool); - -extern void __frontswap_init(unsigned type); -extern int __frontswap_store(struct page *page); -extern int __frontswap_load(struct page *page); -extern void __frontswap_invalidate_page(unsigned, pgoff_t); -extern void __frontswap_invalidate_area(unsigned); - -#ifdef CONFIG_FRONTSWAP - -static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) -{ - bool ret = false; - - if (frontswap_enabled && sis->frontswap_map) - ret = test_bit(offset, sis->frontswap_map); - return ret; -} - -static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) -{ - if (frontswap_enabled && sis->frontswap_map) - set_bit(offset, sis->frontswap_map); -} - -static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) -{ - if (frontswap_enabled && sis->frontswap_map) - clear_bit(offset, sis->frontswap_map); -} - -static inline void frontswap_map_set(struct swap_info_struct *p, - unsigned long *map) -{ - p->frontswap_map = map; -} - -static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) -{ - return p->frontswap_map; -} -#else -/* all inline routines become no-ops and all externs are ignored */ - -#define frontswap_enabled (0) - -static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) -{ - return false; -} - -static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) -{ -} - -static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) -{ -} - -static inline void frontswap_map_set(struct swap_info_struct *p, - unsigned long *map) -{ -} - -static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) -{ - return NULL; -} -#endif - -static inline int frontswap_store(struct page *page) -{ - int ret = -1; - - if (frontswap_enabled) - ret = __frontswap_store(page); - return ret; -} - -static inline int frontswap_load(struct page *page) -{ - int ret = -1; - - if (frontswap_enabled) - ret = __frontswap_load(page); - return ret; -} - -static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset) -{ - if (frontswap_enabled) - __frontswap_invalidate_page(type, offset); -} - -static inline void frontswap_invalidate_area(unsigned type) -{ - if (frontswap_enabled) - __frontswap_invalidate_area(type); -} - -static inline void frontswap_init(unsigned type) -{ - if (frontswap_enabled) - __frontswap_init(type); -} - -#endif /* _LINUX_FRONTSWAP_H */ diff --git a/trunk/include/linux/swap.h b/trunk/include/linux/swap.h index c84ec68eaec9..b6661933e252 100644 --- a/trunk/include/linux/swap.h +++ b/trunk/include/linux/swap.h @@ -197,10 +197,6 @@ struct swap_info_struct { struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ -#ifdef CONFIG_FRONTSWAP - unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ - atomic_t frontswap_pages; /* frontswap pages in-use counter */ -#endif }; struct swap_list_t { diff --git a/trunk/include/linux/swapfile.h b/trunk/include/linux/swapfile.h deleted file mode 100644 index e282624e8c10..000000000000 --- a/trunk/include/linux/swapfile.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _LINUX_SWAPFILE_H -#define _LINUX_SWAPFILE_H - -/* - * these were static in swapfile.c but frontswap.c needs them and we don't - * want to expose them to the dozens of source files that include swap.h - */ -extern spinlock_t swap_lock; -extern struct swap_list_t swap_list; -extern struct swap_info_struct *swap_info[]; -extern int try_to_unuse(unsigned int, bool, unsigned long); - -#endif /* _LINUX_SWAPFILE_H */ diff --git a/trunk/mm/Kconfig b/trunk/mm/Kconfig index 82fed4eb2b6f..b2176374b98e 100644 --- a/trunk/mm/Kconfig +++ b/trunk/mm/Kconfig @@ -389,20 +389,3 @@ config CLEANCACHE in a negligible performance hit. If unsure, say Y to enable cleancache - -config FRONTSWAP - bool "Enable frontswap to cache swap pages if tmem is present" - depends on SWAP - default n - help - Frontswap is so named because it can be thought of as the opposite - of a "backing" store for a swap device. The data is stored into - "transcendent memory", memory that is not directly accessible or - addressable by the kernel and is of unknown and possibly - time-varying size. When space in transcendent memory is available, - a significant swap I/O reduction may be achieved. When none is - available, all frontswap calls are reduced to a single pointer- - compare-against-NULL resulting in a negligible performance hit - and swap data is stored as normal on the matching swap device. - - If unsure, say Y to enable frontswap. diff --git a/trunk/mm/Makefile b/trunk/mm/Makefile index 2e2fbbefb99f..a156285ce88d 100644 --- a/trunk/mm/Makefile +++ b/trunk/mm/Makefile @@ -29,7 +29,6 @@ obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o -obj-$(CONFIG_FRONTSWAP) += frontswap.o obj-$(CONFIG_HAS_DMA) += dmapool.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o diff --git a/trunk/mm/frontswap.c b/trunk/mm/frontswap.c deleted file mode 100644 index e25025574a02..000000000000 --- a/trunk/mm/frontswap.c +++ /dev/null @@ -1,314 +0,0 @@ -/* - * Frontswap frontend - * - * This code provides the generic "frontend" layer to call a matching - * "backend" driver implementation of frontswap. See - * Documentation/vm/frontswap.txt for more information. - * - * Copyright (C) 2009-2012 Oracle Corp. All rights reserved. - * Author: Dan Magenheimer - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * frontswap_ops is set by frontswap_register_ops to contain the pointers - * to the frontswap "backend" implementation functions. - */ -static struct frontswap_ops frontswap_ops __read_mostly; - -/* - * This global enablement flag reduces overhead on systems where frontswap_ops - * has not been registered, so is preferred to the slower alternative: a - * function call that checks a non-global. - */ -bool frontswap_enabled __read_mostly; -EXPORT_SYMBOL(frontswap_enabled); - -/* - * If enabled, frontswap_store will return failure even on success. As - * a result, the swap subsystem will always write the page to swap, in - * effect converting frontswap into a writethrough cache. In this mode, - * there is no direct reduction in swap writes, but a frontswap backend - * can unilaterally "reclaim" any pages in use with no data loss, thus - * providing increases control over maximum memory usage due to frontswap. - */ -static bool frontswap_writethrough_enabled __read_mostly; - -#ifdef CONFIG_DEBUG_FS -/* - * Counters available via /sys/kernel/debug/frontswap (if debugfs is - * properly configured). These are for information only so are not protected - * against increment races. - */ -static u64 frontswap_loads; -static u64 frontswap_succ_stores; -static u64 frontswap_failed_stores; -static u64 frontswap_invalidates; - -static inline void inc_frontswap_loads(void) { - frontswap_loads++; -} -static inline void inc_frontswap_succ_stores(void) { - frontswap_succ_stores++; -} -static inline void inc_frontswap_failed_stores(void) { - frontswap_failed_stores++; -} -static inline void inc_frontswap_invalidates(void) { - frontswap_invalidates++; -} -#else -static inline void inc_frontswap_loads(void) { } -static inline void inc_frontswap_succ_stores(void) { } -static inline void inc_frontswap_failed_stores(void) { } -static inline void inc_frontswap_invalidates(void) { } -#endif -/* - * Register operations for frontswap, returning previous thus allowing - * detection of multiple backends and possible nesting. - */ -struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) -{ - struct frontswap_ops old = frontswap_ops; - - frontswap_ops = *ops; - frontswap_enabled = true; - return old; -} -EXPORT_SYMBOL(frontswap_register_ops); - -/* - * Enable/disable frontswap writethrough (see above). - */ -void frontswap_writethrough(bool enable) -{ - frontswap_writethrough_enabled = enable; -} -EXPORT_SYMBOL(frontswap_writethrough); - -/* - * Called when a swap device is swapon'd. - */ -void __frontswap_init(unsigned type) -{ - struct swap_info_struct *sis = swap_info[type]; - - BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) - return; - if (frontswap_enabled) - (*frontswap_ops.init)(type); -} -EXPORT_SYMBOL(__frontswap_init); - -/* - * "Store" data from a page to frontswap and associate it with the page's - * swaptype and offset. Page must be locked and in the swap cache. - * If frontswap already contains a page with matching swaptype and - * offset, the frontswap implmentation may either overwrite the data and - * return success or invalidate the page from frontswap and return failure. - */ -int __frontswap_store(struct page *page) -{ - int ret = -1, dup = 0; - swp_entry_t entry = { .val = page_private(page), }; - int type = swp_type(entry); - struct swap_info_struct *sis = swap_info[type]; - pgoff_t offset = swp_offset(entry); - - BUG_ON(!PageLocked(page)); - BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) - dup = 1; - ret = (*frontswap_ops.store)(type, offset, page); - if (ret == 0) { - frontswap_set(sis, offset); - inc_frontswap_succ_stores(); - if (!dup) - atomic_inc(&sis->frontswap_pages); - } else if (dup) { - /* - failed dup always results in automatic invalidate of - the (older) page from frontswap - */ - frontswap_clear(sis, offset); - atomic_dec(&sis->frontswap_pages); - inc_frontswap_failed_stores(); - } else - inc_frontswap_failed_stores(); - if (frontswap_writethrough_enabled) - /* report failure so swap also writes to swap device */ - ret = -1; - return ret; -} -EXPORT_SYMBOL(__frontswap_store); - -/* - * "Get" data from frontswap associated with swaptype and offset that were - * specified when the data was put to frontswap and use it to fill the - * specified page with data. Page must be locked and in the swap cache. - */ -int __frontswap_load(struct page *page) -{ - int ret = -1; - swp_entry_t entry = { .val = page_private(page), }; - int type = swp_type(entry); - struct swap_info_struct *sis = swap_info[type]; - pgoff_t offset = swp_offset(entry); - - BUG_ON(!PageLocked(page)); - BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) - ret = (*frontswap_ops.load)(type, offset, page); - if (ret == 0) - inc_frontswap_loads(); - return ret; -} -EXPORT_SYMBOL(__frontswap_load); - -/* - * Invalidate any data from frontswap associated with the specified swaptype - * and offset so that a subsequent "get" will fail. - */ -void __frontswap_invalidate_page(unsigned type, pgoff_t offset) -{ - struct swap_info_struct *sis = swap_info[type]; - - BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) { - (*frontswap_ops.invalidate_page)(type, offset); - atomic_dec(&sis->frontswap_pages); - frontswap_clear(sis, offset); - inc_frontswap_invalidates(); - } -} -EXPORT_SYMBOL(__frontswap_invalidate_page); - -/* - * Invalidate all data from frontswap associated with all offsets for the - * specified swaptype. - */ -void __frontswap_invalidate_area(unsigned type) -{ - struct swap_info_struct *sis = swap_info[type]; - - BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) - return; - (*frontswap_ops.invalidate_area)(type); - atomic_set(&sis->frontswap_pages, 0); - memset(sis->frontswap_map, 0, sis->max / sizeof(long)); -} -EXPORT_SYMBOL(__frontswap_invalidate_area); - -/* - * Frontswap, like a true swap device, may unnecessarily retain pages - * under certain circumstances; "shrink" frontswap is essentially a - * "partial swapoff" and works by calling try_to_unuse to attempt to - * unuse enough frontswap pages to attempt to -- subject to memory - * constraints -- reduce the number of pages in frontswap to the - * number given in the parameter target_pages. - */ -void frontswap_shrink(unsigned long target_pages) -{ - struct swap_info_struct *si = NULL; - int si_frontswap_pages; - unsigned long total_pages = 0, total_pages_to_unuse; - unsigned long pages = 0, pages_to_unuse = 0; - int type; - bool locked = false; - - /* - * we don't want to hold swap_lock while doing a very - * lengthy try_to_unuse, but swap_list may change - * so restart scan from swap_list.head each time - */ - spin_lock(&swap_lock); - locked = true; - total_pages = 0; - for (type = swap_list.head; type >= 0; type = si->next) { - si = swap_info[type]; - total_pages += atomic_read(&si->frontswap_pages); - } - if (total_pages <= target_pages) - goto out; - total_pages_to_unuse = total_pages - target_pages; - for (type = swap_list.head; type >= 0; type = si->next) { - si = swap_info[type]; - si_frontswap_pages = atomic_read(&si->frontswap_pages); - if (total_pages_to_unuse < si_frontswap_pages) - pages = pages_to_unuse = total_pages_to_unuse; - else { - pages = si_frontswap_pages; - pages_to_unuse = 0; /* unuse all */ - } - /* ensure there is enough RAM to fetch pages from frontswap */ - if (security_vm_enough_memory_mm(current->mm, pages)) - continue; - vm_unacct_memory(pages); - break; - } - if (type < 0) - goto out; - locked = false; - spin_unlock(&swap_lock); - try_to_unuse(type, true, pages_to_unuse); -out: - if (locked) - spin_unlock(&swap_lock); - return; -} -EXPORT_SYMBOL(frontswap_shrink); - -/* - * Count and return the number of frontswap pages across all - * swap devices. This is exported so that backend drivers can - * determine current usage without reading debugfs. - */ -unsigned long frontswap_curr_pages(void) -{ - int type; - unsigned long totalpages = 0; - struct swap_info_struct *si = NULL; - - spin_lock(&swap_lock); - for (type = swap_list.head; type >= 0; type = si->next) { - si = swap_info[type]; - totalpages += atomic_read(&si->frontswap_pages); - } - spin_unlock(&swap_lock); - return totalpages; -} -EXPORT_SYMBOL(frontswap_curr_pages); - -static int __init init_frontswap(void) -{ -#ifdef CONFIG_DEBUG_FS - struct dentry *root = debugfs_create_dir("frontswap", NULL); - if (root == NULL) - return -ENXIO; - debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads); - debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores); - debugfs_create_u64("failed_stores", S_IRUGO, root, - &frontswap_failed_stores); - debugfs_create_u64("invalidates", S_IRUGO, - root, &frontswap_invalidates); -#endif - return 0; -} - -module_init(init_frontswap); diff --git a/trunk/mm/page_io.c b/trunk/mm/page_io.c index 34f02923744c..dc76b4d0611e 100644 --- a/trunk/mm/page_io.c +++ b/trunk/mm/page_io.c @@ -18,7 +18,6 @@ #include #include #include -#include #include static struct bio *get_swap_bio(gfp_t gfp_flags, @@ -99,12 +98,6 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); goto out; } - if (frontswap_store(page) == 0) { - set_page_writeback(page); - unlock_page(page); - end_page_writeback(page); - goto out; - } bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); if (bio == NULL) { set_page_dirty(page); @@ -129,11 +122,6 @@ int swap_readpage(struct page *page) VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageUptodate(page)); - if (frontswap_load(page) == 0) { - SetPageUptodate(page); - unlock_page(page); - goto out; - } bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); if (bio == NULL) { unlock_page(page); diff --git a/trunk/mm/swapfile.c b/trunk/mm/swapfile.c index de5bc51c4a66..457b10baef59 100644 --- a/trunk/mm/swapfile.c +++ b/trunk/mm/swapfile.c @@ -31,8 +31,6 @@ #include #include #include -#include -#include #include #include @@ -44,7 +42,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, static void free_swap_count_continuations(struct swap_info_struct *); static sector_t map_swap_entry(swp_entry_t, struct block_device**); -DEFINE_SPINLOCK(swap_lock); +static DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; long nr_swap_pages; long total_swap_pages; @@ -55,9 +53,9 @@ static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; static const char Unused_offset[] = "Unused swap offset entry "; -struct swap_list_t swap_list = {-1, -1}; +static struct swap_list_t swap_list = {-1, -1}; -struct swap_info_struct *swap_info[MAX_SWAPFILES]; +static struct swap_info_struct *swap_info[MAX_SWAPFILES]; static DEFINE_MUTEX(swapon_mutex); @@ -558,7 +556,6 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, swap_list.next = p->type; nr_swap_pages++; p->inuse_pages--; - frontswap_invalidate_page(p->type, offset); if ((p->flags & SWP_BLKDEV) && disk->fops->swap_slot_free_notify) disk->fops->swap_slot_free_notify(p->bdev, offset); @@ -988,12 +985,11 @@ static int unuse_mm(struct mm_struct *mm, } /* - * Scan swap_map (or frontswap_map if frontswap parameter is true) - * from current position to next entry still in use. + * Scan swap_map from current position to next entry still in use. * Recycle to start on reaching the end, returning 0 when empty. */ static unsigned int find_next_to_unuse(struct swap_info_struct *si, - unsigned int prev, bool frontswap) + unsigned int prev) { unsigned int max = si->max; unsigned int i = prev; @@ -1019,12 +1015,6 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, prev = 0; i = 1; } - if (frontswap) { - if (frontswap_test(si, i)) - break; - else - continue; - } count = si->swap_map[i]; if (count && swap_count(count) != SWAP_MAP_BAD) break; @@ -1036,12 +1026,8 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, * We completely avoid races by reading each swap page in advance, * and then search for the process using it. All the necessary * page table adjustments can then be made atomically. - * - * if the boolean frontswap is true, only unuse pages_to_unuse pages; - * pages_to_unuse==0 means all pages; ignored if frontswap is false */ -int try_to_unuse(unsigned int type, bool frontswap, - unsigned long pages_to_unuse) +static int try_to_unuse(unsigned int type) { struct swap_info_struct *si = swap_info[type]; struct mm_struct *start_mm; @@ -1074,7 +1060,7 @@ int try_to_unuse(unsigned int type, bool frontswap, * one pass through swap_map is enough, but not necessarily: * there are races when an instance of an entry might be missed. */ - while ((i = find_next_to_unuse(si, i, frontswap)) != 0) { + while ((i = find_next_to_unuse(si, i)) != 0) { if (signal_pending(current)) { retval = -EINTR; break; @@ -1241,10 +1227,6 @@ int try_to_unuse(unsigned int type, bool frontswap, * interactive performance. */ cond_resched(); - if (frontswap && pages_to_unuse > 0) { - if (!--pages_to_unuse) - break; - } } mmput(start_mm); @@ -1504,8 +1486,7 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) } static void enable_swap_info(struct swap_info_struct *p, int prio, - unsigned char *swap_map, - unsigned long *frontswap_map) + unsigned char *swap_map) { int i, prev; @@ -1515,7 +1496,6 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, else p->prio = --least_priority; p->swap_map = swap_map; - frontswap_map_set(p, frontswap_map); p->flags |= SWP_WRITEOK; nr_swap_pages += p->pages; total_swap_pages += p->pages; @@ -1532,7 +1512,6 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, swap_list.head = swap_list.next = p->type; else swap_info[prev]->next = p->type; - frontswap_init(p->type); spin_unlock(&swap_lock); } @@ -1606,7 +1585,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) spin_unlock(&swap_lock); oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); - err = try_to_unuse(type, false, 0); /* force all pages to be unused */ + err = try_to_unuse(type); compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); if (err) { @@ -1617,7 +1596,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) * sys_swapoff for this swap_info_struct at this point. */ /* re-insert swap space back into swap_list */ - enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); + enable_swap_info(p, p->prio, p->swap_map); goto out_dput; } @@ -1643,11 +1622,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; - frontswap_invalidate_area(type); spin_unlock(&swap_lock); mutex_unlock(&swapon_mutex); vfree(swap_map); - vfree(frontswap_map_get(p)); /* Destroy swap account informatin */ swap_cgroup_swapoff(type); @@ -2011,7 +1988,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) sector_t span; unsigned long maxpages; unsigned char *swap_map = NULL; - unsigned long *frontswap_map = NULL; struct page *page = NULL; struct inode *inode = NULL; @@ -2095,9 +2071,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = nr_extents; goto bad_swap; } - /* frontswap enabled? set up bit-per-page map for frontswap */ - if (frontswap_enabled) - frontswap_map = vzalloc(maxpages / sizeof(long)); if (p->bdev) { if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { @@ -2113,15 +2086,14 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (swap_flags & SWAP_FLAG_PREFER) prio = (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; - enable_swap_info(p, prio, swap_map, frontswap_map); + enable_swap_info(p, prio, swap_map); printk(KERN_INFO "Adding %uk swap on %s. " - "Priority:%d extents:%d across:%lluk %s%s%s\n", + "Priority:%d extents:%d across:%lluk %s%s\n", p->pages<<(PAGE_SHIFT-10), name, p->prio, nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), (p->flags & SWP_SOLIDSTATE) ? "SS" : "", - (p->flags & SWP_DISCARDABLE) ? "D" : "", - (frontswap_map) ? "FS" : ""); + (p->flags & SWP_DISCARDABLE) ? "D" : ""); mutex_unlock(&swapon_mutex); atomic_inc(&proc_poll_event);