From d96050750b6509c94e89bc011badfc54c1639dcf Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Mon, 18 May 2009 08:14:41 -0400 Subject: [PATCH] --- yaml --- r: 148939 b: refs/heads/master c: 9528d1c7a541b481a0e80301dc8d545848104023 h: refs/heads/master i: 148937: d7769787e0cf5aafbd6e923dc17bd579de94f9a3 148935: 6439d0497b2232baba24d303423cadc9d20158ab v: v3 --- [refs] | 2 +- trunk/Documentation/Changes | 15 - trunk/Documentation/SubmittingPatches | 76 +- .../development-process/5.Posting | 31 +- .../feature-removal-schedule.txt | 10 - trunk/Documentation/filesystems/debugfs.txt | 158 -- .../Documentation/x86/x86_64/boot-options.txt | 44 +- trunk/Documentation/x86/x86_64/machinecheck | 8 +- trunk/arch/x86/Kconfig | 45 +- trunk/arch/x86/crypto/Makefile | 2 - trunk/arch/x86/crypto/aesni-intel_glue.c | 267 +-- trunk/arch/x86/crypto/fpu.c | 166 -- trunk/arch/x86/include/asm/entry_arch.h | 11 +- trunk/arch/x86/include/asm/hardirq.h | 2 +- trunk/arch/x86/include/asm/hw_irq.h | 2 - trunk/arch/x86/include/asm/irq_vectors.h | 17 +- trunk/arch/x86/include/asm/mce.h | 88 +- trunk/arch/x86/include/asm/msr-index.h | 7 - trunk/arch/x86/kernel/apic/apic.c | 4 +- trunk/arch/x86/kernel/apic/nmi.c | 2 +- trunk/arch/x86/kernel/cpu/mcheck/Makefile | 10 +- trunk/arch/x86/kernel/cpu/mcheck/k7.c | 42 +- trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c | 127 -- .../arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 - .../arch/x86/kernel/cpu/mcheck/mce-severity.c | 218 -- trunk/arch/x86/kernel/cpu/mcheck/mce.c | 1964 ----------------- trunk/arch/x86/kernel/cpu/mcheck/mce.h | 26 +- trunk/arch/x86/kernel/cpu/mcheck/mce_32.c | 76 + trunk/arch/x86/kernel/cpu/mcheck/mce_64.c | 1188 ++++++++++ trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 203 +- trunk/arch/x86/kernel/cpu/mcheck/mce_intel.c | 74 - .../arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 65 +- trunk/arch/x86/kernel/cpu/mcheck/non-fatal.c | 57 +- trunk/arch/x86/kernel/cpu/mcheck/p4.c | 86 +- trunk/arch/x86/kernel/cpu/mcheck/p5.c | 48 +- trunk/arch/x86/kernel/cpu/mcheck/p6.c | 26 +- .../arch/x86/kernel/cpu/mcheck/therm_throt.c | 73 +- trunk/arch/x86/kernel/cpu/mcheck/threshold.c | 2 +- trunk/arch/x86/kernel/cpu/mcheck/winchip.c | 17 +- trunk/arch/x86/kernel/entry_64.S | 11 +- trunk/arch/x86/kernel/irq.c | 19 +- trunk/arch/x86/kernel/irqinit.c | 3 - trunk/arch/x86/kernel/signal.c | 6 +- trunk/arch/x86/kernel/smp.c | 28 +- trunk/arch/x86/kernel/traps.c | 6 +- trunk/crypto/Kconfig | 10 - trunk/crypto/algboss.c | 18 +- trunk/crypto/api.c | 14 +- trunk/crypto/cryptd.c | 14 +- trunk/crypto/internal.h | 3 + trunk/crypto/pcompress.c | 1 - trunk/crypto/tcrypt.c | 183 +- trunk/crypto/testmgr.c | 470 +--- trunk/crypto/testmgr.h | 645 +----- trunk/crypto/zlib.c | 24 +- trunk/drivers/char/hw_random/Kconfig | 2 +- trunk/drivers/char/hw_random/omap-rng.c | 2 +- trunk/drivers/char/hw_random/timeriomem-rng.c | 26 +- trunk/drivers/char/hw_random/via-rng.c | 15 +- trunk/drivers/crypto/Kconfig | 2 +- trunk/drivers/crypto/hifn_795x.c | 8 +- trunk/drivers/crypto/padlock-aes.c | 13 - trunk/drivers/crypto/talitos.c | 713 ++---- trunk/drivers/i2c/busses/Kconfig | 2 +- trunk/drivers/i2c/busses/i2c-bfin-twi.c | 11 +- trunk/fs/xfs/Kconfig | 1 - trunk/fs/xfs/Makefile | 5 +- trunk/fs/xfs/linux-2.6/xfs_acl.c | 523 ----- trunk/fs/xfs/linux-2.6/xfs_ioctl.c | 25 +- trunk/fs/xfs/linux-2.6/xfs_iops.c | 53 +- trunk/fs/xfs/linux-2.6/xfs_lrw.c | 1 + trunk/fs/xfs/linux-2.6/xfs_quotaops.c | 4 +- trunk/fs/xfs/linux-2.6/xfs_super.c | 49 +- trunk/fs/xfs/linux-2.6/xfs_sync.c | 479 ++-- trunk/fs/xfs/linux-2.6/xfs_sync.h | 19 +- trunk/fs/xfs/linux-2.6/xfs_xattr.c | 67 +- trunk/fs/xfs/quota/xfs_dquot.c | 5 +- trunk/fs/xfs/quota/xfs_dquot.h | 1 + trunk/fs/xfs/quota/xfs_dquot_item.c | 1 + trunk/fs/xfs/quota/xfs_qm.c | 168 +- trunk/fs/xfs/quota/xfs_qm.h | 21 + trunk/fs/xfs/quota/xfs_qm_bhv.c | 77 +- trunk/fs/xfs/quota/xfs_qm_stats.c | 1 + trunk/fs/xfs/quota/xfs_qm_syscalls.c | 113 +- trunk/fs/xfs/quota/xfs_trans_dquot.c | 66 +- trunk/fs/xfs/xfs_acl.c | 874 ++++++++ trunk/fs/xfs/xfs_acl.h | 97 +- trunk/fs/xfs/xfs_ag.h | 2 - trunk/fs/xfs/xfs_arch.h | 32 + trunk/fs/xfs/xfs_attr.c | 13 +- trunk/fs/xfs/xfs_bmap.c | 34 +- trunk/fs/xfs/xfs_bmap_btree.c | 4 +- trunk/fs/xfs/xfs_filestream.c | 6 +- trunk/fs/xfs/xfs_fs.h | 11 +- trunk/fs/xfs/xfs_iget.c | 8 +- trunk/fs/xfs/xfs_inode.c | 1 + trunk/fs/xfs/xfs_inode.h | 6 - trunk/fs/xfs/xfs_iomap.c | 13 +- trunk/fs/xfs/xfs_log_recover.c | 38 +- trunk/fs/xfs/xfs_mount.c | 105 +- trunk/fs/xfs/xfs_mount.h | 84 +- trunk/fs/xfs/xfs_qmops.c | 152 ++ trunk/fs/xfs/xfs_quota.h | 129 +- trunk/fs/xfs/xfs_rename.c | 3 +- trunk/fs/xfs/xfs_rw.c | 1 + trunk/fs/xfs/xfs_trans.c | 15 +- trunk/fs/xfs/xfs_utils.c | 2 +- trunk/fs/xfs/xfs_vnodeops.c | 114 +- trunk/fs/xfs/xfs_vnodeops.h | 1 - trunk/kernel/timer.c | 1 - 110 files changed, 4237 insertions(+), 6711 deletions(-) delete mode 100644 trunk/Documentation/filesystems/debugfs.txt delete mode 100644 trunk/arch/x86/crypto/fpu.c delete mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c delete mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce-internal.h delete mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce-severity.c delete mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce.c create mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce_32.c create mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce_64.c delete mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce_intel.c delete mode 100644 trunk/fs/xfs/linux-2.6/xfs_acl.c create mode 100644 trunk/fs/xfs/xfs_acl.c create mode 100644 trunk/fs/xfs/xfs_qmops.c diff --git a/[refs] b/[refs] index 121480d33758..0f8bd7197cdc 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: a2ee2981ae2a7046b10980feae9f4ab813877106 +refs/heads/master: 9528d1c7a541b481a0e80301dc8d545848104023 diff --git a/trunk/Documentation/Changes b/trunk/Documentation/Changes index d21b3b5aa543..b95082be4d5e 100644 --- a/trunk/Documentation/Changes +++ b/trunk/Documentation/Changes @@ -48,7 +48,6 @@ o procps 3.2.0 # ps --version o oprofile 0.9 # oprofiled --version o udev 081 # udevinfo -V o grub 0.93 # grub --version -o mcelog 0.6 Kernel compilation ================== @@ -277,16 +276,6 @@ before running exportfs or mountd. It is recommended that all NFS services be protected from the internet-at-large by a firewall where that is possible. -mcelog ------- - -In Linux 2.6.31+ the i386 kernel needs to run the mcelog utility -as a regular cronjob similar to the x86-64 kernel to process and log -machine check events when CONFIG_X86_NEW_MCE is enabled. Machine check -events are errors reported by the CPU. Processing them is strongly encouraged. -All x86-64 kernels since 2.6.4 require the mcelog utility to -process machine checks. - Getting updated software ======================== @@ -376,10 +365,6 @@ FUSE ---- o -mcelog ------- -o - Networking ********** diff --git a/trunk/Documentation/SubmittingPatches b/trunk/Documentation/SubmittingPatches index 6c456835c1fd..f309d3c6221c 100644 --- a/trunk/Documentation/SubmittingPatches +++ b/trunk/Documentation/SubmittingPatches @@ -91,10 +91,6 @@ Be as specific as possible. The WORST descriptions possible include things like "update driver X", "bug fix for driver X", or "this patch includes updates for subsystem X. Please apply." -The maintainer will thank you if you write your patch description in a -form which can be easily pulled into Linux's source code management -system, git, as a "commit log". See #15, below. - If your description starts to get long, that's a sign that you probably need to split up your patch. See #3, next. @@ -409,14 +405,7 @@ person it names. This tag documents that potentially interested parties have been included in the discussion -14) Using Reported-by:, Tested-by: and Reviewed-by: - -If this patch fixes a problem reported by somebody else, consider adding a -Reported-by: tag to credit the reporter for their contribution. Please -note that this tag should not be added without the reporter's permission, -especially if the problem was not reported in a public forum. That said, -if we diligently credit our bug reporters, they will, hopefully, be -inspired to help us again in the future. +14) Using Tested-by: and Reviewed-by: A Tested-by: tag indicates that the patch has been successfully tested (in some environment) by the person named. This tag informs maintainers that @@ -455,7 +444,7 @@ offer a Reviewed-by tag for a patch. This tag serves to give credit to reviewers and to inform maintainers of the degree of review which has been done on the patch. Reviewed-by: tags, when supplied by reviewers known to understand the subject area and to perform thorough reviews, will normally -increase the likelihood of your patch getting into the kernel. +increase the liklihood of your patch getting into the kernel. 15) The canonical patch format @@ -496,33 +485,12 @@ phrase" should not be a filename. Do not use the same "summary phrase" for every patch in a whole patch series (where a "patch series" is an ordered sequence of multiple, related patches). -Bear in mind that the "summary phrase" of your email becomes a -globally-unique identifier for that patch. It propagates all the way -into the git changelog. The "summary phrase" may later be used in -developer discussions which refer to the patch. People will want to -google for the "summary phrase" to read discussion regarding that -patch. It will also be the only thing that people may quickly see -when, two or three months later, they are going through perhaps -thousands of patches using tools such as "gitk" or "git log ---oneline". - -For these reasons, the "summary" must be no more than 70-75 -characters, and it must describe both what the patch changes, as well -as why the patch might be necessary. It is challenging to be both -succinct and descriptive, but that is what a well-written summary -should do. - -The "summary phrase" may be prefixed by tags enclosed in square -brackets: "Subject: [PATCH tag] ". The tags are not -considered part of the summary phrase, but describe how the patch -should be treated. Common tags might include a version descriptor if -the multiple versions of the patch have been sent out in response to -comments (i.e., "v1, v2, v3"), or "RFC" to indicate a request for -comments. If there are four patches in a patch series the individual -patches may be numbered like this: 1/4, 2/4, 3/4, 4/4. This assures -that developers understand the order in which the patches should be -applied and that they have reviewed or applied all of the patches in -the patch series. +Bear in mind that the "summary phrase" of your email becomes +a globally-unique identifier for that patch. It propagates +all the way into the git changelog. The "summary phrase" may +later be used in developer discussions which refer to the patch. +People will want to google for the "summary phrase" to read +discussion regarding that patch. A couple of example Subjects: @@ -542,31 +510,19 @@ the patch author in the changelog. The explanation body will be committed to the permanent source changelog, so should make sense to a competent reader who has long since forgotten the immediate details of the discussion that might -have led to this patch. Including symptoms of the failure which the -patch addresses (kernel log messages, oops messages, etc.) is -especially useful for people who might be searching the commit logs -looking for the applicable patch. If a patch fixes a compile failure, -it may not be necessary to include _all_ of the compile failures; just -enough that it is likely that someone searching for the patch can find -it. As in the "summary phrase", it is important to be both succinct as -well as descriptive. +have led to this patch. The "---" marker line serves the essential purpose of marking for patch handling tools where the changelog message ends. One good use for the additional comments after the "---" marker is for -a diffstat, to show what files have changed, and the number of -inserted and deleted lines per file. A diffstat is especially useful -on bigger patches. Other comments relevant only to the moment or the -maintainer, not suitable for the permanent changelog, should also go -here. A good example of such comments might be "patch changelogs" -which describe what has changed between the v1 and v2 version of the -patch. - -If you are going to include a diffstat after the "---" marker, please -use diffstat options "-p 1 -w 70" so that filenames are listed from -the top of the kernel source tree and don't use too much horizontal -space (easily fit in 80 columns, maybe with some indentation). +a diffstat, to show what files have changed, and the number of inserted +and deleted lines per file. A diffstat is especially useful on bigger +patches. Other comments relevant only to the moment or the maintainer, +not suitable for the permanent changelog, should also go here. +Use diffstat options "-p 1 -w 70" so that filenames are listed from the +top of the kernel source tree and don't use too much horizontal space +(easily fit in 80 columns, maybe with some indentation). See more details on the proper patch format in the following references. diff --git a/trunk/Documentation/development-process/5.Posting b/trunk/Documentation/development-process/5.Posting index f622c1e9f0f9..dd48132a74dd 100644 --- a/trunk/Documentation/development-process/5.Posting +++ b/trunk/Documentation/development-process/5.Posting @@ -119,7 +119,7 @@ which takes quite a bit of time and thought after the "real work" has been done. When done properly, though, it is time well spent. -5.4: PATCH FORMATTING AND CHANGELOGS +5.4: PATCH FORMATTING So now you have a perfect series of patches for posting, but the work is not done quite yet. Each patch needs to be formatted into a message which @@ -146,33 +146,8 @@ that end, each patch will be composed of the following: - One or more tag lines, with, at a minimum, one Signed-off-by: line from the author of the patch. Tags will be described in more detail below. -The items above, together, form the changelog for the patch. Writing good -changelogs is a crucial but often-neglected art; it's worth spending -another moment discussing this issue. When writing a changelog, you should -bear in mind that a number of different people will be reading your words. -These include subsystem maintainers and reviewers who need to decide -whether the patch should be included, distributors and other maintainers -trying to decide whether a patch should be backported to other kernels, bug -hunters wondering whether the patch is responsible for a problem they are -chasing, users who want to know how the kernel has changed, and more. A -good changelog conveys the needed information to all of these people in the -most direct and concise way possible. - -To that end, the summary line should describe the effects of and motivation -for the change as well as possible given the one-line constraint. The -detailed description can then amplify on those topics and provide any -needed additional information. If the patch fixes a bug, cite the commit -which introduced the bug if possible. If a problem is associated with -specific log or compiler output, include that output to help others -searching for a solution to the same problem. If the change is meant to -support other changes coming in later patch, say so. If internal APIs are -changed, detail those changes and how other developers should respond. In -general, the more you can put yourself into the shoes of everybody who will -be reading your changelog, the better that changelog (and the kernel as a -whole) will be. - -Needless to say, the changelog should be the text used when committing the -change to a revision control system. It will be followed by: +The above three items should, normally, be the text used when committing +the change to a revision control system. They are followed by: - The patch itself, in the unified ("-u") patch format. Using the "-p" option to diff will associate function names with changes, making the diff --git a/trunk/Documentation/feature-removal-schedule.txt b/trunk/Documentation/feature-removal-schedule.txt index ec9ef5d0d7b3..de491a3e2313 100644 --- a/trunk/Documentation/feature-removal-schedule.txt +++ b/trunk/Documentation/feature-removal-schedule.txt @@ -437,13 +437,3 @@ Why: Superseded by tdfxfb. I2C/DDC support used to live in a separate driver but this caused driver conflicts. Who: Jean Delvare Krzysztof Helt - ----------------------------- - -What: CONFIG_X86_OLD_MCE -When: 2.6.32 -Why: Remove the old legacy 32bit machine check code. This has been - superseded by the newer machine check code from the 64bit port, - but the old version has been kept around for easier testing. Note this - doesn't impact the old P5 and WinChip machine check handlers. -Who: Andi Kleen diff --git a/trunk/Documentation/filesystems/debugfs.txt b/trunk/Documentation/filesystems/debugfs.txt deleted file mode 100644 index ed52af60c2d8..000000000000 --- a/trunk/Documentation/filesystems/debugfs.txt +++ /dev/null @@ -1,158 +0,0 @@ -Copyright 2009 Jonathan Corbet - -Debugfs exists as a simple way for kernel developers to make information -available to user space. Unlike /proc, which is only meant for information -about a process, or sysfs, which has strict one-value-per-file rules, -debugfs has no rules at all. Developers can put any information they want -there. The debugfs filesystem is also intended to not serve as a stable -ABI to user space; in theory, there are no stability constraints placed on -files exported there. The real world is not always so simple, though [1]; -even debugfs interfaces are best designed with the idea that they will need -to be maintained forever. - -Debugfs is typically mounted with a command like: - - mount -t debugfs none /sys/kernel/debug - -(Or an equivalent /etc/fstab line). - -Note that the debugfs API is exported GPL-only to modules. - -Code using debugfs should include . Then, the first order -of business will be to create at least one directory to hold a set of -debugfs files: - - struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); - -This call, if successful, will make a directory called name underneath the -indicated parent directory. If parent is NULL, the directory will be -created in the debugfs root. On success, the return value is a struct -dentry pointer which can be used to create files in the directory (and to -clean it up at the end). A NULL return value indicates that something went -wrong. If ERR_PTR(-ENODEV) is returned, that is an indication that the -kernel has been built without debugfs support and none of the functions -described below will work. - -The most general way to create a file within a debugfs directory is with: - - struct dentry *debugfs_create_file(const char *name, mode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops); - -Here, name is the name of the file to create, mode describes the access -permissions the file should have, parent indicates the directory which -should hold the file, data will be stored in the i_private field of the -resulting inode structure, and fops is a set of file operations which -implement the file's behavior. At a minimum, the read() and/or write() -operations should be provided; others can be included as needed. Again, -the return value will be a dentry pointer to the created file, NULL for -error, or ERR_PTR(-ENODEV) if debugfs support is missing. - -In a number of cases, the creation of a set of file operations is not -actually necessary; the debugfs code provides a number of helper functions -for simple situations. Files containing a single integer value can be -created with any of: - - struct dentry *debugfs_create_u8(const char *name, mode_t mode, - struct dentry *parent, u8 *value); - struct dentry *debugfs_create_u16(const char *name, mode_t mode, - struct dentry *parent, u16 *value); - struct dentry *debugfs_create_u32(const char *name, mode_t mode, - struct dentry *parent, u32 *value); - struct dentry *debugfs_create_u64(const char *name, mode_t mode, - struct dentry *parent, u64 *value); - -These files support both reading and writing the given value; if a specific -file should not be written to, simply set the mode bits accordingly. The -values in these files are in decimal; if hexadecimal is more appropriate, -the following functions can be used instead: - - struct dentry *debugfs_create_x8(const char *name, mode_t mode, - struct dentry *parent, u8 *value); - struct dentry *debugfs_create_x16(const char *name, mode_t mode, - struct dentry *parent, u16 *value); - struct dentry *debugfs_create_x32(const char *name, mode_t mode, - struct dentry *parent, u32 *value); - -Note that there is no debugfs_create_x64(). - -These functions are useful as long as the developer knows the size of the -value to be exported. Some types can have different widths on different -architectures, though, complicating the situation somewhat. There is a -function meant to help out in one special case: - - struct dentry *debugfs_create_size_t(const char *name, mode_t mode, - struct dentry *parent, - size_t *value); - -As might be expected, this function will create a debugfs file to represent -a variable of type size_t. - -Boolean values can be placed in debugfs with: - - struct dentry *debugfs_create_bool(const char *name, mode_t mode, - struct dentry *parent, u32 *value); - -A read on the resulting file will yield either Y (for non-zero values) or -N, followed by a newline. If written to, it will accept either upper- or -lower-case values, or 1 or 0. Any other input will be silently ignored. - -Finally, a block of arbitrary binary data can be exported with: - - struct debugfs_blob_wrapper { - void *data; - unsigned long size; - }; - - struct dentry *debugfs_create_blob(const char *name, mode_t mode, - struct dentry *parent, - struct debugfs_blob_wrapper *blob); - -A read of this file will return the data pointed to by the -debugfs_blob_wrapper structure. Some drivers use "blobs" as a simple way -to return several lines of (static) formatted text output. This function -can be used to export binary information, but there does not appear to be -any code which does so in the mainline. Note that all files created with -debugfs_create_blob() are read-only. - -There are a couple of other directory-oriented helper functions: - - struct dentry *debugfs_rename(struct dentry *old_dir, - struct dentry *old_dentry, - struct dentry *new_dir, - const char *new_name); - - struct dentry *debugfs_create_symlink(const char *name, - struct dentry *parent, - const char *target); - -A call to debugfs_rename() will give a new name to an existing debugfs -file, possibly in a different directory. The new_name must not exist prior -to the call; the return value is old_dentry with updated information. -Symbolic links can be created with debugfs_create_symlink(). - -There is one important thing that all debugfs users must take into account: -there is no automatic cleanup of any directories created in debugfs. If a -module is unloaded without explicitly removing debugfs entries, the result -will be a lot of stale pointers and no end of highly antisocial behavior. -So all debugfs users - at least those which can be built as modules - must -be prepared to remove all files and directories they create there. A file -can be removed with: - - void debugfs_remove(struct dentry *dentry); - -The dentry value can be NULL, in which case nothing will be removed. - -Once upon a time, debugfs users were required to remember the dentry -pointer for every debugfs file they created so that all files could be -cleaned up. We live in more civilized times now, though, and debugfs users -can call: - - void debugfs_remove_recursive(struct dentry *dentry); - -If this function is passed a pointer for the dentry corresponding to the -top-level directory, the entire hierarchy below that directory will be -removed. - -Notes: - [1] http://lwn.net/Articles/309298/ diff --git a/trunk/Documentation/x86/x86_64/boot-options.txt b/trunk/Documentation/x86/x86_64/boot-options.txt index 29a6ff8bc7d3..2db5893d6c97 100644 --- a/trunk/Documentation/x86/x86_64/boot-options.txt +++ b/trunk/Documentation/x86/x86_64/boot-options.txt @@ -5,51 +5,21 @@ only the AMD64 specific ones are listed here. Machine check - Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables. - - mce=off - Disable machine check - mce=no_cmci - Disable CMCI(Corrected Machine Check Interrupt) that - Intel processor supports. Usually this disablement is - not recommended, but it might be handy if your hardware - is misbehaving. - Note that you'll get more problems without CMCI than with - due to the shared banks, i.e. you might get duplicated - error logs. - mce=dont_log_ce - Don't make logs for corrected errors. All events reported - as corrected are silently cleared by OS. - This option will be useful if you have no interest in any - of corrected errors. - mce=ignore_ce - Disable features for corrected errors, e.g. polling timer - and CMCI. All events reported as corrected are not cleared - by OS and remained in its error banks. - Usually this disablement is not recommended, however if - there is an agent checking/clearing corrected errors - (e.g. BIOS or hardware monitoring applications), conflicting - with OS's error handling, and you cannot deactivate the agent, - then this option will be a help. - mce=bootlog - Enable logging of machine checks left over from booting. - Disabled by default on AMD because some BIOS leave bogus ones. - If your BIOS doesn't do that it's a good idea to enable though - to make sure you log even machine check events that result - in a reboot. On Intel systems it is enabled by default. + mce=off disable machine check + mce=bootlog Enable logging of machine checks left over from booting. + Disabled by default on AMD because some BIOS leave bogus ones. + If your BIOS doesn't do that it's a good idea to enable though + to make sure you log even machine check events that result + in a reboot. On Intel systems it is enabled by default. mce=nobootlog Disable boot machine check logging. - mce=tolerancelevel[,monarchtimeout] (number,number) - tolerance levels: + mce=tolerancelevel (number) 0: always panic on uncorrected errors, log corrected errors 1: panic or SIGBUS on uncorrected errors, log corrected errors 2: SIGBUS or log uncorrected errors, log corrected errors 3: never panic or SIGBUS, log all errors (for testing only) Default is 1 Can be also set using sysfs which is preferable. - monarchtimeout: - Sets the time in us to wait for other CPUs on machine checks. 0 - to disable. nomce (for compatibility with i386): same as mce=off diff --git a/trunk/Documentation/x86/x86_64/machinecheck b/trunk/Documentation/x86/x86_64/machinecheck index b1fb30273286..a05e58e7b159 100644 --- a/trunk/Documentation/x86/x86_64/machinecheck +++ b/trunk/Documentation/x86/x86_64/machinecheck @@ -41,9 +41,7 @@ check_interval the polling interval. When the poller stops finding MCEs, it triggers an exponential backoff (poll less often) on the polling interval. The check_interval variable is both the initial and - maximum polling interval. 0 means no polling for corrected machine - check errors (but some corrected errors might be still reported - in other ways) + maximum polling interval. tolerant Tolerance level. When a machine check exception occurs for a non @@ -69,10 +67,6 @@ trigger Program to run when a machine check event is detected. This is an alternative to running mcelog regularly from cron and allows to detect events faster. -monarch_timeout - How long to wait for the other CPUs to machine check too on a - exception. 0 to disable waiting for other CPUs. - Unit: us TBD document entries for AMD threshold interrupt configuration diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 356d2ec8e2fb..68f5578fe38e 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -789,26 +789,10 @@ config X86_MCE to disable it. MCE support simply ignores non-MCE processors like the 386 and 486, so nearly everyone can say Y here. -config X86_OLD_MCE - depends on X86_32 && X86_MCE - bool "Use legacy machine check code (will go away)" - default n - select X86_ANCIENT_MCE - ---help--- - Use the old i386 machine check code. This is merely intended for - testing in a transition period. Try this if you run into any machine - check related software problems, but report the problem to - linux-kernel. When in doubt say no. - -config X86_NEW_MCE - depends on X86_MCE - bool - default y if (!X86_OLD_MCE && X86_32) || X86_64 - config X86_MCE_INTEL def_bool y prompt "Intel MCE features" - depends on X86_NEW_MCE && X86_LOCAL_APIC + depends on X86_64 && X86_MCE && X86_LOCAL_APIC ---help--- Additional support for intel specific MCE features such as the thermal monitor. @@ -816,36 +800,19 @@ config X86_MCE_INTEL config X86_MCE_AMD def_bool y prompt "AMD MCE features" - depends on X86_NEW_MCE && X86_LOCAL_APIC + depends on X86_64 && X86_MCE && X86_LOCAL_APIC ---help--- Additional support for AMD specific MCE features such as the DRAM Error Threshold. -config X86_ANCIENT_MCE - def_bool n - depends on X86_32 - prompt "Support for old Pentium 5 / WinChip machine checks" - ---help--- - Include support for machine check handling on old Pentium 5 or WinChip - systems. These typically need to be enabled explicitely on the command - line. - config X86_MCE_THRESHOLD depends on X86_MCE_AMD || X86_MCE_INTEL bool default y -config X86_MCE_INJECT - depends on X86_NEW_MCE - tristate "Machine check injector support" - ---help--- - Provide support for injecting machine checks for testing purposes. - If you don't know what a machine check is and you don't do kernel - QA it is safe to say n. - config X86_MCE_NONFATAL tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" - depends on X86_OLD_MCE + depends on X86_32 && X86_MCE ---help--- Enabling this feature starts a timer that triggers every 5 seconds which will look at the machine check registers to see if anything happened. @@ -858,15 +825,11 @@ config X86_MCE_NONFATAL config X86_MCE_P4THERMAL bool "check for P4 thermal throttling interrupt." - depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP) + depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) ---help--- Enabling this feature will cause a message to be printed when the P4 enters thermal throttling. -config X86_THERMAL_VECTOR - def_bool y - depends on X86_MCE_P4THERMAL || X86_MCE_INTEL - config VM86 bool "Enable VM86 support" if EMBEDDED default y diff --git a/trunk/arch/x86/crypto/Makefile b/trunk/arch/x86/crypto/Makefile index cfb0010fa940..ebe7deedd5b4 100644 --- a/trunk/arch/x86/crypto/Makefile +++ b/trunk/arch/x86/crypto/Makefile @@ -2,8 +2,6 @@ # Arch-specific CryptoAPI modules. # -obj-$(CONFIG_CRYPTO_FPU) += fpu.o - obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o diff --git a/trunk/arch/x86/crypto/aesni-intel_glue.c b/trunk/arch/x86/crypto/aesni-intel_glue.c index 4e663398f77f..02af0af65497 100644 --- a/trunk/arch/x86/crypto/aesni-intel_glue.c +++ b/trunk/arch/x86/crypto/aesni-intel_glue.c @@ -21,22 +21,6 @@ #include #include -#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) -#define HAS_CTR -#endif - -#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) -#define HAS_LRW -#endif - -#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) -#define HAS_PCBC -#endif - -#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) -#define HAS_XTS -#endif - struct async_aes_ctx { struct cryptd_ablkcipher *cryptd_tfm; }; @@ -153,41 +137,6 @@ static struct crypto_alg aesni_alg = { } }; -static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - - aesni_enc(ctx, dst, src); -} - -static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - - aesni_dec(ctx, dst, src); -} - -static struct crypto_alg __aesni_alg = { - .cra_name = "__aes-aesni", - .cra_driver_name = "__driver-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(__aesni_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, - .cia_encrypt = __aes_encrypt, - .cia_decrypt = __aes_decrypt - } - } -}; - static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -328,16 +277,8 @@ static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int key_len) { struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; - int err; - crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) - & CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(child, key, key_len); - crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) - & CRYPTO_TFM_RES_MASK); - return err; + return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len); } static int ablk_encrypt(struct ablkcipher_request *req) @@ -470,163 +411,6 @@ static struct crypto_alg ablk_cbc_alg = { }, }; -#ifdef HAS_CTR -static int ablk_ctr_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))", - 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_ctr_alg = { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), - .cra_init = ablk_ctr_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - .geniv = "chainiv", - }, - }, -}; -#endif - -#ifdef HAS_LRW -static int ablk_lrw_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))", - 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_lrw_alg = { - .cra_name = "lrw(aes)", - .cra_driver_name = "lrw-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), - .cra_init = ablk_lrw_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}; -#endif - -#ifdef HAS_PCBC -static int ablk_pcbc_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))", - 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_pcbc_alg = { - .cra_name = "pcbc(aes)", - .cra_driver_name = "pcbc-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_pcbc_alg.cra_list), - .cra_init = ablk_pcbc_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}; -#endif - -#ifdef HAS_XTS -static int ablk_xts_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))", - 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_xts_alg = { - .cra_name = "xts(aes)", - .cra_driver_name = "xts-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), - .cra_init = ablk_xts_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}; -#endif - static int __init aesni_init(void) { int err; @@ -637,8 +421,6 @@ static int __init aesni_init(void) } if ((err = crypto_register_alg(&aesni_alg))) goto aes_err; - if ((err = crypto_register_alg(&__aesni_alg))) - goto __aes_err; if ((err = crypto_register_alg(&blk_ecb_alg))) goto blk_ecb_err; if ((err = crypto_register_alg(&blk_cbc_alg))) @@ -647,41 +429,9 @@ static int __init aesni_init(void) goto ablk_ecb_err; if ((err = crypto_register_alg(&ablk_cbc_alg))) goto ablk_cbc_err; -#ifdef HAS_CTR - if ((err = crypto_register_alg(&ablk_ctr_alg))) - goto ablk_ctr_err; -#endif -#ifdef HAS_LRW - if ((err = crypto_register_alg(&ablk_lrw_alg))) - goto ablk_lrw_err; -#endif -#ifdef HAS_PCBC - if ((err = crypto_register_alg(&ablk_pcbc_alg))) - goto ablk_pcbc_err; -#endif -#ifdef HAS_XTS - if ((err = crypto_register_alg(&ablk_xts_alg))) - goto ablk_xts_err; -#endif return err; -#ifdef HAS_XTS -ablk_xts_err: -#endif -#ifdef HAS_PCBC - crypto_unregister_alg(&ablk_pcbc_alg); -ablk_pcbc_err: -#endif -#ifdef HAS_LRW - crypto_unregister_alg(&ablk_lrw_alg); -ablk_lrw_err: -#endif -#ifdef HAS_CTR - crypto_unregister_alg(&ablk_ctr_alg); -ablk_ctr_err: -#endif - crypto_unregister_alg(&ablk_cbc_alg); ablk_cbc_err: crypto_unregister_alg(&ablk_ecb_alg); ablk_ecb_err: @@ -689,8 +439,6 @@ static int __init aesni_init(void) blk_cbc_err: crypto_unregister_alg(&blk_ecb_alg); blk_ecb_err: - crypto_unregister_alg(&__aesni_alg); -__aes_err: crypto_unregister_alg(&aesni_alg); aes_err: return err; @@ -698,23 +446,10 @@ static int __init aesni_init(void) static void __exit aesni_exit(void) { -#ifdef HAS_XTS - crypto_unregister_alg(&ablk_xts_alg); -#endif -#ifdef HAS_PCBC - crypto_unregister_alg(&ablk_pcbc_alg); -#endif -#ifdef HAS_LRW - crypto_unregister_alg(&ablk_lrw_alg); -#endif -#ifdef HAS_CTR - crypto_unregister_alg(&ablk_ctr_alg); -#endif crypto_unregister_alg(&ablk_cbc_alg); crypto_unregister_alg(&ablk_ecb_alg); crypto_unregister_alg(&blk_cbc_alg); crypto_unregister_alg(&blk_ecb_alg); - crypto_unregister_alg(&__aesni_alg); crypto_unregister_alg(&aesni_alg); } diff --git a/trunk/arch/x86/crypto/fpu.c b/trunk/arch/x86/crypto/fpu.c deleted file mode 100644 index 5f9781a3815f..000000000000 --- a/trunk/arch/x86/crypto/fpu.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * FPU: Wrapper for blkcipher touching fpu - * - * Copyright (c) Intel Corp. - * Author: Huang Ying - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include -#include -#include -#include -#include -#include - -struct crypto_fpu_ctx { - struct crypto_blkcipher *child; -}; - -static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key, - unsigned int keylen) -{ - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent); - struct crypto_blkcipher *child = ctx->child; - int err; - - crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_blkcipher_setkey(child, key, keylen); - crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); - return err; -} - -static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - int err; - struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); - struct crypto_blkcipher *child = ctx->child; - struct blkcipher_desc desc = { - .tfm = child, - .info = desc_in->info, - .flags = desc_in->flags, - }; - - kernel_fpu_begin(); - err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes); - kernel_fpu_end(); - return err; -} - -static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - int err; - struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); - struct crypto_blkcipher *child = ctx->child; - struct blkcipher_desc desc = { - .tfm = child, - .info = desc_in->info, - .flags = desc_in->flags, - }; - - kernel_fpu_begin(); - err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes); - kernel_fpu_end(); - return err; -} - -static int crypto_fpu_init_tfm(struct crypto_tfm *tfm) -{ - struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_blkcipher *cipher; - - cipher = crypto_spawn_blkcipher(spawn); - if (IS_ERR(cipher)) - return PTR_ERR(cipher); - - ctx->child = cipher; - return 0; -} - -static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm) -{ - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_blkcipher(ctx->child); -} - -static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb) -{ - struct crypto_instance *inst; - struct crypto_alg *alg; - int err; - - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); - if (err) - return ERR_PTR(err); - - alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER, - CRYPTO_ALG_TYPE_MASK); - if (IS_ERR(alg)) - return ERR_CAST(alg); - - inst = crypto_alloc_instance("fpu", alg); - if (IS_ERR(inst)) - goto out_put_alg; - - inst->alg.cra_flags = alg->cra_flags; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = alg->cra_type; - inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize; - inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize; - inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize; - inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx); - inst->alg.cra_init = crypto_fpu_init_tfm; - inst->alg.cra_exit = crypto_fpu_exit_tfm; - inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey; - inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt; - inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt; - -out_put_alg: - crypto_mod_put(alg); - return inst; -} - -static void crypto_fpu_free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); - kfree(inst); -} - -static struct crypto_template crypto_fpu_tmpl = { - .name = "fpu", - .alloc = crypto_fpu_alloc, - .free = crypto_fpu_free, - .module = THIS_MODULE, -}; - -static int __init crypto_fpu_module_init(void) -{ - return crypto_register_template(&crypto_fpu_tmpl); -} - -static void __exit crypto_fpu_module_exit(void) -{ - crypto_unregister_template(&crypto_fpu_tmpl); -} - -module_init(crypto_fpu_module_init); -module_exit(crypto_fpu_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("FPU block cipher wrapper"); diff --git a/trunk/arch/x86/include/asm/entry_arch.h b/trunk/arch/x86/include/asm/entry_arch.h index ff8cbfa07851..d750a10ccad6 100644 --- a/trunk/arch/x86/include/asm/entry_arch.h +++ b/trunk/arch/x86/include/asm/entry_arch.h @@ -14,7 +14,6 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) -BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, smp_invalidate_interrupt) @@ -53,16 +52,8 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) #endif -#ifdef CONFIG_X86_THERMAL_VECTOR +#ifdef CONFIG_X86_MCE_P4THERMAL BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) #endif -#ifdef CONFIG_X86_MCE_THRESHOLD -BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) -#endif - -#ifdef CONFIG_X86_NEW_MCE -BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) -#endif - #endif diff --git a/trunk/arch/x86/include/asm/hardirq.h b/trunk/arch/x86/include/asm/hardirq.h index 82e3e8f01043..9ebc5c255032 100644 --- a/trunk/arch/x86/include/asm/hardirq.h +++ b/trunk/arch/x86/include/asm/hardirq.h @@ -22,7 +22,7 @@ typedef struct { #endif #ifdef CONFIG_X86_MCE unsigned int irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +# ifdef CONFIG_X86_64 unsigned int irq_threshold_count; # endif #endif diff --git a/trunk/arch/x86/include/asm/hw_irq.h b/trunk/arch/x86/include/asm/hw_irq.h index ba180d93b08c..6df45f639666 100644 --- a/trunk/arch/x86/include/asm/hw_irq.h +++ b/trunk/arch/x86/include/asm/hw_irq.h @@ -34,7 +34,6 @@ extern void perf_pending_interrupt(void); extern void spurious_interrupt(void); extern void thermal_interrupt(void); extern void reschedule_interrupt(void); -extern void mce_self_interrupt(void); extern void invalidate_interrupt(void); extern void invalidate_interrupt0(void); @@ -47,7 +46,6 @@ extern void invalidate_interrupt6(void); extern void invalidate_interrupt7(void); extern void irq_move_cleanup_interrupt(void); -extern void reboot_interrupt(void); extern void threshold_interrupt(void); extern void call_function_interrupt(void); diff --git a/trunk/arch/x86/include/asm/irq_vectors.h b/trunk/arch/x86/include/asm/irq_vectors.h index 5b21f0ec3df2..e997be98c9b9 100644 --- a/trunk/arch/x86/include/asm/irq_vectors.h +++ b/trunk/arch/x86/include/asm/irq_vectors.h @@ -25,7 +25,6 @@ */ #define NMI_VECTOR 0x02 -#define MCE_VECTOR 0x12 /* * IDT vectors usable for external interrupt sources start @@ -88,8 +87,13 @@ #define CALL_FUNCTION_VECTOR 0xfc #define CALL_FUNCTION_SINGLE_VECTOR 0xfb #define THERMAL_APIC_VECTOR 0xfa -#define THRESHOLD_APIC_VECTOR 0xf9 -#define REBOOT_VECTOR 0xf8 + +#ifdef CONFIG_X86_32 +/* 0xf8 - 0xf9 : free */ +#else +# define THRESHOLD_APIC_VECTOR 0xf9 +# define UV_BAU_MESSAGE 0xf8 +#endif /* f0-f7 used for spreading out TLB flushes: */ #define INVALIDATE_TLB_VECTOR_END 0xf7 @@ -113,13 +117,6 @@ */ #define LOCAL_PENDING_VECTOR 0xec -#define UV_BAU_MESSAGE 0xec - -/* - * Self IPI vector for machine checks - */ -#define MCE_SELF_VECTOR 0xeb - /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority diff --git a/trunk/arch/x86/include/asm/mce.h b/trunk/arch/x86/include/asm/mce.h index 540a466e50f5..4f8c199584e7 100644 --- a/trunk/arch/x86/include/asm/mce.h +++ b/trunk/arch/x86/include/asm/mce.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_MCE_H #define _ASM_X86_MCE_H +#ifdef __x86_64__ + #include #include @@ -8,35 +10,21 @@ * Machine Check support for x86 */ -#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ -#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ -#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ -#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ -#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ -#define MCG_EXT_CNT_SHIFT 16 -#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) -#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ - -#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ -#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ -#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ - -#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ -#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ -#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ -#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ -#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ -#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ -#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ -#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ -#define MCI_STATUS_AR (1ULL<<55) /* Action required */ - -/* MISC register defines */ -#define MCM_ADDR_SEGOFF 0 /* segment offset */ -#define MCM_ADDR_LINEAR 1 /* linear address */ -#define MCM_ADDR_PHYS 2 /* physical address */ -#define MCM_ADDR_MEM 3 /* memory address */ -#define MCM_ADDR_GENERIC 7 /* generic */ +#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ +#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ +#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ + +#define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */ +#define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */ +#define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */ + +#define MCI_STATUS_VAL (1UL<<63) /* valid error */ +#define MCI_STATUS_OVER (1UL<<62) /* previous errors lost */ +#define MCI_STATUS_UC (1UL<<61) /* uncorrected error */ +#define MCI_STATUS_EN (1UL<<60) /* error enabled */ +#define MCI_STATUS_MISCV (1UL<<59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV (1UL<<58) /* addr reg. valid */ +#define MCI_STATUS_PCC (1UL<<57) /* processor context corrupt */ /* Fields are zero when not available */ struct mce { @@ -46,19 +34,13 @@ struct mce { __u64 mcgstatus; __u64 ip; __u64 tsc; /* cpu time stamp counter */ - __u64 time; /* wall time_t when error was detected */ - __u8 cpuvendor; /* cpu vendor as encoded in system.h */ - __u8 pad1; - __u16 pad2; - __u32 cpuid; /* CPUID 1 EAX */ + __u64 res1; /* for future extension */ + __u64 res2; /* dito. */ __u8 cs; /* code segment */ __u8 bank; /* machine check bank */ - __u8 cpu; /* cpu number; obsolete; use extcpu now */ + __u8 cpu; /* cpu that raised the error */ __u8 finished; /* entry is valid */ - __u32 extcpu; /* linux cpu number that detected the error */ - __u32 socketid; /* CPU socket ID */ - __u32 apicid; /* CPU initial apic ID */ - __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ + __u32 pad; }; /* @@ -75,7 +57,7 @@ struct mce_log { unsigned len; /* = MCE_LOG_LEN */ unsigned next; unsigned flags; - unsigned recordlen; /* length of struct mce */ + unsigned pad0; struct mce entry[MCE_LOG_LEN]; }; @@ -100,16 +82,19 @@ struct mce_log { #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) +#endif /* __x86_64__ */ + #ifdef __KERNEL__ +#ifdef CONFIG_X86_32 extern int mce_disabled; +#else /* CONFIG_X86_32 */ #include -#include void mce_setup(struct mce *m); void mce_log(struct mce *m); -DECLARE_PER_CPU(struct sys_device, mce_dev); +DECLARE_PER_CPU(struct sys_device, device_mce); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* @@ -119,8 +104,6 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) #ifdef CONFIG_X86_MCE_INTEL -extern int mce_cmci_disabled; -extern int mce_ignore_ce; void mce_intel_feature_init(struct cpuinfo_x86 *c); void cmci_clear(void); void cmci_reenable(void); @@ -140,16 +123,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c); static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif -int mce_available(struct cpuinfo_x86 *c); - -DECLARE_PER_CPU(unsigned, mce_exception_count); -DECLARE_PER_CPU(unsigned, mce_poll_count); +extern int mce_available(struct cpuinfo_x86 *c); void mce_log_therm_throt_event(__u64 status); extern atomic_t mce_entry; -void do_machine_check(struct pt_regs *, long); +extern void do_machine_check(struct pt_regs *, long); typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); @@ -159,16 +139,14 @@ enum mcp_flags { MCP_UC = (1 << 1), /* log uncorrected errors */ MCP_DONTLOG = (1 << 2), /* only clear, don't log */ }; -void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); +extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); -int mce_notify_irq(void); -void mce_notify_process(void); +extern int mce_notify_user(void); -DECLARE_PER_CPU(struct mce, injectm); -extern struct file_operations mce_chrdev_ops; +#endif /* !CONFIG_X86_32 */ #ifdef CONFIG_X86_MCE -void mcheck_init(struct cpuinfo_x86 *c); +extern void mcheck_init(struct cpuinfo_x86 *c); #else #define mcheck_init(c) do { } while (0) #endif diff --git a/trunk/arch/x86/include/asm/msr-index.h b/trunk/arch/x86/include/asm/msr-index.h index 1692fb5050e3..4d58d04fca83 100644 --- a/trunk/arch/x86/include/asm/msr-index.h +++ b/trunk/arch/x86/include/asm/msr-index.h @@ -207,14 +207,7 @@ #define MSR_IA32_THERM_CONTROL 0x0000019a #define MSR_IA32_THERM_INTERRUPT 0x0000019b - -#define THERM_INT_LOW_ENABLE (1 << 0) -#define THERM_INT_HIGH_ENABLE (1 << 1) - #define MSR_IA32_THERM_STATUS 0x0000019c - -#define THERM_STATUS_PROCHOT (1 << 0) - #define MSR_IA32_MISC_ENABLE 0x000001a0 /* MISC_ENABLE bits: architectural */ diff --git a/trunk/arch/x86/kernel/apic/apic.c b/trunk/arch/x86/kernel/apic/apic.c index 8c7c042ecad1..076d3881f3da 100644 --- a/trunk/arch/x86/kernel/apic/apic.c +++ b/trunk/arch/x86/kernel/apic/apic.c @@ -899,7 +899,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_THERMAL_VECTOR +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); @@ -2017,7 +2017,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_THERMAL_VECTOR +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif diff --git a/trunk/arch/x86/kernel/apic/nmi.c b/trunk/arch/x86/kernel/apic/nmi.c index b3025b43b63a..a691302dc3ff 100644 --- a/trunk/arch/x86/kernel/apic/nmi.c +++ b/trunk/arch/x86/kernel/apic/nmi.c @@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu) static inline int mce_in_progress(void) { -#if defined(CONFIG_X86_NEW_MCE) +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) return atomic_read(&mce_entry) > 0; #endif return 0; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/Makefile b/trunk/arch/x86/kernel/cpu/mcheck/Makefile index 45004faf67ea..b2f89829bbe8 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/Makefile +++ b/trunk/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,11 +1,7 @@ -obj-y = mce.o therm_throt.o +obj-y = mce_$(BITS).o therm_throt.o -obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o -obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o -obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o -obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o -obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o +obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o +obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o -obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o diff --git a/trunk/arch/x86/kernel/cpu/mcheck/k7.c b/trunk/arch/x86/kernel/cpu/mcheck/k7.c index 89e510424152..dd3af6e7b39a 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/k7.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/k7.c @@ -2,10 +2,11 @@ * Athlon specific Machine Check Exception Reporting * (C) Copyright 2002 Dave Jones */ -#include -#include -#include + #include +#include +#include +#include #include #include @@ -14,12 +15,12 @@ #include "mce.h" -/* Machine Check Handler For AMD Athlon/Duron: */ +/* Machine Check Handler For AMD Athlon/Duron */ static void k7_machine_check(struct pt_regs *regs, long error_code) { + int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; - int recover = 1; int i; rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); @@ -31,19 +32,15 @@ static void k7_machine_check(struct pt_regs *regs, long error_code) for (i = 1; i < nr_mce_banks; i++) { rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); - if (high & (1<<31)) { + if (high&(1<<31)) { char misc[20]; char addr[24]; - - misc[0] = '\0'; - addr[0] = '\0'; - + misc[0] = addr[0] = '\0'; if (high & (1<<29)) recover |= 1; if (high & (1<<25)) recover |= 2; high &= ~(1<<31); - if (high & (1<<27)) { rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); snprintf(misc, 20, "[%08x%08x]", ahigh, alow); @@ -52,31 +49,27 @@ static void k7_machine_check(struct pt_regs *regs, long error_code) rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); snprintf(addr, 24, " at %08x%08x", ahigh, alow); } - printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", smp_processor_id(), i, high, low, misc, addr); - - /* Clear it: */ + /* Clear it */ wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); - /* Serialize: */ + /* Serialize */ wmb(); add_taint(TAINT_MACHINE_CHECK); } } - if (recover & 2) + if (recover&2) panic("CPU context corrupt"); - if (recover & 1) + if (recover&1) panic("Unable to continue"); - printk(KERN_EMERG "Attempting to continue.\n"); - mcgstl &= ~(1<<2); wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); } -/* AMD K7 machine check is Intel like: */ +/* AMD K7 machine check is Intel like */ void amd_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; @@ -86,26 +79,21 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) return; machine_check_vector = k7_machine_check; - /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); printk(KERN_INFO "Intel machine check architecture supported.\n"); - rdmsr(MSR_IA32_MCG_CAP, l, h); if (l & (1<<8)) /* Control register present ? */ wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); nr_mce_banks = l & 0xff; - /* - * Clear status for MC index 0 separately, we don't touch CTL, - * as some K7 Athlons cause spurious MCEs when its enabled: - */ + /* Clear status for MC index 0 separately, we don't touch CTL, + * as some K7 Athlons cause spurious MCEs when its enabled. */ if (boot_cpu_data.x86 == 6) { wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); i = 1; } else i = 0; - for (; i < nr_mce_banks; i++) { wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c deleted file mode 100644 index a3a235a53f09..000000000000 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Machine check injection support. - * Copyright 2008 Intel Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * - * Authors: - * Andi Kleen - * Ying Huang - */ -#include -#include -#include -#include -#include -#include -#include -#include - -/* Update fake mce registers on current CPU. */ -static void inject_mce(struct mce *m) -{ - struct mce *i = &per_cpu(injectm, m->extcpu); - - /* Make sure noone reads partially written injectm */ - i->finished = 0; - mb(); - m->finished = 0; - /* First set the fields after finished */ - i->extcpu = m->extcpu; - mb(); - /* Now write record in order, finished last (except above) */ - memcpy(i, m, sizeof(struct mce)); - /* Finally activate it */ - mb(); - i->finished = 1; -} - -struct delayed_mce { - struct timer_list timer; - struct mce m; -}; - -/* Inject mce on current CPU */ -static void raise_mce(unsigned long data) -{ - struct delayed_mce *dm = (struct delayed_mce *)data; - struct mce *m = &dm->m; - int cpu = m->extcpu; - - inject_mce(m); - if (m->status & MCI_STATUS_UC) { - struct pt_regs regs; - memset(®s, 0, sizeof(struct pt_regs)); - regs.ip = m->ip; - regs.cs = m->cs; - printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); - do_machine_check(®s, 0); - printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); - } else { - mce_banks_t b; - memset(&b, 0xff, sizeof(mce_banks_t)); - printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); - machine_check_poll(0, &b); - mce_notify_irq(); - printk(KERN_INFO "Finished machine check poll on CPU %d\n", - cpu); - } - kfree(dm); -} - -/* Error injection interface */ -static ssize_t mce_write(struct file *filp, const char __user *ubuf, - size_t usize, loff_t *off) -{ - struct delayed_mce *dm; - struct mce m; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - /* - * There are some cases where real MSR reads could slip - * through. - */ - if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) - return -EIO; - - if ((unsigned long)usize > sizeof(struct mce)) - usize = sizeof(struct mce); - if (copy_from_user(&m, ubuf, usize)) - return -EFAULT; - - if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) - return -EINVAL; - - dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); - if (!dm) - return -ENOMEM; - - /* - * Need to give user space some time to set everything up, - * so do it a jiffie or two later everywhere. - * Should we use a hrtimer here for better synchronization? - */ - memcpy(&dm->m, &m, sizeof(struct mce)); - setup_timer(&dm->timer, raise_mce, (unsigned long)dm); - dm->timer.expires = jiffies + 2; - add_timer_on(&dm->timer, m.extcpu); - return usize; -} - -static int inject_init(void) -{ - printk(KERN_INFO "Machine check injector initialized\n"); - mce_chrdev_ops.write = mce_write; - return 0; -} - -module_init(inject_init); -/* - * Cannot tolerate unloading currently because we cannot - * guarantee all openers of mce_chrdev will get a reference to us. - */ -MODULE_LICENSE("GPL"); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-internal.h b/trunk/arch/x86/kernel/cpu/mcheck/mce-internal.h deleted file mode 100644 index 54dcb8ff12e5..000000000000 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ /dev/null @@ -1,15 +0,0 @@ -#include - -enum severity_level { - MCE_NO_SEVERITY, - MCE_KEEP_SEVERITY, - MCE_SOME_SEVERITY, - MCE_AO_SEVERITY, - MCE_UC_SEVERITY, - MCE_AR_SEVERITY, - MCE_PANIC_SEVERITY, -}; - -int mce_severity(struct mce *a, int tolerant, char **msg); - -extern int mce_ser; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-severity.c b/trunk/arch/x86/kernel/cpu/mcheck/mce-severity.c deleted file mode 100644 index ff0807f97056..000000000000 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * MCE grading rules. - * Copyright 2008, 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * - * Author: Andi Kleen - */ -#include -#include -#include -#include -#include - -#include "mce-internal.h" - -/* - * Grade an mce by severity. In general the most severe ones are processed - * first. Since there are quite a lot of combinations test the bits in a - * table-driven way. The rules are simply processed in order, first - * match wins. - * - * Note this is only used for machine check exceptions, the corrected - * errors use much simpler rules. The exceptions still check for the corrected - * errors, but only to leave them alone for the CMCI handler (except for - * panic situations) - */ - -enum context { IN_KERNEL = 1, IN_USER = 2 }; -enum ser { SER_REQUIRED = 1, NO_SER = 2 }; - -static struct severity { - u64 mask; - u64 result; - unsigned char sev; - unsigned char mcgmask; - unsigned char mcgres; - unsigned char ser; - unsigned char context; - unsigned char covered; - char *msg; -} severities[] = { -#define KERNEL .context = IN_KERNEL -#define USER .context = IN_USER -#define SER .ser = SER_REQUIRED -#define NOSER .ser = NO_SER -#define SEV(s) .sev = MCE_ ## s ## _SEVERITY -#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } -#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } -#define MCGMASK(x, res, s, m, r...) \ - { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } -#define MASK(x, y, s, m, r...) \ - { .mask = x, .result = y, SEV(s), .msg = m, ## r } -#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) -#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) -#define MCACOD 0xffff - - BITCLR(MCI_STATUS_VAL, NO, "Invalid"), - BITCLR(MCI_STATUS_EN, NO, "Not enabled"), - BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), - /* When MCIP is not set something is very confused */ - MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), - /* Neither return not error IP -- no chance to recover -> PANIC */ - MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, - "Neither restart nor error IP"), - MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", - KERNEL), - BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), - MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, - "Spurious not enabled", SER), - - /* ignore OVER for UCNA */ - MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, - "Uncorrected no action required", SER), - MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, - "Illegal combination (UCNA with AR=1)", SER), - MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), - - /* AR add known MCACODs here */ - MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, - "Action required with lost events", SER), - MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, - "Action required; unknown MCACOD", SER), - - /* known AO MCACODs: */ - MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, - "Action optional: memory scrubbing error", SER), - MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, - "Action optional: last level cache writeback error", SER), - - MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, - "Action optional unknown MCACOD", SER), - MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, - "Action optional with lost events", SER), - BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), - BITSET(MCI_STATUS_UC, UC, "Uncorrected"), - BITSET(0, SOME, "No match") /* always matches. keep at end */ -}; - -/* - * If the EIPV bit is set, it means the saved IP is the - * instruction which caused the MCE. - */ -static int error_context(struct mce *m) -{ - if (m->mcgstatus & MCG_STATUS_EIPV) - return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; - /* Unknown, assume kernel */ - return IN_KERNEL; -} - -int mce_severity(struct mce *a, int tolerant, char **msg) -{ - enum context ctx = error_context(a); - struct severity *s; - - for (s = severities;; s++) { - if ((a->status & s->mask) != s->result) - continue; - if ((a->mcgstatus & s->mcgmask) != s->mcgres) - continue; - if (s->ser == SER_REQUIRED && !mce_ser) - continue; - if (s->ser == NO_SER && mce_ser) - continue; - if (s->context && ctx != s->context) - continue; - if (msg) - *msg = s->msg; - s->covered = 1; - if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { - if (panic_on_oops || tolerant < 1) - return MCE_PANIC_SEVERITY; - } - return s->sev; - } -} - -static void *s_start(struct seq_file *f, loff_t *pos) -{ - if (*pos >= ARRAY_SIZE(severities)) - return NULL; - return &severities[*pos]; -} - -static void *s_next(struct seq_file *f, void *data, loff_t *pos) -{ - if (++(*pos) >= ARRAY_SIZE(severities)) - return NULL; - return &severities[*pos]; -} - -static void s_stop(struct seq_file *f, void *data) -{ -} - -static int s_show(struct seq_file *f, void *data) -{ - struct severity *ser = data; - seq_printf(f, "%d\t%s\n", ser->covered, ser->msg); - return 0; -} - -static const struct seq_operations severities_seq_ops = { - .start = s_start, - .next = s_next, - .stop = s_stop, - .show = s_show, -}; - -static int severities_coverage_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &severities_seq_ops); -} - -static ssize_t severities_coverage_write(struct file *file, - const char __user *ubuf, - size_t count, loff_t *ppos) -{ - int i; - for (i = 0; i < ARRAY_SIZE(severities); i++) - severities[i].covered = 0; - return count; -} - -static const struct file_operations severities_coverage_fops = { - .open = severities_coverage_open, - .release = seq_release, - .read = seq_read, - .write = severities_coverage_write, -}; - -static int __init severities_debugfs_init(void) -{ - struct dentry *dmce = NULL, *fseverities_coverage = NULL; - - dmce = debugfs_create_dir("mce", NULL); - if (dmce == NULL) - goto err_out; - fseverities_coverage = debugfs_create_file("severities-coverage", - 0444, dmce, NULL, - &severities_coverage_fops); - if (fseverities_coverage == NULL) - goto err_out; - - return 0; - -err_out: - if (fseverities_coverage) - debugfs_remove(fseverities_coverage); - if (dmce) - debugfs_remove(dmce); - return -ENOMEM; -} -late_initcall(severities_debugfs_init); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce.c b/trunk/arch/x86/kernel/cpu/mcheck/mce.c deleted file mode 100644 index fabba15e4558..000000000000 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce.c +++ /dev/null @@ -1,1964 +0,0 @@ -/* - * Machine check handler. - * - * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. - * Rest from unknown author(s). - * 2004 Andi Kleen. Rewrote most of it. - * Copyright 2008 Intel Corporation - * Author: Andi Kleen - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "mce-internal.h" -#include "mce.h" - -/* Handle unconfigured int18 (should never happen) */ -static void unexpected_machine_check(struct pt_regs *regs, long error_code) -{ - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", - smp_processor_id()); -} - -/* Call the installed machine check handler for this CPU setup. */ -void (*machine_check_vector)(struct pt_regs *, long error_code) = - unexpected_machine_check; - -int mce_disabled; - -#ifdef CONFIG_X86_NEW_MCE - -#define MISC_MCELOG_MINOR 227 - -#define SPINUNIT 100 /* 100ns */ - -atomic_t mce_entry; - -DEFINE_PER_CPU(unsigned, mce_exception_count); - -/* - * Tolerant levels: - * 0: always panic on uncorrected errors, log corrected errors - * 1: panic or SIGBUS on uncorrected errors, log corrected errors - * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors - * 3: never panic or SIGBUS, log all errors (for testing only) - */ -static int tolerant = 1; -static int banks; -static u64 *bank; -static unsigned long notify_user; -static int rip_msr; -static int mce_bootlog = -1; -static int monarch_timeout = -1; -static int mce_panic_timeout; -static int mce_dont_log_ce; -int mce_cmci_disabled; -int mce_ignore_ce; -int mce_ser; - -static char trigger[128]; -static char *trigger_argv[2] = { trigger, NULL }; - -static unsigned long dont_init_banks; - -static DECLARE_WAIT_QUEUE_HEAD(mce_wait); -static DEFINE_PER_CPU(struct mce, mces_seen); -static int cpu_missing; - - -/* MCA banks polled by the period polling timer for corrected events */ -DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { - [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL -}; - -static inline int skip_bank_init(int i) -{ - return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); -} - -static DEFINE_PER_CPU(struct work_struct, mce_work); - -/* Do initial initialization of a struct mce */ -void mce_setup(struct mce *m) -{ - memset(m, 0, sizeof(struct mce)); - m->cpu = m->extcpu = smp_processor_id(); - rdtscll(m->tsc); - /* We hope get_seconds stays lockless */ - m->time = get_seconds(); - m->cpuvendor = boot_cpu_data.x86_vendor; - m->cpuid = cpuid_eax(1); -#ifdef CONFIG_SMP - m->socketid = cpu_data(m->extcpu).phys_proc_id; -#endif - m->apicid = cpu_data(m->extcpu).initial_apicid; - rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); -} - -DEFINE_PER_CPU(struct mce, injectm); -EXPORT_PER_CPU_SYMBOL_GPL(injectm); - -/* - * Lockless MCE logging infrastructure. - * This avoids deadlocks on printk locks without having to break locks. Also - * separate MCEs from kernel messages to avoid bogus bug reports. - */ - -static struct mce_log mcelog = { - .signature = MCE_LOG_SIGNATURE, - .len = MCE_LOG_LEN, - .recordlen = sizeof(struct mce), -}; - -void mce_log(struct mce *mce) -{ - unsigned next, entry; - - mce->finished = 0; - wmb(); - for (;;) { - entry = rcu_dereference(mcelog.next); - for (;;) { - /* - * When the buffer fills up discard new entries. - * Assume that the earlier errors are the more - * interesting ones: - */ - if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, - (unsigned long *)&mcelog.flags); - return; - } - /* Old left over entry. Skip: */ - if (mcelog.entry[entry].finished) { - entry++; - continue; - } - break; - } - smp_rmb(); - next = entry + 1; - if (cmpxchg(&mcelog.next, entry, next) == entry) - break; - } - memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); - wmb(); - mcelog.entry[entry].finished = 1; - wmb(); - - mce->finished = 1; - set_bit(0, ¬ify_user); -} - -static void print_mce(struct mce *m) -{ - printk(KERN_EMERG - "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", - m->extcpu, m->mcgstatus, m->bank, m->status); - if (m->ip) { - printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", - !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); - if (m->cs == __KERNEL_CS) - print_symbol("{%s}", m->ip); - printk("\n"); - } - printk(KERN_EMERG "TSC %llx ", m->tsc); - if (m->addr) - printk("ADDR %llx ", m->addr); - if (m->misc) - printk("MISC %llx ", m->misc); - printk("\n"); - printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - m->cpuvendor, m->cpuid, m->time, m->socketid, - m->apicid); -} - -static void print_mce_head(void) -{ - printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); -} - -static void print_mce_tail(void) -{ - printk(KERN_EMERG "This is not a software problem!\n" - KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); -} - -#define PANIC_TIMEOUT 5 /* 5 seconds */ - -static atomic_t mce_paniced; - -/* Panic in progress. Enable interrupts and wait for final IPI */ -static void wait_for_panic(void) -{ - long timeout = PANIC_TIMEOUT*USEC_PER_SEC; - preempt_disable(); - local_irq_enable(); - while (timeout-- > 0) - udelay(1); - if (panic_timeout == 0) - panic_timeout = mce_panic_timeout; - panic("Panicing machine check CPU died"); -} - -static void mce_panic(char *msg, struct mce *final, char *exp) -{ - int i; - - /* - * Make sure only one CPU runs in machine check panic - */ - if (atomic_add_return(1, &mce_paniced) > 1) - wait_for_panic(); - barrier(); - - bust_spinlocks(1); - console_verbose(); - print_mce_head(); - /* First print corrected ones that are still unlogged */ - for (i = 0; i < MCE_LOG_LEN; i++) { - struct mce *m = &mcelog.entry[i]; - if (!(m->status & MCI_STATUS_VAL)) - continue; - if (!(m->status & MCI_STATUS_UC)) - print_mce(m); - } - /* Now print uncorrected but with the final one last */ - for (i = 0; i < MCE_LOG_LEN; i++) { - struct mce *m = &mcelog.entry[i]; - if (!(m->status & MCI_STATUS_VAL)) - continue; - if (!(m->status & MCI_STATUS_UC)) - continue; - if (!final || memcmp(m, final, sizeof(struct mce))) - print_mce(m); - } - if (final) - print_mce(final); - if (cpu_missing) - printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); - print_mce_tail(); - if (exp) - printk(KERN_EMERG "Machine check: %s\n", exp); - if (panic_timeout == 0) - panic_timeout = mce_panic_timeout; - panic(msg); -} - -/* Support code for software error injection */ - -static int msr_to_offset(u32 msr) -{ - unsigned bank = __get_cpu_var(injectm.bank); - if (msr == rip_msr) - return offsetof(struct mce, ip); - if (msr == MSR_IA32_MC0_STATUS + bank*4) - return offsetof(struct mce, status); - if (msr == MSR_IA32_MC0_ADDR + bank*4) - return offsetof(struct mce, addr); - if (msr == MSR_IA32_MC0_MISC + bank*4) - return offsetof(struct mce, misc); - if (msr == MSR_IA32_MCG_STATUS) - return offsetof(struct mce, mcgstatus); - return -1; -} - -/* MSR access wrappers used for error injection */ -static u64 mce_rdmsrl(u32 msr) -{ - u64 v; - if (__get_cpu_var(injectm).finished) { - int offset = msr_to_offset(msr); - if (offset < 0) - return 0; - return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); - } - rdmsrl(msr, v); - return v; -} - -static void mce_wrmsrl(u32 msr, u64 v) -{ - if (__get_cpu_var(injectm).finished) { - int offset = msr_to_offset(msr); - if (offset >= 0) - *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; - return; - } - wrmsrl(msr, v); -} - -/* - * Simple lockless ring to communicate PFNs from the exception handler with the - * process context work function. This is vastly simplified because there's - * only a single reader and a single writer. - */ -#define MCE_RING_SIZE 16 /* we use one entry less */ - -struct mce_ring { - unsigned short start; - unsigned short end; - unsigned long ring[MCE_RING_SIZE]; -}; -static DEFINE_PER_CPU(struct mce_ring, mce_ring); - -/* Runs with CPU affinity in workqueue */ -static int mce_ring_empty(void) -{ - struct mce_ring *r = &__get_cpu_var(mce_ring); - - return r->start == r->end; -} - -static int mce_ring_get(unsigned long *pfn) -{ - struct mce_ring *r; - int ret = 0; - - *pfn = 0; - get_cpu(); - r = &__get_cpu_var(mce_ring); - if (r->start == r->end) - goto out; - *pfn = r->ring[r->start]; - r->start = (r->start + 1) % MCE_RING_SIZE; - ret = 1; -out: - put_cpu(); - return ret; -} - -/* Always runs in MCE context with preempt off */ -static int mce_ring_add(unsigned long pfn) -{ - struct mce_ring *r = &__get_cpu_var(mce_ring); - unsigned next; - - next = (r->end + 1) % MCE_RING_SIZE; - if (next == r->start) - return -1; - r->ring[r->end] = pfn; - wmb(); - r->end = next; - return 0; -} - -int mce_available(struct cpuinfo_x86 *c) -{ - if (mce_disabled) - return 0; - return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); -} - -static void mce_schedule_work(void) -{ - if (!mce_ring_empty()) { - struct work_struct *work = &__get_cpu_var(mce_work); - if (!work_pending(work)) - schedule_work(work); - } -} - -/* - * Get the address of the instruction at the time of the machine check - * error. - */ -static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) -{ - - if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { - m->ip = regs->ip; - m->cs = regs->cs; - } else { - m->ip = 0; - m->cs = 0; - } - if (rip_msr) - m->ip = mce_rdmsrl(rip_msr); -} - -#ifdef CONFIG_X86_LOCAL_APIC -/* - * Called after interrupts have been reenabled again - * when a MCE happened during an interrupts off region - * in the kernel. - */ -asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) -{ - ack_APIC_irq(); - exit_idle(); - irq_enter(); - mce_notify_irq(); - mce_schedule_work(); - irq_exit(); -} -#endif - -static void mce_report_event(struct pt_regs *regs) -{ - if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { - mce_notify_irq(); - /* - * Triggering the work queue here is just an insurance - * policy in case the syscall exit notify handler - * doesn't run soon enough or ends up running on the - * wrong CPU (can happen when audit sleeps) - */ - mce_schedule_work(); - return; - } - -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Without APIC do not notify. The event will be picked - * up eventually. - */ - if (!cpu_has_apic) - return; - - /* - * When interrupts are disabled we cannot use - * kernel services safely. Trigger an self interrupt - * through the APIC to instead do the notification - * after interrupts are reenabled again. - */ - apic->send_IPI_self(MCE_SELF_VECTOR); - - /* - * Wait for idle afterwards again so that we don't leave the - * APIC in a non idle state because the normal APIC writes - * cannot exclude us. - */ - apic_wait_icr_idle(); -#endif -} - -DEFINE_PER_CPU(unsigned, mce_poll_count); - -/* - * Poll for corrected events or events that happened before reset. - * Those are just logged through /dev/mcelog. - * - * This is executed in standard interrupt context. - * - * Note: spec recommends to panic for fatal unsignalled - * errors here. However this would be quite problematic -- - * we would need to reimplement the Monarch handling and - * it would mess up the exclusion between exception handler - * and poll hander -- * so we skip this for now. - * These cases should not happen anyways, or only when the CPU - * is already totally * confused. In this case it's likely it will - * not fully execute the machine check handler either. - */ -void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) -{ - struct mce m; - int i; - - __get_cpu_var(mce_poll_count)++; - - mce_setup(&m); - - m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); - for (i = 0; i < banks; i++) { - if (!bank[i] || !test_bit(i, *b)) - continue; - - m.misc = 0; - m.addr = 0; - m.bank = i; - m.tsc = 0; - - barrier(); - m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); - if (!(m.status & MCI_STATUS_VAL)) - continue; - - /* - * Uncorrected or signalled events are handled by the exception - * handler when it is enabled, so don't process those here. - * - * TBD do the same check for MCI_STATUS_EN here? - */ - if (!(flags & MCP_UC) && - (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) - continue; - - if (m.status & MCI_STATUS_MISCV) - m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); - if (m.status & MCI_STATUS_ADDRV) - m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); - - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; - /* - * Don't get the IP here because it's unlikely to - * have anything to do with the actual error location. - */ - if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { - mce_log(&m); - add_taint(TAINT_MACHINE_CHECK); - } - - /* - * Clear state for this bank. - */ - mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } - - /* - * Don't clear MCG_STATUS here because it's only defined for - * exceptions. - */ - - sync_core(); -} -EXPORT_SYMBOL_GPL(machine_check_poll); - -/* - * Do a quick check if any of the events requires a panic. - * This decides if we keep the events around or clear them. - */ -static int mce_no_way_out(struct mce *m, char **msg) -{ - int i; - - for (i = 0; i < banks; i++) { - m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); - if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) - return 1; - } - return 0; -} - -/* - * Variable to establish order between CPUs while scanning. - * Each CPU spins initially until executing is equal its number. - */ -static atomic_t mce_executing; - -/* - * Defines order of CPUs on entry. First CPU becomes Monarch. - */ -static atomic_t mce_callin; - -/* - * Check if a timeout waiting for other CPUs happened. - */ -static int mce_timed_out(u64 *t) -{ - /* - * The others already did panic for some reason. - * Bail out like in a timeout. - * rmb() to tell the compiler that system_state - * might have been modified by someone else. - */ - rmb(); - if (atomic_read(&mce_paniced)) - wait_for_panic(); - if (!monarch_timeout) - goto out; - if ((s64)*t < SPINUNIT) { - /* CHECKME: Make panic default for 1 too? */ - if (tolerant < 1) - mce_panic("Timeout synchronizing machine check over CPUs", - NULL, NULL); - cpu_missing = 1; - return 1; - } - *t -= SPINUNIT; -out: - touch_nmi_watchdog(); - return 0; -} - -/* - * The Monarch's reign. The Monarch is the CPU who entered - * the machine check handler first. It waits for the others to - * raise the exception too and then grades them. When any - * error is fatal panic. Only then let the others continue. - * - * The other CPUs entering the MCE handler will be controlled by the - * Monarch. They are called Subjects. - * - * This way we prevent any potential data corruption in a unrecoverable case - * and also makes sure always all CPU's errors are examined. - * - * Also this detects the case of an machine check event coming from outer - * space (not detected by any CPUs) In this case some external agent wants - * us to shut down, so panic too. - * - * The other CPUs might still decide to panic if the handler happens - * in a unrecoverable place, but in this case the system is in a semi-stable - * state and won't corrupt anything by itself. It's ok to let the others - * continue for a bit first. - * - * All the spin loops have timeouts; when a timeout happens a CPU - * typically elects itself to be Monarch. - */ -static void mce_reign(void) -{ - int cpu; - struct mce *m = NULL; - int global_worst = 0; - char *msg = NULL; - char *nmsg = NULL; - - /* - * This CPU is the Monarch and the other CPUs have run - * through their handlers. - * Grade the severity of the errors of all the CPUs. - */ - for_each_possible_cpu(cpu) { - int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, - &nmsg); - if (severity > global_worst) { - msg = nmsg; - global_worst = severity; - m = &per_cpu(mces_seen, cpu); - } - } - - /* - * Cannot recover? Panic here then. - * This dumps all the mces in the log buffer and stops the - * other CPUs. - */ - if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) - mce_panic("Fatal Machine check", m, msg); - - /* - * For UC somewhere we let the CPU who detects it handle it. - * Also must let continue the others, otherwise the handling - * CPU could deadlock on a lock. - */ - - /* - * No machine check event found. Must be some external - * source or one CPU is hung. Panic. - */ - if (!m && tolerant < 3) - mce_panic("Machine check from unknown source", NULL, NULL); - - /* - * Now clear all the mces_seen so that they don't reappear on - * the next mce. - */ - for_each_possible_cpu(cpu) - memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce)); -} - -static atomic_t global_nwo; - -/* - * Start of Monarch synchronization. This waits until all CPUs have - * entered the exception handler and then determines if any of them - * saw a fatal event that requires panic. Then it executes them - * in the entry order. - * TBD double check parallel CPU hotunplug - */ -static int mce_start(int no_way_out, int *order) -{ - int nwo; - int cpus = num_online_cpus(); - u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; - - if (!timeout) { - *order = -1; - return no_way_out; - } - - atomic_add(no_way_out, &global_nwo); - - /* - * Wait for everyone. - */ - while (atomic_read(&mce_callin) != cpus) { - if (mce_timed_out(&timeout)) { - atomic_set(&global_nwo, 0); - *order = -1; - return no_way_out; - } - ndelay(SPINUNIT); - } - - /* - * Cache the global no_way_out state. - */ - nwo = atomic_read(&global_nwo); - - /* - * Monarch starts executing now, the others wait. - */ - if (*order == 1) { - atomic_set(&mce_executing, 1); - return nwo; - } - - /* - * Now start the scanning loop one by one - * in the original callin order. - * This way when there are any shared banks it will - * be only seen by one CPU before cleared, avoiding duplicates. - */ - while (atomic_read(&mce_executing) < *order) { - if (mce_timed_out(&timeout)) { - atomic_set(&global_nwo, 0); - *order = -1; - return no_way_out; - } - ndelay(SPINUNIT); - } - return nwo; -} - -/* - * Synchronize between CPUs after main scanning loop. - * This invokes the bulk of the Monarch processing. - */ -static int mce_end(int order) -{ - int ret = -1; - u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; - - if (!timeout) - goto reset; - if (order < 0) - goto reset; - - /* - * Allow others to run. - */ - atomic_inc(&mce_executing); - - if (order == 1) { - /* CHECKME: Can this race with a parallel hotplug? */ - int cpus = num_online_cpus(); - - /* - * Monarch: Wait for everyone to go through their scanning - * loops. - */ - while (atomic_read(&mce_executing) <= cpus) { - if (mce_timed_out(&timeout)) - goto reset; - ndelay(SPINUNIT); - } - - mce_reign(); - barrier(); - ret = 0; - } else { - /* - * Subject: Wait for Monarch to finish. - */ - while (atomic_read(&mce_executing) != 0) { - if (mce_timed_out(&timeout)) - goto reset; - ndelay(SPINUNIT); - } - - /* - * Don't reset anything. That's done by the Monarch. - */ - return 0; - } - - /* - * Reset all global state. - */ -reset: - atomic_set(&global_nwo, 0); - atomic_set(&mce_callin, 0); - barrier(); - - /* - * Let others run again. - */ - atomic_set(&mce_executing, 0); - return ret; -} - -/* - * Check if the address reported by the CPU is in a format we can parse. - * It would be possible to add code for most other cases, but all would - * be somewhat complicated (e.g. segment offset would require an instruction - * parser). So only support physical addresses upto page granuality for now. - */ -static int mce_usable_address(struct mce *m) -{ - if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) - return 0; - if ((m->misc & 0x3f) > PAGE_SHIFT) - return 0; - if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS) - return 0; - return 1; -} - -static void mce_clear_state(unsigned long *toclear) -{ - int i; - - for (i = 0; i < banks; i++) { - if (test_bit(i, toclear)) - mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } -} - -/* - * The actual machine check handler. This only handles real - * exceptions when something got corrupted coming in through int 18. - * - * This is executed in NMI context not subject to normal locking rules. This - * implies that most kernel services cannot be safely used. Don't even - * think about putting a printk in there! - * - * On Intel systems this is entered on all CPUs in parallel through - * MCE broadcast. However some CPUs might be broken beyond repair, - * so be always careful when synchronizing with others. - */ -void do_machine_check(struct pt_regs *regs, long error_code) -{ - struct mce m, *final; - int i; - int worst = 0; - int severity; - /* - * Establish sequential order between the CPUs entering the machine - * check handler. - */ - int order; - - /* - * If no_way_out gets set, there is no safe way to recover from this - * MCE. If tolerant is cranked up, we'll try anyway. - */ - int no_way_out = 0; - /* - * If kill_it gets set, there might be a way to recover from this - * error. - */ - int kill_it = 0; - DECLARE_BITMAP(toclear, MAX_NR_BANKS); - char *msg = "Unknown"; - - atomic_inc(&mce_entry); - - __get_cpu_var(mce_exception_count)++; - - if (notify_die(DIE_NMI, "machine check", regs, error_code, - 18, SIGKILL) == NOTIFY_STOP) - goto out; - if (!banks) - goto out; - - order = atomic_add_return(1, &mce_callin); - mce_setup(&m); - - m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); - no_way_out = mce_no_way_out(&m, &msg); - - final = &__get_cpu_var(mces_seen); - *final = m; - - barrier(); - - /* - * When no restart IP must always kill or panic. - */ - if (!(m.mcgstatus & MCG_STATUS_RIPV)) - kill_it = 1; - - /* - * Go through all the banks in exclusion of the other CPUs. - * This way we don't report duplicated events on shared banks - * because the first one to see it will clear it. - */ - no_way_out = mce_start(no_way_out, &order); - for (i = 0; i < banks; i++) { - __clear_bit(i, toclear); - if (!bank[i]) - continue; - - m.misc = 0; - m.addr = 0; - m.bank = i; - - m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); - if ((m.status & MCI_STATUS_VAL) == 0) - continue; - - /* - * Non uncorrected or non signaled errors are handled by - * machine_check_poll. Leave them alone, unless this panics. - */ - if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && - !no_way_out) - continue; - - /* - * Set taint even when machine check was not enabled. - */ - add_taint(TAINT_MACHINE_CHECK); - - severity = mce_severity(&m, tolerant, NULL); - - /* - * When machine check was for corrected handler don't touch, - * unless we're panicing. - */ - if (severity == MCE_KEEP_SEVERITY && !no_way_out) - continue; - __set_bit(i, toclear); - if (severity == MCE_NO_SEVERITY) { - /* - * Machine check event was not enabled. Clear, but - * ignore. - */ - continue; - } - - /* - * Kill on action required. - */ - if (severity == MCE_AR_SEVERITY) - kill_it = 1; - - if (m.status & MCI_STATUS_MISCV) - m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); - if (m.status & MCI_STATUS_ADDRV) - m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); - - /* - * Action optional error. Queue address for later processing. - * When the ring overflows we just ignore the AO error. - * RED-PEN add some logging mechanism when - * usable_address or mce_add_ring fails. - * RED-PEN don't ignore overflow for tolerant == 0 - */ - if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) - mce_ring_add(m.addr >> PAGE_SHIFT); - - mce_get_rip(&m, regs); - mce_log(&m); - - if (severity > worst) { - *final = m; - worst = severity; - } - } - - if (!no_way_out) - mce_clear_state(toclear); - - /* - * Do most of the synchronization with other CPUs. - * When there's any problem use only local no_way_out state. - */ - if (mce_end(order) < 0) - no_way_out = worst >= MCE_PANIC_SEVERITY; - - /* - * If we have decided that we just CAN'T continue, and the user - * has not set tolerant to an insane level, give up and die. - * - * This is mainly used in the case when the system doesn't - * support MCE broadcasting or it has been disabled. - */ - if (no_way_out && tolerant < 3) - mce_panic("Fatal machine check on current CPU", final, msg); - - /* - * If the error seems to be unrecoverable, something should be - * done. Try to kill as little as possible. If we can kill just - * one task, do that. If the user has set the tolerance very - * high, don't try to do anything at all. - */ - - if (kill_it && tolerant < 3) - force_sig(SIGBUS, current); - - /* notify userspace ASAP */ - set_thread_flag(TIF_MCE_NOTIFY); - - if (worst > 0) - mce_report_event(regs); - mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); -out: - atomic_dec(&mce_entry); - sync_core(); -} -EXPORT_SYMBOL_GPL(do_machine_check); - -/* dummy to break dependency. actual code is in mm/memory-failure.c */ -void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) -{ - printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); -} - -/* - * Called after mce notification in process context. This code - * is allowed to sleep. Call the high level VM handler to process - * any corrupted pages. - * Assume that the work queue code only calls this one at a time - * per CPU. - * Note we don't disable preemption, so this code might run on the wrong - * CPU. In this case the event is picked up by the scheduled work queue. - * This is merely a fast path to expedite processing in some common - * cases. - */ -void mce_notify_process(void) -{ - unsigned long pfn; - mce_notify_irq(); - while (mce_ring_get(&pfn)) - memory_failure(pfn, MCE_VECTOR); -} - -static void mce_process_work(struct work_struct *dummy) -{ - mce_notify_process(); -} - -#ifdef CONFIG_X86_MCE_INTEL -/*** - * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog - * @cpu: The CPU on which the event occurred. - * @status: Event status information - * - * This function should be called by the thermal interrupt after the - * event has been processed and the decision was made to log the event - * further. - * - * The status parameter will be saved to the 'status' field of 'struct mce' - * and historically has been the register value of the - * MSR_IA32_THERMAL_STATUS (Intel) msr. - */ -void mce_log_therm_throt_event(__u64 status) -{ - struct mce m; - - mce_setup(&m); - m.bank = MCE_THERMAL_BANK; - m.status = status; - mce_log(&m); -} -#endif /* CONFIG_X86_MCE_INTEL */ - -/* - * Periodic polling timer for "silent" machine check errors. If the - * poller finds an MCE, poll 2x faster. When the poller finds no more - * errors, poll 2x slower (up to check_interval seconds). - */ -static int check_interval = 5 * 60; /* 5 minutes */ - -static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ -static DEFINE_PER_CPU(struct timer_list, mce_timer); - -static void mcheck_timer(unsigned long data) -{ - struct timer_list *t = &per_cpu(mce_timer, data); - int *n; - - WARN_ON(smp_processor_id() != data); - - if (mce_available(¤t_cpu_data)) { - machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); - } - - /* - * Alert userspace if needed. If we logged an MCE, reduce the - * polling interval, otherwise increase the polling interval. - */ - n = &__get_cpu_var(next_interval); - if (mce_notify_irq()) - *n = max(*n/2, HZ/100); - else - *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); - - t->expires = jiffies + *n; - add_timer(t); -} - -static void mce_do_trigger(struct work_struct *work) -{ - call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); -} - -static DECLARE_WORK(mce_trigger_work, mce_do_trigger); - -/* - * Notify the user(s) about new machine check events. - * Can be called from interrupt context, but not from machine check/NMI - * context. - */ -int mce_notify_irq(void) -{ - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - - clear_thread_flag(TIF_MCE_NOTIFY); - - if (test_and_clear_bit(0, ¬ify_user)) { - wake_up_interruptible(&mce_wait); - - /* - * There is no risk of missing notifications because - * work_pending is always cleared before the function is - * executed. - */ - if (trigger[0] && !work_pending(&mce_trigger_work)) - schedule_work(&mce_trigger_work); - - if (__ratelimit(&ratelimit)) - printk(KERN_INFO "Machine check events logged\n"); - - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(mce_notify_irq); - -/* - * Initialize Machine Checks for a CPU. - */ -static int mce_cap_init(void) -{ - unsigned b; - u64 cap; - - rdmsrl(MSR_IA32_MCG_CAP, cap); - - b = cap & MCG_BANKCNT_MASK; - printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); - - if (b > MAX_NR_BANKS) { - printk(KERN_WARNING - "MCE: Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); - b = MAX_NR_BANKS; - } - - /* Don't support asymmetric configurations today */ - WARN_ON(banks != 0 && b != banks); - banks = b; - if (!bank) { - bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); - if (!bank) - return -ENOMEM; - memset(bank, 0xff, banks * sizeof(u64)); - } - - /* Use accurate RIP reporting if available. */ - if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) - rip_msr = MSR_IA32_MCG_EIP; - - if (cap & MCG_SER_P) - mce_ser = 1; - - return 0; -} - -static void mce_init(void) -{ - mce_banks_t all_banks; - u64 cap; - int i; - - /* - * Log the machine checks left over from the previous reset. - */ - bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); - - set_in_cr4(X86_CR4_MCE); - - rdmsrl(MSR_IA32_MCG_CAP, cap); - if (cap & MCG_CTL_P) - wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); - - for (i = 0; i < banks; i++) { - if (skip_bank_init(i)) - continue; - wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } -} - -/* Add per CPU specific workarounds here */ -static void mce_cpu_quirks(struct cpuinfo_x86 *c) -{ - /* This should be disabled by the BIOS, but isn't always */ - if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && banks > 4) { - /* - * disable GART TBL walk error reporting, which - * trips off incorrectly with the IOMMU & 3ware - * & Cerberus: - */ - clear_bit(10, (unsigned long *)&bank[4]); - } - if (c->x86 <= 17 && mce_bootlog < 0) { - /* - * Lots of broken BIOS around that don't clear them - * by default and leave crap in there. Don't log: - */ - mce_bootlog = 0; - } - /* - * Various K7s with broken bank 0 around. Always disable - * by default. - */ - if (c->x86 == 6) - bank[0] = 0; - } - - if (c->x86_vendor == X86_VENDOR_INTEL) { - /* - * SDM documents that on family 6 bank 0 should not be written - * because it aliases to another special BIOS controlled - * register. - * But it's not aliased anymore on model 0x1a+ - * Don't ignore bank 0 completely because there could be a - * valid event later, merely don't write CTL0. - */ - - if (c->x86 == 6 && c->x86_model < 0x1A) - __set_bit(0, &dont_init_banks); - - /* - * All newer Intel systems support MCE broadcasting. Enable - * synchronization with a one second timeout. - */ - if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && - monarch_timeout < 0) - monarch_timeout = USEC_PER_SEC; - } - if (monarch_timeout < 0) - monarch_timeout = 0; - if (mce_bootlog != 0) - mce_panic_timeout = 30; -} - -static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) -{ - if (c->x86 != 5) - return; - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - if (mce_p5_enabled()) - intel_p5_mcheck_init(c); - break; - case X86_VENDOR_CENTAUR: - winchip_mcheck_init(c); - break; - } -} - -static void mce_cpu_features(struct cpuinfo_x86 *c) -{ - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - mce_intel_feature_init(c); - break; - case X86_VENDOR_AMD: - mce_amd_feature_init(c); - break; - default: - break; - } -} - -static void mce_init_timer(void) -{ - struct timer_list *t = &__get_cpu_var(mce_timer); - int *n = &__get_cpu_var(next_interval); - - if (mce_ignore_ce) - return; - - *n = check_interval * HZ; - if (!*n) - return; - setup_timer(t, mcheck_timer, smp_processor_id()); - t->expires = round_jiffies(jiffies + *n); - add_timer(t); -} - -/* - * Called for each booted CPU to set up machine checks. - * Must be called with preempt off: - */ -void __cpuinit mcheck_init(struct cpuinfo_x86 *c) -{ - if (mce_disabled) - return; - - mce_ancient_init(c); - - if (!mce_available(c)) - return; - - if (mce_cap_init() < 0) { - mce_disabled = 1; - return; - } - mce_cpu_quirks(c); - - machine_check_vector = do_machine_check; - - mce_init(); - mce_cpu_features(c); - mce_init_timer(); - INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); -} - -/* - * Character device to read and clear the MCE log. - */ - -static DEFINE_SPINLOCK(mce_state_lock); -static int open_count; /* #times opened */ -static int open_exclu; /* already open exclusive? */ - -static int mce_open(struct inode *inode, struct file *file) -{ - spin_lock(&mce_state_lock); - - if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { - spin_unlock(&mce_state_lock); - - return -EBUSY; - } - - if (file->f_flags & O_EXCL) - open_exclu = 1; - open_count++; - - spin_unlock(&mce_state_lock); - - return nonseekable_open(inode, file); -} - -static int mce_release(struct inode *inode, struct file *file) -{ - spin_lock(&mce_state_lock); - - open_count--; - open_exclu = 0; - - spin_unlock(&mce_state_lock); - - return 0; -} - -static void collect_tscs(void *data) -{ - unsigned long *cpu_tsc = (unsigned long *)data; - - rdtscll(cpu_tsc[smp_processor_id()]); -} - -static DEFINE_MUTEX(mce_read_mutex); - -static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, - loff_t *off) -{ - char __user *buf = ubuf; - unsigned long *cpu_tsc; - unsigned prev, next; - int i, err; - - cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); - if (!cpu_tsc) - return -ENOMEM; - - mutex_lock(&mce_read_mutex); - next = rcu_dereference(mcelog.next); - - /* Only supports full reads right now */ - if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { - mutex_unlock(&mce_read_mutex); - kfree(cpu_tsc); - - return -EINVAL; - } - - err = 0; - prev = 0; - do { - for (i = prev; i < next; i++) { - unsigned long start = jiffies; - - while (!mcelog.entry[i].finished) { - if (time_after_eq(jiffies, start + 2)) { - memset(mcelog.entry + i, 0, - sizeof(struct mce)); - goto timeout; - } - cpu_relax(); - } - smp_rmb(); - err |= copy_to_user(buf, mcelog.entry + i, - sizeof(struct mce)); - buf += sizeof(struct mce); -timeout: - ; - } - - memset(mcelog.entry + prev, 0, - (next - prev) * sizeof(struct mce)); - prev = next; - next = cmpxchg(&mcelog.next, prev, 0); - } while (next != prev); - - synchronize_sched(); - - /* - * Collect entries that were still getting written before the - * synchronize. - */ - on_each_cpu(collect_tscs, cpu_tsc, 1); - - for (i = next; i < MCE_LOG_LEN; i++) { - if (mcelog.entry[i].finished && - mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { - err |= copy_to_user(buf, mcelog.entry+i, - sizeof(struct mce)); - smp_rmb(); - buf += sizeof(struct mce); - memset(&mcelog.entry[i], 0, sizeof(struct mce)); - } - } - mutex_unlock(&mce_read_mutex); - kfree(cpu_tsc); - - return err ? -EFAULT : buf - ubuf; -} - -static unsigned int mce_poll(struct file *file, poll_table *wait) -{ - poll_wait(file, &mce_wait, wait); - if (rcu_dereference(mcelog.next)) - return POLLIN | POLLRDNORM; - return 0; -} - -static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) -{ - int __user *p = (int __user *)arg; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - switch (cmd) { - case MCE_GET_RECORD_LEN: - return put_user(sizeof(struct mce), p); - case MCE_GET_LOG_LEN: - return put_user(MCE_LOG_LEN, p); - case MCE_GETCLEAR_FLAGS: { - unsigned flags; - - do { - flags = mcelog.flags; - } while (cmpxchg(&mcelog.flags, flags, 0) != flags); - - return put_user(flags, p); - } - default: - return -ENOTTY; - } -} - -/* Modified in mce-inject.c, so not static or const */ -struct file_operations mce_chrdev_ops = { - .open = mce_open, - .release = mce_release, - .read = mce_read, - .poll = mce_poll, - .unlocked_ioctl = mce_ioctl, -}; -EXPORT_SYMBOL_GPL(mce_chrdev_ops); - -static struct miscdevice mce_log_device = { - MISC_MCELOG_MINOR, - "mcelog", - &mce_chrdev_ops, -}; - -/* - * mce=off Disables machine check - * mce=no_cmci Disables CMCI - * mce=dont_log_ce Clears corrected events silently, no log created for CEs. - * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. - * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) - * monarchtimeout is how long to wait for other CPUs on machine - * check, or 0 to not wait - * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. - * mce=nobootlog Don't log MCEs from before booting. - */ -static int __init mcheck_enable(char *str) -{ - if (*str == 0) - enable_p5_mce(); - if (*str == '=') - str++; - if (!strcmp(str, "off")) - mce_disabled = 1; - else if (!strcmp(str, "no_cmci")) - mce_cmci_disabled = 1; - else if (!strcmp(str, "dont_log_ce")) - mce_dont_log_ce = 1; - else if (!strcmp(str, "ignore_ce")) - mce_ignore_ce = 1; - else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) - mce_bootlog = (str[0] == 'b'); - else if (isdigit(str[0])) { - get_option(&str, &tolerant); - if (*str == ',') { - ++str; - get_option(&str, &monarch_timeout); - } - } else { - printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", - str); - return 0; - } - return 1; -} -__setup("mce", mcheck_enable); - -/* - * Sysfs support - */ - -/* - * Disable machine checks on suspend and shutdown. We can't really handle - * them later. - */ -static int mce_disable(void) -{ - int i; - - for (i = 0; i < banks; i++) { - if (!skip_bank_init(i)) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); - } - return 0; -} - -static int mce_suspend(struct sys_device *dev, pm_message_t state) -{ - return mce_disable(); -} - -static int mce_shutdown(struct sys_device *dev) -{ - return mce_disable(); -} - -/* - * On resume clear all MCE state. Don't want to see leftovers from the BIOS. - * Only one CPU is active at this time, the others get re-added later using - * CPU hotplug: - */ -static int mce_resume(struct sys_device *dev) -{ - mce_init(); - mce_cpu_features(¤t_cpu_data); - - return 0; -} - -static void mce_cpu_restart(void *data) -{ - del_timer_sync(&__get_cpu_var(mce_timer)); - if (mce_available(¤t_cpu_data)) - mce_init(); - mce_init_timer(); -} - -/* Reinit MCEs after user configuration changes */ -static void mce_restart(void) -{ - on_each_cpu(mce_cpu_restart, NULL, 1); -} - -static struct sysdev_class mce_sysclass = { - .suspend = mce_suspend, - .shutdown = mce_shutdown, - .resume = mce_resume, - .name = "machinecheck", -}; - -DEFINE_PER_CPU(struct sys_device, mce_dev); - -__cpuinitdata -void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); - -static struct sysdev_attribute *bank_attrs; - -static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, - char *buf) -{ - u64 b = bank[attr - bank_attrs]; - - return sprintf(buf, "%llx\n", b); -} - -static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, - const char *buf, size_t size) -{ - u64 new; - - if (strict_strtoull(buf, 0, &new) < 0) - return -EINVAL; - - bank[attr - bank_attrs] = new; - mce_restart(); - - return size; -} - -static ssize_t -show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) -{ - strcpy(buf, trigger); - strcat(buf, "\n"); - return strlen(trigger) + 1; -} - -static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, - const char *buf, size_t siz) -{ - char *p; - int len; - - strncpy(trigger, buf, sizeof(trigger)); - trigger[sizeof(trigger)-1] = 0; - len = strlen(trigger); - p = strchr(trigger, '\n'); - - if (*p) - *p = 0; - - return len; -} - -static ssize_t store_int_with_restart(struct sys_device *s, - struct sysdev_attribute *attr, - const char *buf, size_t size) -{ - ssize_t ret = sysdev_store_int(s, attr, buf, size); - mce_restart(); - return ret; -} - -static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); -static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); -static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); - -static struct sysdev_ext_attribute attr_check_interval = { - _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, - store_int_with_restart), - &check_interval -}; - -static struct sysdev_attribute *mce_attrs[] = { - &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, - &attr_monarch_timeout.attr, - NULL -}; - -static cpumask_var_t mce_dev_initialized; - -/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ -static __cpuinit int mce_create_device(unsigned int cpu) -{ - int err; - int i; - - if (!mce_available(&boot_cpu_data)) - return -EIO; - - memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject)); - per_cpu(mce_dev, cpu).id = cpu; - per_cpu(mce_dev, cpu).cls = &mce_sysclass; - - err = sysdev_register(&per_cpu(mce_dev, cpu)); - if (err) - return err; - - for (i = 0; mce_attrs[i]; i++) { - err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); - if (err) - goto error; - } - for (i = 0; i < banks; i++) { - err = sysdev_create_file(&per_cpu(mce_dev, cpu), - &bank_attrs[i]); - if (err) - goto error2; - } - cpumask_set_cpu(cpu, mce_dev_initialized); - - return 0; -error2: - while (--i >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); -error: - while (--i >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); - - sysdev_unregister(&per_cpu(mce_dev, cpu)); - - return err; -} - -static __cpuinit void mce_remove_device(unsigned int cpu) -{ - int i; - - if (!cpumask_test_cpu(cpu, mce_dev_initialized)) - return; - - for (i = 0; mce_attrs[i]; i++) - sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); - - for (i = 0; i < banks; i++) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); - - sysdev_unregister(&per_cpu(mce_dev, cpu)); - cpumask_clear_cpu(cpu, mce_dev_initialized); -} - -/* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) -{ - unsigned long action = *(unsigned long *)h; - int i; - - if (!mce_available(¤t_cpu_data)) - return; - if (!(action & CPU_TASKS_FROZEN)) - cmci_clear(); - for (i = 0; i < banks; i++) { - if (!skip_bank_init(i)) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); - } -} - -static void mce_reenable_cpu(void *h) -{ - unsigned long action = *(unsigned long *)h; - int i; - - if (!mce_available(¤t_cpu_data)) - return; - - if (!(action & CPU_TASKS_FROZEN)) - cmci_reenable(); - for (i = 0; i < banks; i++) { - if (!skip_bank_init(i)) - wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); - } -} - -/* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static int __cpuinit -mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - mce_create_device(cpu); - if (threshold_cpu_callback) - threshold_cpu_callback(action, cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - if (threshold_cpu_callback) - threshold_cpu_callback(action, cpu); - mce_remove_device(cpu); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - del_timer_sync(t); - smp_call_function_single(cpu, mce_disable_cpu, &action, 1); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - t->expires = round_jiffies(jiffies + - __get_cpu_var(next_interval)); - add_timer_on(t, cpu); - smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - break; - case CPU_POST_DEAD: - /* intentionally ignoring frozen here */ - cmci_rediscover(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block mce_cpu_notifier __cpuinitdata = { - .notifier_call = mce_cpu_callback, -}; - -static __init int mce_init_banks(void) -{ - int i; - - bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, - GFP_KERNEL); - if (!bank_attrs) - return -ENOMEM; - - for (i = 0; i < banks; i++) { - struct sysdev_attribute *a = &bank_attrs[i]; - - a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); - if (!a->attr.name) - goto nomem; - - a->attr.mode = 0644; - a->show = show_bank; - a->store = set_bank; - } - return 0; - -nomem: - while (--i >= 0) - kfree(bank_attrs[i].attr.name); - kfree(bank_attrs); - bank_attrs = NULL; - - return -ENOMEM; -} - -static __init int mce_init_device(void) -{ - int err; - int i = 0; - - if (!mce_available(&boot_cpu_data)) - return -EIO; - - alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); - - err = mce_init_banks(); - if (err) - return err; - - err = sysdev_class_register(&mce_sysclass); - if (err) - return err; - - for_each_online_cpu(i) { - err = mce_create_device(i); - if (err) - return err; - } - - register_hotcpu_notifier(&mce_cpu_notifier); - misc_register(&mce_log_device); - - return err; -} - -device_initcall(mce_init_device); - -#else /* CONFIG_X86_OLD_MCE: */ - -int nr_mce_banks; -EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ - -/* This has to be run for each processor */ -void mcheck_init(struct cpuinfo_x86 *c) -{ - if (mce_disabled == 1) - return; - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - amd_mcheck_init(c); - break; - - case X86_VENDOR_INTEL: - if (c->x86 == 5) - intel_p5_mcheck_init(c); - if (c->x86 == 6) - intel_p6_mcheck_init(c); - if (c->x86 == 15) - intel_p4_mcheck_init(c); - break; - - case X86_VENDOR_CENTAUR: - if (c->x86 == 5) - winchip_mcheck_init(c); - break; - - default: - break; - } - printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); -} - -static int __init mcheck_enable(char *str) -{ - mce_disabled = -1; - return 1; -} - -__setup("mce", mcheck_enable); - -#endif /* CONFIG_X86_OLD_MCE */ - -/* - * Old style boot options parsing. Only for compatibility. - */ -static int __init mcheck_disable(char *str) -{ - mce_disabled = 1; - return 1; -} -__setup("nomce", mcheck_disable); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce.h b/trunk/arch/x86/kernel/cpu/mcheck/mce.h index 84a552b458c8..ae9f628838f1 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce.h +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce.h @@ -1,38 +1,14 @@ #include #include -#ifdef CONFIG_X86_OLD_MCE void amd_mcheck_init(struct cpuinfo_x86 *c); void intel_p4_mcheck_init(struct cpuinfo_x86 *c); -void intel_p6_mcheck_init(struct cpuinfo_x86 *c); -#endif - -#ifdef CONFIG_X86_ANCIENT_MCE void intel_p5_mcheck_init(struct cpuinfo_x86 *c); +void intel_p6_mcheck_init(struct cpuinfo_x86 *c); void winchip_mcheck_init(struct cpuinfo_x86 *c); -extern int mce_p5_enable; -static inline int mce_p5_enabled(void) { return mce_p5_enable; } -static inline void enable_p5_mce(void) { mce_p5_enable = 1; } -#else -static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline int mce_p5_enabled(void) { return 0; } -static inline void enable_p5_mce(void) { } -#endif /* Call the installed machine check handler for this CPU setup. */ extern void (*machine_check_vector)(struct pt_regs *, long error_code); -#ifdef CONFIG_X86_OLD_MCE - extern int nr_mce_banks; -void intel_set_thermal_handler(void); - -#else - -static inline void intel_set_thermal_handler(void) { } - -#endif - -void intel_init_thermal(struct cpuinfo_x86 *c); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c new file mode 100644 index 000000000000..3552119b091d --- /dev/null +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c @@ -0,0 +1,76 @@ +/* + * mce.c - x86 Machine Check Exception Reporting + * (c) 2002 Alan Cox , Dave Jones + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +int mce_disabled; +int nr_mce_banks; + +EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ + +/* Handle unconfigured int18 (should never happen) */ +static void unexpected_machine_check(struct pt_regs *regs, long error_code) +{ + printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); +} + +/* Call the installed machine check handler for this CPU setup. */ +void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; + +/* This has to be run for each processor */ +void mcheck_init(struct cpuinfo_x86 *c) +{ + if (mce_disabled == 1) + return; + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + amd_mcheck_init(c); + break; + + case X86_VENDOR_INTEL: + if (c->x86 == 5) + intel_p5_mcheck_init(c); + if (c->x86 == 6) + intel_p6_mcheck_init(c); + if (c->x86 == 15) + intel_p4_mcheck_init(c); + break; + + case X86_VENDOR_CENTAUR: + if (c->x86 == 5) + winchip_mcheck_init(c); + break; + + default: + break; + } +} + +static int __init mcheck_disable(char *str) +{ + mce_disabled = 1; + return 1; +} + +static int __init mcheck_enable(char *str) +{ + mce_disabled = -1; + return 1; +} + +__setup("nomce", mcheck_disable); +__setup("mce", mcheck_enable); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c new file mode 100644 index 000000000000..289cc4815028 --- /dev/null +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -0,0 +1,1188 @@ +/* + * Machine check handler. + * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. + * Rest from unknown author(s). + * 2004 Andi Kleen. Rewrote most of it. + * Copyright 2008 Intel Corporation + * Author: Andi Kleen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MISC_MCELOG_MINOR 227 + +atomic_t mce_entry; + +static int mce_dont_init; + +/* + * Tolerant levels: + * 0: always panic on uncorrected errors, log corrected errors + * 1: panic or SIGBUS on uncorrected errors, log corrected errors + * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors + * 3: never panic or SIGBUS, log all errors (for testing only) + */ +static int tolerant = 1; +static int banks; +static u64 *bank; +static unsigned long notify_user; +static int rip_msr; +static int mce_bootlog = -1; +static atomic_t mce_events; + +static char trigger[128]; +static char *trigger_argv[2] = { trigger, NULL }; + +static DECLARE_WAIT_QUEUE_HEAD(mce_wait); + +/* MCA banks polled by the period polling timer for corrected events */ +DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { + [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL +}; + +/* Do initial initialization of a struct mce */ +void mce_setup(struct mce *m) +{ + memset(m, 0, sizeof(struct mce)); + m->cpu = smp_processor_id(); + rdtscll(m->tsc); +} + +/* + * Lockless MCE logging infrastructure. + * This avoids deadlocks on printk locks without having to break locks. Also + * separate MCEs from kernel messages to avoid bogus bug reports. + */ + +static struct mce_log mcelog = { + MCE_LOG_SIGNATURE, + MCE_LOG_LEN, +}; + +void mce_log(struct mce *mce) +{ + unsigned next, entry; + atomic_inc(&mce_events); + mce->finished = 0; + wmb(); + for (;;) { + entry = rcu_dereference(mcelog.next); + for (;;) { + /* When the buffer fills up discard new entries. Assume + that the earlier errors are the more interesting. */ + if (entry >= MCE_LOG_LEN) { + set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); + return; + } + /* Old left over entry. Skip. */ + if (mcelog.entry[entry].finished) { + entry++; + continue; + } + break; + } + smp_rmb(); + next = entry + 1; + if (cmpxchg(&mcelog.next, entry, next) == entry) + break; + } + memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); + wmb(); + mcelog.entry[entry].finished = 1; + wmb(); + + set_bit(0, ¬ify_user); +} + +static void print_mce(struct mce *m) +{ + printk(KERN_EMERG "\n" + KERN_EMERG "HARDWARE ERROR\n" + KERN_EMERG + "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + m->cpu, m->mcgstatus, m->bank, m->status); + if (m->ip) { + printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", + !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", + m->cs, m->ip); + if (m->cs == __KERNEL_CS) + print_symbol("{%s}", m->ip); + printk("\n"); + } + printk(KERN_EMERG "TSC %llx ", m->tsc); + if (m->addr) + printk("ADDR %llx ", m->addr); + if (m->misc) + printk("MISC %llx ", m->misc); + printk("\n"); + printk(KERN_EMERG "This is not a software problem!\n"); + printk(KERN_EMERG "Run through mcelog --ascii to decode " + "and contact your hardware vendor\n"); +} + +static void mce_panic(char *msg, struct mce *backup, unsigned long start) +{ + int i; + + oops_begin(); + for (i = 0; i < MCE_LOG_LEN; i++) { + unsigned long tsc = mcelog.entry[i].tsc; + + if (time_before(tsc, start)) + continue; + print_mce(&mcelog.entry[i]); + if (backup && mcelog.entry[i].tsc == backup->tsc) + backup = NULL; + } + if (backup) + print_mce(backup); + panic(msg); +} + +int mce_available(struct cpuinfo_x86 *c) +{ + if (mce_dont_init) + return 0; + return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); +} + +static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) +{ + if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { + m->ip = regs->ip; + m->cs = regs->cs; + } else { + m->ip = 0; + m->cs = 0; + } + if (rip_msr) { + /* Assume the RIP in the MSR is exact. Is this true? */ + m->mcgstatus |= MCG_STATUS_EIPV; + rdmsrl(rip_msr, m->ip); + m->cs = 0; + } +} + +/* + * Poll for corrected events or events that happened before reset. + * Those are just logged through /dev/mcelog. + * + * This is executed in standard interrupt context. + */ +void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) +{ + struct mce m; + int i; + + mce_setup(&m); + + rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + for (i = 0; i < banks; i++) { + if (!bank[i] || !test_bit(i, *b)) + continue; + + m.misc = 0; + m.addr = 0; + m.bank = i; + m.tsc = 0; + + barrier(); + rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); + if (!(m.status & MCI_STATUS_VAL)) + continue; + + /* + * Uncorrected events are handled by the exception handler + * when it is enabled. But when the exception is disabled log + * everything. + * + * TBD do the same check for MCI_STATUS_EN here? + */ + if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) + continue; + + if (m.status & MCI_STATUS_MISCV) + rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); + if (m.status & MCI_STATUS_ADDRV) + rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); + + if (!(flags & MCP_TIMESTAMP)) + m.tsc = 0; + /* + * Don't get the IP here because it's unlikely to + * have anything to do with the actual error location. + */ + if (!(flags & MCP_DONTLOG)) { + mce_log(&m); + add_taint(TAINT_MACHINE_CHECK); + } + + /* + * Clear state for this bank. + */ + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + } + + /* + * Don't clear MCG_STATUS here because it's only defined for + * exceptions. + */ +} + +/* + * The actual machine check handler. This only handles real + * exceptions when something got corrupted coming in through int 18. + * + * This is executed in NMI context not subject to normal locking rules. This + * implies that most kernel services cannot be safely used. Don't even + * think about putting a printk in there! + */ +void do_machine_check(struct pt_regs * regs, long error_code) +{ + struct mce m, panicm; + u64 mcestart = 0; + int i; + int panicm_found = 0; + /* + * If no_way_out gets set, there is no safe way to recover from this + * MCE. If tolerant is cranked up, we'll try anyway. + */ + int no_way_out = 0; + /* + * If kill_it gets set, there might be a way to recover from this + * error. + */ + int kill_it = 0; + DECLARE_BITMAP(toclear, MAX_NR_BANKS); + + atomic_inc(&mce_entry); + + if (notify_die(DIE_NMI, "machine check", regs, error_code, + 18, SIGKILL) == NOTIFY_STOP) + goto out2; + if (!banks) + goto out2; + + mce_setup(&m); + + rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + /* if the restart IP is not valid, we're done for */ + if (!(m.mcgstatus & MCG_STATUS_RIPV)) + no_way_out = 1; + + rdtscll(mcestart); + barrier(); + + for (i = 0; i < banks; i++) { + __clear_bit(i, toclear); + if (!bank[i]) + continue; + + m.misc = 0; + m.addr = 0; + m.bank = i; + + rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); + if ((m.status & MCI_STATUS_VAL) == 0) + continue; + + /* + * Non uncorrected errors are handled by machine_check_poll + * Leave them alone. + */ + if ((m.status & MCI_STATUS_UC) == 0) + continue; + + /* + * Set taint even when machine check was not enabled. + */ + add_taint(TAINT_MACHINE_CHECK); + + __set_bit(i, toclear); + + if (m.status & MCI_STATUS_EN) { + /* if PCC was set, there's no way out */ + no_way_out |= !!(m.status & MCI_STATUS_PCC); + /* + * If this error was uncorrectable and there was + * an overflow, we're in trouble. If no overflow, + * we might get away with just killing a task. + */ + if (m.status & MCI_STATUS_UC) { + if (tolerant < 1 || m.status & MCI_STATUS_OVER) + no_way_out = 1; + kill_it = 1; + } + } else { + /* + * Machine check event was not enabled. Clear, but + * ignore. + */ + continue; + } + + if (m.status & MCI_STATUS_MISCV) + rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); + if (m.status & MCI_STATUS_ADDRV) + rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); + + mce_get_rip(&m, regs); + mce_log(&m); + + /* Did this bank cause the exception? */ + /* Assume that the bank with uncorrectable errors did it, + and that there is only a single one. */ + if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { + panicm = m; + panicm_found = 1; + } + } + + /* If we didn't find an uncorrectable error, pick + the last one (shouldn't happen, just being safe). */ + if (!panicm_found) + panicm = m; + + /* + * If we have decided that we just CAN'T continue, and the user + * has not set tolerant to an insane level, give up and die. + */ + if (no_way_out && tolerant < 3) + mce_panic("Machine check", &panicm, mcestart); + + /* + * If the error seems to be unrecoverable, something should be + * done. Try to kill as little as possible. If we can kill just + * one task, do that. If the user has set the tolerance very + * high, don't try to do anything at all. + */ + if (kill_it && tolerant < 3) { + int user_space = 0; + + /* + * If the EIPV bit is set, it means the saved IP is the + * instruction which caused the MCE. + */ + if (m.mcgstatus & MCG_STATUS_EIPV) + user_space = panicm.ip && (panicm.cs & 3); + + /* + * If we know that the error was in user space, send a + * SIGBUS. Otherwise, panic if tolerance is low. + * + * force_sig() takes an awful lot of locks and has a slight + * risk of deadlocking. + */ + if (user_space) { + force_sig(SIGBUS, current); + } else if (panic_on_oops || tolerant < 2) { + mce_panic("Uncorrected machine check", + &panicm, mcestart); + } + } + + /* notify userspace ASAP */ + set_thread_flag(TIF_MCE_NOTIFY); + + /* the last thing we do is clear state */ + for (i = 0; i < banks; i++) { + if (test_bit(i, toclear)) + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + } + wrmsrl(MSR_IA32_MCG_STATUS, 0); + out2: + atomic_dec(&mce_entry); +} +EXPORT_SYMBOL_GPL(do_machine_check); + +#ifdef CONFIG_X86_MCE_INTEL +/*** + * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog + * @cpu: The CPU on which the event occurred. + * @status: Event status information + * + * This function should be called by the thermal interrupt after the + * event has been processed and the decision was made to log the event + * further. + * + * The status parameter will be saved to the 'status' field of 'struct mce' + * and historically has been the register value of the + * MSR_IA32_THERMAL_STATUS (Intel) msr. + */ +void mce_log_therm_throt_event(__u64 status) +{ + struct mce m; + + mce_setup(&m); + m.bank = MCE_THERMAL_BANK; + m.status = status; + mce_log(&m); +} +#endif /* CONFIG_X86_MCE_INTEL */ + +/* + * Periodic polling timer for "silent" machine check errors. If the + * poller finds an MCE, poll 2x faster. When the poller finds no more + * errors, poll 2x slower (up to check_interval seconds). + */ + +static int check_interval = 5 * 60; /* 5 minutes */ +static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ +static void mcheck_timer(unsigned long); +static DEFINE_PER_CPU(struct timer_list, mce_timer); + +static void mcheck_timer(unsigned long data) +{ + struct timer_list *t = &per_cpu(mce_timer, data); + int *n; + + WARN_ON(smp_processor_id() != data); + + if (mce_available(¤t_cpu_data)) + machine_check_poll(MCP_TIMESTAMP, + &__get_cpu_var(mce_poll_banks)); + + /* + * Alert userspace if needed. If we logged an MCE, reduce the + * polling interval, otherwise increase the polling interval. + */ + n = &__get_cpu_var(next_interval); + if (mce_notify_user()) { + *n = max(*n/2, HZ/100); + } else { + *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); + } + + t->expires = jiffies + *n; + add_timer(t); +} + +static void mce_do_trigger(struct work_struct *work) +{ + call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); +} + +static DECLARE_WORK(mce_trigger_work, mce_do_trigger); + +/* + * Notify the user(s) about new machine check events. + * Can be called from interrupt context, but not from machine check/NMI + * context. + */ +int mce_notify_user(void) +{ + /* Not more than two messages every minute */ + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); + + clear_thread_flag(TIF_MCE_NOTIFY); + if (test_and_clear_bit(0, ¬ify_user)) { + wake_up_interruptible(&mce_wait); + + /* + * There is no risk of missing notifications because + * work_pending is always cleared before the function is + * executed. + */ + if (trigger[0] && !work_pending(&mce_trigger_work)) + schedule_work(&mce_trigger_work); + + if (__ratelimit(&ratelimit)) + printk(KERN_INFO "Machine check events logged\n"); + + return 1; + } + return 0; +} + +/* see if the idle task needs to notify userspace */ +static int +mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) +{ + /* IDLE_END should be safe - interrupts are back on */ + if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) + mce_notify_user(); + + return NOTIFY_OK; +} + +static struct notifier_block mce_idle_notifier = { + .notifier_call = mce_idle_callback, +}; + +static __init int periodic_mcheck_init(void) +{ + idle_notifier_register(&mce_idle_notifier); + return 0; +} +__initcall(periodic_mcheck_init); + +/* + * Initialize Machine Checks for a CPU. + */ +static int mce_cap_init(void) +{ + u64 cap; + unsigned b; + + rdmsrl(MSR_IA32_MCG_CAP, cap); + b = cap & 0xff; + if (b > MAX_NR_BANKS) { + printk(KERN_WARNING + "MCE: Using only %u machine check banks out of %u\n", + MAX_NR_BANKS, b); + b = MAX_NR_BANKS; + } + + /* Don't support asymmetric configurations today */ + WARN_ON(banks != 0 && b != banks); + banks = b; + if (!bank) { + bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); + if (!bank) + return -ENOMEM; + memset(bank, 0xff, banks * sizeof(u64)); + } + + /* Use accurate RIP reporting if available. */ + if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) + rip_msr = MSR_IA32_MCG_EIP; + + return 0; +} + +static void mce_init(void *dummy) +{ + u64 cap; + int i; + mce_banks_t all_banks; + + /* + * Log the machine checks left over from the previous reset. + */ + bitmap_fill(all_banks, MAX_NR_BANKS); + machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); + + set_in_cr4(X86_CR4_MCE); + + rdmsrl(MSR_IA32_MCG_CAP, cap); + if (cap & MCG_CTL_P) + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + + for (i = 0; i < banks; i++) { + wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + } +} + +/* Add per CPU specific workarounds here */ +static void mce_cpu_quirks(struct cpuinfo_x86 *c) +{ + /* This should be disabled by the BIOS, but isn't always */ + if (c->x86_vendor == X86_VENDOR_AMD) { + if (c->x86 == 15 && banks > 4) + /* disable GART TBL walk error reporting, which trips off + incorrectly with the IOMMU & 3ware & Cerberus. */ + clear_bit(10, (unsigned long *)&bank[4]); + if(c->x86 <= 17 && mce_bootlog < 0) + /* Lots of broken BIOS around that don't clear them + by default and leave crap in there. Don't log. */ + mce_bootlog = 0; + } + +} + +static void mce_cpu_features(struct cpuinfo_x86 *c) +{ + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + mce_intel_feature_init(c); + break; + case X86_VENDOR_AMD: + mce_amd_feature_init(c); + break; + default: + break; + } +} + +static void mce_init_timer(void) +{ + struct timer_list *t = &__get_cpu_var(mce_timer); + int *n = &__get_cpu_var(next_interval); + + *n = check_interval * HZ; + if (!*n) + return; + setup_timer(t, mcheck_timer, smp_processor_id()); + t->expires = round_jiffies(jiffies + *n); + add_timer(t); +} + +/* + * Called for each booted CPU to set up machine checks. + * Must be called with preempt off. + */ +void __cpuinit mcheck_init(struct cpuinfo_x86 *c) +{ + if (!mce_available(c)) + return; + + if (mce_cap_init() < 0) { + mce_dont_init = 1; + return; + } + mce_cpu_quirks(c); + + mce_init(NULL); + mce_cpu_features(c); + mce_init_timer(); +} + +/* + * Character device to read and clear the MCE log. + */ + +static DEFINE_SPINLOCK(mce_state_lock); +static int open_count; /* #times opened */ +static int open_exclu; /* already open exclusive? */ + +static int mce_open(struct inode *inode, struct file *file) +{ + lock_kernel(); + spin_lock(&mce_state_lock); + + if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { + spin_unlock(&mce_state_lock); + unlock_kernel(); + return -EBUSY; + } + + if (file->f_flags & O_EXCL) + open_exclu = 1; + open_count++; + + spin_unlock(&mce_state_lock); + unlock_kernel(); + + return nonseekable_open(inode, file); +} + +static int mce_release(struct inode *inode, struct file *file) +{ + spin_lock(&mce_state_lock); + + open_count--; + open_exclu = 0; + + spin_unlock(&mce_state_lock); + + return 0; +} + +static void collect_tscs(void *data) +{ + unsigned long *cpu_tsc = (unsigned long *)data; + + rdtscll(cpu_tsc[smp_processor_id()]); +} + +static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, + loff_t *off) +{ + unsigned long *cpu_tsc; + static DEFINE_MUTEX(mce_read_mutex); + unsigned prev, next; + char __user *buf = ubuf; + int i, err; + + cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); + if (!cpu_tsc) + return -ENOMEM; + + mutex_lock(&mce_read_mutex); + next = rcu_dereference(mcelog.next); + + /* Only supports full reads right now */ + if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { + mutex_unlock(&mce_read_mutex); + kfree(cpu_tsc); + return -EINVAL; + } + + err = 0; + prev = 0; + do { + for (i = prev; i < next; i++) { + unsigned long start = jiffies; + + while (!mcelog.entry[i].finished) { + if (time_after_eq(jiffies, start + 2)) { + memset(mcelog.entry + i, 0, + sizeof(struct mce)); + goto timeout; + } + cpu_relax(); + } + smp_rmb(); + err |= copy_to_user(buf, mcelog.entry + i, + sizeof(struct mce)); + buf += sizeof(struct mce); +timeout: + ; + } + + memset(mcelog.entry + prev, 0, + (next - prev) * sizeof(struct mce)); + prev = next; + next = cmpxchg(&mcelog.next, prev, 0); + } while (next != prev); + + synchronize_sched(); + + /* + * Collect entries that were still getting written before the + * synchronize. + */ + on_each_cpu(collect_tscs, cpu_tsc, 1); + for (i = next; i < MCE_LOG_LEN; i++) { + if (mcelog.entry[i].finished && + mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { + err |= copy_to_user(buf, mcelog.entry+i, + sizeof(struct mce)); + smp_rmb(); + buf += sizeof(struct mce); + memset(&mcelog.entry[i], 0, sizeof(struct mce)); + } + } + mutex_unlock(&mce_read_mutex); + kfree(cpu_tsc); + return err ? -EFAULT : buf - ubuf; +} + +static unsigned int mce_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &mce_wait, wait); + if (rcu_dereference(mcelog.next)) + return POLLIN | POLLRDNORM; + return 0; +} + +static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + int __user *p = (int __user *)arg; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + switch (cmd) { + case MCE_GET_RECORD_LEN: + return put_user(sizeof(struct mce), p); + case MCE_GET_LOG_LEN: + return put_user(MCE_LOG_LEN, p); + case MCE_GETCLEAR_FLAGS: { + unsigned flags; + + do { + flags = mcelog.flags; + } while (cmpxchg(&mcelog.flags, flags, 0) != flags); + return put_user(flags, p); + } + default: + return -ENOTTY; + } +} + +static const struct file_operations mce_chrdev_ops = { + .open = mce_open, + .release = mce_release, + .read = mce_read, + .poll = mce_poll, + .unlocked_ioctl = mce_ioctl, +}; + +static struct miscdevice mce_log_device = { + MISC_MCELOG_MINOR, + "mcelog", + &mce_chrdev_ops, +}; + +/* + * Old style boot options parsing. Only for compatibility. + */ +static int __init mcheck_disable(char *str) +{ + mce_dont_init = 1; + return 1; +} + +/* mce=off disables machine check. + mce=TOLERANCELEVEL (number, see above) + mce=bootlog Log MCEs from before booting. Disabled by default on AMD. + mce=nobootlog Don't log MCEs from before booting. */ +static int __init mcheck_enable(char *str) +{ + if (!strcmp(str, "off")) + mce_dont_init = 1; + else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) + mce_bootlog = str[0] == 'b'; + else if (isdigit(str[0])) + get_option(&str, &tolerant); + else + printk("mce= argument %s ignored. Please use /sys", str); + return 1; +} + +__setup("nomce", mcheck_disable); +__setup("mce=", mcheck_enable); + +/* + * Sysfs support + */ + +/* + * Disable machine checks on suspend and shutdown. We can't really handle + * them later. + */ +static int mce_disable(void) +{ + int i; + + for (i = 0; i < banks; i++) + wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); + return 0; +} + +static int mce_suspend(struct sys_device *dev, pm_message_t state) +{ + return mce_disable(); +} + +static int mce_shutdown(struct sys_device *dev) +{ + return mce_disable(); +} + +/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. + Only one CPU is active at this time, the others get readded later using + CPU hotplug. */ +static int mce_resume(struct sys_device *dev) +{ + mce_init(NULL); + mce_cpu_features(¤t_cpu_data); + return 0; +} + +static void mce_cpu_restart(void *data) +{ + del_timer_sync(&__get_cpu_var(mce_timer)); + if (mce_available(¤t_cpu_data)) + mce_init(NULL); + mce_init_timer(); +} + +/* Reinit MCEs after user configuration changes */ +static void mce_restart(void) +{ + on_each_cpu(mce_cpu_restart, NULL, 1); +} + +static struct sysdev_class mce_sysclass = { + .suspend = mce_suspend, + .shutdown = mce_shutdown, + .resume = mce_resume, + .name = "machinecheck", +}; + +DEFINE_PER_CPU(struct sys_device, device_mce); +void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; + +/* Why are there no generic functions for this? */ +#define ACCESSOR(name, var, start) \ + static ssize_t show_ ## name(struct sys_device *s, \ + struct sysdev_attribute *attr, \ + char *buf) { \ + return sprintf(buf, "%lx\n", (unsigned long)var); \ + } \ + static ssize_t set_ ## name(struct sys_device *s, \ + struct sysdev_attribute *attr, \ + const char *buf, size_t siz) { \ + char *end; \ + unsigned long new = simple_strtoul(buf, &end, 0); \ + if (end == buf) return -EINVAL; \ + var = new; \ + start; \ + return end-buf; \ + } \ + static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); + +static struct sysdev_attribute *bank_attrs; + +static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, + char *buf) +{ + u64 b = bank[attr - bank_attrs]; + return sprintf(buf, "%llx\n", b); +} + +static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, + const char *buf, size_t siz) +{ + char *end; + u64 new = simple_strtoull(buf, &end, 0); + if (end == buf) + return -EINVAL; + bank[attr - bank_attrs] = new; + mce_restart(); + return end-buf; +} + +static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, + char *buf) +{ + strcpy(buf, trigger); + strcat(buf, "\n"); + return strlen(trigger) + 1; +} + +static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, + const char *buf,size_t siz) +{ + char *p; + int len; + strncpy(trigger, buf, sizeof(trigger)); + trigger[sizeof(trigger)-1] = 0; + len = strlen(trigger); + p = strchr(trigger, '\n'); + if (*p) *p = 0; + return len; +} + +static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); +static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); +ACCESSOR(check_interval,check_interval,mce_restart()) +static struct sysdev_attribute *mce_attributes[] = { + &attr_tolerant.attr, &attr_check_interval, &attr_trigger, + NULL +}; + +static cpumask_var_t mce_device_initialized; + +/* Per cpu sysdev init. All of the cpus still share the same ctl bank */ +static __cpuinit int mce_create_device(unsigned int cpu) +{ + int err; + int i; + + if (!mce_available(&boot_cpu_data)) + return -EIO; + + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); + per_cpu(device_mce,cpu).id = cpu; + per_cpu(device_mce,cpu).cls = &mce_sysclass; + + err = sysdev_register(&per_cpu(device_mce,cpu)); + if (err) + return err; + + for (i = 0; mce_attributes[i]; i++) { + err = sysdev_create_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + if (err) + goto error; + } + for (i = 0; i < banks; i++) { + err = sysdev_create_file(&per_cpu(device_mce, cpu), + &bank_attrs[i]); + if (err) + goto error2; + } + cpumask_set_cpu(cpu, mce_device_initialized); + + return 0; +error2: + while (--i >= 0) { + sysdev_remove_file(&per_cpu(device_mce, cpu), + &bank_attrs[i]); + } +error: + while (--i >= 0) { + sysdev_remove_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + } + sysdev_unregister(&per_cpu(device_mce,cpu)); + + return err; +} + +static __cpuinit void mce_remove_device(unsigned int cpu) +{ + int i; + + if (!cpumask_test_cpu(cpu, mce_device_initialized)) + return; + + for (i = 0; mce_attributes[i]; i++) + sysdev_remove_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + for (i = 0; i < banks; i++) + sysdev_remove_file(&per_cpu(device_mce, cpu), + &bank_attrs[i]); + sysdev_unregister(&per_cpu(device_mce,cpu)); + cpumask_clear_cpu(cpu, mce_device_initialized); +} + +/* Make sure there are no machine checks on offlined CPUs. */ +static void mce_disable_cpu(void *h) +{ + int i; + unsigned long action = *(unsigned long *)h; + + if (!mce_available(¤t_cpu_data)) + return; + if (!(action & CPU_TASKS_FROZEN)) + cmci_clear(); + for (i = 0; i < banks; i++) + wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); +} + +static void mce_reenable_cpu(void *h) +{ + int i; + unsigned long action = *(unsigned long *)h; + + if (!mce_available(¤t_cpu_data)) + return; + if (!(action & CPU_TASKS_FROZEN)) + cmci_reenable(); + for (i = 0; i < banks; i++) + wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); +} + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct timer_list *t = &per_cpu(mce_timer, cpu); + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + mce_create_device(cpu); + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); + mce_remove_device(cpu); + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + del_timer_sync(t); + smp_call_function_single(cpu, mce_disable_cpu, &action, 1); + break; + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + t->expires = round_jiffies(jiffies + + __get_cpu_var(next_interval)); + add_timer_on(t, cpu); + smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); + break; + case CPU_POST_DEAD: + /* intentionally ignoring frozen here */ + cmci_rediscover(cpu); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block mce_cpu_notifier __cpuinitdata = { + .notifier_call = mce_cpu_callback, +}; + +static __init int mce_init_banks(void) +{ + int i; + + bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, + GFP_KERNEL); + if (!bank_attrs) + return -ENOMEM; + + for (i = 0; i < banks; i++) { + struct sysdev_attribute *a = &bank_attrs[i]; + a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); + if (!a->attr.name) + goto nomem; + a->attr.mode = 0644; + a->show = show_bank; + a->store = set_bank; + } + return 0; + +nomem: + while (--i >= 0) + kfree(bank_attrs[i].attr.name); + kfree(bank_attrs); + bank_attrs = NULL; + return -ENOMEM; +} + +static __init int mce_init_device(void) +{ + int err; + int i = 0; + + if (!mce_available(&boot_cpu_data)) + return -EIO; + + zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); + + err = mce_init_banks(); + if (err) + return err; + + err = sysdev_class_register(&mce_sysclass); + if (err) + return err; + + for_each_online_cpu(i) { + err = mce_create_device(i); + if (err) + return err; + } + + register_hotcpu_notifier(&mce_cpu_notifier); + misc_register(&mce_log_device); + return err; +} + +device_initcall(mce_init_device); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index ddae21620bda..56dde9c4bc96 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -13,22 +13,22 @@ * * All MC4_MISCi registers are shared between multi-cores */ + +#include +#include +#include #include -#include #include -#include -#include -#include +#include #include -#include -#include -#include #include - +#include +#include #include -#include #include #include +#include +#include #define PFX "mce_threshold: " #define VERSION "version 1.1.1" @@ -48,26 +48,26 @@ #define MCG_XBLK_ADDR 0xC0000400 struct threshold_block { - unsigned int block; - unsigned int bank; - unsigned int cpu; - u32 address; - u16 interrupt_enable; - u16 threshold_limit; - struct kobject kobj; - struct list_head miscj; + unsigned int block; + unsigned int bank; + unsigned int cpu; + u32 address; + u16 interrupt_enable; + u16 threshold_limit; + struct kobject kobj; + struct list_head miscj; }; /* defaults used early on boot */ static struct threshold_block threshold_defaults = { - .interrupt_enable = 0, - .threshold_limit = THRESHOLD_MAX, + .interrupt_enable = 0, + .threshold_limit = THRESHOLD_MAX, }; struct threshold_bank { - struct kobject *kobj; - struct threshold_block *blocks; - cpumask_var_t cpus; + struct kobject *kobj; + struct threshold_block *blocks; + cpumask_var_t cpus; }; static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); @@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void); */ struct thresh_restart { - struct threshold_block *b; - int reset; - u16 old_limit; + struct threshold_block *b; + int reset; + u16 old_limit; }; /* must be called with correct cpu affinity */ @@ -110,7 +110,6 @@ static void threshold_restart_bank(void *_tr) } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (mci_misc_hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); - mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | (new_count & THRESHOLD_MAX); } @@ -126,11 +125,11 @@ static void threshold_restart_bank(void *_tr) /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { + unsigned int bank, block; unsigned int cpu = smp_processor_id(); + u8 lvt_off; u32 low = 0, high = 0, address = 0; - unsigned int bank, block; struct thresh_restart tr; - u8 lvt_off; for (bank = 0; bank < NR_BANKS; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -141,7 +140,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (!address) break; address += MCG_XBLK_ADDR; - } else + } + else ++address; if (rdmsr_safe(address, &low, &high)) @@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) */ static void amd_threshold_interrupt(void) { - u32 low = 0, high = 0, address = 0; unsigned int bank, block; struct mce m; + u32 low = 0, high = 0, address = 0; mce_setup(&m); @@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void) if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) continue; for (block = 0; block < NR_BLOCKS; ++block) { - if (block == 0) { + if (block == 0) address = MSR_IA32_MC0_MISC + bank * 4; - } else if (block == 1) { + else if (block == 1) { address = (low & MASK_BLKPTR_LO) >> 21; if (!address) break; address += MCG_XBLK_ADDR; - } else { - ++address; } + else + ++address; if (rdmsr_safe(address, &low, &high)) break; @@ -229,10 +229,8 @@ static void amd_threshold_interrupt(void) (high & MASK_LOCKED_HI)) continue; - /* - * Log the machine check that caused the threshold - * event. - */ + /* Log the machine check that caused the threshold + event. */ machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); @@ -256,52 +254,48 @@ static void amd_threshold_interrupt(void) struct threshold_attr { struct attribute attr; - ssize_t (*show) (struct threshold_block *, char *); - ssize_t (*store) (struct threshold_block *, const char *, size_t count); + ssize_t(*show) (struct threshold_block *, char *); + ssize_t(*store) (struct threshold_block *, const char *, size_t count); }; -#define SHOW_FIELDS(name) \ -static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ -{ \ - return sprintf(buf, "%lx\n", (unsigned long) b->name); \ +#define SHOW_FIELDS(name) \ +static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ +{ \ + return sprintf(buf, "%lx\n", (unsigned long) b->name); \ } SHOW_FIELDS(interrupt_enable) SHOW_FIELDS(threshold_limit) -static ssize_t -store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) +static ssize_t store_interrupt_enable(struct threshold_block *b, + const char *buf, size_t count) { + char *end; struct thresh_restart tr; - unsigned long new; - - if (strict_strtoul(buf, 0, &new) < 0) + unsigned long new = simple_strtoul(buf, &end, 0); + if (end == buf) return -EINVAL; - b->interrupt_enable = !!new; - tr.b = b; - tr.reset = 0; - tr.old_limit = 0; - + tr.b = b; + tr.reset = 0; + tr.old_limit = 0; smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); - return size; + return end - buf; } -static ssize_t -store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) +static ssize_t store_threshold_limit(struct threshold_block *b, + const char *buf, size_t count) { + char *end; struct thresh_restart tr; - unsigned long new; - - if (strict_strtoul(buf, 0, &new) < 0) + unsigned long new = simple_strtoul(buf, &end, 0); + if (end == buf) return -EINVAL; - if (new > THRESHOLD_MAX) new = THRESHOLD_MAX; if (new < 1) new = 1; - tr.old_limit = b->threshold_limit; b->threshold_limit = new; tr.b = b; @@ -309,12 +303,12 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); - return size; + return end - buf; } struct threshold_block_cross_cpu { - struct threshold_block *tb; - long retval; + struct threshold_block *tb; + long retval; }; static void local_error_count_handler(void *_tbcc) @@ -344,13 +338,16 @@ static ssize_t store_error_count(struct threshold_block *b, return 1; } -#define RW_ATTR(val) \ -static struct threshold_attr val = { \ - .attr = {.name = __stringify(val), .mode = 0644 }, \ - .show = show_## val, \ - .store = store_## val, \ +#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ }; +#define RW_ATTR(name) \ +static struct threshold_attr name = \ + THRESHOLD_ATTR(name, 0644, show_## name, store_## name) + RW_ATTR(interrupt_enable); RW_ATTR(threshold_limit); RW_ATTR(error_count); @@ -362,17 +359,15 @@ static struct attribute *default_attrs[] = { NULL }; -#define to_block(k) container_of(k, struct threshold_block, kobj) -#define to_attr(a) container_of(a, struct threshold_attr, attr) +#define to_block(k) container_of(k, struct threshold_block, kobj) +#define to_attr(a) container_of(a, struct threshold_attr, attr) static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) { struct threshold_block *b = to_block(kobj); struct threshold_attr *a = to_attr(attr); ssize_t ret; - ret = a->show ? a->show(b, buf) : -EIO; - return ret; } @@ -382,20 +377,18 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, struct threshold_block *b = to_block(kobj); struct threshold_attr *a = to_attr(attr); ssize_t ret; - ret = a->store ? a->store(b, buf, count) : -EIO; - return ret; } static struct sysfs_ops threshold_ops = { - .show = show, - .store = store, + .show = show, + .store = store, }; static struct kobj_type threshold_ktype = { - .sysfs_ops = &threshold_ops, - .default_attrs = default_attrs, + .sysfs_ops = &threshold_ops, + .default_attrs = default_attrs, }; static __cpuinit int allocate_threshold_blocks(unsigned int cpu, @@ -403,9 +396,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, unsigned int block, u32 address) { - struct threshold_block *b = NULL; - u32 low, high; int err; + u32 low, high; + struct threshold_block *b = NULL; if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) return 0; @@ -428,21 +421,20 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, if (!b) return -ENOMEM; - b->block = block; - b->bank = bank; - b->cpu = cpu; - b->address = address; - b->interrupt_enable = 0; - b->threshold_limit = THRESHOLD_MAX; + b->block = block; + b->bank = bank; + b->cpu = cpu; + b->address = address; + b->interrupt_enable = 0; + b->threshold_limit = THRESHOLD_MAX; INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { + if (per_cpu(threshold_banks, cpu)[bank]->blocks) list_add(&b->miscj, &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { + else per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } err = kobject_init_and_add(&b->kobj, &threshold_ktype, per_cpu(threshold_banks, cpu)[bank]->kobj, @@ -455,9 +447,8 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, if (!address) return 0; address += MCG_XBLK_ADDR; - } else { + } else ++address; - } err = allocate_threshold_blocks(cpu, bank, ++block, address); if (err) @@ -509,14 +500,13 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (!b) goto out; - err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj, + err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, b->kobj, name); if (err) goto out; cpumask_copy(b->cpus, cpu_core_mask(cpu)); per_cpu(threshold_banks, cpu)[bank] = b; - goto out; } #endif @@ -532,7 +522,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; } - b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj); + b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); if (!b->kobj) goto out_free; @@ -552,7 +542,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (i == cpu) continue; - err = sysfs_create_link(&per_cpu(mce_dev, i).kobj, + err = sysfs_create_link(&per_cpu(device_mce, i).kobj, b->kobj, name); if (err) goto out; @@ -615,13 +605,15 @@ static void deallocate_threshold_block(unsigned int cpu, static void threshold_remove_bank(unsigned int cpu, int bank) { + int i = 0; struct threshold_bank *b; char name[32]; - int i = 0; b = per_cpu(threshold_banks, cpu)[bank]; + if (!b) return; + if (!b->blocks) goto free_out; @@ -630,9 +622,8 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #ifdef CONFIG_SMP /* sibling symlink */ if (shared_bank[bank] && b->blocks->cpu != cpu) { - sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name); + sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; - return; } #endif @@ -642,7 +633,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) if (i == cpu) continue; - sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name); + sysfs_remove_link(&per_cpu(device_mce, i).kobj, name); per_cpu(threshold_banks, i)[bank] = NULL; } @@ -668,9 +659,12 @@ static void threshold_remove_device(unsigned int cpu) } /* get notified when a cpu comes on/off */ -static void __cpuinit -amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu) +static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, + unsigned int cpu) { + if (cpu >= NR_CPUS) + return; + switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -692,12 +686,11 @@ static __init int threshold_init_device(void) /* to hit CPUs online before the notifier is up */ for_each_online_cpu(lcpu) { int err = threshold_create_device(lcpu); - if (err) return err; } threshold_cpu_callback = amd_64_threshold_cpu_callback; - return 0; } + device_initcall(threshold_init_device); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_intel.c deleted file mode 100644 index 2b011d2d8579..000000000000 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Common code for Intel machine checks - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "mce.h" - -void intel_init_thermal(struct cpuinfo_x86 *c) -{ - unsigned int cpu = smp_processor_id(); - int tm2 = 0; - u32 l, h; - - /* Thermal monitoring depends on ACPI and clock modulation*/ - if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) - return; - - /* - * First check if its enabled already, in which case there might - * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already: - */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); - if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG - "CPU%d: Thermal monitoring handled by SMI\n", cpu); - return; - } - - if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) - tm2 = 1; - - /* Check whether a vector already exists */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG - "CPU%d: Thermal LVT vector (%#x) already installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; - } - - /* We'll mask the thermal vector in the lapic till we're ready: */ - h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; - apic_write(APIC_LVTTHMR, h); - - rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr(MSR_IA32_THERM_INTERRUPT, - l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); - - intel_set_thermal_handler(); - - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); - - /* Unmask the thermal vector: */ - l = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - - printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", - cpu, tm2 ? "TM2" : "TM1"); - - /* enable thermal throttle processing */ - atomic_set(&therm_throt_en, 1); -} diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index f2ef6952c400..65a0fceedcd7 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -16,8 +16,6 @@ #include #include -#include "mce.h" - asmlinkage void smp_thermal_interrupt(void) { __u64 msr_val; @@ -28,13 +26,67 @@ asmlinkage void smp_thermal_interrupt(void) irq_enter(); rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) + if (therm_throt_process(msr_val & 1)) mce_log_therm_throt_event(msr_val); inc_irq_stat(irq_thermal_count); irq_exit(); } +static void intel_init_thermal(struct cpuinfo_x86 *c) +{ + u32 l, h; + int tm2 = 0; + unsigned int cpu = smp_processor_id(); + + if (!cpu_has(c, X86_FEATURE_ACPI)) + return; + + if (!cpu_has(c, X86_FEATURE_ACC)) + return; + + /* first check if TM1 is already enabled by the BIOS, in which + * case there might be some SMM goo which handles it, so we can't even + * put a handler since it might be delivered via SMI already. + */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG + "CPU%d: Thermal monitoring handled by SMI\n", cpu); + return; + } + + if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) + tm2 = 1; + + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG + "CPU%d: Thermal LVT vector (%#x) already " + "installed\n", cpu, (h & APIC_VECTOR_MASK)); + return; + } + + h = THERMAL_APIC_VECTOR; + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); + apic_write(APIC_LVTTHMR, h); + + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); + + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); + + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", + cpu, tm2 ? "TM2" : "TM1"); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); + return; +} + /* * Support for Intel Correct Machine Check Interrupts. This allows * the CPU to raise an interrupt when a corrected machine check happened. @@ -56,9 +108,6 @@ static int cmci_supported(int *banks) { u64 cap; - if (mce_cmci_disabled || mce_ignore_ce) - return 0; - /* * Vendor check is not strictly needed, but the initial * initialization is vendor keyed and this @@ -82,7 +131,7 @@ static int cmci_supported(int *banks) static void intel_threshold_interrupt(void) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); - mce_notify_irq(); + mce_notify_user(); } static void print_update(char *type, int *hdr, int num) @@ -198,7 +247,7 @@ void cmci_rediscover(int dying) return; cpumask_copy(old, ¤t->cpus_allowed); - for_each_online_cpu(cpu) { + for_each_online_cpu (cpu) { if (cpu == dying) continue; if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) diff --git a/trunk/arch/x86/kernel/cpu/mcheck/non-fatal.c b/trunk/arch/x86/kernel/cpu/mcheck/non-fatal.c index 70b710420f74..a74af128efc9 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -6,14 +6,15 @@ * This file contains routines to check for non-fatal MCEs every 15s * */ -#include -#include -#include -#include -#include -#include + #include +#include +#include +#include +#include +#include #include +#include #include #include @@ -21,9 +22,9 @@ #include "mce.h" -static int firstbank; +static int firstbank; -#define MCE_RATE (15*HZ) /* timer rate is 15s */ +#define MCE_RATE 15*HZ /* timer rate is 15s */ static void mce_checkregs(void *info) { @@ -33,24 +34,23 @@ static void mce_checkregs(void *info) for (i = firstbank; i < nr_mce_banks; i++) { rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); - if (!(high & (1<<31))) - continue; - - printk(KERN_INFO "MCE: The hardware reports a non fatal, " - "correctable incident occurred on CPU %d.\n", + if (high & (1<<31)) { + printk(KERN_INFO "MCE: The hardware reports a non " + "fatal, correctable incident occurred on " + "CPU %d.\n", smp_processor_id()); - - printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); - - /* - * Scrub the error so we don't pick it up in MCE_RATE - * seconds time: - */ - wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); - - /* Serialize: */ - wmb(); - add_taint(TAINT_MACHINE_CHECK); + printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); + + /* + * Scrub the error so we don't pick it up in MCE_RATE + * seconds time. + */ + wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); + + /* Serialize */ + wmb(); + add_taint(TAINT_MACHINE_CHECK); + } } } @@ -77,17 +77,16 @@ static int __init init_nonfatal_mce_checker(void) /* Some Athlons misbehave when we frob bank 0 */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 6) - firstbank = 1; + boot_cpu_data.x86 == 6) + firstbank = 1; else - firstbank = 0; + firstbank = 0; /* * Check for non-fatal errors every MCE_RATE s */ schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); printk(KERN_INFO "Machine check exception polling timer started.\n"); - return 0; } module_init(init_nonfatal_mce_checker); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/p4.c b/trunk/arch/x86/kernel/cpu/mcheck/p4.c index 82cee108a2d3..f53bdcbaf382 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/p4.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/p4.c @@ -2,17 +2,18 @@ * P4 specific Machine Check Exception Reporting */ -#include -#include -#include #include +#include +#include +#include #include -#include #include #include -#include #include +#include + +#include #include "mce.h" @@ -35,7 +36,6 @@ static int mce_num_extended_msrs; #ifdef CONFIG_X86_MCE_P4THERMAL - static void unexpected_thermal_interrupt(struct pt_regs *regs) { printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", @@ -43,7 +43,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs) add_taint(TAINT_MACHINE_CHECK); } -/* P4/Xeon Thermal transition interrupt handler: */ +/* P4/Xeon Thermal transition interrupt handler */ static void intel_thermal_interrupt(struct pt_regs *regs) { __u64 msr_val; @@ -51,12 +51,11 @@ static void intel_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - therm_throt_process(msr_val & THERM_STATUS_PROCHOT); + therm_throt_process(msr_val & 0x1); } -/* Thermal interrupt handler for this CPU setup: */ -static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = - unexpected_thermal_interrupt; +/* Thermal interrupt handler for this CPU setup */ +static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; void smp_thermal_interrupt(struct pt_regs *regs) { @@ -66,15 +65,67 @@ void smp_thermal_interrupt(struct pt_regs *regs) irq_exit(); } -void intel_set_thermal_handler(void) +/* P4/Xeon Thermal regulation detect and init */ +static void intel_init_thermal(struct cpuinfo_x86 *c) { + u32 l, h; + unsigned int cpu = smp_processor_id(); + + /* Thermal monitoring */ + if (!cpu_has(c, X86_FEATURE_ACPI)) + return; /* -ENODEV */ + + /* Clock modulation */ + if (!cpu_has(c, X86_FEATURE_ACC)) + return; /* -ENODEV */ + + /* first check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already -zwanem. + */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", + cpu); + return; /* -EBUSY */ + } + + /* check whether a vector already exists, temporarily masked? */ + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " + "installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; /* -EBUSY */ + } + + /* The temperature transition interrupt handler setup */ + h = THERMAL_APIC_VECTOR; /* our delivery vector */ + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ + apic_write(APIC_LVTTHMR, h); + + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); + + /* ok we're good to go... */ vendor_thermal_interrupt = intel_thermal_interrupt; -} + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); + + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); + return; +} #endif /* CONFIG_X86_MCE_P4THERMAL */ + /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ -static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) +static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) { u32 h; @@ -92,9 +143,9 @@ static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) static void intel_machine_check(struct pt_regs *regs, long error_code) { + int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; - int recover = 1; int i; rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); @@ -106,9 +157,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) if (mce_num_extended_msrs > 0) { struct intel_mce_extended_msrs dbg; - intel_get_extended_msrs(&dbg); - printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", @@ -122,7 +171,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) if (high & (1<<31)) { char misc[20]; char addr[24]; - misc[0] = addr[0] = '\0'; if (high & (1<<29)) recover |= 1; @@ -148,7 +196,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) panic("Unable to continue"); printk(KERN_EMERG "Attempting to continue.\n"); - /* * Do not clear the MSR_IA32_MCi_STATUS if the error is not * recoverable/continuable.This will allow BIOS to look at the MSRs @@ -170,6 +217,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); } + void intel_p4_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/p5.c b/trunk/arch/x86/kernel/cpu/mcheck/p5.c index 015f481ab1b0..c9f77ea69edc 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/p5.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/p5.c @@ -2,10 +2,11 @@ * P5 specific Machine Check Exception Reporting * (C) Copyright 2002 Alan Cox */ -#include -#include -#include + #include +#include +#include +#include #include #include @@ -14,58 +15,39 @@ #include "mce.h" -/* By default disabled */ -int mce_p5_enable; - -/* Machine check handler for Pentium class Intel CPUs: */ +/* Machine check handler for Pentium class Intel */ static void pentium_machine_check(struct pt_regs *regs, long error_code) { u32 loaddr, hi, lotype; - rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); - - printk(KERN_EMERG - "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", - smp_processor_id(), loaddr, lotype); - - if (lotype & (1<<5)) { - printk(KERN_EMERG - "CPU#%d: Possible thermal failure (CPU on fire ?).\n", - smp_processor_id()); - } - + printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); + if (lotype&(1<<5)) + printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); add_taint(TAINT_MACHINE_CHECK); } -/* Set up machine check reporting for processors with Intel style MCE: */ +/* Set up machine check reporting for processors with Intel style MCE */ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; - /* Check for MCE support: */ + /*Check for MCE support */ if (!cpu_has(c, X86_FEATURE_MCE)) return; -#ifdef CONFIG_X86_OLD_MCE - /* Default P5 to off as its often misconnected: */ + /* Default P5 to off as its often misconnected */ if (mce_disabled != -1) return; -#endif - machine_check_vector = pentium_machine_check; - /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); - /* Read registers before enabling: */ + /* Read registers before enabling */ rdmsr(MSR_IA32_P5_MC_ADDR, l, h); rdmsr(MSR_IA32_P5_MC_TYPE, l, h); - printk(KERN_INFO - "Intel old style machine check architecture supported.\n"); + printk(KERN_INFO "Intel old style machine check architecture supported.\n"); - /* Enable MCE: */ + /* Enable MCE */ set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO - "Intel old style machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); + printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); } diff --git a/trunk/arch/x86/kernel/cpu/mcheck/p6.c b/trunk/arch/x86/kernel/cpu/mcheck/p6.c index 43c24e667457..2ac52d7b434b 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/p6.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/p6.c @@ -2,10 +2,11 @@ * P6 specific Machine Check Exception Reporting * (C) Copyright 2002 Alan Cox */ -#include -#include -#include + #include +#include +#include +#include #include #include @@ -17,9 +18,9 @@ /* Machine Check Handler For PII/PIII */ static void intel_machine_check(struct pt_regs *regs, long error_code) { + int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; - int recover = 1; int i; rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); @@ -34,16 +35,12 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) if (high & (1<<31)) { char misc[20]; char addr[24]; - - misc[0] = '\0'; - addr[0] = '\0'; - + misc[0] = addr[0] = '\0'; if (high & (1<<29)) recover |= 1; if (high & (1<<25)) recover |= 2; high &= ~(1<<31); - if (high & (1<<27)) { rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); snprintf(misc, 20, "[%08x%08x]", ahigh, alow); @@ -52,7 +49,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); snprintf(addr, 24, " at %08x%08x", ahigh, alow); } - printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", smp_processor_id(), i, high, low, misc, addr); } @@ -67,17 +63,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) /* * Do not clear the MSR_IA32_MCi_STATUS if the error is not * recoverable/continuable.This will allow BIOS to look at the MSRs - * for errors if the OS could not log the error: + * for errors if the OS could not log the error. */ for (i = 0; i < nr_mce_banks; i++) { unsigned int msr; - msr = MSR_IA32_MC0_STATUS+i*4; rdmsr(msr, low, high); if (high & (1<<31)) { - /* Clear it: */ + /* Clear it */ wrmsr(msr, 0UL, 0UL); - /* Serialize: */ + /* Serialize */ wmb(); add_taint(TAINT_MACHINE_CHECK); } @@ -86,7 +81,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); } -/* Set up machine check reporting for processors with Intel style MCE: */ +/* Set up machine check reporting for processors with Intel style MCE */ void intel_p6_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; @@ -102,7 +97,6 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c) /* Ok machine check is available */ machine_check_vector = intel_machine_check; - /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); printk(KERN_INFO "Intel machine check architecture supported.\n"); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7b1ae2e20ba5..d5ae2243f0b9 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -1,7 +1,7 @@ /* + * * Thermal throttle event support code (such as syslog messaging and rate * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). - * * This allows consistent reporting of CPU thermal throttle events. * * Maintains a counter in /sys that keeps track of the number of thermal @@ -13,43 +13,43 @@ * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. * Inspired by Ross Biro's and Al Borchers' counter code. */ -#include -#include + #include #include #include - +#include +#include +#include #include /* How long to wait between reporting thermal events */ -#define CHECK_INTERVAL (300 * HZ) +#define CHECK_INTERVAL (300 * HZ) static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); - -atomic_t therm_throt_en = ATOMIC_INIT(0); +atomic_t therm_throt_en = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS -#define define_therm_throt_sysdev_one_ro(_name) \ - static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) - -#define define_therm_throt_sysdev_show_func(name) \ -static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ - char *buf) \ -{ \ - unsigned int cpu = dev->id; \ - ssize_t ret; \ - \ - preempt_disable(); /* CPU hotplug */ \ - if (cpu_online(cpu)) \ - ret = sprintf(buf, "%lu\n", \ - per_cpu(thermal_throttle_##name, cpu)); \ - else \ - ret = 0; \ - preempt_enable(); \ - \ - return ret; \ +#define define_therm_throt_sysdev_one_ro(_name) \ + static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) + +#define define_therm_throt_sysdev_show_func(name) \ +static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + ssize_t ret; \ + \ + preempt_disable(); /* CPU hotplug */ \ + if (cpu_online(cpu)) \ + ret = sprintf(buf, "%lu\n", \ + per_cpu(thermal_throttle_##name, cpu)); \ + else \ + ret = 0; \ + preempt_enable(); \ + \ + return ret; \ } define_therm_throt_sysdev_show_func(count); @@ -61,8 +61,8 @@ static struct attribute *thermal_throttle_attrs[] = { }; static struct attribute_group thermal_throttle_attr_group = { - .attrs = thermal_throttle_attrs, - .name = "thermal_throttle" + .attrs = thermal_throttle_attrs, + .name = "thermal_throttle" }; #endif /* CONFIG_SYSFS */ @@ -110,11 +110,10 @@ int therm_throt_process(int curr) } #ifdef CONFIG_SYSFS -/* Add/Remove thermal_throttle interface for CPU device: */ +/* Add/Remove thermal_throttle interface for CPU device */ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) { - return sysfs_create_group(&sys_dev->kobj, - &thermal_throttle_attr_group); + return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); } static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) @@ -122,21 +121,19 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); } -/* Mutex protecting device creation against CPU hotplug: */ +/* Mutex protecting device creation against CPU hotplug */ static DEFINE_MUTEX(therm_cpu_lock); /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static __cpuinit int -thermal_throttle_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned long)hcpu; struct sys_device *sys_dev; int err = 0; sys_dev = get_cpu_sysdev(cpu); - switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: diff --git a/trunk/arch/x86/kernel/cpu/mcheck/threshold.c b/trunk/arch/x86/kernel/cpu/mcheck/threshold.c index d746df2909c9..23ee9e730f78 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/threshold.c @@ -17,7 +17,7 @@ static void default_threshold_interrupt(void) void (*mce_threshold_vector)(void) = default_threshold_interrupt; -asmlinkage void smp_threshold_interrupt(void) +asmlinkage void mce_threshold_interrupt(void) { exit_idle(); irq_enter(); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/winchip.c b/trunk/arch/x86/kernel/cpu/mcheck/winchip.c index 81b02487090b..2a043d89811d 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/winchip.c @@ -2,10 +2,11 @@ * IDT Winchip specific Machine Check Exception Reporting * (C) Copyright 2002 Alan Cox */ -#include -#include -#include + #include +#include +#include +#include #include #include @@ -13,7 +14,7 @@ #include "mce.h" -/* Machine check handler for WinChip C6: */ +/* Machine check handler for WinChip C6 */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); @@ -24,18 +25,12 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code) void winchip_mcheck_init(struct cpuinfo_x86 *c) { u32 lo, hi; - machine_check_vector = winchip_machine_check; - /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); - rdmsr(MSR_IDT_FCR1, lo, hi); lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ lo &= ~(1<<4); /* Enable MCE */ wrmsr(MSR_IDT_FCR1, lo, hi); - set_in_cr4(X86_CR4_MCE); - - printk(KERN_INFO - "Winchip machine check reporting enabled on CPU#0.\n"); + printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); } diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S index de74f0a3e0ed..a4742a340d8d 100644 --- a/trunk/arch/x86/kernel/entry_64.S +++ b/trunk/arch/x86/kernel/entry_64.S @@ -963,8 +963,6 @@ END(\sym) #ifdef CONFIG_SMP apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt -apicinterrupt REBOOT_VECTOR \ - reboot_interrupt smp_reboot_interrupt #endif #ifdef CONFIG_X86_UV @@ -996,15 +994,10 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ #endif apicinterrupt THRESHOLD_APIC_VECTOR \ - threshold_interrupt smp_threshold_interrupt + threshold_interrupt mce_threshold_interrupt apicinterrupt THERMAL_APIC_VECTOR \ thermal_interrupt smp_thermal_interrupt -#ifdef CONFIG_X86_MCE -apicinterrupt MCE_SELF_VECTOR \ - mce_self_interrupt smp_mce_self_interrupt -#endif - #ifdef CONFIG_SMP apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ call_function_single_interrupt smp_call_function_single_interrupt @@ -1386,7 +1379,7 @@ errorentry xen_stack_segment do_stack_segment errorentry general_protection do_general_protection errorentry page_fault do_page_fault #ifdef CONFIG_X86_MCE -paranoidzeroentry machine_check *machine_check_vector(%rip) +paranoidzeroentry machine_check do_machine_check #endif /* diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c index b0cdde6932f5..38287b5f116e 100644 --- a/trunk/arch/x86/kernel/irq.c +++ b/trunk/arch/x86/kernel/irq.c @@ -12,7 +12,6 @@ #include #include #include -#include #include atomic_t irq_err_count; @@ -97,22 +96,12 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); -# ifdef CONFIG_X86_MCE_THRESHOLD +# ifdef CONFIG_X86_64 seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); # endif -#endif -#ifdef CONFIG_X86_NEW_MCE - seq_printf(p, "%*s: ", prec, "MCE"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); - seq_printf(p, " Machine check exceptions\n"); - seq_printf(p, "%*s: ", prec, "MCP"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); - seq_printf(p, " Machine check polls\n"); #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) @@ -196,13 +185,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #endif #ifdef CONFIG_X86_MCE sum += irq_stats(cpu)->irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +# ifdef CONFIG_X86_64 sum += irq_stats(cpu)->irq_threshold_count; # endif -#endif -#ifdef CONFIG_X86_NEW_MCE - sum += per_cpu(mce_exception_count, cpu); - sum += per_cpu(mce_poll_count, cpu); #endif return sum; } diff --git a/trunk/arch/x86/kernel/irqinit.c b/trunk/arch/x86/kernel/irqinit.c index 696f0e475c2d..267c6624c77f 100644 --- a/trunk/arch/x86/kernel/irqinit.c +++ b/trunk/arch/x86/kernel/irqinit.c @@ -173,9 +173,6 @@ static void __init smp_intr_init(void) /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); - - /* IPI used for rebooting/stopping */ - alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); #endif #endif /* CONFIG_SMP */ } diff --git a/trunk/arch/x86/kernel/signal.c b/trunk/arch/x86/kernel/signal.c index 4c578751e94e..0a813b17b172 100644 --- a/trunk/arch/x86/kernel/signal.c +++ b/trunk/arch/x86/kernel/signal.c @@ -24,11 +24,11 @@ #include #include #include -#include #ifdef CONFIG_X86_64 #include #include +#include #endif /* CONFIG_X86_64 */ #include @@ -856,10 +856,10 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { -#ifdef CONFIG_X86_NEW_MCE +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) - mce_notify_process(); + mce_notify_user(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ /* deal with pending signal delivery */ diff --git a/trunk/arch/x86/kernel/smp.c b/trunk/arch/x86/kernel/smp.c index ec1de97600e7..28f5fb495a66 100644 --- a/trunk/arch/x86/kernel/smp.c +++ b/trunk/arch/x86/kernel/smp.c @@ -150,40 +150,14 @@ void native_send_call_func_ipi(const struct cpumask *mask) * this function calls the 'stop' function on all other CPUs in the system. */ -asmlinkage void smp_reboot_interrupt(void) -{ - ack_APIC_irq(); - irq_enter(); - stop_this_cpu(NULL); - irq_exit(); -} - static void native_smp_send_stop(void) { unsigned long flags; - unsigned long wait; if (reboot_force) return; - /* - * Use an own vector here because smp_call_function - * does lots of things not suitable in a panic situation. - * On most systems we could also use an NMI here, - * but there are a few systems around where NMI - * is problematic so stay with an non NMI for now - * (this implies we cannot stop CPUs spinning with irq off - * currently) - */ - if (num_online_cpus() > 1) { - apic->send_IPI_allbutself(REBOOT_VECTOR); - - /* Don't wait longer than a second */ - wait = USEC_PER_SEC; - while (num_online_cpus() > 1 && wait--) - udelay(1); - } - + smp_call_function(stop_this_cpu, NULL, 0); local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c index 1e1e27b7d438..07d60c870ce2 100644 --- a/trunk/arch/x86/kernel/traps.c +++ b/trunk/arch/x86/kernel/traps.c @@ -798,15 +798,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) return new_kesp; } -#endif - +#else asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) { } -asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) +asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) { } +#endif /* * 'math_state_restore()' saves the current math information in the diff --git a/trunk/crypto/Kconfig b/trunk/crypto/Kconfig index 4dfdd03e708f..74d0e622a515 100644 --- a/trunk/crypto/Kconfig +++ b/trunk/crypto/Kconfig @@ -241,11 +241,6 @@ config CRYPTO_XTS key size 256, 384 or 512 bits. This implementation currently can't handle a sectorsize which is not a multiple of 16 bytes. -config CRYPTO_FPU - tristate - select CRYPTO_BLKCIPHER - select CRYPTO_MANAGER - comment "Hash modes" config CRYPTO_HMAC @@ -491,7 +486,6 @@ config CRYPTO_AES_NI_INTEL select CRYPTO_AES_X86_64 select CRYPTO_CRYPTD select CRYPTO_ALGAPI - select CRYPTO_FPU help Use Intel AES-NI instructions for AES algorithm. @@ -511,10 +505,6 @@ config CRYPTO_AES_NI_INTEL See for more information. - In addition to AES cipher algorithm support, the - acceleration for some popular block cipher mode is supported - too, including ECB, CBC, CTR, LRW, PCBC, XTS. - config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" select CRYPTO_ALGAPI diff --git a/trunk/crypto/algboss.c b/trunk/crypto/algboss.c index 9908dd830c26..6906f92aeac0 100644 --- a/trunk/crypto/algboss.c +++ b/trunk/crypto/algboss.c @@ -280,13 +280,29 @@ static struct notifier_block cryptomgr_notifier = { static int __init cryptomgr_init(void) { - return crypto_register_notifier(&cryptomgr_notifier); + int err; + + err = testmgr_init(); + if (err) + return err; + + err = crypto_register_notifier(&cryptomgr_notifier); + if (err) + goto free_testmgr; + + return 0; + +free_testmgr: + testmgr_exit(); + return err; } static void __exit cryptomgr_exit(void) { int err = crypto_unregister_notifier(&cryptomgr_notifier); BUG_ON(err); + + testmgr_exit(); } subsys_initcall(cryptomgr_init); diff --git a/trunk/crypto/api.c b/trunk/crypto/api.c index d5944f92b416..fd2545decb28 100644 --- a/trunk/crypto/api.c +++ b/trunk/crypto/api.c @@ -217,11 +217,14 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) alg = crypto_alg_lookup(name, type, mask); if (!alg) { - request_module("%s", name); + char tmp[CRYPTO_MAX_ALG_NAME]; + + request_module(name); if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask & - CRYPTO_ALG_NEED_FALLBACK)) - request_module("%s-all", name); + CRYPTO_ALG_NEED_FALLBACK) && + snprintf(tmp, sizeof(tmp), "%s-all", name) < sizeof(tmp)) + request_module(tmp); alg = crypto_alg_lookup(name, type, mask); } @@ -577,17 +580,20 @@ EXPORT_SYMBOL_GPL(crypto_alloc_tfm); void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm) { struct crypto_alg *alg; + int size; if (unlikely(!mem)) return; alg = tfm->__crt_alg; + size = ksize(mem); if (!tfm->exit && alg->cra_exit) alg->cra_exit(tfm); crypto_exit_ops(tfm); crypto_mod_put(alg); - kzfree(mem); + memset(mem, 0, size); + kfree(mem); } EXPORT_SYMBOL_GPL(crypto_destroy_tfm); diff --git a/trunk/crypto/cryptd.c b/trunk/crypto/cryptd.c index ae5fa99d5d36..d14b22658d7a 100644 --- a/trunk/crypto/cryptd.c +++ b/trunk/crypto/cryptd.c @@ -586,24 +586,20 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name, u32 type, u32 mask) { char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; - struct crypto_tfm *tfm; + struct crypto_ablkcipher *tfm; if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) return ERR_PTR(-EINVAL); - type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV); - type |= CRYPTO_ALG_TYPE_BLKCIPHER; - mask &= ~CRYPTO_ALG_TYPE_MASK; - mask |= (CRYPTO_ALG_GENIV | CRYPTO_ALG_TYPE_BLKCIPHER_MASK); - tfm = crypto_alloc_base(cryptd_alg_name, type, mask); + tfm = crypto_alloc_ablkcipher(cryptd_alg_name, type, mask); if (IS_ERR(tfm)) return ERR_CAST(tfm); - if (tfm->__crt_alg->cra_module != THIS_MODULE) { - crypto_free_tfm(tfm); + if (crypto_ablkcipher_tfm(tfm)->__crt_alg->cra_module != THIS_MODULE) { + crypto_free_ablkcipher(tfm); return ERR_PTR(-EINVAL); } - return __cryptd_ablkcipher_cast(__crypto_ablkcipher_cast(tfm)); + return __cryptd_ablkcipher_cast(tfm); } EXPORT_SYMBOL_GPL(cryptd_alloc_ablkcipher); diff --git a/trunk/crypto/internal.h b/trunk/crypto/internal.h index 113579a82dff..fc76e1f37fc3 100644 --- a/trunk/crypto/internal.h +++ b/trunk/crypto/internal.h @@ -121,6 +121,9 @@ int crypto_register_notifier(struct notifier_block *nb); int crypto_unregister_notifier(struct notifier_block *nb); int crypto_probing_notify(unsigned long val, void *v); +int __init testmgr_init(void); +void testmgr_exit(void); + static inline void crypto_alg_put(struct crypto_alg *alg) { if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy) diff --git a/trunk/crypto/pcompress.c b/trunk/crypto/pcompress.c index bcadc03726b7..ca9a4af91efe 100644 --- a/trunk/crypto/pcompress.c +++ b/trunk/crypto/pcompress.c @@ -26,7 +26,6 @@ #include #include -#include #include "internal.h" diff --git a/trunk/crypto/tcrypt.c b/trunk/crypto/tcrypt.c index d59ba5079d14..c3c9124209a1 100644 --- a/trunk/crypto/tcrypt.c +++ b/trunk/crypto/tcrypt.c @@ -27,7 +27,6 @@ #include #include #include "tcrypt.h" -#include "internal.h" /* * Need slab memory for testing (size in number of pages). @@ -397,16 +396,16 @@ static void test_hash_speed(const char *algo, unsigned int sec, struct scatterlist sg[TVMEMSIZE]; struct crypto_hash *tfm; struct hash_desc desc; - static char output[1024]; + char output[1024]; int i; int ret; - printk(KERN_INFO "\ntesting speed of %s\n", algo); + printk("\ntesting speed of %s\n", algo); tfm = crypto_alloc_hash(algo, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { - printk(KERN_ERR "failed to load transform for %s: %ld\n", algo, + printk("failed to load transform for %s: %ld\n", algo, PTR_ERR(tfm)); return; } @@ -415,7 +414,7 @@ static void test_hash_speed(const char *algo, unsigned int sec, desc.flags = 0; if (crypto_hash_digestsize(tfm) > sizeof(output)) { - printk(KERN_ERR "digestsize(%u) > outputbuffer(%zu)\n", + printk("digestsize(%u) > outputbuffer(%zu)\n", crypto_hash_digestsize(tfm), sizeof(output)); goto out; } @@ -428,14 +427,12 @@ static void test_hash_speed(const char *algo, unsigned int sec, for (i = 0; speed[i].blen != 0; i++) { if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) { - printk(KERN_ERR - "template (%u) too big for tvmem (%lu)\n", + printk("template (%u) too big for tvmem (%lu)\n", speed[i].blen, TVMEMSIZE * PAGE_SIZE); goto out; } - printk(KERN_INFO "test%3u " - "(%5u byte blocks,%5u bytes per update,%4u updates): ", + printk("test%3u (%5u byte blocks,%5u bytes per update,%4u updates): ", i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen); if (sec) @@ -446,7 +443,7 @@ static void test_hash_speed(const char *algo, unsigned int sec, speed[i].plen, output); if (ret) { - printk(KERN_ERR "hashing failed ret=%d\n", ret); + printk("hashing failed ret=%d\n", ret); break; } } @@ -469,255 +466,239 @@ static void test_available(void) static inline int tcrypt_test(const char *alg) { - int ret; - - ret = alg_test(alg, alg, 0, 0); - /* non-fips algs return -EINVAL in fips mode */ - if (fips_enabled && ret == -EINVAL) - ret = 0; - return ret; + return alg_test(alg, alg, 0, 0); } -static int do_test(int m) +static void do_test(int m) { int i; - int ret = 0; switch (m) { case 0: for (i = 1; i < 200; i++) - ret += do_test(i); + do_test(i); break; case 1: - ret += tcrypt_test("md5"); + tcrypt_test("md5"); break; case 2: - ret += tcrypt_test("sha1"); + tcrypt_test("sha1"); break; case 3: - ret += tcrypt_test("ecb(des)"); - ret += tcrypt_test("cbc(des)"); + tcrypt_test("ecb(des)"); + tcrypt_test("cbc(des)"); break; case 4: - ret += tcrypt_test("ecb(des3_ede)"); - ret += tcrypt_test("cbc(des3_ede)"); + tcrypt_test("ecb(des3_ede)"); + tcrypt_test("cbc(des3_ede)"); break; case 5: - ret += tcrypt_test("md4"); + tcrypt_test("md4"); break; case 6: - ret += tcrypt_test("sha256"); + tcrypt_test("sha256"); break; case 7: - ret += tcrypt_test("ecb(blowfish)"); - ret += tcrypt_test("cbc(blowfish)"); + tcrypt_test("ecb(blowfish)"); + tcrypt_test("cbc(blowfish)"); break; case 8: - ret += tcrypt_test("ecb(twofish)"); - ret += tcrypt_test("cbc(twofish)"); + tcrypt_test("ecb(twofish)"); + tcrypt_test("cbc(twofish)"); break; case 9: - ret += tcrypt_test("ecb(serpent)"); + tcrypt_test("ecb(serpent)"); break; case 10: - ret += tcrypt_test("ecb(aes)"); - ret += tcrypt_test("cbc(aes)"); - ret += tcrypt_test("lrw(aes)"); - ret += tcrypt_test("xts(aes)"); - ret += tcrypt_test("ctr(aes)"); - ret += tcrypt_test("rfc3686(ctr(aes))"); + tcrypt_test("ecb(aes)"); + tcrypt_test("cbc(aes)"); + tcrypt_test("lrw(aes)"); + tcrypt_test("xts(aes)"); + tcrypt_test("rfc3686(ctr(aes))"); break; case 11: - ret += tcrypt_test("sha384"); + tcrypt_test("sha384"); break; case 12: - ret += tcrypt_test("sha512"); + tcrypt_test("sha512"); break; case 13: - ret += tcrypt_test("deflate"); + tcrypt_test("deflate"); break; case 14: - ret += tcrypt_test("ecb(cast5)"); + tcrypt_test("ecb(cast5)"); break; case 15: - ret += tcrypt_test("ecb(cast6)"); + tcrypt_test("ecb(cast6)"); break; case 16: - ret += tcrypt_test("ecb(arc4)"); + tcrypt_test("ecb(arc4)"); break; case 17: - ret += tcrypt_test("michael_mic"); + tcrypt_test("michael_mic"); break; case 18: - ret += tcrypt_test("crc32c"); + tcrypt_test("crc32c"); break; case 19: - ret += tcrypt_test("ecb(tea)"); + tcrypt_test("ecb(tea)"); break; case 20: - ret += tcrypt_test("ecb(xtea)"); + tcrypt_test("ecb(xtea)"); break; case 21: - ret += tcrypt_test("ecb(khazad)"); + tcrypt_test("ecb(khazad)"); break; case 22: - ret += tcrypt_test("wp512"); + tcrypt_test("wp512"); break; case 23: - ret += tcrypt_test("wp384"); + tcrypt_test("wp384"); break; case 24: - ret += tcrypt_test("wp256"); + tcrypt_test("wp256"); break; case 25: - ret += tcrypt_test("ecb(tnepres)"); + tcrypt_test("ecb(tnepres)"); break; case 26: - ret += tcrypt_test("ecb(anubis)"); - ret += tcrypt_test("cbc(anubis)"); + tcrypt_test("ecb(anubis)"); + tcrypt_test("cbc(anubis)"); break; case 27: - ret += tcrypt_test("tgr192"); + tcrypt_test("tgr192"); break; case 28: - ret += tcrypt_test("tgr160"); + tcrypt_test("tgr160"); break; case 29: - ret += tcrypt_test("tgr128"); + tcrypt_test("tgr128"); break; case 30: - ret += tcrypt_test("ecb(xeta)"); + tcrypt_test("ecb(xeta)"); break; case 31: - ret += tcrypt_test("pcbc(fcrypt)"); + tcrypt_test("pcbc(fcrypt)"); break; case 32: - ret += tcrypt_test("ecb(camellia)"); - ret += tcrypt_test("cbc(camellia)"); + tcrypt_test("ecb(camellia)"); + tcrypt_test("cbc(camellia)"); break; case 33: - ret += tcrypt_test("sha224"); + tcrypt_test("sha224"); break; case 34: - ret += tcrypt_test("salsa20"); + tcrypt_test("salsa20"); break; case 35: - ret += tcrypt_test("gcm(aes)"); + tcrypt_test("gcm(aes)"); break; case 36: - ret += tcrypt_test("lzo"); + tcrypt_test("lzo"); break; case 37: - ret += tcrypt_test("ccm(aes)"); + tcrypt_test("ccm(aes)"); break; case 38: - ret += tcrypt_test("cts(cbc(aes))"); + tcrypt_test("cts(cbc(aes))"); break; case 39: - ret += tcrypt_test("rmd128"); + tcrypt_test("rmd128"); break; case 40: - ret += tcrypt_test("rmd160"); + tcrypt_test("rmd160"); break; case 41: - ret += tcrypt_test("rmd256"); + tcrypt_test("rmd256"); break; case 42: - ret += tcrypt_test("rmd320"); + tcrypt_test("rmd320"); break; case 43: - ret += tcrypt_test("ecb(seed)"); + tcrypt_test("ecb(seed)"); break; case 44: - ret += tcrypt_test("zlib"); - break; - - case 45: - ret += tcrypt_test("rfc4309(ccm(aes))"); + tcrypt_test("zlib"); break; case 100: - ret += tcrypt_test("hmac(md5)"); + tcrypt_test("hmac(md5)"); break; case 101: - ret += tcrypt_test("hmac(sha1)"); + tcrypt_test("hmac(sha1)"); break; case 102: - ret += tcrypt_test("hmac(sha256)"); + tcrypt_test("hmac(sha256)"); break; case 103: - ret += tcrypt_test("hmac(sha384)"); + tcrypt_test("hmac(sha384)"); break; case 104: - ret += tcrypt_test("hmac(sha512)"); + tcrypt_test("hmac(sha512)"); break; case 105: - ret += tcrypt_test("hmac(sha224)"); + tcrypt_test("hmac(sha224)"); break; case 106: - ret += tcrypt_test("xcbc(aes)"); + tcrypt_test("xcbc(aes)"); break; case 107: - ret += tcrypt_test("hmac(rmd128)"); + tcrypt_test("hmac(rmd128)"); break; case 108: - ret += tcrypt_test("hmac(rmd160)"); - break; - - case 150: - ret += tcrypt_test("ansi_cprng"); + tcrypt_test("hmac(rmd160)"); break; case 200: @@ -881,8 +862,6 @@ static int do_test(int m) test_available(); break; } - - return ret; } static int __init tcrypt_mod_init(void) @@ -896,21 +875,15 @@ static int __init tcrypt_mod_init(void) goto err_free_tv; } - err = do_test(mode); - if (err) { - printk(KERN_ERR "tcrypt: one or more tests failed!\n"); - goto err_free_tv; - } + do_test(mode); - /* We intentionaly return -EAGAIN to prevent keeping the module, - * unless we're running in fips mode. It does all its work from - * init() and doesn't offer any runtime functionality, but in - * the fips case, checking for a successful load is helpful. + /* We intentionaly return -EAGAIN to prevent keeping + * the module. It does all its work from init() + * and doesn't offer any runtime functionality * => we don't need it in the memory, do we? * -- mludvig */ - if (!fips_enabled) - err = -EAGAIN; + err = -EAGAIN; err_free_tv: for (i = 0; i < TVMEMSIZE && tvmem[i]; i++) diff --git a/trunk/crypto/testmgr.c b/trunk/crypto/testmgr.c index e9e9d84293b9..b50c3c6b17a2 100644 --- a/trunk/crypto/testmgr.c +++ b/trunk/crypto/testmgr.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "internal.h" #include "testmgr.h" @@ -85,16 +84,10 @@ struct hash_test_suite { unsigned int count; }; -struct cprng_test_suite { - struct cprng_testvec *vecs; - unsigned int count; -}; - struct alg_test_desc { const char *alg; int (*test)(const struct alg_test_desc *desc, const char *driver, u32 type, u32 mask); - int fips_allowed; /* set if alg is allowed in fips mode */ union { struct aead_test_suite aead; @@ -102,12 +95,14 @@ struct alg_test_desc { struct comp_test_suite comp; struct pcomp_test_suite pcomp; struct hash_test_suite hash; - struct cprng_test_suite cprng; } suite; }; static unsigned int IDX[8] = { IDX1, IDX2, IDX3, IDX4, IDX5, IDX6, IDX7, IDX8 }; +static char *xbuf[XBUFSIZE]; +static char *axbuf[XBUFSIZE]; + static void hexdump(unsigned char *buf, unsigned int len) { print_hex_dump(KERN_CONT, "", DUMP_PREFIX_OFFSET, @@ -126,33 +121,6 @@ static void tcrypt_complete(struct crypto_async_request *req, int err) complete(&res->completion); } -static int testmgr_alloc_buf(char *buf[XBUFSIZE]) -{ - int i; - - for (i = 0; i < XBUFSIZE; i++) { - buf[i] = (void *)__get_free_page(GFP_KERNEL); - if (!buf[i]) - goto err_free_buf; - } - - return 0; - -err_free_buf: - while (i-- > 0) - free_page((unsigned long)buf[i]); - - return -ENOMEM; -} - -static void testmgr_free_buf(char *buf[XBUFSIZE]) -{ - int i; - - for (i = 0; i < XBUFSIZE; i++) - free_page((unsigned long)buf[i]); -} - static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, unsigned int tcount) { @@ -162,12 +130,8 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, char result[64]; struct ahash_request *req; struct tcrypt_result tresult; + int ret; void *hash_buff; - char *xbuf[XBUFSIZE]; - int ret = -ENOMEM; - - if (testmgr_alloc_buf(xbuf)) - goto out_nobuf; init_completion(&tresult.completion); @@ -175,25 +139,17 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, if (!req) { printk(KERN_ERR "alg: hash: Failed to allocate request for " "%s\n", algo); + ret = -ENOMEM; goto out_noreq; } ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, tcrypt_complete, &tresult); - j = 0; for (i = 0; i < tcount; i++) { - if (template[i].np) - continue; - - j++; memset(result, 0, 64); hash_buff = xbuf[0]; - ret = -EINVAL; - if (WARN_ON(template[i].psize > PAGE_SIZE)) - goto out; - memcpy(hash_buff, template[i].plaintext, template[i].psize); sg_init_one(&sg[0], hash_buff, template[i].psize); @@ -203,7 +159,7 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, template[i].ksize); if (ret) { printk(KERN_ERR "alg: hash: setkey failed on " - "test %d for %s: ret=%d\n", j, algo, + "test %d for %s: ret=%d\n", i + 1, algo, -ret); goto out; } @@ -225,14 +181,14 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, /* fall through */ default: printk(KERN_ERR "alg: hash: digest failed on test %d " - "for %s: ret=%d\n", j, algo, -ret); + "for %s: ret=%d\n", i + 1, algo, -ret); goto out; } if (memcmp(result, template[i].digest, crypto_ahash_digestsize(tfm))) { printk(KERN_ERR "alg: hash: Test %d failed for %s\n", - j, algo); + i + 1, algo); hexdump(result, crypto_ahash_digestsize(tfm)); ret = -EINVAL; goto out; @@ -247,11 +203,7 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, temp = 0; sg_init_table(sg, template[i].np); - ret = -EINVAL; for (k = 0; k < template[i].np; k++) { - if (WARN_ON(offset_in_page(IDX[k]) + - template[i].tap[k] > PAGE_SIZE)) - goto out; sg_set_buf(&sg[k], memcpy(xbuf[IDX[k] >> PAGE_SHIFT] + offset_in_page(IDX[k]), @@ -313,8 +265,6 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, out: ahash_request_free(req); out_noreq: - testmgr_free_buf(xbuf); -out_nobuf: return ret; } @@ -323,7 +273,7 @@ static int test_aead(struct crypto_aead *tfm, int enc, { const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm)); unsigned int i, j, k, n, temp; - int ret = -ENOMEM; + int ret = 0; char *q; char *key; struct aead_request *req; @@ -335,13 +285,6 @@ static int test_aead(struct crypto_aead *tfm, int enc, void *input; void *assoc; char iv[MAX_IVLEN]; - char *xbuf[XBUFSIZE]; - char *axbuf[XBUFSIZE]; - - if (testmgr_alloc_buf(xbuf)) - goto out_noxbuf; - if (testmgr_alloc_buf(axbuf)) - goto out_noaxbuf; if (enc == ENCRYPT) e = "encryption"; @@ -354,6 +297,7 @@ static int test_aead(struct crypto_aead *tfm, int enc, if (!req) { printk(KERN_ERR "alg: aead: Failed to allocate request for " "%s\n", algo); + ret = -ENOMEM; goto out; } @@ -370,11 +314,6 @@ static int test_aead(struct crypto_aead *tfm, int enc, input = xbuf[0]; assoc = axbuf[0]; - ret = -EINVAL; - if (WARN_ON(template[i].ilen > PAGE_SIZE || - template[i].alen > PAGE_SIZE)) - goto out; - memcpy(input, template[i].input, template[i].ilen); memcpy(assoc, template[i].assoc, template[i].alen); if (template[i].iv) @@ -424,16 +363,6 @@ static int test_aead(struct crypto_aead *tfm, int enc, switch (ret) { case 0: - if (template[i].novrfy) { - /* verification was supposed to fail */ - printk(KERN_ERR "alg: aead: %s failed " - "on test %d for %s: ret was 0, " - "expected -EBADMSG\n", - e, j, algo); - /* so really, we got a bad message */ - ret = -EBADMSG; - goto out; - } break; case -EINPROGRESS: case -EBUSY: @@ -443,10 +372,6 @@ static int test_aead(struct crypto_aead *tfm, int enc, INIT_COMPLETION(result.completion); break; } - case -EBADMSG: - if (template[i].novrfy) - /* verification failure was expected */ - continue; /* fall through */ default: printk(KERN_ERR "alg: aead: %s failed on test " @@ -534,11 +459,7 @@ static int test_aead(struct crypto_aead *tfm, int enc, } sg_init_table(asg, template[i].anp); - ret = -EINVAL; for (k = 0, temp = 0; k < template[i].anp; k++) { - if (WARN_ON(offset_in_page(IDX[k]) + - template[i].atap[k] > PAGE_SIZE)) - goto out; sg_set_buf(&asg[k], memcpy(axbuf[IDX[k] >> PAGE_SHIFT] + offset_in_page(IDX[k]), @@ -560,16 +481,6 @@ static int test_aead(struct crypto_aead *tfm, int enc, switch (ret) { case 0: - if (template[i].novrfy) { - /* verification was supposed to fail */ - printk(KERN_ERR "alg: aead: %s failed " - "on chunk test %d for %s: ret " - "was 0, expected -EBADMSG\n", - e, j, algo); - /* so really, we got a bad message */ - ret = -EBADMSG; - goto out; - } break; case -EINPROGRESS: case -EBUSY: @@ -579,10 +490,6 @@ static int test_aead(struct crypto_aead *tfm, int enc, INIT_COMPLETION(result.completion); break; } - case -EBADMSG: - if (template[i].novrfy) - /* verification failure was expected */ - continue; /* fall through */ default: printk(KERN_ERR "alg: aead: %s failed on " @@ -639,10 +546,6 @@ static int test_aead(struct crypto_aead *tfm, int enc, out: aead_request_free(req); - testmgr_free_buf(axbuf); -out_noaxbuf: - testmgr_free_buf(xbuf); -out_noxbuf: return ret; } @@ -651,14 +554,10 @@ static int test_cipher(struct crypto_cipher *tfm, int enc, { const char *algo = crypto_tfm_alg_driver_name(crypto_cipher_tfm(tfm)); unsigned int i, j, k; + int ret; char *q; const char *e; void *data; - char *xbuf[XBUFSIZE]; - int ret = -ENOMEM; - - if (testmgr_alloc_buf(xbuf)) - goto out_nobuf; if (enc == ENCRYPT) e = "encryption"; @@ -672,10 +571,6 @@ static int test_cipher(struct crypto_cipher *tfm, int enc, j++; - ret = -EINVAL; - if (WARN_ON(template[i].ilen > PAGE_SIZE)) - goto out; - data = xbuf[0]; memcpy(data, template[i].input, template[i].ilen); @@ -716,8 +611,6 @@ static int test_cipher(struct crypto_cipher *tfm, int enc, ret = 0; out: - testmgr_free_buf(xbuf); -out_nobuf: return ret; } @@ -727,6 +620,7 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc, const char *algo = crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm)); unsigned int i, j, k, n, temp; + int ret; char *q; struct ablkcipher_request *req; struct scatterlist sg[8]; @@ -734,11 +628,6 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc, struct tcrypt_result result; void *data; char iv[MAX_IVLEN]; - char *xbuf[XBUFSIZE]; - int ret = -ENOMEM; - - if (testmgr_alloc_buf(xbuf)) - goto out_nobuf; if (enc == ENCRYPT) e = "encryption"; @@ -751,6 +640,7 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc, if (!req) { printk(KERN_ERR "alg: skcipher: Failed to allocate request " "for %s\n", algo); + ret = -ENOMEM; goto out; } @@ -767,10 +657,6 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc, if (!(template[i].np)) { j++; - ret = -EINVAL; - if (WARN_ON(template[i].ilen > PAGE_SIZE)) - goto out; - data = xbuf[0]; memcpy(data, template[i].input, template[i].ilen); @@ -939,8 +825,6 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc, out: ablkcipher_request_free(req); - testmgr_free_buf(xbuf); -out_nobuf: return ret; } @@ -953,8 +837,7 @@ static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate, int ret; for (i = 0; i < ctcount; i++) { - int ilen; - unsigned int dlen = COMP_BUF_SIZE; + int ilen, dlen = COMP_BUF_SIZE; memset(result, 0, sizeof (result)); @@ -986,8 +869,7 @@ static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate, } for (i = 0; i < dtcount; i++) { - int ilen; - unsigned int dlen = COMP_BUF_SIZE; + int ilen, dlen = COMP_BUF_SIZE; memset(result, 0, sizeof (result)); @@ -1032,25 +914,24 @@ static int test_pcomp(struct crypto_pcomp *tfm, const char *algo = crypto_tfm_alg_driver_name(crypto_pcomp_tfm(tfm)); unsigned int i; char result[COMP_BUF_SIZE]; - int res; + int error; for (i = 0; i < ctcount; i++) { struct comp_request req; - unsigned int produced = 0; - res = crypto_compress_setup(tfm, ctemplate[i].params, - ctemplate[i].paramsize); - if (res) { + error = crypto_compress_setup(tfm, ctemplate[i].params, + ctemplate[i].paramsize); + if (error) { pr_err("alg: pcomp: compression setup failed on test " - "%d for %s: error=%d\n", i + 1, algo, res); - return res; + "%d for %s: error=%d\n", i + 1, algo, error); + return error; } - res = crypto_compress_init(tfm); - if (res) { + error = crypto_compress_init(tfm); + if (error) { pr_err("alg: pcomp: compression init failed on test " - "%d for %s: error=%d\n", i + 1, algo, res); - return res; + "%d for %s: error=%d\n", i + 1, algo, error); + return error; } memset(result, 0, sizeof(result)); @@ -1060,37 +941,32 @@ static int test_pcomp(struct crypto_pcomp *tfm, req.next_out = result; req.avail_out = ctemplate[i].outlen / 2; - res = crypto_compress_update(tfm, &req); - if (res < 0 && (res != -EAGAIN || req.avail_in)) { + error = crypto_compress_update(tfm, &req); + if (error && (error != -EAGAIN || req.avail_in)) { pr_err("alg: pcomp: compression update failed on test " - "%d for %s: error=%d\n", i + 1, algo, res); - return res; + "%d for %s: error=%d\n", i + 1, algo, error); + return error; } - if (res > 0) - produced += res; /* Add remaining input data */ req.avail_in += (ctemplate[i].inlen + 1) / 2; - res = crypto_compress_update(tfm, &req); - if (res < 0 && (res != -EAGAIN || req.avail_in)) { + error = crypto_compress_update(tfm, &req); + if (error && (error != -EAGAIN || req.avail_in)) { pr_err("alg: pcomp: compression update failed on test " - "%d for %s: error=%d\n", i + 1, algo, res); - return res; + "%d for %s: error=%d\n", i + 1, algo, error); + return error; } - if (res > 0) - produced += res; /* Provide remaining output space */ req.avail_out += COMP_BUF_SIZE - ctemplate[i].outlen / 2; - res = crypto_compress_final(tfm, &req); - if (res < 0) { + error = crypto_compress_final(tfm, &req); + if (error) { pr_err("alg: pcomp: compression final failed on test " - "%d for %s: error=%d\n", i + 1, algo, res); - return res; + "%d for %s: error=%d\n", i + 1, algo, error); + return error; } - produced += res; if (COMP_BUF_SIZE - req.avail_out != ctemplate[i].outlen) { pr_err("alg: comp: Compression test %d failed for %s: " @@ -1100,13 +976,6 @@ static int test_pcomp(struct crypto_pcomp *tfm, return -EINVAL; } - if (produced != ctemplate[i].outlen) { - pr_err("alg: comp: Compression test %d failed for %s: " - "returned len = %u (expected %d)\n", i + 1, - algo, produced, ctemplate[i].outlen); - return -EINVAL; - } - if (memcmp(result, ctemplate[i].output, ctemplate[i].outlen)) { pr_err("alg: pcomp: Compression test %d failed for " "%s\n", i + 1, algo); @@ -1117,21 +986,21 @@ static int test_pcomp(struct crypto_pcomp *tfm, for (i = 0; i < dtcount; i++) { struct comp_request req; - unsigned int produced = 0; - res = crypto_decompress_setup(tfm, dtemplate[i].params, - dtemplate[i].paramsize); - if (res) { + error = crypto_decompress_setup(tfm, dtemplate[i].params, + dtemplate[i].paramsize); + if (error) { pr_err("alg: pcomp: decompression setup failed on " - "test %d for %s: error=%d\n", i + 1, algo, res); - return res; + "test %d for %s: error=%d\n", i + 1, algo, + error); + return error; } - res = crypto_decompress_init(tfm); - if (res) { + error = crypto_decompress_init(tfm); + if (error) { pr_err("alg: pcomp: decompression init failed on test " - "%d for %s: error=%d\n", i + 1, algo, res); - return res; + "%d for %s: error=%d\n", i + 1, algo, error); + return error; } memset(result, 0, sizeof(result)); @@ -1141,38 +1010,35 @@ static int test_pcomp(struct crypto_pcomp *tfm, req.next_out = result; req.avail_out = dtemplate[i].outlen / 2; - res = crypto_decompress_update(tfm, &req); - if (res < 0 && (res != -EAGAIN || req.avail_in)) { + error = crypto_decompress_update(tfm, &req); + if (error && (error != -EAGAIN || req.avail_in)) { pr_err("alg: pcomp: decompression update failed on " - "test %d for %s: error=%d\n", i + 1, algo, res); - return res; + "test %d for %s: error=%d\n", i + 1, algo, + error); + return error; } - if (res > 0) - produced += res; /* Add remaining input data */ req.avail_in += (dtemplate[i].inlen + 1) / 2; - res = crypto_decompress_update(tfm, &req); - if (res < 0 && (res != -EAGAIN || req.avail_in)) { + error = crypto_decompress_update(tfm, &req); + if (error && (error != -EAGAIN || req.avail_in)) { pr_err("alg: pcomp: decompression update failed on " - "test %d for %s: error=%d\n", i + 1, algo, res); - return res; + "test %d for %s: error=%d\n", i + 1, algo, + error); + return error; } - if (res > 0) - produced += res; /* Provide remaining output space */ req.avail_out += COMP_BUF_SIZE - dtemplate[i].outlen / 2; - res = crypto_decompress_final(tfm, &req); - if (res < 0 && (res != -EAGAIN || req.avail_in)) { + error = crypto_decompress_final(tfm, &req); + if (error && (error != -EAGAIN || req.avail_in)) { pr_err("alg: pcomp: decompression final failed on " - "test %d for %s: error=%d\n", i + 1, algo, res); - return res; + "test %d for %s: error=%d\n", i + 1, algo, + error); + return error; } - if (res > 0) - produced += res; if (COMP_BUF_SIZE - req.avail_out != dtemplate[i].outlen) { pr_err("alg: comp: Decompression test %d failed for " @@ -1182,13 +1048,6 @@ static int test_pcomp(struct crypto_pcomp *tfm, return -EINVAL; } - if (produced != dtemplate[i].outlen) { - pr_err("alg: comp: Decompression test %d failed for " - "%s: returned len = %u (expected %d)\n", i + 1, - algo, produced, dtemplate[i].outlen); - return -EINVAL; - } - if (memcmp(result, dtemplate[i].output, dtemplate[i].outlen)) { pr_err("alg: pcomp: Decompression test %d failed for " "%s\n", i + 1, algo); @@ -1200,68 +1059,6 @@ static int test_pcomp(struct crypto_pcomp *tfm, return 0; } - -static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template, - unsigned int tcount) -{ - const char *algo = crypto_tfm_alg_driver_name(crypto_rng_tfm(tfm)); - int err, i, j, seedsize; - u8 *seed; - char result[32]; - - seedsize = crypto_rng_seedsize(tfm); - - seed = kmalloc(seedsize, GFP_KERNEL); - if (!seed) { - printk(KERN_ERR "alg: cprng: Failed to allocate seed space " - "for %s\n", algo); - return -ENOMEM; - } - - for (i = 0; i < tcount; i++) { - memset(result, 0, 32); - - memcpy(seed, template[i].v, template[i].vlen); - memcpy(seed + template[i].vlen, template[i].key, - template[i].klen); - memcpy(seed + template[i].vlen + template[i].klen, - template[i].dt, template[i].dtlen); - - err = crypto_rng_reset(tfm, seed, seedsize); - if (err) { - printk(KERN_ERR "alg: cprng: Failed to reset rng " - "for %s\n", algo); - goto out; - } - - for (j = 0; j < template[i].loops; j++) { - err = crypto_rng_get_bytes(tfm, result, - template[i].rlen); - if (err != template[i].rlen) { - printk(KERN_ERR "alg: cprng: Failed to obtain " - "the correct amount of random data for " - "%s (requested %d, got %d)\n", algo, - template[i].rlen, err); - goto out; - } - } - - err = memcmp(result, template[i].result, - template[i].rlen); - if (err) { - printk(KERN_ERR "alg: cprng: Test %d failed for %s\n", - i, algo); - hexdump(result, template[i].rlen); - err = -EINVAL; - goto out; - } - } - -out: - kfree(seed); - return err; -} - static int alg_test_aead(const struct alg_test_desc *desc, const char *driver, u32 type, u32 mask) { @@ -1461,42 +1258,11 @@ static int alg_test_crc32c(const struct alg_test_desc *desc, return err; } -static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver, - u32 type, u32 mask) -{ - struct crypto_rng *rng; - int err; - - rng = crypto_alloc_rng(driver, type, mask); - if (IS_ERR(rng)) { - printk(KERN_ERR "alg: cprng: Failed to load transform for %s: " - "%ld\n", driver, PTR_ERR(rng)); - return PTR_ERR(rng); - } - - err = test_cprng(rng, desc->suite.cprng.vecs, desc->suite.cprng.count); - - crypto_free_rng(rng); - - return err; -} - /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { - .alg = "ansi_cprng", - .test = alg_test_cprng, - .fips_allowed = 1, - .suite = { - .cprng = { - .vecs = ansi_cprng_aes_tv_template, - .count = ANSI_CPRNG_AES_TEST_VECTORS - } - } - }, { .alg = "cbc(aes)", .test = alg_test_skcipher, - .fips_allowed = 1, .suite = { .cipher = { .enc = { @@ -1572,7 +1338,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "cbc(des3_ede)", .test = alg_test_skcipher, - .fips_allowed = 1, .suite = { .cipher = { .enc = { @@ -1603,7 +1368,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "ccm(aes)", .test = alg_test_aead, - .fips_allowed = 1, .suite = { .aead = { .enc = { @@ -1619,29 +1383,12 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "crc32c", .test = alg_test_crc32c, - .fips_allowed = 1, .suite = { .hash = { .vecs = crc32c_tv_template, .count = CRC32C_TEST_VECTORS } } - }, { - .alg = "ctr(aes)", - .test = alg_test_skcipher, - .fips_allowed = 1, - .suite = { - .cipher = { - .enc = { - .vecs = aes_ctr_enc_tv_template, - .count = AES_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ctr_dec_tv_template, - .count = AES_CTR_DEC_TEST_VECTORS - } - } - } }, { .alg = "cts(cbc(aes))", .test = alg_test_skcipher, @@ -1675,7 +1422,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "ecb(aes)", .test = alg_test_skcipher, - .fips_allowed = 1, .suite = { .cipher = { .enc = { @@ -1781,7 +1527,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "ecb(des)", .test = alg_test_skcipher, - .fips_allowed = 1, .suite = { .cipher = { .enc = { @@ -1797,7 +1542,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "ecb(des3_ede)", .test = alg_test_skcipher, - .fips_allowed = 1, .suite = { .cipher = { .enc = { @@ -1933,7 +1677,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "gcm(aes)", .test = alg_test_aead, - .fips_allowed = 1, .suite = { .aead = { .enc = { @@ -1976,7 +1719,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "hmac(sha1)", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = { .vecs = hmac_sha1_tv_template, @@ -1986,7 +1728,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "hmac(sha224)", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = { .vecs = hmac_sha224_tv_template, @@ -1996,7 +1737,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "hmac(sha256)", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = { .vecs = hmac_sha256_tv_template, @@ -2006,7 +1746,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "hmac(sha384)", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = { .vecs = hmac_sha384_tv_template, @@ -2016,7 +1755,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "hmac(sha512)", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = { .vecs = hmac_sha512_tv_template, @@ -2098,32 +1836,15 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "rfc3686(ctr(aes))", .test = alg_test_skcipher, - .fips_allowed = 1, .suite = { .cipher = { .enc = { - .vecs = aes_ctr_rfc3686_enc_tv_template, - .count = AES_CTR_3686_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ctr_rfc3686_dec_tv_template, - .count = AES_CTR_3686_DEC_TEST_VECTORS - } - } - } - }, { - .alg = "rfc4309(ccm(aes))", - .test = alg_test_aead, - .fips_allowed = 1, - .suite = { - .aead = { - .enc = { - .vecs = aes_ccm_rfc4309_enc_tv_template, - .count = AES_CCM_4309_ENC_TEST_VECTORS + .vecs = aes_ctr_enc_tv_template, + .count = AES_CTR_ENC_TEST_VECTORS }, .dec = { - .vecs = aes_ccm_rfc4309_dec_tv_template, - .count = AES_CCM_4309_DEC_TEST_VECTORS + .vecs = aes_ctr_dec_tv_template, + .count = AES_CTR_DEC_TEST_VECTORS } } } @@ -2177,7 +1898,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "sha1", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = { .vecs = sha1_tv_template, @@ -2187,7 +1907,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "sha224", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = { .vecs = sha224_tv_template, @@ -2197,7 +1916,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "sha256", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = { .vecs = sha256_tv_template, @@ -2207,7 +1925,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "sha384", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = { .vecs = sha384_tv_template, @@ -2217,7 +1934,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "sha512", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = { .vecs = sha512_tv_template, @@ -2361,36 +2077,60 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask) if (i < 0) goto notest; - if (fips_enabled && !alg_test_descs[i].fips_allowed) - goto non_fips_alg; - - rc = alg_test_cipher(alg_test_descs + i, driver, type, mask); - goto test_done; + return alg_test_cipher(alg_test_descs + i, driver, type, mask); } i = alg_find_test(alg); if (i < 0) goto notest; - if (fips_enabled && !alg_test_descs[i].fips_allowed) - goto non_fips_alg; - rc = alg_test_descs[i].test(alg_test_descs + i, driver, type, mask); -test_done: if (fips_enabled && rc) panic("%s: %s alg self test failed in fips mode!\n", driver, alg); - if (fips_enabled && !rc) - printk(KERN_INFO "alg: self-tests for %s (%s) passed\n", - driver, alg); - return rc; notest: printk(KERN_INFO "alg: No test for %s (%s)\n", alg, driver); return 0; -non_fips_alg: - return -EINVAL; } EXPORT_SYMBOL_GPL(alg_test); + +int __init testmgr_init(void) +{ + int i; + + for (i = 0; i < XBUFSIZE; i++) { + xbuf[i] = (void *)__get_free_page(GFP_KERNEL); + if (!xbuf[i]) + goto err_free_xbuf; + } + + for (i = 0; i < XBUFSIZE; i++) { + axbuf[i] = (void *)__get_free_page(GFP_KERNEL); + if (!axbuf[i]) + goto err_free_axbuf; + } + + return 0; + +err_free_axbuf: + for (i = 0; i < XBUFSIZE && axbuf[i]; i++) + free_page((unsigned long)axbuf[i]); +err_free_xbuf: + for (i = 0; i < XBUFSIZE && xbuf[i]; i++) + free_page((unsigned long)xbuf[i]); + + return -ENOMEM; +} + +void testmgr_exit(void) +{ + int i; + + for (i = 0; i < XBUFSIZE; i++) + free_page((unsigned long)axbuf[i]); + for (i = 0; i < XBUFSIZE; i++) + free_page((unsigned long)xbuf[i]); +} diff --git a/trunk/crypto/testmgr.h b/trunk/crypto/testmgr.h index 69316228fc19..526f00a9c72f 100644 --- a/trunk/crypto/testmgr.h +++ b/trunk/crypto/testmgr.h @@ -62,7 +62,6 @@ struct aead_testvec { int np; int anp; unsigned char fail; - unsigned char novrfy; /* ccm dec verification failure expected */ unsigned char wk; /* weak key flag */ unsigned char klen; unsigned short ilen; @@ -70,18 +69,6 @@ struct aead_testvec { unsigned short rlen; }; -struct cprng_testvec { - char *key; - char *dt; - char *v; - char *result; - unsigned char klen; - unsigned short dtlen; - unsigned short vlen; - unsigned short rlen; - unsigned short loops; -}; - static char zeroed_string[48]; /* @@ -2854,16 +2841,12 @@ static struct cipher_testvec cast6_dec_tv_template[] = { #define AES_LRW_DEC_TEST_VECTORS 8 #define AES_XTS_ENC_TEST_VECTORS 4 #define AES_XTS_DEC_TEST_VECTORS 4 -#define AES_CTR_ENC_TEST_VECTORS 3 -#define AES_CTR_DEC_TEST_VECTORS 3 -#define AES_CTR_3686_ENC_TEST_VECTORS 7 -#define AES_CTR_3686_DEC_TEST_VECTORS 6 +#define AES_CTR_ENC_TEST_VECTORS 7 +#define AES_CTR_DEC_TEST_VECTORS 6 #define AES_GCM_ENC_TEST_VECTORS 9 #define AES_GCM_DEC_TEST_VECTORS 8 #define AES_CCM_ENC_TEST_VECTORS 7 #define AES_CCM_DEC_TEST_VECTORS 7 -#define AES_CCM_4309_ENC_TEST_VECTORS 7 -#define AES_CCM_4309_DEC_TEST_VECTORS 10 static struct cipher_testvec aes_enc_tv_template[] = { { /* From FIPS-197 */ @@ -4000,164 +3983,6 @@ static struct cipher_testvec aes_xts_dec_tv_template[] = { static struct cipher_testvec aes_ctr_enc_tv_template[] = { - { /* From NIST Special Publication 800-38A, Appendix F.5 */ - .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" - "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", - .klen = 16, - .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" - "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" - "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" - "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" - "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" - "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" - "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" - "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", - .ilen = 64, - .result = "\x87\x4d\x61\x91\xb6\x20\xe3\x26" - "\x1b\xef\x68\x64\x99\x0d\xb6\xce" - "\x98\x06\xf6\x6b\x79\x70\xfd\xff" - "\x86\x17\x18\x7b\xb9\xff\xfd\xff" - "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e" - "\x5b\x4f\x09\x02\x0d\xb0\x3e\xab" - "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1" - "\x79\x21\x70\xa0\xf3\x00\x9c\xee", - .rlen = 64, - }, { - .key = "\x8e\x73\xb0\xf7\xda\x0e\x64\x52" - "\xc8\x10\xf3\x2b\x80\x90\x79\xe5" - "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", - .klen = 24, - .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" - "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" - "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" - "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" - "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" - "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" - "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" - "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", - .ilen = 64, - .result = "\x1a\xbc\x93\x24\x17\x52\x1c\xa2" - "\x4f\x2b\x04\x59\xfe\x7e\x6e\x0b" - "\x09\x03\x39\xec\x0a\xa6\xfa\xef" - "\xd5\xcc\xc2\xc6\xf4\xce\x8e\x94" - "\x1e\x36\xb2\x6b\xd1\xeb\xc6\x70" - "\xd1\xbd\x1d\x66\x56\x20\xab\xf7" - "\x4f\x78\xa7\xf6\xd2\x98\x09\x58" - "\x5a\x97\xda\xec\x58\xc6\xb0\x50", - .rlen = 64, - }, { - .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe" - "\x2b\x73\xae\xf0\x85\x7d\x77\x81" - "\x1f\x35\x2c\x07\x3b\x61\x08\xd7" - "\x2d\x98\x10\xa3\x09\x14\xdf\xf4", - .klen = 32, - .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" - "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" - "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" - "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" - "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" - "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" - "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" - "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", - .ilen = 64, - .result = "\x60\x1e\xc3\x13\x77\x57\x89\xa5" - "\xb7\xa7\xf5\x04\xbb\xf3\xd2\x28" - "\xf4\x43\xe3\xca\x4d\x62\xb5\x9a" - "\xca\x84\xe9\x90\xca\xca\xf5\xc5" - "\x2b\x09\x30\xda\xa2\x3d\xe9\x4c" - "\xe8\x70\x17\xba\x2d\x84\x98\x8d" - "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6" - "\x13\xc2\xdd\x08\x45\x79\x41\xa6", - .rlen = 64, - } -}; - -static struct cipher_testvec aes_ctr_dec_tv_template[] = { - { /* From NIST Special Publication 800-38A, Appendix F.5 */ - .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" - "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", - .klen = 16, - .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .input = "\x87\x4d\x61\x91\xb6\x20\xe3\x26" - "\x1b\xef\x68\x64\x99\x0d\xb6\xce" - "\x98\x06\xf6\x6b\x79\x70\xfd\xff" - "\x86\x17\x18\x7b\xb9\xff\xfd\xff" - "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e" - "\x5b\x4f\x09\x02\x0d\xb0\x3e\xab" - "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1" - "\x79\x21\x70\xa0\xf3\x00\x9c\xee", - .ilen = 64, - .result = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" - "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" - "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" - "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" - "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" - "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" - "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" - "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", - .rlen = 64, - }, { - .key = "\x8e\x73\xb0\xf7\xda\x0e\x64\x52" - "\xc8\x10\xf3\x2b\x80\x90\x79\xe5" - "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", - .klen = 24, - .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .input = "\x1a\xbc\x93\x24\x17\x52\x1c\xa2" - "\x4f\x2b\x04\x59\xfe\x7e\x6e\x0b" - "\x09\x03\x39\xec\x0a\xa6\xfa\xef" - "\xd5\xcc\xc2\xc6\xf4\xce\x8e\x94" - "\x1e\x36\xb2\x6b\xd1\xeb\xc6\x70" - "\xd1\xbd\x1d\x66\x56\x20\xab\xf7" - "\x4f\x78\xa7\xf6\xd2\x98\x09\x58" - "\x5a\x97\xda\xec\x58\xc6\xb0\x50", - .ilen = 64, - .result = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" - "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" - "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" - "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" - "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" - "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" - "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" - "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", - .rlen = 64, - }, { - .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe" - "\x2b\x73\xae\xf0\x85\x7d\x77\x81" - "\x1f\x35\x2c\x07\x3b\x61\x08\xd7" - "\x2d\x98\x10\xa3\x09\x14\xdf\xf4", - .klen = 32, - .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .input = "\x60\x1e\xc3\x13\x77\x57\x89\xa5" - "\xb7\xa7\xf5\x04\xbb\xf3\xd2\x28" - "\xf4\x43\xe3\xca\x4d\x62\xb5\x9a" - "\xca\x84\xe9\x90\xca\xca\xf5\xc5" - "\x2b\x09\x30\xda\xa2\x3d\xe9\x4c" - "\xe8\x70\x17\xba\x2d\x84\x98\x8d" - "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6" - "\x13\xc2\xdd\x08\x45\x79\x41\xa6", - .ilen = 64, - .result = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" - "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" - "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" - "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" - "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" - "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" - "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" - "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", - .rlen = 64, - } -}; - -static struct cipher_testvec aes_ctr_rfc3686_enc_tv_template[] = { { /* From RFC 3686 */ .key = "\xae\x68\x52\xf8\x12\x10\x67\xcc" "\x4b\xf7\xa5\x76\x55\x77\xf3\x9e" @@ -5289,7 +5114,7 @@ static struct cipher_testvec aes_ctr_rfc3686_enc_tv_template[] = { }, }; -static struct cipher_testvec aes_ctr_rfc3686_dec_tv_template[] = { +static struct cipher_testvec aes_ctr_dec_tv_template[] = { { /* From RFC 3686 */ .key = "\xae\x68\x52\xf8\x12\x10\x67\xcc" "\x4b\xf7\xa5\x76\x55\x77\xf3\x9e" @@ -6000,470 +5825,6 @@ static struct aead_testvec aes_ccm_dec_tv_template[] = { }, }; -/* - * rfc4309 refers to section 8 of rfc3610 for test vectors, but they all - * use a 13-byte nonce, we only support an 11-byte nonce. Similarly, all of - * Special Publication 800-38C's test vectors also use nonce lengths our - * implementation doesn't support. The following are taken from fips cavs - * fax files on hand at Red Hat. - * - * nb: actual key lengths are (klen - 3), the last 3 bytes are actually - * part of the nonce which combine w/the iv, but need to be input this way. - */ -static struct aead_testvec aes_ccm_rfc4309_enc_tv_template[] = { - { - .key = "\x83\xac\x54\x66\xc2\xeb\xe5\x05" - "\x2e\x01\xd1\xfc\x5d\x82\x66\x2e" - "\x96\xac\x59", - .klen = 19, - .iv = "\x30\x07\xa1\xe2\xa2\xc7\x55\x24", - .alen = 0, - .input = "\x19\xc8\x81\xf6\xe9\x86\xff\x93" - "\x0b\x78\x67\xe5\xbb\xb7\xfc\x6e" - "\x83\x77\xb3\xa6\x0c\x8c\x9f\x9c" - "\x35\x2e\xad\xe0\x62\xf9\x91\xa1", - .ilen = 32, - .result = "\xab\x6f\xe1\x69\x1d\x19\x99\xa8" - "\x92\xa0\xc4\x6f\x7e\xe2\x8b\xb1" - "\x70\xbb\x8c\xa6\x4c\x6e\x97\x8a" - "\x57\x2b\xbe\x5d\x98\xa6\xb1\x32" - "\xda\x24\xea\xd9\xa1\x39\x98\xfd" - "\xa4\xbe\xd9\xf2\x1a\x6d\x22\xa8", - .rlen = 48, - }, { - .key = "\x1e\x2c\x7e\x01\x41\x9a\xef\xc0" - "\x0d\x58\x96\x6e\x5c\xa2\x4b\xd3" - "\x4f\xa3\x19", - .klen = 19, - .iv = "\xd3\x01\x5a\xd8\x30\x60\x15\x56", - .assoc = "\xda\xe6\x28\x9c\x45\x2d\xfd\x63" - "\x5e\xda\x4c\xb6\xe6\xfc\xf9\xb7" - "\x0c\x56\xcb\xe4\xe0\x05\x7a\xe1" - "\x0a\x63\x09\x78\xbc\x2c\x55\xde", - .alen = 32, - .input = "\x87\xa3\x36\xfd\x96\xb3\x93\x78" - "\xa9\x28\x63\xba\x12\xa3\x14\x85" - "\x57\x1e\x06\xc9\x7b\x21\xef\x76" - "\x7f\x38\x7e\x8e\x29\xa4\x3e\x7e", - .ilen = 32, - .result = "\x8a\x1e\x11\xf0\x02\x6b\xe2\x19" - "\xfc\x70\xc4\x6d\x8e\xb7\x99\xab" - "\xc5\x4b\xa2\xac\xd3\xf3\x48\xff" - "\x3b\xb5\xce\x53\xef\xde\xbb\x02" - "\xa9\x86\x15\x6c\x13\xfe\xda\x0a" - "\x22\xb8\x29\x3d\xd8\x39\x9a\x23", - .rlen = 48, - }, { - .key = "\xf4\x6b\xc2\x75\x62\xfe\xb4\xe1" - "\xa3\xf0\xff\xdd\x4e\x4b\x12\x75" - "\x53\x14\x73\x66\x8d\x88\xf6\x80" - "\xa0\x20\x35", - .klen = 27, - .iv = "\x26\xf2\x21\x8d\x50\x20\xda\xe2", - .assoc = "\x5b\x9e\x13\x67\x02\x5e\xef\xc1" - "\x6c\xf9\xd7\x1e\x52\x8f\x7a\x47" - "\xe9\xd4\xcf\x20\x14\x6e\xf0\x2d" - "\xd8\x9e\x2b\x56\x10\x23\x56\xe7", - .alen = 32, - .ilen = 0, - .result = "\x36\xea\x7a\x70\x08\xdc\x6a\xbc" - "\xad\x0c\x7a\x63\xf6\x61\xfd\x9b", - .rlen = 16, - }, { - .key = "\x56\xdf\x5c\x8f\x26\x3f\x0e\x42" - "\xef\x7a\xd3\xce\xfc\x84\x60\x62" - "\xca\xb4\x40\xaf\x5f\xc9\xc9\x01" - "\xd6\x3c\x8c", - .klen = 27, - .iv = "\x86\x84\xb6\xcd\xef\x09\x2e\x94", - .assoc = "\x02\x65\x78\x3c\xe9\x21\x30\x91" - "\xb1\xb9\xda\x76\x9a\x78\x6d\x95" - "\xf2\x88\x32\xa3\xf2\x50\xcb\x4c" - "\xe3\x00\x73\x69\x84\x69\x87\x79", - .alen = 32, - .input = "\x9f\xd2\x02\x4b\x52\x49\x31\x3c" - "\x43\x69\x3a\x2d\x8e\x70\xad\x7e" - "\xe0\xe5\x46\x09\x80\x89\x13\xb2" - "\x8c\x8b\xd9\x3f\x86\xfb\xb5\x6b", - .ilen = 32, - .result = "\x39\xdf\x7c\x3c\x5a\x29\xb9\x62" - "\x5d\x51\xc2\x16\xd8\xbd\x06\x9f" - "\x9b\x6a\x09\x70\xc1\x51\x83\xc2" - "\x66\x88\x1d\x4f\x9a\xda\xe0\x1e" - "\xc7\x79\x11\x58\xe5\x6b\x20\x40" - "\x7a\xea\x46\x42\x8b\xe4\x6f\xe1", - .rlen = 48, - }, { - .key = "\xe0\x8d\x99\x71\x60\xd7\x97\x1a" - "\xbd\x01\x99\xd5\x8a\xdf\x71\x3a" - "\xd3\xdf\x24\x4b\x5e\x3d\x4b\x4e" - "\x30\x7a\xb9\xd8\x53\x0a\x5e\x2b" - "\x1e\x29\x91", - .klen = 35, - .iv = "\xad\x8e\xc1\x53\x0a\xcf\x2d\xbe", - .assoc = "\x19\xb6\x1f\x57\xc4\xf3\xf0\x8b" - "\x78\x2b\x94\x02\x29\x0f\x42\x27" - "\x6b\x75\xcb\x98\x34\x08\x7e\x79" - "\xe4\x3e\x49\x0d\x84\x8b\x22\x87", - .alen = 32, - .input = "\xe1\xd9\xd8\x13\xeb\x3a\x75\x3f" - "\x9d\xbd\x5f\x66\xbe\xdc\xbb\x66" - "\xbf\x17\x99\x62\x4a\x39\x27\x1f" - "\x1d\xdc\x24\xae\x19\x2f\x98\x4c", - .ilen = 32, - .result = "\x19\xb8\x61\x33\x45\x2b\x43\x96" - "\x6f\x51\xd0\x20\x30\x7d\x9b\xc6" - "\x26\x3d\xf8\xc9\x65\x16\xa8\x9f" - "\xf0\x62\x17\x34\xf2\x1e\x8d\x75" - "\x4e\x13\xcc\xc0\xc3\x2a\x54\x2d", - .rlen = 40, - }, { - .key = "\x7c\xc8\x18\x3b\x8d\x99\xe0\x7c" - "\x45\x41\xb8\xbd\x5c\xa7\xc2\x32" - "\x8a\xb8\x02\x59\xa4\xfe\xa9\x2c" - "\x09\x75\x9a\x9b\x3c\x9b\x27\x39" - "\xf9\xd9\x4e", - .klen = 35, - .iv = "\x63\xb5\x3d\x9d\x43\xf6\x1e\x50", - .assoc = "\x57\xf5\x6b\x8b\x57\x5c\x3d\x3b" - "\x13\x02\x01\x0c\x83\x4c\x96\x35" - "\x8e\xd6\x39\xcf\x7d\x14\x9b\x94" - "\xb0\x39\x36\xe6\x8f\x57\xe0\x13", - .alen = 32, - .input = "\x3b\x6c\x29\x36\xb6\xef\x07\xa6" - "\x83\x72\x07\x4f\xcf\xfa\x66\x89" - "\x5f\xca\xb1\xba\xd5\x8f\x2c\x27" - "\x30\xdb\x75\x09\x93\xd4\x65\xe4", - .ilen = 32, - .result = "\xb0\x88\x5a\x33\xaa\xe5\xc7\x1d" - "\x85\x23\xc7\xc6\x2f\xf4\x1e\x3d" - "\xcc\x63\x44\x25\x07\x78\x4f\x9e" - "\x96\xb8\x88\xeb\xbc\x48\x1f\x06" - "\x39\xaf\x39\xac\xd8\x4a\x80\x39" - "\x7b\x72\x8a\xf7", - .rlen = 44, - }, { - .key = "\xab\xd0\xe9\x33\x07\x26\xe5\x83" - "\x8c\x76\x95\xd4\xb6\xdc\xf3\x46" - "\xf9\x8f\xad\xe3\x02\x13\x83\x77" - "\x3f\xb0\xf1\xa1\xa1\x22\x0f\x2b" - "\x24\xa7\x8b", - .klen = 35, - .iv = "\x07\xcb\xcc\x0e\xe6\x33\xbf\xf5", - .assoc = "\xd4\xdb\x30\x1d\x03\xfe\xfd\x5f" - "\x87\xd4\x8c\xb6\xb6\xf1\x7a\x5d" - "\xab\x90\x65\x8d\x8e\xca\x4d\x4f" - "\x16\x0c\x40\x90\x4b\xc7\x36\x73", - .alen = 32, - .input = "\xf5\xc6\x7d\x48\xc1\xb7\xe6\x92" - "\x97\x5a\xca\xc4\xa9\x6d\xf9\x3d" - "\x6c\xde\xbc\xf1\x90\xea\x6a\xb2" - "\x35\x86\x36\xaf\x5c\xfe\x4b\x3a", - .ilen = 32, - .result = "\x83\x6f\x40\x87\x72\xcf\xc1\x13" - "\xef\xbb\x80\x21\x04\x6c\x58\x09" - "\x07\x1b\xfc\xdf\xc0\x3f\x5b\xc7" - "\xe0\x79\xa8\x6e\x71\x7c\x3f\xcf" - "\x5c\xda\xb2\x33\xe5\x13\xe2\x0d" - "\x74\xd1\xef\xb5\x0f\x3a\xb5\xf8", - .rlen = 48, - }, -}; - -static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = { - { - .key = "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1" - "\xff\x80\x2e\x48\x7d\x82\xf8\xb9" - "\xc6\xfb\x7d", - .klen = 19, - .iv = "\x80\x0d\x13\xab\xd8\xa6\xb2\xd8", - .alen = 0, - .input = "\xd5\xe8\x93\x9f\xc7\x89\x2e\x2b", - .ilen = 8, - .result = "\x00", - .rlen = 0, - .novrfy = 1, - }, { - .key = "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1" - "\xff\x80\x2e\x48\x7d\x82\xf8\xb9" - "\xaf\x94\x87", - .klen = 19, - .iv = "\x78\x35\x82\x81\x7f\x88\x94\x68", - .alen = 0, - .input = "\x41\x3c\xb8\x87\x73\xcb\xf3\xf3", - .ilen = 8, - .result = "\x00", - .rlen = 0, - }, { - .key = "\x61\x0e\x8c\xae\xe3\x23\xb6\x38" - "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8" - "\xc6\xfb\x7d", - .klen = 19, - .iv = "\x80\x0d\x13\xab\xd8\xa6\xb2\xd8", - .assoc = "\xf3\x94\x87\x78\x35\x82\x81\x7f" - "\x88\x94\x68\xb1\x78\x6b\x2b\xd6" - "\x04\x1f\x4e\xed\x78\xd5\x33\x66" - "\xd8\x94\x99\x91\x81\x54\x62\x57", - .alen = 32, - .input = "\xf0\x7c\x29\x02\xae\x1c\x2f\x55" - "\xd0\xd1\x3d\x1a\xa3\x6d\xe4\x0a" - "\x86\xb0\x87\x6b\x62\x33\x8c\x34" - "\xce\xab\x57\xcc\x79\x0b\xe0\x6f" - "\x5c\x3e\x48\x1f\x6c\x46\xf7\x51" - "\x8b\x84\x83\x2a\xc1\x05\xb8\xc5", - .ilen = 48, - .result = "\x50\x82\x3e\x07\xe2\x1e\xb6\xfb" - "\x33\xe4\x73\xce\xd2\xfb\x95\x79" - "\xe8\xb4\xb5\x77\x11\x10\x62\x6f" - "\x6a\x82\xd1\x13\xec\xf5\xd0\x48", - .rlen = 32, - .novrfy = 1, - }, { - .key = "\x61\x0e\x8c\xae\xe3\x23\xb6\x38" - "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8" - "\x05\xe0\xc9", - .klen = 19, - .iv = "\x0f\xed\x34\xea\x97\xd4\x3b\xdf", - .assoc = "\x49\x5c\x50\x1f\x1d\x94\xcc\x81" - "\xba\xb7\xb6\x03\xaf\xa5\xc1\xa1" - "\xd8\x5c\x42\x68\xe0\x6c\xda\x89" - "\x05\xac\x56\xac\x1b\x2a\xd3\x86", - .alen = 32, - .input = "\x39\xbe\x7d\x15\x62\x77\xf3\x3c" - "\xad\x83\x52\x6d\x71\x03\x25\x1c" - "\xed\x81\x3a\x9a\x16\x7d\x19\x80" - "\x72\x04\x72\xd0\xf6\xff\x05\x0f" - "\xb7\x14\x30\x00\x32\x9e\xa0\xa6" - "\x9e\x5a\x18\xa1\xb8\xfe\xdb\xd3", - .ilen = 48, - .result = "\x75\x05\xbe\xc2\xd9\x1e\xde\x60" - "\x47\x3d\x8c\x7d\xbd\xb5\xd9\xb7" - "\xf2\xae\x61\x05\x8f\x82\x24\x3f" - "\x9c\x67\x91\xe1\x38\x4f\xe4\x0c", - .rlen = 32, - }, { - .key = "\x39\xbb\xa7\xbe\x59\x97\x9e\x73" - "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3" - "\xa4\x48\x93\x39\x26\x71\x4a\xc6" - "\xee\x49\x83", - .klen = 27, - .iv = "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e", - .assoc = "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1" - "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64" - "\xa4\xf0\x13\x05\xd1\x77\x99\x67" - "\x11\xc4\xc6\xdb\x00\x56\x36\x61", - .alen = 32, - .input = "\x71\x99\xfa\xf4\x44\x12\x68\x9b", - .ilen = 8, - .result = "\x00", - .rlen = 0, - }, { - .key = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7" - "\x96\xe5\xc5\x68\xaa\x95\x35\xe0" - "\x29\xa0\xba\x9e\x48\x78\xd1\xba" - "\xee\x49\x83", - .klen = 27, - .iv = "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e", - .assoc = "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1" - "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64" - "\xa4\xf0\x13\x05\xd1\x77\x99\x67" - "\x11\xc4\xc6\xdb\x00\x56\x36\x61", - .alen = 32, - .input = "\xfb\xe5\x5d\x34\xbe\xe5\xe8\xe7" - "\x5a\xef\x2f\xbf\x1f\x7f\xd4\xb2" - "\x66\xca\x61\x1e\x96\x7a\x61\xb3" - "\x1c\x16\x45\x52\xba\x04\x9c\x9f" - "\xb1\xd2\x40\xbc\x52\x7c\x6f\xb1", - .ilen = 40, - .result = "\x85\x34\x66\x42\xc8\x92\x0f\x36" - "\x58\xe0\x6b\x91\x3c\x98\x5c\xbb" - "\x0a\x85\xcc\x02\xad\x7a\x96\xe9" - "\x65\x43\xa4\xc3\x0f\xdc\x55\x81", - .rlen = 32, - }, { - .key = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7" - "\x96\xe5\xc5\x68\xaa\x95\x35\xe0" - "\x29\xa0\xba\x9e\x48\x78\xd1\xba" - "\xd1\xfc\x57", - .klen = 27, - .iv = "\x9c\xfe\xb8\x9c\xad\x71\xaa\x1f", - .assoc = "\x86\x67\xa5\xa9\x14\x5f\x0d\xc6" - "\xff\x14\xc7\x44\xbf\x6c\x3a\xc3" - "\xff\xb6\x81\xbd\xe2\xd5\x06\xc7" - "\x3c\xa1\x52\x13\x03\x8a\x23\x3a", - .alen = 32, - .input = "\x3f\x66\xb0\x9d\xe5\x4b\x38\x00" - "\xc6\x0e\x6e\xe5\xd6\x98\xa6\x37" - "\x8c\x26\x33\xc6\xb2\xa2\x17\xfa" - "\x64\x19\xc0\x30\xd7\xfc\x14\x6b" - "\xe3\x33\xc2\x04\xb0\x37\xbe\x3f" - "\xa9\xb4\x2d\x68\x03\xa3\x44\xef", - .ilen = 48, - .result = "\x02\x87\x4d\x28\x80\x6e\xb2\xed" - "\x99\x2a\xa8\xca\x04\x25\x45\x90" - "\x1d\xdd\x5a\xd9\xe4\xdb\x9c\x9c" - "\x49\xe9\x01\xfe\xa7\x80\x6d\x6b", - .rlen = 32, - .novrfy = 1, - }, { - .key = "\xa4\x4b\x54\x29\x0a\xb8\x6d\x01" - "\x5b\x80\x2a\xcf\x25\xc4\xb7\x5c" - "\x20\x2c\xad\x30\xc2\x2b\x41\xfb" - "\x0e\x85\xbc\x33\xad\x0f\x2b\xff" - "\xee\x49\x83", - .klen = 35, - .iv = "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e", - .alen = 0, - .input = "\x1f\xb8\x8f\xa3\xdd\x54\x00\xf2", - .ilen = 8, - .result = "\x00", - .rlen = 0, - }, { - .key = "\x39\xbb\xa7\xbe\x59\x97\x9e\x73" - "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3" - "\xa4\x48\x93\x39\x26\x71\x4a\xc6" - "\xae\x8f\x11\x4c\xc2\x9c\x4a\xbb" - "\x85\x34\x66", - .klen = 35, - .iv = "\x42\xc8\x92\x0f\x36\x58\xe0\x6b", - .alen = 0, - .input = "\x48\x01\x5e\x02\x24\x04\x66\x47" - "\xa1\xea\x6f\xaf\xe8\xfc\xfb\xdd" - "\xa5\xa9\x87\x8d\x84\xee\x2e\x77" - "\xbb\x86\xb9\xf5\x5c\x6c\xff\xf6" - "\x72\xc3\x8e\xf7\x70\xb1\xb2\x07" - "\xbc\xa8\xa3\xbd\x83\x7c\x1d\x2a", - .ilen = 48, - .result = "\xdc\x56\xf2\x71\xb0\xb1\xa0\x6c" - "\xf0\x97\x3a\xfb\x6d\xe7\x32\x99" - "\x3e\xaf\x70\x5e\xb2\x4d\xea\x39" - "\x89\xd4\x75\x7a\x63\xb1\xda\x93", - .rlen = 32, - .novrfy = 1, - }, { - .key = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7" - "\x96\xe5\xc5\x68\xaa\x95\x35\xe0" - "\x29\xa0\xba\x9e\x48\x78\xd1\xba" - "\x0d\x1a\x53\x3b\xb5\xe3\xf8\x8b" - "\xcf\x76\x3f", - .klen = 35, - .iv = "\xd9\x95\x75\x8f\x44\x89\x40\x7b", - .assoc = "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88" - "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b" - "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b" - "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe", - .alen = 32, - .input = "\x48\x58\xd6\xf3\xad\x63\x58\xbf" - "\xae\xc7\x5e\xae\x83\x8f\x7b\xe4" - "\x78\x5c\x4c\x67\x71\x89\x94\xbf" - "\x47\xf1\x63\x7e\x1c\x59\xbd\xc5" - "\x7f\x44\x0a\x0c\x01\x18\x07\x92" - "\xe1\xd3\x51\xce\x32\x6d\x0c\x5b", - .ilen = 48, - .result = "\xc2\x54\xc8\xde\x78\x87\x77\x40" - "\x49\x71\xe4\xb7\xe7\xcb\x76\x61" - "\x0a\x41\xb9\xe9\xc0\x76\x54\xab" - "\x04\x49\x3b\x19\x93\x57\x25\x5d", - .rlen = 32, - }, -}; - -/* - * ANSI X9.31 Continuous Pseudo-Random Number Generator (AES mode) - * test vectors, taken from Appendix B.2.9 and B.2.10: - * http://csrc.nist.gov/groups/STM/cavp/documents/rng/RNGVS.pdf - * Only AES-128 is supported at this time. - */ -#define ANSI_CPRNG_AES_TEST_VECTORS 6 - -static struct cprng_testvec ansi_cprng_aes_tv_template[] = { - { - .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42" - "\xed\x06\x1c\xab\xb8\xd4\x62\x02", - .klen = 16, - .dt = "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62" - "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xf9", - .dtlen = 16, - .v = "\x80\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .vlen = 16, - .result = "\x59\x53\x1e\xd1\x3b\xb0\xc0\x55" - "\x84\x79\x66\x85\xc1\x2f\x76\x41", - .rlen = 16, - .loops = 1, - }, { - .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42" - "\xed\x06\x1c\xab\xb8\xd4\x62\x02", - .klen = 16, - .dt = "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62" - "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfa", - .dtlen = 16, - .v = "\xc0\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .vlen = 16, - .result = "\x7c\x22\x2c\xf4\xca\x8f\xa2\x4c" - "\x1c\x9c\xb6\x41\xa9\xf3\x22\x0d", - .rlen = 16, - .loops = 1, - }, { - .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42" - "\xed\x06\x1c\xab\xb8\xd4\x62\x02", - .klen = 16, - .dt = "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62" - "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfb", - .dtlen = 16, - .v = "\xe0\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .vlen = 16, - .result = "\x8a\xaa\x00\x39\x66\x67\x5b\xe5" - "\x29\x14\x28\x81\xa9\x4d\x4e\xc7", - .rlen = 16, - .loops = 1, - }, { - .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42" - "\xed\x06\x1c\xab\xb8\xd4\x62\x02", - .klen = 16, - .dt = "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62" - "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfc", - .dtlen = 16, - .v = "\xf0\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .vlen = 16, - .result = "\x88\xdd\xa4\x56\x30\x24\x23\xe5" - "\xf6\x9d\xa5\x7e\x7b\x95\xc7\x3a", - .rlen = 16, - .loops = 1, - }, { - .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42" - "\xed\x06\x1c\xab\xb8\xd4\x62\x02", - .klen = 16, - .dt = "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62" - "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfd", - .dtlen = 16, - .v = "\xf8\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .vlen = 16, - .result = "\x05\x25\x92\x46\x61\x79\xd2\xcb" - "\x78\xc4\x0b\x14\x0a\x5a\x9a\xc8", - .rlen = 16, - .loops = 1, - }, { /* Monte Carlo Test */ - .key = "\x9f\x5b\x51\x20\x0b\xf3\x34\xb5" - "\xd8\x2b\xe8\xc3\x72\x55\xc8\x48", - .klen = 16, - .dt = "\x63\x76\xbb\xe5\x29\x02\xba\x3b" - "\x67\xc9\x25\xfa\x70\x1f\x11\xac", - .dtlen = 16, - .v = "\x57\x2c\x8e\x76\x87\x26\x47\x97" - "\x7e\x74\xfb\xdd\xc4\x95\x01\xd1", - .vlen = 16, - .result = "\x48\xe9\xbd\x0d\x06\xee\x18\xfb" - "\xe4\x57\x90\xd5\xc3\xfc\x9b\x73", - .rlen = 16, - .loops = 10000, - }, -}; - /* Cast5 test vectors from RFC 2144 */ #define CAST5_ENC_TEST_VECTORS 3 #define CAST5_DEC_TEST_VECTORS 3 diff --git a/trunk/crypto/zlib.c b/trunk/crypto/zlib.c index c3015733c990..33609bab614e 100644 --- a/trunk/crypto/zlib.c +++ b/trunk/crypto/zlib.c @@ -165,15 +165,15 @@ static int zlib_compress_update(struct crypto_pcomp *tfm, return -EINVAL; } - ret = req->avail_out - stream->avail_out; pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", stream->avail_in, stream->avail_out, - req->avail_in - stream->avail_in, ret); + req->avail_in - stream->avail_in, + req->avail_out - stream->avail_out); req->next_in = stream->next_in; req->avail_in = stream->avail_in; req->next_out = stream->next_out; req->avail_out = stream->avail_out; - return ret; + return 0; } static int zlib_compress_final(struct crypto_pcomp *tfm, @@ -195,15 +195,15 @@ static int zlib_compress_final(struct crypto_pcomp *tfm, return -EINVAL; } - ret = req->avail_out - stream->avail_out; pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", stream->avail_in, stream->avail_out, - req->avail_in - stream->avail_in, ret); + req->avail_in - stream->avail_in, + req->avail_out - stream->avail_out); req->next_in = stream->next_in; req->avail_in = stream->avail_in; req->next_out = stream->next_out; req->avail_out = stream->avail_out; - return ret; + return 0; } @@ -280,15 +280,15 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm, return -EINVAL; } - ret = req->avail_out - stream->avail_out; pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", stream->avail_in, stream->avail_out, - req->avail_in - stream->avail_in, ret); + req->avail_in - stream->avail_in, + req->avail_out - stream->avail_out); req->next_in = stream->next_in; req->avail_in = stream->avail_in; req->next_out = stream->next_out; req->avail_out = stream->avail_out; - return ret; + return 0; } static int zlib_decompress_final(struct crypto_pcomp *tfm, @@ -328,15 +328,15 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm, return -EINVAL; } - ret = req->avail_out - stream->avail_out; pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", stream->avail_in, stream->avail_out, - req->avail_in - stream->avail_in, ret); + req->avail_in - stream->avail_in, + req->avail_out - stream->avail_out); req->next_in = stream->next_in; req->avail_in = stream->avail_in; req->next_out = stream->next_out; req->avail_out = stream->avail_out; - return ret; + return 0; } diff --git a/trunk/drivers/char/hw_random/Kconfig b/trunk/drivers/char/hw_random/Kconfig index 9c00440dcf86..5fab6470f4b2 100644 --- a/trunk/drivers/char/hw_random/Kconfig +++ b/trunk/drivers/char/hw_random/Kconfig @@ -88,7 +88,7 @@ config HW_RANDOM_N2RNG config HW_RANDOM_VIA tristate "VIA HW Random Number Generator support" - depends on HW_RANDOM && X86 + depends on HW_RANDOM && X86_32 default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number diff --git a/trunk/drivers/char/hw_random/omap-rng.c b/trunk/drivers/char/hw_random/omap-rng.c index 00dd3de1be51..538313f9e7ac 100644 --- a/trunk/drivers/char/hw_random/omap-rng.c +++ b/trunk/drivers/char/hw_random/omap-rng.c @@ -89,7 +89,7 @@ static struct hwrng omap_rng_ops = { .data_read = omap_rng_data_read, }; -static int __devinit omap_rng_probe(struct platform_device *pdev) +static int __init omap_rng_probe(struct platform_device *pdev) { struct resource *res, *mem; int ret; diff --git a/trunk/drivers/char/hw_random/timeriomem-rng.c b/trunk/drivers/char/hw_random/timeriomem-rng.c index a94e930575f2..dcd352ad0e7f 100644 --- a/trunk/drivers/char/hw_random/timeriomem-rng.c +++ b/trunk/drivers/char/hw_random/timeriomem-rng.c @@ -88,9 +88,9 @@ static struct hwrng timeriomem_rng_ops = { .priv = 0, }; -static int __devinit timeriomem_rng_probe(struct platform_device *pdev) +static int __init timeriomem_rng_probe(struct platform_device *pdev) { - struct resource *res; + struct resource *res, *mem; int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -98,12 +98,21 @@ static int __devinit timeriomem_rng_probe(struct platform_device *pdev) if (!res) return -ENOENT; + mem = request_mem_region(res->start, res->end - res->start + 1, + pdev->name); + if (mem == NULL) + return -EBUSY; + + dev_set_drvdata(&pdev->dev, mem); + timeriomem_rng_data = pdev->dev.platform_data; timeriomem_rng_data->address = ioremap(res->start, res->end - res->start + 1); - if (!timeriomem_rng_data->address) - return -EIO; + if (!timeriomem_rng_data->address) { + ret = -ENOMEM; + goto err_ioremap; + } if (timeriomem_rng_data->period != 0 && usecs_to_jiffies(timeriomem_rng_data->period) > 0) { @@ -116,7 +125,7 @@ static int __devinit timeriomem_rng_probe(struct platform_device *pdev) ret = hwrng_register(&timeriomem_rng_ops); if (ret) - goto failed; + goto err_register; dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n", timeriomem_rng_data->address, @@ -124,19 +133,24 @@ static int __devinit timeriomem_rng_probe(struct platform_device *pdev) return 0; -failed: +err_register: dev_err(&pdev->dev, "problem registering\n"); iounmap(timeriomem_rng_data->address); +err_ioremap: + release_resource(mem); return ret; } static int __devexit timeriomem_rng_remove(struct platform_device *pdev) { + struct resource *mem = dev_get_drvdata(&pdev->dev); + del_timer_sync(&timeriomem_rng_timer); hwrng_unregister(&timeriomem_rng_ops); iounmap(timeriomem_rng_data->address); + release_resource(mem); return 0; } diff --git a/trunk/drivers/char/hw_random/via-rng.c b/trunk/drivers/char/hw_random/via-rng.c index 794aacb715c1..4e9573c1d39e 100644 --- a/trunk/drivers/char/hw_random/via-rng.c +++ b/trunk/drivers/char/hw_random/via-rng.c @@ -132,19 +132,6 @@ static int via_rng_init(struct hwrng *rng) struct cpuinfo_x86 *c = &cpu_data(0); u32 lo, hi, old_lo; - /* VIA Nano CPUs don't have the MSR_VIA_RNG anymore. The RNG - * is always enabled if CPUID rng_en is set. There is no - * RNG configuration like it used to be the case in this - * register */ - if ((c->x86 == 6) && (c->x86_model >= 0x0f)) { - if (!cpu_has_xstore_enabled) { - printk(KERN_ERR PFX "can't enable hardware RNG " - "if XSTORE is not enabled\n"); - return -ENODEV; - } - return 0; - } - /* Control the RNG via MSR. Tread lightly and pay very close * close attention to values written, as the reserved fields * are documented to be "undefined and unpredictable"; but it @@ -218,5 +205,5 @@ static void __exit mod_exit(void) module_init(mod_init); module_exit(mod_exit); -MODULE_DESCRIPTION("H/W RNG driver for VIA CPU with PadLock"); +MODULE_DESCRIPTION("H/W RNG driver for VIA chipsets"); MODULE_LICENSE("GPL"); diff --git a/trunk/drivers/crypto/Kconfig b/trunk/drivers/crypto/Kconfig index e748e55bd86b..01afd758072f 100644 --- a/trunk/drivers/crypto/Kconfig +++ b/trunk/drivers/crypto/Kconfig @@ -12,7 +12,7 @@ if CRYPTO_HW config CRYPTO_DEV_PADLOCK tristate "Support for VIA PadLock ACE" - depends on X86 && !UML + depends on X86_32 && !UML select CRYPTO_ALGAPI help Some VIA processors come with an integrated crypto engine diff --git a/trunk/drivers/crypto/hifn_795x.c b/trunk/drivers/crypto/hifn_795x.c index 5f753fc08730..2bef086fb342 100644 --- a/trunk/drivers/crypto/hifn_795x.c +++ b/trunk/drivers/crypto/hifn_795x.c @@ -2564,7 +2564,7 @@ static void hifn_tasklet_callback(unsigned long data) hifn_process_queue(dev); } -static int __devinit hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static int hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int err, i; struct hifn_device *dev; @@ -2696,7 +2696,7 @@ static int __devinit hifn_probe(struct pci_dev *pdev, const struct pci_device_id return err; } -static void __devexit hifn_remove(struct pci_dev *pdev) +static void hifn_remove(struct pci_dev *pdev) { int i; struct hifn_device *dev; @@ -2744,7 +2744,7 @@ static struct pci_driver hifn_pci_driver = { .remove = __devexit_p(hifn_remove), }; -static int __init hifn_init(void) +static int __devinit hifn_init(void) { unsigned int freq; int err; @@ -2789,7 +2789,7 @@ static int __init hifn_init(void) return 0; } -static void __exit hifn_fini(void) +static void __devexit hifn_fini(void) { pci_unregister_driver(&hifn_pci_driver); diff --git a/trunk/drivers/crypto/padlock-aes.c b/trunk/drivers/crypto/padlock-aes.c index 87f92c39b5f0..856b3cc25583 100644 --- a/trunk/drivers/crypto/padlock-aes.c +++ b/trunk/drivers/crypto/padlock-aes.c @@ -154,11 +154,7 @@ static inline void padlock_reset_key(struct cword *cword) int cpu = raw_smp_processor_id(); if (cword != per_cpu(last_cword, cpu)) -#ifndef CONFIG_X86_64 asm volatile ("pushfl; popfl"); -#else - asm volatile ("pushfq; popfq"); -#endif } static inline void padlock_store_cword(struct cword *cword) @@ -212,19 +208,10 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, asm volatile ("test $1, %%cl;" "je 1f;" -#ifndef CONFIG_X86_64 "lea -1(%%ecx), %%eax;" "mov $1, %%ecx;" -#else - "lea -1(%%rcx), %%rax;" - "mov $1, %%rcx;" -#endif ".byte 0xf3,0x0f,0xa7,0xc8;" /* rep xcryptecb */ -#ifndef CONFIG_X86_64 "mov %%eax, %%ecx;" -#else - "mov %%rax, %%rcx;" -#endif "1:" ".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) diff --git a/trunk/drivers/crypto/talitos.c b/trunk/drivers/crypto/talitos.c index c70775fd3ce2..a3918c16b3db 100644 --- a/trunk/drivers/crypto/talitos.c +++ b/trunk/drivers/crypto/talitos.c @@ -44,8 +44,6 @@ #include #include #include -#include -#include #include "talitos.h" @@ -341,8 +339,7 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) status = error; dma_unmap_single(dev, request->dma_desc, - sizeof(struct talitos_desc), - DMA_BIDIRECTIONAL); + sizeof(struct talitos_desc), DMA_BIDIRECTIONAL); /* copy entries so we can call callback outside lock */ saved_req.desc = request->desc; @@ -416,8 +413,7 @@ static struct talitos_desc *current_desc(struct device *dev, int ch) /* * user diagnostics; report root cause of error based on execution unit status */ -static void report_eu_error(struct device *dev, int ch, - struct talitos_desc *desc) +static void report_eu_error(struct device *dev, int ch, struct talitos_desc *desc) { struct talitos_private *priv = dev_get_drvdata(dev); int i; @@ -688,8 +684,8 @@ struct talitos_ctx { unsigned int authsize; }; -static int aead_setauthsize(struct crypto_aead *authenc, - unsigned int authsize) +static int aead_authenc_setauthsize(struct crypto_aead *authenc, + unsigned int authsize) { struct talitos_ctx *ctx = crypto_aead_ctx(authenc); @@ -698,8 +694,8 @@ static int aead_setauthsize(struct crypto_aead *authenc, return 0; } -static int aead_setkey(struct crypto_aead *authenc, - const u8 *key, unsigned int keylen) +static int aead_authenc_setkey(struct crypto_aead *authenc, + const u8 *key, unsigned int keylen) { struct talitos_ctx *ctx = crypto_aead_ctx(authenc); struct rtattr *rta = (void *)key; @@ -744,7 +740,7 @@ static int aead_setkey(struct crypto_aead *authenc, } /* - * talitos_edesc - s/w-extended descriptor + * ipsec_esp_edesc - s/w-extended ipsec_esp descriptor * @src_nents: number of segments in input scatterlist * @dst_nents: number of segments in output scatterlist * @dma_len: length of dma mapped link_tbl space @@ -756,67 +752,17 @@ static int aead_setkey(struct crypto_aead *authenc, * is greater than 1, an integrity check value is concatenated to the end * of link_tbl data */ -struct talitos_edesc { +struct ipsec_esp_edesc { int src_nents; int dst_nents; - int src_is_chained; - int dst_is_chained; int dma_len; dma_addr_t dma_link_tbl; struct talitos_desc desc; struct talitos_ptr link_tbl[0]; }; -static int talitos_map_sg(struct device *dev, struct scatterlist *sg, - unsigned int nents, enum dma_data_direction dir, - int chained) -{ - if (unlikely(chained)) - while (sg) { - dma_map_sg(dev, sg, 1, dir); - sg = scatterwalk_sg_next(sg); - } - else - dma_map_sg(dev, sg, nents, dir); - return nents; -} - -static void talitos_unmap_sg_chain(struct device *dev, struct scatterlist *sg, - enum dma_data_direction dir) -{ - while (sg) { - dma_unmap_sg(dev, sg, 1, dir); - sg = scatterwalk_sg_next(sg); - } -} - -static void talitos_sg_unmap(struct device *dev, - struct talitos_edesc *edesc, - struct scatterlist *src, - struct scatterlist *dst) -{ - unsigned int src_nents = edesc->src_nents ? : 1; - unsigned int dst_nents = edesc->dst_nents ? : 1; - - if (src != dst) { - if (edesc->src_is_chained) - talitos_unmap_sg_chain(dev, src, DMA_TO_DEVICE); - else - dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); - - if (edesc->dst_is_chained) - talitos_unmap_sg_chain(dev, dst, DMA_FROM_DEVICE); - else - dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE); - } else - if (edesc->src_is_chained) - talitos_unmap_sg_chain(dev, src, DMA_BIDIRECTIONAL); - else - dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL); -} - static void ipsec_esp_unmap(struct device *dev, - struct talitos_edesc *edesc, + struct ipsec_esp_edesc *edesc, struct aead_request *areq) { unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], DMA_FROM_DEVICE); @@ -826,7 +772,15 @@ static void ipsec_esp_unmap(struct device *dev, dma_unmap_sg(dev, areq->assoc, 1, DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, areq->src, areq->dst); + if (areq->src != areq->dst) { + dma_unmap_sg(dev, areq->src, edesc->src_nents ? : 1, + DMA_TO_DEVICE); + dma_unmap_sg(dev, areq->dst, edesc->dst_nents ? : 1, + DMA_FROM_DEVICE); + } else { + dma_unmap_sg(dev, areq->src, edesc->src_nents ? : 1, + DMA_BIDIRECTIONAL); + } if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, @@ -841,14 +795,13 @@ static void ipsec_esp_encrypt_done(struct device *dev, int err) { struct aead_request *areq = context; + struct ipsec_esp_edesc *edesc = + container_of(desc, struct ipsec_esp_edesc, desc); struct crypto_aead *authenc = crypto_aead_reqtfm(areq); struct talitos_ctx *ctx = crypto_aead_ctx(authenc); - struct talitos_edesc *edesc; struct scatterlist *sg; void *icvdata; - edesc = container_of(desc, struct talitos_edesc, desc); - ipsec_esp_unmap(dev, edesc, areq); /* copy the generated ICV to dst */ @@ -866,18 +819,17 @@ static void ipsec_esp_encrypt_done(struct device *dev, } static void ipsec_esp_decrypt_swauth_done(struct device *dev, - struct talitos_desc *desc, - void *context, int err) + struct talitos_desc *desc, void *context, + int err) { struct aead_request *req = context; + struct ipsec_esp_edesc *edesc = + container_of(desc, struct ipsec_esp_edesc, desc); struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct talitos_ctx *ctx = crypto_aead_ctx(authenc); - struct talitos_edesc *edesc; struct scatterlist *sg; void *icvdata; - edesc = container_of(desc, struct talitos_edesc, desc); - ipsec_esp_unmap(dev, edesc, req); if (!err) { @@ -899,20 +851,20 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev, } static void ipsec_esp_decrypt_hwauth_done(struct device *dev, - struct talitos_desc *desc, - void *context, int err) + struct talitos_desc *desc, void *context, + int err) { struct aead_request *req = context; - struct talitos_edesc *edesc; - - edesc = container_of(desc, struct talitos_edesc, desc); + struct ipsec_esp_edesc *edesc = + container_of(desc, struct ipsec_esp_edesc, desc); ipsec_esp_unmap(dev, edesc, req); /* check ICV auth status */ - if (!err && ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) != - DESC_HDR_LO_ICCR1_PASS)) - err = -EBADMSG; + if (!err) + if ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) != + DESC_HDR_LO_ICCR1_PASS) + err = -EBADMSG; kfree(edesc); @@ -934,7 +886,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, link_tbl_ptr->j_extent = 0; link_tbl_ptr++; cryptlen -= sg_dma_len(sg); - sg = scatterwalk_sg_next(sg); + sg = sg_next(sg); } /* adjust (decrease) last one (or two) entry's len to cryptlen */ @@ -958,7 +910,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, /* * fill in and submit ipsec_esp descriptor */ -static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, +static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq, u8 *giv, u64 seq, void (*callback) (struct device *dev, struct talitos_desc *desc, @@ -1000,31 +952,32 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, desc->ptr[4].len = cpu_to_be16(cryptlen); desc->ptr[4].j_extent = authsize; - sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1, - (areq->src == areq->dst) ? DMA_BIDIRECTIONAL - : DMA_TO_DEVICE, - edesc->src_is_chained); + if (areq->src == areq->dst) + sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ? : 1, + DMA_BIDIRECTIONAL); + else + sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ? : 1, + DMA_TO_DEVICE); if (sg_count == 1) { desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src)); } else { sg_link_tbl_len = cryptlen; - if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) + if ((edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) && + (edesc->desc.hdr & DESC_HDR_MODE0_ENCRYPT) == 0) { sg_link_tbl_len = cryptlen + authsize; - + } sg_count = sg_to_link_tbl(areq->src, sg_count, sg_link_tbl_len, &edesc->link_tbl[0]); if (sg_count > 1) { desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; desc->ptr[4].ptr = cpu_to_be32(edesc->dma_link_tbl); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); + dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); } else { /* Only one segment now, so no link tbl needed */ - desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq-> - src)); + desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src)); } } @@ -1032,11 +985,10 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, desc->ptr[5].len = cpu_to_be16(cryptlen); desc->ptr[5].j_extent = authsize; - if (areq->src != areq->dst) - sg_count = talitos_map_sg(dev, areq->dst, - edesc->dst_nents ? : 1, - DMA_FROM_DEVICE, - edesc->dst_is_chained); + if (areq->src != areq->dst) { + sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1, + DMA_FROM_DEVICE); + } if (sg_count == 1) { desc->ptr[5].ptr = cpu_to_be32(sg_dma_address(areq->dst)); @@ -1081,55 +1033,49 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, return ret; } + /* * derive number of elements in scatterlist */ -static int sg_count(struct scatterlist *sg_list, int nbytes, int *chained) +static int sg_count(struct scatterlist *sg_list, int nbytes) { struct scatterlist *sg = sg_list; int sg_nents = 0; - *chained = 0; - while (nbytes > 0) { + while (nbytes) { sg_nents++; nbytes -= sg->length; - if (!sg_is_last(sg) && (sg + 1)->length == 0) - *chained = 1; - sg = scatterwalk_sg_next(sg); + sg = sg_next(sg); } return sg_nents; } /* - * allocate and map the extended descriptor + * allocate and map the ipsec_esp extended descriptor */ -static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, - struct scatterlist *src, - struct scatterlist *dst, - unsigned int cryptlen, - unsigned int authsize, - int icv_stashing, - u32 cryptoflags) +static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq, + int icv_stashing) { - struct talitos_edesc *edesc; + struct crypto_aead *authenc = crypto_aead_reqtfm(areq); + struct talitos_ctx *ctx = crypto_aead_ctx(authenc); + struct ipsec_esp_edesc *edesc; int src_nents, dst_nents, alloc_len, dma_len; - int src_chained, dst_chained = 0; - gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : + gfp_t flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) { - dev_err(dev, "length exceeds h/w max limit\n"); + if (areq->cryptlen + ctx->authsize > TALITOS_MAX_DATA_LEN) { + dev_err(ctx->dev, "cryptlen exceeds h/w max limit\n"); return ERR_PTR(-EINVAL); } - src_nents = sg_count(src, cryptlen + authsize, &src_chained); + src_nents = sg_count(areq->src, areq->cryptlen + ctx->authsize); src_nents = (src_nents == 1) ? 0 : src_nents; - if (dst == src) { + if (areq->dst == areq->src) { dst_nents = src_nents; } else { - dst_nents = sg_count(dst, cryptlen + authsize, &dst_chained); + dst_nents = sg_count(areq->dst, areq->cryptlen + ctx->authsize); dst_nents = (dst_nents == 1) ? 0 : dst_nents; } @@ -1138,52 +1084,39 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, * allowing for two separate entries for ICV and generated ICV (+ 2), * and the ICV data itself */ - alloc_len = sizeof(struct talitos_edesc); + alloc_len = sizeof(struct ipsec_esp_edesc); if (src_nents || dst_nents) { dma_len = (src_nents + dst_nents + 2) * - sizeof(struct talitos_ptr) + authsize; + sizeof(struct talitos_ptr) + ctx->authsize; alloc_len += dma_len; } else { dma_len = 0; - alloc_len += icv_stashing ? authsize : 0; + alloc_len += icv_stashing ? ctx->authsize : 0; } edesc = kmalloc(alloc_len, GFP_DMA | flags); if (!edesc) { - dev_err(dev, "could not allocate edescriptor\n"); + dev_err(ctx->dev, "could not allocate edescriptor\n"); return ERR_PTR(-ENOMEM); } edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; - edesc->src_is_chained = src_chained; - edesc->dst_is_chained = dst_chained; edesc->dma_len = dma_len; - edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0], + edesc->dma_link_tbl = dma_map_single(ctx->dev, &edesc->link_tbl[0], edesc->dma_len, DMA_BIDIRECTIONAL); return edesc; } -static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq, - int icv_stashing) -{ - struct crypto_aead *authenc = crypto_aead_reqtfm(areq); - struct talitos_ctx *ctx = crypto_aead_ctx(authenc); - - return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, - areq->cryptlen, ctx->authsize, icv_stashing, - areq->base.flags); -} - -static int aead_encrypt(struct aead_request *req) +static int aead_authenc_encrypt(struct aead_request *req) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct talitos_ctx *ctx = crypto_aead_ctx(authenc); - struct talitos_edesc *edesc; + struct ipsec_esp_edesc *edesc; /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, 0); + edesc = ipsec_esp_edesc_alloc(req, 0); if (IS_ERR(edesc)) return PTR_ERR(edesc); @@ -1193,67 +1126,70 @@ static int aead_encrypt(struct aead_request *req) return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_encrypt_done); } -static int aead_decrypt(struct aead_request *req) + + +static int aead_authenc_decrypt(struct aead_request *req) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct talitos_ctx *ctx = crypto_aead_ctx(authenc); unsigned int authsize = ctx->authsize; struct talitos_private *priv = dev_get_drvdata(ctx->dev); - struct talitos_edesc *edesc; + struct ipsec_esp_edesc *edesc; struct scatterlist *sg; void *icvdata; req->cryptlen -= authsize; /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, 1); + edesc = ipsec_esp_edesc_alloc(req, 1); if (IS_ERR(edesc)) return PTR_ERR(edesc); if ((priv->features & TALITOS_FTR_HW_AUTH_CHECK) && - ((!edesc->src_nents && !edesc->dst_nents) || - priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT)) { + (((!edesc->src_nents && !edesc->dst_nents) || + priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT))) { /* decrypt and check the ICV */ - edesc->desc.hdr = ctx->desc_hdr_template | - DESC_HDR_DIR_INBOUND | + edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND | DESC_HDR_MODE1_MDEU_CICV; /* reset integrity check result bits */ edesc->desc.hdr_lo = 0; - return ipsec_esp(edesc, req, NULL, 0, - ipsec_esp_decrypt_hwauth_done); + return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_hwauth_done); - } + } else { - /* Have to check the ICV with software */ - edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND; + /* Have to check the ICV with software */ - /* stash incoming ICV for later cmp with ICV generated by the h/w */ - if (edesc->dma_len) - icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 2]; - else - icvdata = &edesc->link_tbl[0]; + edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND; + + /* stash incoming ICV for later cmp with ICV generated by the h/w */ + if (edesc->dma_len) + icvdata = &edesc->link_tbl[edesc->src_nents + + edesc->dst_nents + 2]; + else + icvdata = &edesc->link_tbl[0]; - sg = sg_last(req->src, edesc->src_nents ? : 1); + sg = sg_last(req->src, edesc->src_nents ? : 1); - memcpy(icvdata, (char *)sg_virt(sg) + sg->length - ctx->authsize, - ctx->authsize); + memcpy(icvdata, (char *)sg_virt(sg) + sg->length - ctx->authsize, + ctx->authsize); - return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_swauth_done); + return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_swauth_done); + } } -static int aead_givencrypt(struct aead_givcrypt_request *req) +static int aead_authenc_givencrypt( + struct aead_givcrypt_request *req) { struct aead_request *areq = &req->areq; struct crypto_aead *authenc = crypto_aead_reqtfm(areq); struct talitos_ctx *ctx = crypto_aead_ctx(authenc); - struct talitos_edesc *edesc; + struct ipsec_esp_edesc *edesc; /* allocate extended descriptor */ - edesc = aead_edesc_alloc(areq, 0); + edesc = ipsec_esp_edesc_alloc(areq, 0); if (IS_ERR(edesc)) return PTR_ERR(edesc); @@ -1268,228 +1204,31 @@ static int aead_givencrypt(struct aead_givcrypt_request *req) ipsec_esp_encrypt_done); } -static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, - const u8 *key, unsigned int keylen) -{ - struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); - struct ablkcipher_alg *alg = crypto_ablkcipher_alg(cipher); - - if (keylen > TALITOS_MAX_KEY_SIZE) - goto badkey; - - if (keylen < alg->min_keysize || keylen > alg->max_keysize) - goto badkey; - - memcpy(&ctx->key, key, keylen); - ctx->keylen = keylen; - - return 0; - -badkey: - crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; -} - -static void common_nonsnoop_unmap(struct device *dev, - struct talitos_edesc *edesc, - struct ablkcipher_request *areq) -{ - unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); - unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); - unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); - - talitos_sg_unmap(dev, edesc, areq->src, areq->dst); - - if (edesc->dma_len) - dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, - DMA_BIDIRECTIONAL); -} - -static void ablkcipher_done(struct device *dev, - struct talitos_desc *desc, void *context, - int err) -{ - struct ablkcipher_request *areq = context; - struct talitos_edesc *edesc; - - edesc = container_of(desc, struct talitos_edesc, desc); - - common_nonsnoop_unmap(dev, edesc, areq); - - kfree(edesc); - - areq->base.complete(&areq->base, err); -} - -static int common_nonsnoop(struct talitos_edesc *edesc, - struct ablkcipher_request *areq, - u8 *giv, - void (*callback) (struct device *dev, - struct talitos_desc *desc, - void *context, int error)) -{ - struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq); - struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); - struct device *dev = ctx->dev; - struct talitos_desc *desc = &edesc->desc; - unsigned int cryptlen = areq->nbytes; - unsigned int ivsize; - int sg_count, ret; - - /* first DWORD empty */ - desc->ptr[0].len = 0; - desc->ptr[0].ptr = 0; - desc->ptr[0].j_extent = 0; - - /* cipher iv */ - ivsize = crypto_ablkcipher_ivsize(cipher); - map_single_talitos_ptr(dev, &desc->ptr[1], ivsize, giv ?: areq->info, 0, - DMA_TO_DEVICE); - - /* cipher key */ - map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, - (char *)&ctx->key, 0, DMA_TO_DEVICE); - - /* - * cipher in - */ - desc->ptr[3].len = cpu_to_be16(cryptlen); - desc->ptr[3].j_extent = 0; - - sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1, - (areq->src == areq->dst) ? DMA_BIDIRECTIONAL - : DMA_TO_DEVICE, - edesc->src_is_chained); - - if (sg_count == 1) { - desc->ptr[3].ptr = cpu_to_be32(sg_dma_address(areq->src)); - } else { - sg_count = sg_to_link_tbl(areq->src, sg_count, cryptlen, - &edesc->link_tbl[0]); - if (sg_count > 1) { - desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP; - desc->ptr[3].ptr = cpu_to_be32(edesc->dma_link_tbl); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed */ - desc->ptr[3].ptr = cpu_to_be32(sg_dma_address(areq-> - src)); - } - } - - /* cipher out */ - desc->ptr[4].len = cpu_to_be16(cryptlen); - desc->ptr[4].j_extent = 0; - - if (areq->src != areq->dst) - sg_count = talitos_map_sg(dev, areq->dst, - edesc->dst_nents ? : 1, - DMA_FROM_DEVICE, - edesc->dst_is_chained); - - if (sg_count == 1) { - desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->dst)); - } else { - struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents + 1]; - - desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; - desc->ptr[4].ptr = cpu_to_be32((struct talitos_ptr *) - edesc->dma_link_tbl + - edesc->src_nents + 1); - sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen, - link_tbl_ptr); - dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); - } - - /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0, - DMA_FROM_DEVICE); - - /* last DWORD empty */ - desc->ptr[6].len = 0; - desc->ptr[6].ptr = 0; - desc->ptr[6].j_extent = 0; - - ret = talitos_submit(dev, desc, callback, areq); - if (ret != -EINPROGRESS) { - common_nonsnoop_unmap(dev, edesc, areq); - kfree(edesc); - } - return ret; -} - -static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request * - areq) -{ - struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq); - struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); - - return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, areq->nbytes, - 0, 0, areq->base.flags); -} - -static int ablkcipher_encrypt(struct ablkcipher_request *areq) -{ - struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq); - struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); - struct talitos_edesc *edesc; - - /* allocate extended descriptor */ - edesc = ablkcipher_edesc_alloc(areq); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - /* set encrypt */ - edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_MODE0_ENCRYPT; - - return common_nonsnoop(edesc, areq, NULL, ablkcipher_done); -} - -static int ablkcipher_decrypt(struct ablkcipher_request *areq) -{ - struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq); - struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); - struct talitos_edesc *edesc; - - /* allocate extended descriptor */ - edesc = ablkcipher_edesc_alloc(areq); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND; - - return common_nonsnoop(edesc, areq, NULL, ablkcipher_done); -} - struct talitos_alg_template { - struct crypto_alg alg; + char name[CRYPTO_MAX_ALG_NAME]; + char driver_name[CRYPTO_MAX_ALG_NAME]; + unsigned int blocksize; + struct aead_alg aead; + struct device *dev; __be32 desc_hdr_template; }; static struct talitos_alg_template driver_algs[] = { - /* AEAD algorithms. These use a single-pass ipsec_esp descriptor */ + /* single-pass ipsec_esp descriptor */ { - .alg = { - .cra_name = "authenc(hmac(sha1),cbc(aes))", - .cra_driver_name = "authenc-hmac-sha1-cbc-aes-talitos", - .cra_blocksize = AES_BLOCK_SIZE, - .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, - .cra_type = &crypto_aead_type, - .cra_aead = { - .setkey = aead_setkey, - .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, - .geniv = "", - .ivsize = AES_BLOCK_SIZE, - .maxauthsize = SHA1_DIGEST_SIZE, - } - }, + .name = "authenc(hmac(sha1),cbc(aes))", + .driver_name = "authenc-hmac-sha1-cbc-aes-talitos", + .blocksize = AES_BLOCK_SIZE, + .aead = { + .setkey = aead_authenc_setkey, + .setauthsize = aead_authenc_setauthsize, + .encrypt = aead_authenc_encrypt, + .decrypt = aead_authenc_decrypt, + .givencrypt = aead_authenc_givencrypt, + .geniv = "", + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_CBC | @@ -1499,23 +1238,19 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, { - .alg = { - .cra_name = "authenc(hmac(sha1),cbc(des3_ede))", - .cra_driver_name = "authenc-hmac-sha1-cbc-3des-talitos", - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, - .cra_type = &crypto_aead_type, - .cra_aead = { - .setkey = aead_setkey, - .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, - .geniv = "", - .ivsize = DES3_EDE_BLOCK_SIZE, - .maxauthsize = SHA1_DIGEST_SIZE, - } - }, + .name = "authenc(hmac(sha1),cbc(des3_ede))", + .driver_name = "authenc-hmac-sha1-cbc-3des-talitos", + .blocksize = DES3_EDE_BLOCK_SIZE, + .aead = { + .setkey = aead_authenc_setkey, + .setauthsize = aead_authenc_setauthsize, + .encrypt = aead_authenc_encrypt, + .decrypt = aead_authenc_decrypt, + .givencrypt = aead_authenc_givencrypt, + .geniv = "", + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | DESC_HDR_SEL0_DEU | DESC_HDR_MODE0_DEU_CBC | @@ -1526,23 +1261,19 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, { - .alg = { - .cra_name = "authenc(hmac(sha256),cbc(aes))", - .cra_driver_name = "authenc-hmac-sha256-cbc-aes-talitos", - .cra_blocksize = AES_BLOCK_SIZE, - .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, - .cra_type = &crypto_aead_type, - .cra_aead = { - .setkey = aead_setkey, - .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, - .geniv = "", - .ivsize = AES_BLOCK_SIZE, - .maxauthsize = SHA256_DIGEST_SIZE, - } - }, + .name = "authenc(hmac(sha256),cbc(aes))", + .driver_name = "authenc-hmac-sha256-cbc-aes-talitos", + .blocksize = AES_BLOCK_SIZE, + .aead = { + .setkey = aead_authenc_setkey, + .setauthsize = aead_authenc_setauthsize, + .encrypt = aead_authenc_encrypt, + .decrypt = aead_authenc_decrypt, + .givencrypt = aead_authenc_givencrypt, + .geniv = "", + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_CBC | @@ -1552,23 +1283,19 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, { - .alg = { - .cra_name = "authenc(hmac(sha256),cbc(des3_ede))", - .cra_driver_name = "authenc-hmac-sha256-cbc-3des-talitos", - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, - .cra_type = &crypto_aead_type, - .cra_aead = { - .setkey = aead_setkey, - .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, - .geniv = "", - .ivsize = DES3_EDE_BLOCK_SIZE, - .maxauthsize = SHA256_DIGEST_SIZE, - } - }, + .name = "authenc(hmac(sha256),cbc(des3_ede))", + .driver_name = "authenc-hmac-sha256-cbc-3des-talitos", + .blocksize = DES3_EDE_BLOCK_SIZE, + .aead = { + .setkey = aead_authenc_setkey, + .setauthsize = aead_authenc_setauthsize, + .encrypt = aead_authenc_encrypt, + .decrypt = aead_authenc_decrypt, + .givencrypt = aead_authenc_givencrypt, + .geniv = "", + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | DESC_HDR_SEL0_DEU | DESC_HDR_MODE0_DEU_CBC | @@ -1579,23 +1306,19 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, { - .alg = { - .cra_name = "authenc(hmac(md5),cbc(aes))", - .cra_driver_name = "authenc-hmac-md5-cbc-aes-talitos", - .cra_blocksize = AES_BLOCK_SIZE, - .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, - .cra_type = &crypto_aead_type, - .cra_aead = { - .setkey = aead_setkey, - .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, - .geniv = "", - .ivsize = AES_BLOCK_SIZE, - .maxauthsize = MD5_DIGEST_SIZE, - } - }, + .name = "authenc(hmac(md5),cbc(aes))", + .driver_name = "authenc-hmac-md5-cbc-aes-talitos", + .blocksize = AES_BLOCK_SIZE, + .aead = { + .setkey = aead_authenc_setkey, + .setauthsize = aead_authenc_setauthsize, + .encrypt = aead_authenc_encrypt, + .decrypt = aead_authenc_decrypt, + .givencrypt = aead_authenc_givencrypt, + .geniv = "", + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_CBC | @@ -1605,23 +1328,19 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_MD5_HMAC, }, { - .alg = { - .cra_name = "authenc(hmac(md5),cbc(des3_ede))", - .cra_driver_name = "authenc-hmac-md5-cbc-3des-talitos", - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, - .cra_type = &crypto_aead_type, - .cra_aead = { - .setkey = aead_setkey, - .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, - .geniv = "", - .ivsize = DES3_EDE_BLOCK_SIZE, - .maxauthsize = MD5_DIGEST_SIZE, - } - }, + .name = "authenc(hmac(md5),cbc(des3_ede))", + .driver_name = "authenc-hmac-md5-cbc-3des-talitos", + .blocksize = DES3_EDE_BLOCK_SIZE, + .aead = { + .setkey = aead_authenc_setkey, + .setauthsize = aead_authenc_setauthsize, + .encrypt = aead_authenc_encrypt, + .decrypt = aead_authenc_decrypt, + .givencrypt = aead_authenc_givencrypt, + .geniv = "", + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | DESC_HDR_SEL0_DEU | DESC_HDR_MODE0_DEU_CBC | @@ -1630,52 +1349,6 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_INIT | DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_MD5_HMAC, - }, - /* ABLKCIPHER algorithms. */ - { - .alg = { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-talitos", - .cra_blocksize = AES_BLOCK_SIZE, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | - CRYPTO_ALG_ASYNC, - .cra_type = &crypto_ablkcipher_type, - .cra_ablkcipher = { - .setkey = ablkcipher_setkey, - .encrypt = ablkcipher_encrypt, - .decrypt = ablkcipher_decrypt, - .geniv = "eseqiv", - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - } - }, - .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | - DESC_HDR_SEL0_AESU | - DESC_HDR_MODE0_AESU_CBC, - }, - { - .alg = { - .cra_name = "cbc(des3_ede)", - .cra_driver_name = "cbc-3des-talitos", - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | - CRYPTO_ALG_ASYNC, - .cra_type = &crypto_ablkcipher_type, - .cra_ablkcipher = { - .setkey = ablkcipher_setkey, - .encrypt = ablkcipher_encrypt, - .decrypt = ablkcipher_decrypt, - .geniv = "eseqiv", - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - } - }, - .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | - DESC_HDR_SEL0_DEU | - DESC_HDR_MODE0_DEU_CBC | - DESC_HDR_MODE0_DEU_3DES, } }; @@ -1689,14 +1362,12 @@ struct talitos_crypto_alg { static int talitos_cra_init(struct crypto_tfm *tfm) { struct crypto_alg *alg = tfm->__crt_alg; - struct talitos_crypto_alg *talitos_alg; + struct talitos_crypto_alg *talitos_alg = + container_of(alg, struct talitos_crypto_alg, crypto_alg); struct talitos_ctx *ctx = crypto_tfm_ctx(tfm); - talitos_alg = container_of(alg, struct talitos_crypto_alg, crypto_alg); - /* update context with ptr to dev */ ctx->dev = talitos_alg->dev; - /* copy descriptor header template value */ ctx->desc_hdr_template = talitos_alg->desc_hdr_template; @@ -1782,13 +1453,19 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, return ERR_PTR(-ENOMEM); alg = &t_alg->crypto_alg; - *alg = template->alg; + snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); + snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + template->driver_name); alg->cra_module = THIS_MODULE; alg->cra_init = talitos_cra_init; alg->cra_priority = TALITOS_CRA_PRIORITY; + alg->cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC; + alg->cra_blocksize = template->blocksize; alg->cra_alignmask = 0; + alg->cra_type = &crypto_aead_type; alg->cra_ctxsize = sizeof(struct talitos_ctx); + alg->cra_u.aead = template->aead; t_alg->desc_hdr_template = template->desc_hdr_template; t_alg->dev = dev; diff --git a/trunk/drivers/i2c/busses/Kconfig b/trunk/drivers/i2c/busses/Kconfig index f1c6ca7e2852..c8460fa9cfac 100644 --- a/trunk/drivers/i2c/busses/Kconfig +++ b/trunk/drivers/i2c/busses/Kconfig @@ -298,7 +298,7 @@ config I2C_BLACKFIN_TWI config I2C_BLACKFIN_TWI_CLK_KHZ int "Blackfin TWI I2C clock (kHz)" depends on I2C_BLACKFIN_TWI - range 10 400 + range 21 400 default 50 help The unit of the TWI clock is kHz. diff --git a/trunk/drivers/i2c/busses/i2c-bfin-twi.c b/trunk/drivers/i2c/busses/i2c-bfin-twi.c index fc548b3d002e..77cafb6ba923 100644 --- a/trunk/drivers/i2c/busses/i2c-bfin-twi.c +++ b/trunk/drivers/i2c/busses/i2c-bfin-twi.c @@ -614,6 +614,7 @@ static int i2c_bfin_twi_probe(struct platform_device *pdev) struct i2c_adapter *p_adap; struct resource *res; int rc; + unsigned int clkhilow; iface = kzalloc(sizeof(struct bfin_twi_iface), GFP_KERNEL); if (!iface) { @@ -675,10 +676,14 @@ static int i2c_bfin_twi_probe(struct platform_device *pdev) /* Set TWI internal clock as 10MHz */ write_CONTROL(iface, ((get_sclk() / 1024 / 1024 + 5) / 10) & 0x7F); + /* + * We will not end up with a CLKDIV=0 because no one will specify + * 20kHz SCL or less in Kconfig now. (5 * 1024 / 20 = 0x100) + */ + clkhilow = 5 * 1024 / CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ; + /* Set Twi interface clock as specified */ - write_CLKDIV(iface, ((5*1024 / CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ) - << 8) | ((5*1024 / CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ) - & 0xFF)); + write_CLKDIV(iface, (clkhilow << 8) | clkhilow); /* Enable TWI */ write_CONTROL(iface, read_CONTROL(iface) | TWI_ENA); diff --git a/trunk/fs/xfs/Kconfig b/trunk/fs/xfs/Kconfig index 480f28127f09..29228f5899cd 100644 --- a/trunk/fs/xfs/Kconfig +++ b/trunk/fs/xfs/Kconfig @@ -39,7 +39,6 @@ config XFS_QUOTA config XFS_POSIX_ACL bool "XFS POSIX ACL support" depends on XFS_FS - select FS_POSIX_ACL help POSIX Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. diff --git a/trunk/fs/xfs/Makefile b/trunk/fs/xfs/Makefile index 7a59daed1782..60f107e47fe9 100644 --- a/trunk/fs/xfs/Makefile +++ b/trunk/fs/xfs/Makefile @@ -40,7 +40,7 @@ xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o endif xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o -xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o +xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o @@ -88,7 +88,8 @@ xfs-y += xfs_alloc.o \ xfs_utils.o \ xfs_vnodeops.o \ xfs_rw.o \ - xfs_dmops.o + xfs_dmops.o \ + xfs_qmops.o xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \ xfs_dir2_trace.o diff --git a/trunk/fs/xfs/linux-2.6/xfs_acl.c b/trunk/fs/xfs/linux-2.6/xfs_acl.c deleted file mode 100644 index 1e9d1246eebc..000000000000 --- a/trunk/fs/xfs/linux-2.6/xfs_acl.c +++ /dev/null @@ -1,523 +0,0 @@ -/* - * Copyright (c) 2008, Christoph Hellwig - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_acl.h" -#include "xfs_attr.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_vnodeops.h" -#include -#include - - -#define XFS_ACL_NOT_CACHED ((void *)-1) - -/* - * Locking scheme: - * - all ACL updates are protected by inode->i_mutex, which is taken before - * calling into this file. - * - access and updates to the ip->i_acl and ip->i_default_acl pointers are - * protected by inode->i_lock. - */ - -STATIC struct posix_acl * -xfs_acl_from_disk(struct xfs_acl *aclp) -{ - struct posix_acl_entry *acl_e; - struct posix_acl *acl; - struct xfs_acl_entry *ace; - int count, i; - - count = be32_to_cpu(aclp->acl_cnt); - - acl = posix_acl_alloc(count, GFP_KERNEL); - if (!acl) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < count; i++) { - acl_e = &acl->a_entries[i]; - ace = &aclp->acl_entry[i]; - - /* - * The tag is 32 bits on disk and 16 bits in core. - * - * Because every access to it goes through the core - * format first this is not a problem. - */ - acl_e->e_tag = be32_to_cpu(ace->ae_tag); - acl_e->e_perm = be16_to_cpu(ace->ae_perm); - - switch (acl_e->e_tag) { - case ACL_USER: - case ACL_GROUP: - acl_e->e_id = be32_to_cpu(ace->ae_id); - break; - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - acl_e->e_id = ACL_UNDEFINED_ID; - break; - default: - goto fail; - } - } - return acl; - -fail: - posix_acl_release(acl); - return ERR_PTR(-EINVAL); -} - -STATIC void -xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) -{ - const struct posix_acl_entry *acl_e; - struct xfs_acl_entry *ace; - int i; - - aclp->acl_cnt = cpu_to_be32(acl->a_count); - for (i = 0; i < acl->a_count; i++) { - ace = &aclp->acl_entry[i]; - acl_e = &acl->a_entries[i]; - - ace->ae_tag = cpu_to_be32(acl_e->e_tag); - ace->ae_id = cpu_to_be32(acl_e->e_id); - ace->ae_perm = cpu_to_be16(acl_e->e_perm); - } -} - -/* - * Update the cached ACL pointer in the inode. - * - * Because we don't hold any locks while reading/writing the attribute - * from/to disk another thread could have raced and updated the cached - * ACL value before us. In that case we release the previous cached value - * and update it with our new value. - */ -STATIC void -xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED) - posix_acl_release(*p_acl); - *p_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - -struct posix_acl * -xfs_get_acl(struct inode *inode, int type) -{ - struct xfs_inode *ip = XFS_I(inode); - struct posix_acl *acl = NULL, **p_acl; - struct xfs_acl *xfs_acl; - int len = sizeof(struct xfs_acl); - char *ea_name; - int error; - - switch (type) { - case ACL_TYPE_ACCESS: - ea_name = SGI_ACL_FILE; - p_acl = &ip->i_acl; - break; - case ACL_TYPE_DEFAULT: - ea_name = SGI_ACL_DEFAULT; - p_acl = &ip->i_default_acl; - break; - default: - return ERR_PTR(-EINVAL); - } - - spin_lock(&inode->i_lock); - if (*p_acl != XFS_ACL_NOT_CACHED) - acl = posix_acl_dup(*p_acl); - spin_unlock(&inode->i_lock); - - /* - * If we have a cached ACLs value just return it, not need to - * go out to the disk. - */ - if (acl) - return acl; - - xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); - if (!xfs_acl) - return ERR_PTR(-ENOMEM); - - error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT); - if (error) { - /* - * If the attribute doesn't exist make sure we have a negative - * cache entry, for any other error assume it is transient and - * leave the cache entry as XFS_ACL_NOT_CACHED. - */ - if (error == -ENOATTR) { - acl = NULL; - goto out_update_cache; - } - goto out; - } - - acl = xfs_acl_from_disk(xfs_acl); - if (IS_ERR(acl)) - goto out; - - out_update_cache: - xfs_update_cached_acl(inode, p_acl, acl); - out: - kfree(xfs_acl); - return acl; -} - -STATIC int -xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) -{ - struct xfs_inode *ip = XFS_I(inode); - struct posix_acl **p_acl; - char *ea_name; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - switch (type) { - case ACL_TYPE_ACCESS: - ea_name = SGI_ACL_FILE; - p_acl = &ip->i_acl; - break; - case ACL_TYPE_DEFAULT: - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - ea_name = SGI_ACL_DEFAULT; - p_acl = &ip->i_default_acl; - break; - default: - return -EINVAL; - } - - if (acl) { - struct xfs_acl *xfs_acl; - int len; - - xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); - if (!xfs_acl) - return -ENOMEM; - - xfs_acl_to_disk(xfs_acl, acl); - len = sizeof(struct xfs_acl) - - (sizeof(struct xfs_acl_entry) * - (XFS_ACL_MAX_ENTRIES - acl->a_count)); - - error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl, - len, ATTR_ROOT); - - kfree(xfs_acl); - } else { - /* - * A NULL ACL argument means we want to remove the ACL. - */ - error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT); - - /* - * If the attribute didn't exist to start with that's fine. - */ - if (error == -ENOATTR) - error = 0; - } - - if (!error) - xfs_update_cached_acl(inode, p_acl, acl); - return error; -} - -int -xfs_check_acl(struct inode *inode, int mask) -{ - struct xfs_inode *ip = XFS_I(inode); - struct posix_acl *acl; - int error = -EAGAIN; - - xfs_itrace_entry(ip); - - /* - * If there is no attribute fork no ACL exists on this inode and - * we can skip the whole exercise. - */ - if (!XFS_IFORK_Q(ip)) - return -EAGAIN; - - acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - } - - return error; -} - -static int -xfs_set_mode(struct inode *inode, mode_t mode) -{ - int error = 0; - - if (mode != inode->i_mode) { - struct iattr iattr; - - iattr.ia_valid = ATTR_MODE; - iattr.ia_mode = mode; - - error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); - } - - return error; -} - -static int -xfs_acl_exists(struct inode *inode, char *name) -{ - int len = sizeof(struct xfs_acl); - - return (xfs_attr_get(XFS_I(inode), name, NULL, &len, - ATTR_ROOT|ATTR_KERNOVAL) == 0); -} - -int -posix_acl_access_exists(struct inode *inode) -{ - return xfs_acl_exists(inode, SGI_ACL_FILE); -} - -int -posix_acl_default_exists(struct inode *inode) -{ - if (!S_ISDIR(inode->i_mode)) - return 0; - return xfs_acl_exists(inode, SGI_ACL_DEFAULT); -} - -/* - * No need for i_mutex because the inode is not yet exposed to the VFS. - */ -int -xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl) -{ - struct posix_acl *clone; - mode_t mode; - int error = 0, inherit = 0; - - if (S_ISDIR(inode->i_mode)) { - error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl); - if (error) - return error; - } - - clone = posix_acl_clone(default_acl, GFP_KERNEL); - if (!clone) - return -ENOMEM; - - mode = inode->i_mode; - error = posix_acl_create_masq(clone, &mode); - if (error < 0) - goto out_release_clone; - - /* - * If posix_acl_create_masq returns a positive value we need to - * inherit a permission that can't be represented using the Unix - * mode bits and we actually need to set an ACL. - */ - if (error > 0) - inherit = 1; - - error = xfs_set_mode(inode, mode); - if (error) - goto out_release_clone; - - if (inherit) - error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone); - - out_release_clone: - posix_acl_release(clone); - return error; -} - -int -xfs_acl_chmod(struct inode *inode) -{ - struct posix_acl *acl, *clone; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); - - clone = posix_acl_clone(acl, GFP_KERNEL); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - - error = posix_acl_chmod_masq(clone, inode->i_mode); - if (!error) - error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone); - - posix_acl_release(clone); - return error; -} - -void -xfs_inode_init_acls(struct xfs_inode *ip) -{ - /* - * No need for locking, inode is not live yet. - */ - ip->i_acl = XFS_ACL_NOT_CACHED; - ip->i_default_acl = XFS_ACL_NOT_CACHED; -} - -void -xfs_inode_clear_acls(struct xfs_inode *ip) -{ - /* - * No need for locking here, the inode is not live anymore - * and just about to be freed. - */ - if (ip->i_acl != XFS_ACL_NOT_CACHED) - posix_acl_release(ip->i_acl); - if (ip->i_default_acl != XFS_ACL_NOT_CACHED) - posix_acl_release(ip->i_default_acl); -} - - -/* - * System xattr handlers. - * - * Currently Posix ACLs are the only system namespace extended attribute - * handlers supported by XFS, so we just implement the handlers here. - * If we ever support other system extended attributes this will need - * some refactoring. - */ - -static int -xfs_decode_acl(const char *name) -{ - if (strcmp(name, "posix_acl_access") == 0) - return ACL_TYPE_ACCESS; - else if (strcmp(name, "posix_acl_default") == 0) - return ACL_TYPE_DEFAULT; - return -EINVAL; -} - -static int -xfs_xattr_system_get(struct inode *inode, const char *name, - void *value, size_t size) -{ - struct posix_acl *acl; - int type, error; - - type = xfs_decode_acl(name); - if (type < 0) - return type; - - acl = xfs_get_acl(inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - - error = posix_acl_to_xattr(acl, value, size); - posix_acl_release(acl); - - return error; -} - -static int -xfs_xattr_system_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - struct posix_acl *acl = NULL; - int error = 0, type; - - type = xfs_decode_acl(name); - if (type < 0) - return type; - if (flags & XATTR_CREATE) - return -EINVAL; - if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; - if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; - - if (!value) - goto set_acl; - - acl = posix_acl_from_xattr(value, size); - if (!acl) { - /* - * acl_set_file(3) may request that we set default ACLs with - * zero length -- defend (gracefully) against that here. - */ - goto out; - } - if (IS_ERR(acl)) { - error = PTR_ERR(acl); - goto out; - } - - error = posix_acl_valid(acl); - if (error) - goto out_release; - - error = -EINVAL; - if (acl->a_count > XFS_ACL_MAX_ENTRIES) - goto out_release; - - if (type == ACL_TYPE_ACCESS) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); - - if (error <= 0) { - posix_acl_release(acl); - acl = NULL; - - if (error < 0) - return error; - } - - error = xfs_set_mode(inode, mode); - if (error) - goto out_release; - } - - set_acl: - error = xfs_set_acl(inode, type, acl); - out_release: - posix_acl_release(acl); - out: - return error; -} - -struct xattr_handler xfs_xattr_system_handler = { - .prefix = XATTR_SYSTEM_PREFIX, - .get = xfs_xattr_system_get, - .set = xfs_xattr_system_set, -}; diff --git a/trunk/fs/xfs/linux-2.6/xfs_ioctl.c b/trunk/fs/xfs/linux-2.6/xfs_ioctl.c index 5bb523d7f37e..34eaab608e6e 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/trunk/fs/xfs/linux-2.6/xfs_ioctl.c @@ -41,6 +41,7 @@ #include "xfs_itable.h" #include "xfs_error.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_bmap.h" #include "xfs_buf_item.h" @@ -898,8 +899,7 @@ xfs_ioctl_setattr( struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; unsigned int lock_flags = 0; - struct xfs_dquot *udqp = NULL; - struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *udqp = NULL, *gdqp = NULL; struct xfs_dquot *olddquot = NULL; int code; @@ -919,7 +919,7 @@ xfs_ioctl_setattr( * because the i_*dquot fields will get updated anyway. */ if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { - code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, + code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid, ip->i_d.di_gid, fa->fsx_projid, XFS_QMOPT_PQUOTA, &udqp, &gdqp); if (code) @@ -954,11 +954,10 @@ xfs_ioctl_setattr( * Do a quota reservation only if projid is actually going to change. */ if (mask & FSX_PROJID) { - if (XFS_IS_QUOTA_RUNNING(mp) && - XFS_IS_PQUOTA_ON(mp) && + if (XFS_IS_PQUOTA_ON(mp) && ip->i_d.di_projid != fa->fsx_projid) { ASSERT(tp); - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, + code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ @@ -1060,8 +1059,8 @@ xfs_ioctl_setattr( * in the transaction. */ if (ip->i_d.di_projid != fa->fsx_projid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { - olddquot = xfs_qm_vop_chown(tp, ip, + if (XFS_IS_PQUOTA_ON(mp)) { + olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_projid = fa->fsx_projid; @@ -1107,9 +1106,9 @@ xfs_ioctl_setattr( /* * Release any dquot(s) the inode had kept before chown. */ - xfs_qm_dqrele(olddquot); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); + XFS_QM_DQRELE(mp, olddquot); + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); if (code) return code; @@ -1123,8 +1122,8 @@ xfs_ioctl_setattr( return 0; error_return: - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); xfs_trans_cancel(tp, 0); if (lock_flags) xfs_iunlock(ip, lock_flags); diff --git a/trunk/fs/xfs/linux-2.6/xfs_iops.c b/trunk/fs/xfs/linux-2.6/xfs_iops.c index 58973bb46038..6075382336d7 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_iops.c +++ b/trunk/fs/xfs/linux-2.6/xfs_iops.c @@ -17,7 +17,6 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_acl.h" #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" @@ -52,7 +51,6 @@ #include #include #include -#include #include #include #include @@ -204,8 +202,9 @@ xfs_vn_mknod( { struct inode *inode; struct xfs_inode *ip = NULL; - struct posix_acl *default_acl = NULL; + xfs_acl_t *default_acl = NULL; struct xfs_name name; + int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS; int error; /* @@ -220,15 +219,19 @@ xfs_vn_mknod( rdev = 0; } - if (IS_POSIXACL(dir)) { - default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(default_acl)) - return -PTR_ERR(default_acl); - - if (!default_acl) - mode &= ~current_umask(); + if (test_default_acl && test_default_acl(dir)) { + if (!_ACL_ALLOC(default_acl)) { + return -ENOMEM; + } + if (!_ACL_GET_DEFAULT(dir, default_acl)) { + _ACL_FREE(default_acl); + default_acl = NULL; + } } + if (IS_POSIXACL(dir) && !default_acl) + mode &= ~current_umask(); + xfs_dentry_to_name(&name, dentry); error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); if (unlikely(error)) @@ -241,10 +244,10 @@ xfs_vn_mknod( goto out_cleanup_inode; if (default_acl) { - error = -xfs_inherit_acl(inode, default_acl); + error = _ACL_INHERIT(inode, mode, default_acl); if (unlikely(error)) goto out_cleanup_inode; - posix_acl_release(default_acl); + _ACL_FREE(default_acl); } @@ -254,7 +257,8 @@ xfs_vn_mknod( out_cleanup_inode: xfs_cleanup_inode(dir, inode, dentry); out_free_acl: - posix_acl_release(default_acl); + if (default_acl) + _ACL_FREE(default_acl); return -error; } @@ -484,6 +488,26 @@ xfs_vn_put_link( kfree(s); } +#ifdef CONFIG_XFS_POSIX_ACL +STATIC int +xfs_check_acl( + struct inode *inode, + int mask) +{ + struct xfs_inode *ip = XFS_I(inode); + int error; + + xfs_itrace_entry(ip); + + if (XFS_IFORK_Q(ip)) { + error = xfs_acl_iaccess(ip, mask, NULL); + if (error != -1) + return -error; + } + + return -EAGAIN; +} + STATIC int xfs_vn_permission( struct inode *inode, @@ -491,6 +515,9 @@ xfs_vn_permission( { return generic_permission(inode, mask, xfs_check_acl); } +#else +#define xfs_vn_permission NULL +#endif STATIC int xfs_vn_getattr( diff --git a/trunk/fs/xfs/linux-2.6/xfs_lrw.c b/trunk/fs/xfs/linux-2.6/xfs_lrw.c index 7078974a6eee..9142192ccbe6 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_lrw.c +++ b/trunk/fs/xfs/linux-2.6/xfs_lrw.c @@ -42,6 +42,7 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_inode_item.h" #include "xfs_buf_item.h" diff --git a/trunk/fs/xfs/linux-2.6/xfs_quotaops.c b/trunk/fs/xfs/linux-2.6/xfs_quotaops.c index cb6e2cca214f..94d9a633d3d9 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/trunk/fs/xfs/linux-2.6/xfs_quotaops.c @@ -50,11 +50,9 @@ xfs_fs_quota_sync( { struct xfs_mount *mp = XFS_M(sb); - if (sb->s_flags & MS_RDONLY) - return -EROFS; if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; - return -xfs_sync_data(mp, 0); + return -xfs_sync_inodes(mp, SYNC_DELWRI); } STATIC int diff --git a/trunk/fs/xfs/linux-2.6/xfs_super.c b/trunk/fs/xfs/linux-2.6/xfs_super.c index 2e09efbca8db..08d6bd9a3947 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_super.c +++ b/trunk/fs/xfs/linux-2.6/xfs_super.c @@ -43,6 +43,7 @@ #include "xfs_itable.h" #include "xfs_fsops.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" @@ -404,14 +405,6 @@ xfs_parseargs( return EINVAL; } -#ifndef CONFIG_XFS_QUOTA - if (XFS_IS_QUOTA_RUNNING(mp)) { - cmn_err(CE_WARN, - "XFS: quota support not available in this kernel."); - return EINVAL; - } -#endif - if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { cmn_err(CE_WARN, @@ -1070,18 +1063,7 @@ xfs_fs_put_super( int unmount_event_flags = 0; xfs_syncd_stop(mp); - - if (!(sb->s_flags & MS_RDONLY)) { - /* - * XXX(hch): this should be SYNC_WAIT. - * - * Or more likely not needed at all because the VFS is already - * calling ->sync_fs after shutting down all filestem - * operations and just before calling ->put_super. - */ - xfs_sync_data(mp, 0); - xfs_sync_attr(mp, 0); - } + xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI); #ifdef HAVE_DMAPI if (mp->m_flags & XFS_MOUNT_DMAPI) { @@ -1116,6 +1098,7 @@ xfs_fs_put_super( xfs_freesb(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); + xfs_qmops_put(mp); xfs_dmops_put(mp); xfs_free_fsname(mp); kfree(mp); @@ -1175,7 +1158,6 @@ xfs_fs_statfs( { struct xfs_mount *mp = XFS_M(dentry->d_sb); xfs_sb_t *sbp = &mp->m_sb; - struct xfs_inode *ip = XFS_I(dentry->d_inode); __uint64_t fakeinos, id; xfs_extlen_t lsize; @@ -1204,10 +1186,7 @@ xfs_fs_statfs( statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); spin_unlock(&mp->m_sb_lock); - if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || - ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == - (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) - xfs_qm_statvfs(ip, statp); + XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp); return 0; } @@ -1415,13 +1394,16 @@ xfs_fs_fill_super( error = xfs_dmops_get(mp); if (error) goto out_free_fsname; + error = xfs_qmops_get(mp); + if (error) + goto out_put_dmops; if (silent) flags |= XFS_MFSI_QUIET; error = xfs_open_devices(mp); if (error) - goto out_put_dmops; + goto out_put_qmops; if (xfs_icsb_init_counters(mp)) mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; @@ -1489,6 +1471,8 @@ xfs_fs_fill_super( out_destroy_counters: xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); + out_put_qmops: + xfs_qmops_put(mp); out_put_dmops: xfs_dmops_put(mp); out_free_fsname: @@ -1722,8 +1706,18 @@ xfs_init_zones(void) if (!xfs_ili_zone) goto out_destroy_inode_zone; +#ifdef CONFIG_XFS_POSIX_ACL + xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl"); + if (!xfs_acl_zone) + goto out_destroy_ili_zone; +#endif + return 0; +#ifdef CONFIG_XFS_POSIX_ACL + out_destroy_ili_zone: +#endif + kmem_zone_destroy(xfs_ili_zone); out_destroy_inode_zone: kmem_zone_destroy(xfs_inode_zone); out_destroy_efi_zone: @@ -1757,6 +1751,9 @@ xfs_init_zones(void) STATIC void xfs_destroy_zones(void) { +#ifdef CONFIG_XFS_POSIX_ACL + kmem_zone_destroy(xfs_acl_zone); +#endif kmem_zone_destroy(xfs_ili_zone); kmem_zone_destroy(xfs_inode_zone); kmem_zone_destroy(xfs_efi_zone); diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c index b619d6b8ca43..f7ba76633c29 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.c +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c @@ -43,267 +43,166 @@ #include "xfs_buf_item.h" #include "xfs_inode_item.h" #include "xfs_rw.h" -#include "xfs_quota.h" #include #include - -STATIC xfs_inode_t * -xfs_inode_ag_lookup( - struct xfs_mount *mp, - struct xfs_perag *pag, - uint32_t *first_index, - int tag) +/* + * Sync all the inodes in the given AG according to the + * direction given by the flags. + */ +STATIC int +xfs_sync_inodes_ag( + xfs_mount_t *mp, + int ag, + int flags) { - int nr_found; - struct xfs_inode *ip; - - /* - * use a gang lookup to find the next inode in the tree - * as the tree is sparse and a gang lookup walks to find - * the number of objects requested. - */ - read_lock(&pag->pag_ici_lock); - if (tag == XFS_ICI_NO_TAG) { - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void **)&ip, *first_index, 1); - } else { - nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, - (void **)&ip, *first_index, 1, tag); - } - if (!nr_found) - goto unlock; + xfs_perag_t *pag = &mp->m_perag[ag]; + int nr_found; + uint32_t first_index = 0; + int error = 0; + int last_error = 0; - /* - * Update the index for the next lookup. Catch overflows - * into the next AG range which can occur if we have inodes - * in the last block of the AG and we are currently - * pointing to the last inode. - */ - *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) - goto unlock; + do { + struct inode *inode; + xfs_inode_t *ip = NULL; + int lock_flags = XFS_ILOCK_SHARED; - return ip; + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void**)&ip, first_index, 1); -unlock: - read_unlock(&pag->pag_ici_lock); - return NULL; -} + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; + } -STATIC int -xfs_inode_ag_walk( - struct xfs_mount *mp, - xfs_agnumber_t ag, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags), - int flags, - int tag) -{ - struct xfs_perag *pag = &mp->m_perag[ag]; - uint32_t first_index; - int last_error = 0; - int skipped; + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } -restart: - skipped = 0; - first_index = 0; - do { - int error = 0; - xfs_inode_t *ip; + /* nothing to sync during shutdown */ + if (XFS_FORCED_SHUTDOWN(mp)) { + read_unlock(&pag->pag_ici_lock); + return 0; + } - ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); - if (!ip) - break; + /* + * If we can't get a reference on the inode, it must be + * in reclaim. Leave it for the reclaim code to flush. + */ + inode = VFS_I(ip); + if (!igrab(inode)) { + read_unlock(&pag->pag_ici_lock); + continue; + } + read_unlock(&pag->pag_ici_lock); - error = execute(ip, pag, flags); - if (error == EAGAIN) { - skipped++; + /* avoid new or bad inodes */ + if (is_bad_inode(inode) || + xfs_iflags_test(ip, XFS_INEW)) { + IRELE(ip); continue; } + + /* + * If we have to flush data or wait for I/O completion + * we need to hold the iolock. + */ + if (flags & SYNC_DELWRI) { + if (VN_DIRTY(inode)) { + if (flags & SYNC_TRYLOCK) { + if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) + lock_flags |= XFS_IOLOCK_SHARED; + } else { + xfs_ilock(ip, XFS_IOLOCK_SHARED); + lock_flags |= XFS_IOLOCK_SHARED; + } + if (lock_flags & XFS_IOLOCK_SHARED) { + error = xfs_flush_pages(ip, 0, -1, + (flags & SYNC_WAIT) ? 0 + : XFS_B_ASYNC, + FI_NONE); + } + } + if (VN_CACHED(inode) && (flags & SYNC_IOWAIT)) + xfs_ioend_wait(ip); + } + xfs_ilock(ip, XFS_ILOCK_SHARED); + + if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) { + if (flags & SYNC_WAIT) { + xfs_iflock(ip); + if (!xfs_inode_clean(ip)) + error = xfs_iflush(ip, XFS_IFLUSH_SYNC); + else + xfs_ifunlock(ip); + } else if (xfs_iflock_nowait(ip)) { + if (!xfs_inode_clean(ip)) + error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); + else + xfs_ifunlock(ip); + } + } + xfs_iput(ip, lock_flags); + if (error) last_error = error; /* * bail out if the filesystem is corrupted. */ if (error == EFSCORRUPTED) - break; + return XFS_ERROR(error); - } while (1); - - if (skipped) { - delay(1); - goto restart; - } + } while (nr_found); - xfs_put_perag(mp, pag); return last_error; } int -xfs_inode_ag_iterator( - struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags), - int flags, - int tag) -{ - int error = 0; - int last_error = 0; - xfs_agnumber_t ag; - - for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { - if (!mp->m_perag[ag].pag_ici_init) - continue; - error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); - if (error) { - last_error = error; - if (error == EFSCORRUPTED) - break; - } - } - return XFS_ERROR(last_error); -} - -/* must be called with pag_ici_lock held and releases it */ -int -xfs_sync_inode_valid( - struct xfs_inode *ip, - struct xfs_perag *pag) -{ - struct inode *inode = VFS_I(ip); - - /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - read_unlock(&pag->pag_ici_lock); - return EFSCORRUPTED; - } - - /* - * If we can't get a reference on the inode, it must be in reclaim. - * Leave it for the reclaim code to flush. Also avoid inodes that - * haven't been fully initialised. - */ - if (!igrab(inode)) { - read_unlock(&pag->pag_ici_lock); - return ENOENT; - } - read_unlock(&pag->pag_ici_lock); - - if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { - IRELE(ip); - return ENOENT; - } - - return 0; -} - -STATIC int -xfs_sync_inode_data( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) +xfs_sync_inodes( + xfs_mount_t *mp, + int flags) { - struct inode *inode = VFS_I(ip); - struct address_space *mapping = inode->i_mapping; - int error = 0; - - error = xfs_sync_inode_valid(ip, pag); - if (error) - return error; - - if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - goto out_wait; - - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { - if (flags & SYNC_TRYLOCK) - goto out_wait; - xfs_ilock(ip, XFS_IOLOCK_SHARED); - } + int error; + int last_error; + int i; + int lflags = XFS_LOG_FORCE; - error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? - 0 : XFS_B_ASYNC, FI_NONE); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); + if (mp->m_flags & XFS_MOUNT_RDONLY) + return 0; + error = 0; + last_error = 0; - out_wait: if (flags & SYNC_WAIT) - xfs_ioend_wait(ip); - IRELE(ip); - return error; -} - -STATIC int -xfs_sync_inode_attr( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) -{ - int error = 0; - - error = xfs_sync_inode_valid(ip, pag); - if (error) - return error; + lflags |= XFS_LOG_SYNC; - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (xfs_inode_clean(ip)) - goto out_unlock; - if (!xfs_iflock_nowait(ip)) { - if (!(flags & SYNC_WAIT)) - goto out_unlock; - xfs_iflock(ip); - } - - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - goto out_unlock; + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + error = xfs_sync_inodes_ag(mp, i, flags); + if (error) + last_error = error; + if (error == EFSCORRUPTED) + break; } + if (flags & SYNC_DELWRI) + xfs_log_force(mp, 0, lflags); - error = xfs_iflush(ip, (flags & SYNC_WAIT) ? - XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI); - - out_unlock: - xfs_iunlock(ip, XFS_ILOCK_SHARED); - IRELE(ip); - return error; -} - -/* - * Write out pagecache data for the whole filesystem. - */ -int -xfs_sync_data( - struct xfs_mount *mp, - int flags) -{ - int error; - - ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); - - error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, - XFS_ICI_NO_TAG); - if (error) - return XFS_ERROR(error); - - xfs_log_force(mp, 0, - (flags & SYNC_WAIT) ? - XFS_LOG_FORCE | XFS_LOG_SYNC : - XFS_LOG_FORCE); - return 0; -} - -/* - * Write out inode metadata (attributes) for the whole filesystem. - */ -int -xfs_sync_attr( - struct xfs_mount *mp, - int flags) -{ - ASSERT((flags & ~SYNC_WAIT) == 0); - - return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, - XFS_ICI_NO_TAG); + return XFS_ERROR(last_error); } STATIC int @@ -353,7 +252,7 @@ xfs_sync_fsdata( * If this is xfssyncd() then only sync the superblock if we can * lock it without sleeping and it is not pinned. */ - if (flags & SYNC_TRYLOCK) { + if (flags & SYNC_BDFLUSH) { ASSERT(!(flags & SYNC_WAIT)); bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); @@ -417,13 +316,13 @@ xfs_quiesce_data( int error; /* push non-blocking */ - xfs_sync_data(mp, 0); - xfs_qm_sync(mp, SYNC_TRYLOCK); + xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH); + XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); xfs_filestream_flush(mp); /* push and block */ - xfs_sync_data(mp, SYNC_WAIT); - xfs_qm_sync(mp, SYNC_WAIT); + xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT); + XFS_QM_DQSYNC(mp, SYNC_WAIT); /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp, 0); @@ -442,7 +341,7 @@ xfs_quiesce_fs( int count = 0, pincount; xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* * This loop must run at least twice. The first instance of the loop @@ -451,7 +350,7 @@ xfs_quiesce_fs( * logged before we can write the unmount record. */ do { - xfs_sync_attr(mp, SYNC_WAIT); + xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { delay(50); @@ -534,8 +433,8 @@ xfs_flush_inodes_work( void *arg) { struct inode *inode = arg; - xfs_sync_data(mp, SYNC_TRYLOCK); - xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); + xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK); + xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT); iput(inode); } @@ -566,10 +465,10 @@ xfs_sync_worker( if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* dgc: errors ignored here */ - error = xfs_qm_sync(mp, SYNC_TRYLOCK); - error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); + error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); + error = xfs_sync_fsdata(mp, SYNC_BDFLUSH); if (xfs_log_need_covered(mp)) error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE); } @@ -670,7 +569,7 @@ xfs_reclaim_inode( xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); } - return -EAGAIN; + return 1; } __xfs_iflags_set(ip, XFS_IRECLAIM); spin_unlock(&ip->i_flags_lock); @@ -755,27 +654,101 @@ xfs_inode_clear_reclaim_tag( xfs_put_perag(mp, pag); } -STATIC int -xfs_reclaim_inode_now( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) + +STATIC void +xfs_reclaim_inodes_ag( + xfs_mount_t *mp, + int ag, + int noblock, + int mode) { - /* ignore if already under reclaim */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { + xfs_inode_t *ip = NULL; + xfs_perag_t *pag = &mp->m_perag[ag]; + int nr_found; + uint32_t first_index; + int skipped; + +restart: + first_index = 0; + skipped = 0; + do { + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, + (void**)&ip, first_index, 1, + XFS_ICI_RECLAIM_TAG); + + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; + } + + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } + + /* ignore if already under reclaim */ + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { + read_unlock(&pag->pag_ici_lock); + continue; + } + + if (noblock) { + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { + read_unlock(&pag->pag_ici_lock); + continue; + } + if (xfs_ipincount(ip) || + !xfs_iflock_nowait(ip)) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + read_unlock(&pag->pag_ici_lock); + continue; + } + } read_unlock(&pag->pag_ici_lock); - return 0; + + /* + * hmmm - this is an inode already in reclaim. Do + * we even bother catching it here? + */ + if (xfs_reclaim_inode(ip, noblock, mode)) + skipped++; + } while (nr_found); + + if (skipped) { + delay(1); + goto restart; } - read_unlock(&pag->pag_ici_lock); + return; - return xfs_reclaim_inode(ip, 0, flags); } int xfs_reclaim_inodes( xfs_mount_t *mp, + int noblock, int mode) { - return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, - XFS_ICI_RECLAIM_TAG); + int i; + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + xfs_reclaim_inodes_ag(mp, i, noblock, mode); + } + return 0; } + + diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.h b/trunk/fs/xfs/linux-2.6/xfs_sync.h index 2a10301c99c7..308d5bf6dfbd 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.h +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.h @@ -29,14 +29,17 @@ typedef struct xfs_sync_work { struct completion *w_completion; } xfs_sync_work_t; -#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ -#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ +#define SYNC_ATTR 0x0001 /* sync attributes */ +#define SYNC_DELWRI 0x0002 /* look at delayed writes */ +#define SYNC_WAIT 0x0004 /* wait for i/o to complete */ +#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ +#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ +#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */ int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); -int xfs_sync_attr(struct xfs_mount *mp, int flags); -int xfs_sync_data(struct xfs_mount *mp, int flags); +int xfs_sync_inodes(struct xfs_mount *mp, int flags); int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); @@ -45,16 +48,10 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inodes(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); -int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); +int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inode *ip); - -int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); -int xfs_inode_ag_iterator(struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, int tag); - #endif diff --git a/trunk/fs/xfs/linux-2.6/xfs_xattr.c b/trunk/fs/xfs/linux-2.6/xfs_xattr.c index 497c7fb75cc1..964621fde6ed 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_xattr.c +++ b/trunk/fs/xfs/linux-2.6/xfs_xattr.c @@ -29,6 +29,67 @@ #include +/* + * ACL handling. Should eventually be moved into xfs_acl.c + */ + +static int +xfs_decode_acl(const char *name) +{ + if (strcmp(name, "posix_acl_access") == 0) + return _ACL_TYPE_ACCESS; + else if (strcmp(name, "posix_acl_default") == 0) + return _ACL_TYPE_DEFAULT; + return -EINVAL; +} + +/* + * Get system extended attributes which at the moment only + * includes Posix ACLs. + */ +static int +xfs_xattr_system_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + int acl; + + acl = xfs_decode_acl(name); + if (acl < 0) + return acl; + + return xfs_acl_vget(inode, buffer, size, acl); +} + +static int +xfs_xattr_system_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + int acl; + + acl = xfs_decode_acl(name); + if (acl < 0) + return acl; + if (flags & XATTR_CREATE) + return -EINVAL; + + if (!value) + return xfs_acl_vremove(inode, acl); + + return xfs_acl_vset(inode, (void *)value, size, acl); +} + +static struct xattr_handler xfs_xattr_system_handler = { + .prefix = XATTR_SYSTEM_PREFIX, + .get = xfs_xattr_system_get, + .set = xfs_xattr_system_set, +}; + + +/* + * Real xattr handling. The only difference between the namespaces is + * a flag passed to the low-level attr code. + */ + static int __xfs_xattr_get(struct inode *inode, const char *name, void *value, size_t size, int xflags) @@ -138,9 +199,7 @@ struct xattr_handler *xfs_xattr_handlers[] = { &xfs_xattr_user_handler, &xfs_xattr_trusted_handler, &xfs_xattr_security_handler, -#ifdef CONFIG_XFS_POSIX_ACL &xfs_xattr_system_handler, -#endif NULL }; @@ -251,7 +310,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) /* * Then add the two synthetic ACL attributes. */ - if (posix_acl_access_exists(inode)) { + if (xfs_acl_vhasacl_access(inode)) { error = list_one_attr(POSIX_ACL_XATTR_ACCESS, strlen(POSIX_ACL_XATTR_ACCESS) + 1, data, size, &context.count); @@ -259,7 +318,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) return error; } - if (posix_acl_default_exists(inode)) { + if (xfs_acl_vhasacl_default(inode)) { error = list_one_attr(POSIX_ACL_XATTR_DEFAULT, strlen(POSIX_ACL_XATTR_DEFAULT) + 1, data, size, &context.count); diff --git a/trunk/fs/xfs/quota/xfs_dquot.c b/trunk/fs/xfs/quota/xfs_dquot.c index 2f3f2229eaaf..e4babcc63423 100644 --- a/trunk/fs/xfs/quota/xfs_dquot.c +++ b/trunk/fs/xfs/quota/xfs_dquot.c @@ -42,6 +42,7 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -1193,9 +1194,7 @@ void xfs_qm_dqrele( xfs_dquot_t *dqp) { - if (!dqp) - return; - + ASSERT(dqp); xfs_dqtrace_entry(dqp, "DQRELE"); xfs_dqlock(dqp); diff --git a/trunk/fs/xfs/quota/xfs_dquot.h b/trunk/fs/xfs/quota/xfs_dquot.h index 6533ead9b889..de0f402ddb4c 100644 --- a/trunk/fs/xfs/quota/xfs_dquot.h +++ b/trunk/fs/xfs/quota/xfs_dquot.h @@ -181,6 +181,7 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, xfs_dqid_t, uint, uint, xfs_dquot_t **); extern void xfs_qm_dqput(xfs_dquot_t *); +extern void xfs_qm_dqrele(xfs_dquot_t *); extern void xfs_dqlock(xfs_dquot_t *); extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); extern void xfs_dqunlock(xfs_dquot_t *); diff --git a/trunk/fs/xfs/quota/xfs_dquot_item.c b/trunk/fs/xfs/quota/xfs_dquot_item.c index d0d4a9a0bbd7..1728f6a7c4f5 100644 --- a/trunk/fs/xfs/quota/xfs_dquot_item.c +++ b/trunk/fs/xfs/quota/xfs_dquot_item.c @@ -42,6 +42,7 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" diff --git a/trunk/fs/xfs/quota/xfs_qm.c b/trunk/fs/xfs/quota/xfs_qm.c index 45b1bfef7388..5b6695049e00 100644 --- a/trunk/fs/xfs/quota/xfs_qm.c +++ b/trunk/fs/xfs/quota/xfs_qm.c @@ -42,6 +42,7 @@ #include "xfs_error.h" #include "xfs_bmap.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -286,13 +287,11 @@ xfs_qm_rele_quotafs_ref( * Just destroy the quotainfo structure. */ void -xfs_qm_unmount( - struct xfs_mount *mp) +xfs_qm_unmount_quotadestroy( + xfs_mount_t *mp) { - if (mp->m_quotainfo) { - xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); + if (mp->m_quotainfo) xfs_qm_destroy_quotainfo(mp); - } } @@ -386,13 +385,8 @@ xfs_qm_mount_quotas( if (error) { xfs_fs_cmn_err(CE_WARN, mp, "Failed to initialize disk quotas."); - return; } - -#ifdef QUOTADEBUG - if (XFS_IS_QUOTA_ON(mp)) - xfs_qm_internalqcheck(mp); -#endif + return; } /* @@ -780,11 +774,12 @@ xfs_qm_dqattach_grouphint( * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON * into account. * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. + * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL. * Inode may get unlocked and relocked in here, and the caller must deal with * the consequences. */ int -xfs_qm_dqattach_locked( +xfs_qm_dqattach( xfs_inode_t *ip, uint flags) { @@ -792,14 +787,17 @@ xfs_qm_dqattach_locked( uint nquotas = 0; int error = 0; - if (!XFS_IS_QUOTA_RUNNING(mp) || - !XFS_IS_QUOTA_ON(mp) || - !XFS_NOT_DQATTACHED(mp, ip) || - ip->i_ino == mp->m_sb.sb_uquotino || - ip->i_ino == mp->m_sb.sb_gquotino) + if ((! XFS_IS_QUOTA_ON(mp)) || + (! XFS_NOT_DQATTACHED(mp, ip)) || + (ip->i_ino == mp->m_sb.sb_uquotino) || + (ip->i_ino == mp->m_sb.sb_gquotino)) return 0; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 || + xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (! (flags & XFS_QMOPT_ILOCKED)) + xfs_ilock(ip, XFS_ILOCK_EXCL); if (XFS_IS_UQUOTA_ON(mp)) { error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, @@ -851,7 +849,8 @@ xfs_qm_dqattach_locked( xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); } - done: + done: + #ifdef QUOTADEBUG if (! error) { if (XFS_IS_UQUOTA_ON(mp)) @@ -859,22 +858,15 @@ xfs_qm_dqattach_locked( if (XFS_IS_OQUOTA_ON(mp)) ASSERT(ip->i_gdquot); } - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); #endif - return error; -} -int -xfs_qm_dqattach( - struct xfs_inode *ip, - uint flags) -{ - int error; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_qm_dqattach_locked(ip, flags); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (! (flags & XFS_QMOPT_ILOCKED)) + xfs_iunlock(ip, XFS_ILOCK_EXCL); +#ifdef QUOTADEBUG + else + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); +#endif return error; } @@ -904,6 +896,11 @@ xfs_qm_dqdetach( } } +/* + * This is called to sync quotas. We can be told to use non-blocking + * semantics by either the SYNC_BDFLUSH flag or the absence of the + * SYNC_WAIT flag. + */ int xfs_qm_sync( xfs_mount_t *mp, @@ -912,13 +909,17 @@ xfs_qm_sync( int recl, restarts; xfs_dquot_t *dqp; uint flush_flags; + boolean_t nowait; int error; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (! XFS_IS_QUOTA_ON(mp)) return 0; - flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI; restarts = 0; + /* + * We won't block unless we are asked to. + */ + nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0); again: xfs_qm_mplist_lock(mp); @@ -938,10 +939,18 @@ xfs_qm_sync( * don't 'seem' to be dirty. ie. don't acquire dqlock. * This is very similar to what xfs_sync does with inodes. */ - if (flags & SYNC_TRYLOCK) { - if (!XFS_DQ_IS_DIRTY(dqp)) + if (flags & SYNC_BDFLUSH) { + if (! XFS_DQ_IS_DIRTY(dqp)) continue; - if (!xfs_qm_dqlock_nowait(dqp)) + } + + if (nowait) { + /* + * Try to acquire the dquot lock. We are NOT out of + * lock order, but we just don't want to wait for this + * lock, unless somebody wanted us to. + */ + if (! xfs_qm_dqlock_nowait(dqp)) continue; } else { xfs_dqlock(dqp); @@ -958,7 +967,7 @@ xfs_qm_sync( /* XXX a sentinel would be better */ recl = XFS_QI_MPLRECLAIMS(mp); if (!xfs_dqflock_nowait(dqp)) { - if (flags & SYNC_TRYLOCK) { + if (nowait) { xfs_dqunlock(dqp); continue; } @@ -976,6 +985,7 @@ xfs_qm_sync( * Let go of the mplist lock. We don't want to hold it * across a disk write */ + flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC; xfs_qm_mplist_unlock(mp); xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH"); error = xfs_qm_dqflush(dqp, flush_flags); @@ -2309,20 +2319,20 @@ xfs_qm_write_sb_changes( */ int xfs_qm_vop_dqalloc( - struct xfs_inode *ip, - uid_t uid, - gid_t gid, - prid_t prid, - uint flags, - struct xfs_dquot **O_udqpp, - struct xfs_dquot **O_gdqpp) + xfs_mount_t *mp, + xfs_inode_t *ip, + uid_t uid, + gid_t gid, + prid_t prid, + uint flags, + xfs_dquot_t **O_udqpp, + xfs_dquot_t **O_gdqpp) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_dquot *uq, *gq; - int error; - uint lockflags; + int error; + xfs_dquot_t *uq, *gq; + uint lockflags; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return 0; lockflags = XFS_ILOCK_EXCL; @@ -2336,8 +2346,8 @@ xfs_qm_vop_dqalloc( * if necessary. The dquot(s) will not be locked. */ if (XFS_NOT_DQATTACHED(mp, ip)) { - error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC); - if (error) { + if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC | + XFS_QMOPT_ILOCKED))) { xfs_iunlock(ip, lockflags); return error; } @@ -2459,7 +2469,6 @@ xfs_qm_vop_chown( uint bfield = XFS_IS_REALTIME_INODE(ip) ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); @@ -2499,13 +2508,13 @@ xfs_qm_vop_chown_reserve( xfs_dquot_t *gdqp, uint flags) { - xfs_mount_t *mp = ip->i_mount; + int error; + xfs_mount_t *mp; uint delblks, blkflags, prjflags = 0; xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; - int error; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); + mp = ip->i_mount; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); delblks = ip->i_delayed_blks; @@ -2573,23 +2582,28 @@ xfs_qm_vop_chown_reserve( int xfs_qm_vop_rename_dqattach( - struct xfs_inode **i_tab) + xfs_inode_t **i_tab) { - struct xfs_mount *mp = i_tab[0]->i_mount; - int i; + xfs_inode_t *ip; + int i; + int error; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; + ip = i_tab[0]; - for (i = 0; (i < 4 && i_tab[i]); i++) { - struct xfs_inode *ip = i_tab[i]; - int error; + if (! XFS_IS_QUOTA_ON(ip->i_mount)) + return 0; + if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) { + error = xfs_qm_dqattach(ip, 0); + if (error) + return error; + } + for (i = 1; (i < 4 && i_tab[i]); i++) { /* * Watch out for duplicate entries in the table. */ - if (i == 0 || ip != i_tab[i-1]) { - if (XFS_NOT_DQATTACHED(mp, ip)) { + if ((ip = i_tab[i]) != i_tab[i-1]) { + if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) { error = xfs_qm_dqattach(ip, 0); if (error) return error; @@ -2600,19 +2614,17 @@ xfs_qm_vop_rename_dqattach( } void -xfs_qm_vop_create_dqattach( - struct xfs_trans *tp, - struct xfs_inode *ip, - struct xfs_dquot *udqp, - struct xfs_dquot *gdqp) +xfs_qm_vop_dqattach_and_dqmod_newinode( + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_dquot_t *udqp, + xfs_dquot_t *gdqp) { - struct xfs_mount *mp = tp->t_mountp; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_ON(tp->t_mountp)) return; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); if (udqp) { xfs_dqlock(udqp); @@ -2620,7 +2632,7 @@ xfs_qm_vop_create_dqattach( xfs_dqunlock(udqp); ASSERT(ip->i_udquot == NULL); ip->i_udquot = udqp; - ASSERT(XFS_IS_UQUOTA_ON(mp)); + ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp)); ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); } @@ -2630,8 +2642,8 @@ xfs_qm_vop_create_dqattach( xfs_dqunlock(gdqp); ASSERT(ip->i_gdquot == NULL); ip->i_gdquot = gdqp; - ASSERT(XFS_IS_OQUOTA_ON(mp)); - ASSERT((XFS_IS_GQUOTA_ON(mp) ? + ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp)); + ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ? ip->i_d.di_gid : ip->i_d.di_projid) == be32_to_cpu(gdqp->q_core.d_id)); xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); diff --git a/trunk/fs/xfs/quota/xfs_qm.h b/trunk/fs/xfs/quota/xfs_qm.h index 495564b8af38..a371954cae1b 100644 --- a/trunk/fs/xfs/quota/xfs_qm.h +++ b/trunk/fs/xfs/quota/xfs_qm.h @@ -127,6 +127,8 @@ typedef struct xfs_quotainfo { } xfs_quotainfo_t; +extern xfs_dqtrxops_t xfs_trans_dquot_ops; + extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, xfs_dquot_t *, xfs_dquot_t *, long, long, uint); @@ -157,11 +159,17 @@ typedef struct xfs_dquot_acct { #define XFS_QM_RTBWARNLIMIT 5 extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); +extern void xfs_qm_mount_quotas(xfs_mount_t *); extern int xfs_qm_quotacheck(xfs_mount_t *); +extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); +extern void xfs_qm_unmount_quotas(xfs_mount_t *); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); +extern int xfs_qm_sync(xfs_mount_t *, int); /* dquot stuff */ extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); +extern int xfs_qm_dqattach(xfs_inode_t *, uint); +extern void xfs_qm_dqdetach(xfs_inode_t *); extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); @@ -175,6 +183,19 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); +/* vop stuff */ +extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *, + uid_t, gid_t, prid_t, uint, + xfs_dquot_t **, xfs_dquot_t **); +extern void xfs_qm_vop_dqattach_and_dqmod_newinode( + xfs_trans_t *, xfs_inode_t *, + xfs_dquot_t *, xfs_dquot_t *); +extern int xfs_qm_vop_rename_dqattach(xfs_inode_t **); +extern xfs_dquot_t * xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *, + xfs_dquot_t **, xfs_dquot_t *); +extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *, + xfs_dquot_t *, xfs_dquot_t *, uint); + /* list stuff */ extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *); extern void xfs_qm_freelist_unlink(xfs_dquot_t *); diff --git a/trunk/fs/xfs/quota/xfs_qm_bhv.c b/trunk/fs/xfs/quota/xfs_qm_bhv.c index a5346630dfae..63037c689a4b 100644 --- a/trunk/fs/xfs/quota/xfs_qm_bhv.c +++ b/trunk/fs/xfs/quota/xfs_qm_bhv.c @@ -42,6 +42,7 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_qm.h" @@ -83,7 +84,7 @@ xfs_fill_statvfs_from_dquot( * return a statvfs of the project, not the entire filesystem. * This makes such trees appear as if they are filesystems in themselves. */ -void +STATIC void xfs_qm_statvfs( xfs_inode_t *ip, struct kstatfs *statp) @@ -91,13 +92,20 @@ xfs_qm_statvfs( xfs_mount_t *mp = ip->i_mount; xfs_dquot_t *dqp; + if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || + !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == + (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) + return; + if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) { - xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); + xfs_disk_dquot_t *dp = &dqp->q_core; + + xfs_fill_statvfs_from_dquot(statp, dp); xfs_qm_dqput(dqp); } } -int +STATIC int xfs_qm_newmount( xfs_mount_t *mp, uint *needquotamount, @@ -106,6 +114,9 @@ xfs_qm_newmount( uint quotaondisk; uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; + *quotaflags = 0; + *needquotamount = B_FALSE; + quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) && (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); @@ -168,6 +179,66 @@ xfs_qm_newmount( return 0; } +STATIC int +xfs_qm_endmount( + xfs_mount_t *mp, + uint needquotamount, + uint quotaflags) +{ + if (needquotamount) { + ASSERT(mp->m_qflags == 0); + mp->m_qflags = quotaflags; + xfs_qm_mount_quotas(mp); + } + +#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) + if (! (XFS_IS_QUOTA_ON(mp))) + xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on"); + else + xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on"); +#endif + +#ifdef QUOTADEBUG + if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp)) + cmn_err(CE_WARN, "XFS: mount internalqcheck failed"); +#endif + + return 0; +} + +STATIC void +xfs_qm_dqrele_null( + xfs_dquot_t *dq) +{ + /* + * Called from XFS, where we always check first for a NULL dquot. + */ + if (!dq) + return; + xfs_qm_dqrele(dq); +} + + +struct xfs_qmops xfs_qmcore_xfs = { + .xfs_qminit = xfs_qm_newmount, + .xfs_qmdone = xfs_qm_unmount_quotadestroy, + .xfs_qmmount = xfs_qm_endmount, + .xfs_qmunmount = xfs_qm_unmount_quotas, + .xfs_dqrele = xfs_qm_dqrele_null, + .xfs_dqattach = xfs_qm_dqattach, + .xfs_dqdetach = xfs_qm_dqdetach, + .xfs_dqpurgeall = xfs_qm_dqpurge_all, + .xfs_dqvopalloc = xfs_qm_vop_dqalloc, + .xfs_dqvopcreate = xfs_qm_vop_dqattach_and_dqmod_newinode, + .xfs_dqvoprename = xfs_qm_vop_rename_dqattach, + .xfs_dqvopchown = xfs_qm_vop_chown, + .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve, + .xfs_dqstatvfs = xfs_qm_statvfs, + .xfs_dqsync = xfs_qm_sync, + .xfs_dqtrxops = &xfs_trans_dquot_ops, +}; +EXPORT_SYMBOL(xfs_qmcore_xfs); + void __init xfs_qm_init(void) { diff --git a/trunk/fs/xfs/quota/xfs_qm_stats.c b/trunk/fs/xfs/quota/xfs_qm_stats.c index 21b08c0396a1..709f5f545cf5 100644 --- a/trunk/fs/xfs/quota/xfs_qm_stats.c +++ b/trunk/fs/xfs/quota/xfs_qm_stats.c @@ -42,6 +42,7 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_qm.h" diff --git a/trunk/fs/xfs/quota/xfs_qm_syscalls.c b/trunk/fs/xfs/quota/xfs_qm_syscalls.c index 4e4276b956e8..c7b66f6506ce 100644 --- a/trunk/fs/xfs/quota/xfs_qm_syscalls.c +++ b/trunk/fs/xfs/quota/xfs_qm_syscalls.c @@ -45,6 +45,7 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" @@ -846,55 +847,105 @@ xfs_qm_export_flags( } -STATIC int -xfs_dqrele_inode( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) +/* + * Release all the dquots on the inodes in an AG. + */ +STATIC void +xfs_qm_dqrele_inodes_ag( + xfs_mount_t *mp, + int ag, + uint flags) { - int error; + xfs_inode_t *ip = NULL; + xfs_perag_t *pag = &mp->m_perag[ag]; + int first_index = 0; + int nr_found; + + do { + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void**)&ip, first_index, 1); + + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; + } + + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } + + /* skip quota inodes */ + if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { + ASSERT(ip->i_udquot == NULL); + ASSERT(ip->i_gdquot == NULL); + read_unlock(&pag->pag_ici_lock); + continue; + } - /* skip quota inodes */ - if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) { - ASSERT(ip->i_udquot == NULL); - ASSERT(ip->i_gdquot == NULL); + /* + * If we can't get a reference on the inode, it must be + * in reclaim. Leave it for the reclaim code to flush. + */ + if (!igrab(VFS_I(ip))) { + read_unlock(&pag->pag_ici_lock); + continue; + } read_unlock(&pag->pag_ici_lock); - return 0; - } - error = xfs_sync_inode_valid(ip, pag); - if (error) - return error; + /* avoid new inodes though we shouldn't find any here */ + if (xfs_iflags_test(ip, XFS_INEW)) { + IRELE(ip); + continue; + } - xfs_ilock(ip, XFS_ILOCK_EXCL); - if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { - xfs_qm_dqrele(ip->i_udquot); - ip->i_udquot = NULL; - } - if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { - xfs_qm_dqrele(ip->i_gdquot); - ip->i_gdquot = NULL; - } - xfs_iput(ip, XFS_ILOCK_EXCL); - IRELE(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); + if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { + xfs_qm_dqrele(ip->i_udquot); + ip->i_udquot = NULL; + } + if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && + ip->i_gdquot) { + xfs_qm_dqrele(ip->i_gdquot); + ip->i_gdquot = NULL; + } + xfs_iput(ip, XFS_ILOCK_EXCL); - return 0; + } while (nr_found); } - /* * Go thru all the inodes in the file system, releasing their dquots. - * * Note that the mount structure gets modified to indicate that quotas are off - * AFTER this, in the case of quotaoff. + * AFTER this, in the case of quotaoff. This also gets called from + * xfs_rootumount. */ void xfs_qm_dqrele_all_inodes( struct xfs_mount *mp, uint flags) { + int i; + ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + xfs_qm_dqrele_inodes_ag(mp, i, flags); + } } /*------------------------------------------------------------------------*/ diff --git a/trunk/fs/xfs/quota/xfs_trans_dquot.c b/trunk/fs/xfs/quota/xfs_trans_dquot.c index 97ac9640be98..447173bcf96d 100644 --- a/trunk/fs/xfs/quota/xfs_trans_dquot.c +++ b/trunk/fs/xfs/quota/xfs_trans_dquot.c @@ -42,6 +42,7 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" @@ -110,7 +111,7 @@ xfs_trans_log_dquot( * Carry forward whatever is left of the quota blk reservation to * the spanky new transaction */ -void +STATIC void xfs_trans_dup_dqinfo( xfs_trans_t *otp, xfs_trans_t *ntp) @@ -166,17 +167,19 @@ xfs_trans_dup_dqinfo( /* * Wrap around mod_dquot to account for both user and group quotas. */ -void +STATIC void xfs_trans_mod_dquot_byino( xfs_trans_t *tp, xfs_inode_t *ip, uint field, long delta) { - xfs_mount_t *mp = tp->t_mountp; + xfs_mount_t *mp; + + ASSERT(tp); + mp = tp->t_mountp; - if (!XFS_IS_QUOTA_RUNNING(mp) || - !XFS_IS_QUOTA_ON(mp) || + if (!XFS_IS_QUOTA_ON(mp) || ip->i_ino == mp->m_sb.sb_uquotino || ip->i_ino == mp->m_sb.sb_gquotino) return; @@ -226,7 +229,6 @@ xfs_trans_mod_dquot( xfs_dqtrx_t *qtrx; ASSERT(tp); - ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); qtrx = NULL; if (tp->t_dqinfo == NULL) @@ -344,7 +346,7 @@ xfs_trans_dqlockedjoin( * Unreserve just the reservations done by this transaction. * dquot is still left locked at exit. */ -void +STATIC void xfs_trans_apply_dquot_deltas( xfs_trans_t *tp) { @@ -355,7 +357,7 @@ xfs_trans_apply_dquot_deltas( long totalbdelta; long totalrtbdelta; - if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY)) return; ASSERT(tp->t_dqinfo); @@ -529,7 +531,7 @@ xfs_trans_apply_dquot_deltas( * we simply throw those away, since that's the expected behavior * when a transaction is curtailed without a commit. */ -void +STATIC void xfs_trans_unreserve_and_mod_dquots( xfs_trans_t *tp) { @@ -766,7 +768,7 @@ xfs_trans_reserve_quota_bydquots( { int resvd = 0, error; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return 0; if (tp && tp->t_dqinfo == NULL) @@ -809,17 +811,18 @@ xfs_trans_reserve_quota_bydquots( * This doesn't change the actual usage, just the reservation. * The inode sent in is locked. */ -int +STATIC int xfs_trans_reserve_quota_nblks( - struct xfs_trans *tp, - struct xfs_inode *ip, - long nblks, - long ninos, - uint flags) + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_inode_t *ip, + long nblks, + long ninos, + uint flags) { - struct xfs_mount *mp = ip->i_mount; + int error; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return 0; if (XFS_IS_PQUOTA_ON(mp)) flags |= XFS_QMOPT_ENOSPC; @@ -828,6 +831,7 @@ xfs_trans_reserve_quota_nblks( ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == XFS_TRANS_DQ_RES_RTBLKS || (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == @@ -836,9 +840,11 @@ xfs_trans_reserve_quota_nblks( /* * Reserve nblks against these dquots, with trans as the mediator. */ - return xfs_trans_reserve_quota_bydquots(tp, mp, - ip->i_udquot, ip->i_gdquot, - nblks, ninos, flags); + error = xfs_trans_reserve_quota_bydquots(tp, mp, + ip->i_udquot, ip->i_gdquot, + nblks, ninos, + flags); + return error; } /* @@ -889,15 +895,25 @@ STATIC void xfs_trans_alloc_dqinfo( xfs_trans_t *tp) { - tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); + (tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); } -void +STATIC void xfs_trans_free_dqinfo( xfs_trans_t *tp) { if (!tp->t_dqinfo) return; - kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); - tp->t_dqinfo = NULL; + kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo); + (tp)->t_dqinfo = NULL; } + +xfs_dqtrxops_t xfs_trans_dquot_ops = { + .qo_dup_dqinfo = xfs_trans_dup_dqinfo, + .qo_free_dqinfo = xfs_trans_free_dqinfo, + .qo_mod_dquot_byino = xfs_trans_mod_dquot_byino, + .qo_apply_dquot_deltas = xfs_trans_apply_dquot_deltas, + .qo_reserve_quota_nblks = xfs_trans_reserve_quota_nblks, + .qo_reserve_quota_bydquots = xfs_trans_reserve_quota_bydquots, + .qo_unreserve_and_mod_dquots = xfs_trans_unreserve_and_mod_dquots, +}; diff --git a/trunk/fs/xfs/xfs_acl.c b/trunk/fs/xfs/xfs_acl.c new file mode 100644 index 000000000000..a8cdd73999a4 --- /dev/null +++ b/trunk/fs/xfs/xfs_acl.c @@ -0,0 +1,874 @@ +/* + * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_inum.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_vnodeops.h" + +#include +#include + +STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *); +STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); +STATIC void xfs_acl_get_endian(xfs_acl_t *); +STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); +STATIC int xfs_acl_invalid(xfs_acl_t *); +STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); +STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *); +STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *); +STATIC int xfs_acl_allow_set(struct inode *, int); + +kmem_zone_t *xfs_acl_zone; + + +/* + * Test for existence of access ACL attribute as efficiently as possible. + */ +int +xfs_acl_vhasacl_access( + struct inode *vp) +{ + int error; + + xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error); + return (error == 0); +} + +/* + * Test for existence of default ACL attribute as efficiently as possible. + */ +int +xfs_acl_vhasacl_default( + struct inode *vp) +{ + int error; + + if (!S_ISDIR(vp->i_mode)) + return 0; + xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); + return (error == 0); +} + +/* + * Convert from extended attribute representation to in-memory for XFS. + */ +STATIC int +posix_acl_xattr_to_xfs( + posix_acl_xattr_header *src, + size_t size, + xfs_acl_t *dest) +{ + posix_acl_xattr_entry *src_entry; + xfs_acl_entry_t *dest_entry; + int n; + + if (!src || !dest) + return EINVAL; + + if (size < sizeof(posix_acl_xattr_header)) + return EINVAL; + + if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) + return EOPNOTSUPP; + + memset(dest, 0, sizeof(xfs_acl_t)); + dest->acl_cnt = posix_acl_xattr_count(size); + if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES) + return EINVAL; + + /* + * acl_set_file(3) may request that we set default ACLs with + * zero length -- defend (gracefully) against that here. + */ + if (!dest->acl_cnt) + return 0; + + src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src)); + dest_entry = &dest->acl_entry[0]; + + for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) { + dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm); + if (_ACL_PERM_INVALID(dest_entry->ae_perm)) + return EINVAL; + dest_entry->ae_tag = le16_to_cpu(src_entry->e_tag); + switch(dest_entry->ae_tag) { + case ACL_USER: + case ACL_GROUP: + dest_entry->ae_id = le32_to_cpu(src_entry->e_id); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + dest_entry->ae_id = ACL_UNDEFINED_ID; + break; + default: + return EINVAL; + } + } + if (xfs_acl_invalid(dest)) + return EINVAL; + + return 0; +} + +/* + * Comparison function called from xfs_sort(). + * Primary key is ae_tag, secondary key is ae_id. + */ +STATIC int +xfs_acl_entry_compare( + const void *va, + const void *vb) +{ + xfs_acl_entry_t *a = (xfs_acl_entry_t *)va, + *b = (xfs_acl_entry_t *)vb; + + if (a->ae_tag == b->ae_tag) + return (a->ae_id - b->ae_id); + return (a->ae_tag - b->ae_tag); +} + +/* + * Convert from in-memory XFS to extended attribute representation. + */ +STATIC int +posix_acl_xfs_to_xattr( + xfs_acl_t *src, + posix_acl_xattr_header *dest, + size_t size) +{ + int n; + size_t new_size = posix_acl_xattr_size(src->acl_cnt); + posix_acl_xattr_entry *dest_entry; + xfs_acl_entry_t *src_entry; + + if (size < new_size) + return -ERANGE; + + /* Need to sort src XFS ACL by */ + xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]), + xfs_acl_entry_compare); + + dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); + dest_entry = &dest->a_entries[0]; + src_entry = &src->acl_entry[0]; + for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) { + dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm); + if (_ACL_PERM_INVALID(src_entry->ae_perm)) + return -EINVAL; + dest_entry->e_tag = cpu_to_le16(src_entry->ae_tag); + switch (src_entry->ae_tag) { + case ACL_USER: + case ACL_GROUP: + dest_entry->e_id = cpu_to_le32(src_entry->ae_id); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); + break; + default: + return -EINVAL; + } + } + return new_size; +} + +int +xfs_acl_vget( + struct inode *vp, + void *acl, + size_t size, + int kind) +{ + int error; + xfs_acl_t *xfs_acl = NULL; + posix_acl_xattr_header *ext_acl = acl; + int flags = 0; + + if(size) { + if (!(_ACL_ALLOC(xfs_acl))) { + error = ENOMEM; + goto out; + } + memset(xfs_acl, 0, sizeof(xfs_acl_t)); + } else + flags = ATTR_KERNOVAL; + + xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error); + if (error) + goto out; + + if (!size) { + error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES); + } else { + if (xfs_acl_invalid(xfs_acl)) { + error = EINVAL; + goto out; + } + if (kind == _ACL_TYPE_ACCESS) + xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl); + error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); + } +out: + if(xfs_acl) + _ACL_FREE(xfs_acl); + return -error; +} + +int +xfs_acl_vremove( + struct inode *vp, + int kind) +{ + int error; + + error = xfs_acl_allow_set(vp, kind); + if (!error) { + error = xfs_attr_remove(XFS_I(vp), + kind == _ACL_TYPE_DEFAULT? + SGI_ACL_DEFAULT: SGI_ACL_FILE, + ATTR_ROOT); + if (error == ENOATTR) + error = 0; /* 'scool */ + } + return -error; +} + +int +xfs_acl_vset( + struct inode *vp, + void *acl, + size_t size, + int kind) +{ + posix_acl_xattr_header *ext_acl = acl; + xfs_acl_t *xfs_acl; + int error; + int basicperms = 0; /* more than std unix perms? */ + + if (!acl) + return -EINVAL; + + if (!(_ACL_ALLOC(xfs_acl))) + return -ENOMEM; + + error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl); + if (error) { + _ACL_FREE(xfs_acl); + return -error; + } + if (!xfs_acl->acl_cnt) { + _ACL_FREE(xfs_acl); + return 0; + } + + error = xfs_acl_allow_set(vp, kind); + + /* Incoming ACL exists, set file mode based on its value */ + if (!error && kind == _ACL_TYPE_ACCESS) + error = xfs_acl_setmode(vp, xfs_acl, &basicperms); + + if (error) + goto out; + + /* + * If we have more than std unix permissions, set up the actual attr. + * Otherwise, delete any existing attr. This prevents us from + * having actual attrs for permissions that can be stored in the + * standard permission bits. + */ + if (!basicperms) { + xfs_acl_set_attr(vp, xfs_acl, kind, &error); + } else { + error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); + } + +out: + _ACL_FREE(xfs_acl); + return -error; +} + +int +xfs_acl_iaccess( + xfs_inode_t *ip, + mode_t mode, + cred_t *cr) +{ + xfs_acl_t *acl; + int rval; + struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE}; + + if (!(_ACL_ALLOC(acl))) + return -1; + + /* If the file has no ACL return -1. */ + rval = sizeof(xfs_acl_t); + if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) { + _ACL_FREE(acl); + return -1; + } + xfs_acl_get_endian(acl); + + /* If the file has an empty ACL return -1. */ + if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) { + _ACL_FREE(acl); + return -1; + } + + /* Synchronize ACL with mode bits */ + xfs_acl_sync_mode(ip->i_d.di_mode, acl); + + rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr); + _ACL_FREE(acl); + return rval; +} + +STATIC int +xfs_acl_allow_set( + struct inode *vp, + int kind) +{ + if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) + return EPERM; + if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode)) + return ENOTDIR; + if (vp->i_sb->s_flags & MS_RDONLY) + return EROFS; + if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER)) + return EPERM; + return 0; +} + +/* + * Note: cr is only used here for the capability check if the ACL test fails. + * It is not used to find out the credentials uid or groups etc, as was + * done in IRIX. It is assumed that the uid and groups for the current + * thread are taken from "current" instead of the cr parameter. + */ +STATIC int +xfs_acl_access( + uid_t fuid, + gid_t fgid, + xfs_acl_t *fap, + mode_t md, + cred_t *cr) +{ + xfs_acl_entry_t matched; + int i, allows; + int maskallows = -1; /* true, but not 1, either */ + int seen_userobj = 0; + + matched.ae_tag = 0; /* Invalid type */ + matched.ae_perm = 0; + + for (i = 0; i < fap->acl_cnt; i++) { + /* + * Break out if we've got a user_obj entry or + * a user entry and the mask (and have processed USER_OBJ) + */ + if (matched.ae_tag == ACL_USER_OBJ) + break; + if (matched.ae_tag == ACL_USER) { + if (maskallows != -1 && seen_userobj) + break; + if (fap->acl_entry[i].ae_tag != ACL_MASK && + fap->acl_entry[i].ae_tag != ACL_USER_OBJ) + continue; + } + /* True if this entry allows the requested access */ + allows = ((fap->acl_entry[i].ae_perm & md) == md); + + switch (fap->acl_entry[i].ae_tag) { + case ACL_USER_OBJ: + seen_userobj = 1; + if (fuid != current_fsuid()) + continue; + matched.ae_tag = ACL_USER_OBJ; + matched.ae_perm = allows; + break; + case ACL_USER: + if (fap->acl_entry[i].ae_id != current_fsuid()) + continue; + matched.ae_tag = ACL_USER; + matched.ae_perm = allows; + break; + case ACL_GROUP_OBJ: + if ((matched.ae_tag == ACL_GROUP_OBJ || + matched.ae_tag == ACL_GROUP) && !allows) + continue; + if (!in_group_p(fgid)) + continue; + matched.ae_tag = ACL_GROUP_OBJ; + matched.ae_perm = allows; + break; + case ACL_GROUP: + if ((matched.ae_tag == ACL_GROUP_OBJ || + matched.ae_tag == ACL_GROUP) && !allows) + continue; + if (!in_group_p(fap->acl_entry[i].ae_id)) + continue; + matched.ae_tag = ACL_GROUP; + matched.ae_perm = allows; + break; + case ACL_MASK: + maskallows = allows; + break; + case ACL_OTHER: + if (matched.ae_tag != 0) + continue; + matched.ae_tag = ACL_OTHER; + matched.ae_perm = allows; + break; + } + } + /* + * First possibility is that no matched entry allows access. + * The capability to override DAC may exist, so check for it. + */ + switch (matched.ae_tag) { + case ACL_OTHER: + case ACL_USER_OBJ: + if (matched.ae_perm) + return 0; + break; + case ACL_USER: + case ACL_GROUP_OBJ: + case ACL_GROUP: + if (maskallows && matched.ae_perm) + return 0; + break; + case 0: + break; + } + + /* EACCES tells generic_permission to check for capability overrides */ + return EACCES; +} + +/* + * ACL validity checker. + * This acl validation routine checks each ACL entry read in makes sense. + */ +STATIC int +xfs_acl_invalid( + xfs_acl_t *aclp) +{ + xfs_acl_entry_t *entry, *e; + int user = 0, group = 0, other = 0, mask = 0; + int mask_required = 0; + int i, j; + + if (!aclp) + goto acl_invalid; + + if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES) + goto acl_invalid; + + for (i = 0; i < aclp->acl_cnt; i++) { + entry = &aclp->acl_entry[i]; + switch (entry->ae_tag) { + case ACL_USER_OBJ: + if (user++) + goto acl_invalid; + break; + case ACL_GROUP_OBJ: + if (group++) + goto acl_invalid; + break; + case ACL_OTHER: + if (other++) + goto acl_invalid; + break; + case ACL_USER: + case ACL_GROUP: + for (j = i + 1; j < aclp->acl_cnt; j++) { + e = &aclp->acl_entry[j]; + if (e->ae_id == entry->ae_id && + e->ae_tag == entry->ae_tag) + goto acl_invalid; + } + mask_required++; + break; + case ACL_MASK: + if (mask++) + goto acl_invalid; + break; + default: + goto acl_invalid; + } + } + if (!user || !group || !other || (mask_required && !mask)) + goto acl_invalid; + else + return 0; +acl_invalid: + return EINVAL; +} + +/* + * Do ACL endian conversion. + */ +STATIC void +xfs_acl_get_endian( + xfs_acl_t *aclp) +{ + xfs_acl_entry_t *ace, *end; + + INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); + end = &aclp->acl_entry[0]+aclp->acl_cnt; + for (ace = &aclp->acl_entry[0]; ace < end; ace++) { + INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag); + INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id); + INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm); + } +} + +/* + * Get the ACL from the EA and do endian conversion. + */ +STATIC void +xfs_acl_get_attr( + struct inode *vp, + xfs_acl_t *aclp, + int kind, + int flags, + int *error) +{ + int len = sizeof(xfs_acl_t); + + ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); + flags |= ATTR_ROOT; + *error = xfs_attr_get(XFS_I(vp), + kind == _ACL_TYPE_ACCESS ? + SGI_ACL_FILE : SGI_ACL_DEFAULT, + (char *)aclp, &len, flags); + if (*error || (flags & ATTR_KERNOVAL)) + return; + xfs_acl_get_endian(aclp); +} + +/* + * Set the EA with the ACL and do endian conversion. + */ +STATIC void +xfs_acl_set_attr( + struct inode *vp, + xfs_acl_t *aclp, + int kind, + int *error) +{ + xfs_acl_entry_t *ace, *newace, *end; + xfs_acl_t *newacl; + int len; + + if (!(_ACL_ALLOC(newacl))) { + *error = ENOMEM; + return; + } + + len = sizeof(xfs_acl_t) - + (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt)); + end = &aclp->acl_entry[0]+aclp->acl_cnt; + for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0]; + ace < end; + ace++, newace++) { + INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag); + INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id); + INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); + } + INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); + *error = xfs_attr_set(XFS_I(vp), + kind == _ACL_TYPE_ACCESS ? + SGI_ACL_FILE: SGI_ACL_DEFAULT, + (char *)newacl, len, ATTR_ROOT); + _ACL_FREE(newacl); +} + +int +xfs_acl_vtoacl( + struct inode *vp, + xfs_acl_t *access_acl, + xfs_acl_t *default_acl) +{ + int error = 0; + + if (access_acl) { + /* + * Get the Access ACL and the mode. If either cannot + * be obtained for some reason, invalidate the access ACL. + */ + xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error); + if (error) + access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; + else /* We have a good ACL and the file mode, synchronize. */ + xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl); + } + + if (default_acl) { + xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error); + if (error) + default_acl->acl_cnt = XFS_ACL_NOT_PRESENT; + } + return error; +} + +/* + * This function retrieves the parent directory's acl, processes it + * and lets the child inherit the acl(s) that it should. + */ +int +xfs_acl_inherit( + struct inode *vp, + mode_t mode, + xfs_acl_t *pdaclp) +{ + xfs_acl_t *cacl; + int error = 0; + int basicperms = 0; + + /* + * If the parent does not have a default ACL, or it's an + * invalid ACL, we're done. + */ + if (!vp) + return 0; + if (!pdaclp || xfs_acl_invalid(pdaclp)) + return 0; + + /* + * Copy the default ACL of the containing directory to + * the access ACL of the new file and use the mode that + * was passed in to set up the correct initial values for + * the u::,g::[m::], and o:: entries. This is what makes + * umask() "work" with ACL's. + */ + + if (!(_ACL_ALLOC(cacl))) + return ENOMEM; + + memcpy(cacl, pdaclp, sizeof(xfs_acl_t)); + xfs_acl_filter_mode(mode, cacl); + error = xfs_acl_setmode(vp, cacl, &basicperms); + if (error) + goto out_error; + + /* + * Set the Default and Access ACL on the file. The mode is already + * set on the file, so we don't need to worry about that. + * + * If the new file is a directory, its default ACL is a copy of + * the containing directory's default ACL. + */ + if (S_ISDIR(vp->i_mode)) + xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); + if (!error && !basicperms) + xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); +out_error: + _ACL_FREE(cacl); + return error; +} + +/* + * Set up the correct mode on the file based on the supplied ACL. This + * makes sure that the mode on the file reflects the state of the + * u::,g::[m::], and o:: entries in the ACL. Since the mode is where + * the ACL is going to get the permissions for these entries, we must + * synchronize the mode whenever we set the ACL on a file. + */ +STATIC int +xfs_acl_setmode( + struct inode *vp, + xfs_acl_t *acl, + int *basicperms) +{ + struct iattr iattr; + xfs_acl_entry_t *ap; + xfs_acl_entry_t *gap = NULL; + int i, nomask = 1; + + *basicperms = 1; + + if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) + return 0; + + /* + * Copy the u::, g::, o::, and m:: bits from the ACL into the + * mode. The m:: bits take precedence over the g:: bits. + */ + iattr.ia_valid = ATTR_MODE; + iattr.ia_mode = XFS_I(vp)->i_d.di_mode; + iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); + ap = acl->acl_entry; + for (i = 0; i < acl->acl_cnt; ++i) { + switch (ap->ae_tag) { + case ACL_USER_OBJ: + iattr.ia_mode |= ap->ae_perm << 6; + break; + case ACL_GROUP_OBJ: + gap = ap; + break; + case ACL_MASK: /* more than just standard modes */ + nomask = 0; + iattr.ia_mode |= ap->ae_perm << 3; + *basicperms = 0; + break; + case ACL_OTHER: + iattr.ia_mode |= ap->ae_perm; + break; + default: /* more than just standard modes */ + *basicperms = 0; + break; + } + ap++; + } + + /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */ + if (gap && nomask) + iattr.ia_mode |= gap->ae_perm << 3; + + return xfs_setattr(XFS_I(vp), &iattr, 0); +} + +/* + * The permissions for the special ACL entries (u::, g::[m::], o::) are + * actually stored in the file mode (if there is both a group and a mask, + * the group is stored in the ACL entry and the mask is stored on the file). + * This allows the mode to remain automatically in sync with the ACL without + * the need for a call-back to the ACL system at every point where the mode + * could change. This function takes the permissions from the specified mode + * and places it in the supplied ACL. + * + * This implementation draws its validity from the fact that, when the ACL + * was assigned, the mode was copied from the ACL. + * If the mode did not change, therefore, the mode remains exactly what was + * taken from the special ACL entries at assignment. + * If a subsequent chmod() was done, the POSIX spec says that the change in + * mode must cause an update to the ACL seen at user level and used for + * access checks. Before and after a mode change, therefore, the file mode + * most accurately reflects what the special ACL entries should permit/deny. + * + * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly, + * the existing mode bits will override whatever is in the + * ACL. Similarly, if there is a pre-existing ACL that was + * never in sync with its mode (owing to a bug in 6.5 and + * before), it will now magically (or mystically) be + * synchronized. This could cause slight astonishment, but + * it is better than inconsistent permissions. + * + * The supplied ACL is a template that may contain any combination + * of special entries. These are treated as place holders when we fill + * out the ACL. This routine does not add or remove special entries, it + * simply unites each special entry with its associated set of permissions. + */ +STATIC void +xfs_acl_sync_mode( + mode_t mode, + xfs_acl_t *acl) +{ + int i, nomask = 1; + xfs_acl_entry_t *ap; + xfs_acl_entry_t *gap = NULL; + + /* + * Set ACL entries. POSIX1003.1eD16 requires that the MASK + * be set instead of the GROUP entry, if there is a MASK. + */ + for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) { + switch (ap->ae_tag) { + case ACL_USER_OBJ: + ap->ae_perm = (mode >> 6) & 0x7; + break; + case ACL_GROUP_OBJ: + gap = ap; + break; + case ACL_MASK: + nomask = 0; + ap->ae_perm = (mode >> 3) & 0x7; + break; + case ACL_OTHER: + ap->ae_perm = mode & 0x7; + break; + default: + break; + } + } + /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */ + if (gap && nomask) + gap->ae_perm = (mode >> 3) & 0x7; +} + +/* + * When inheriting an Access ACL from a directory Default ACL, + * the ACL bits are set to the intersection of the ACL default + * permission bits and the file permission bits in mode. If there + * are no permission bits on the file then we must not give them + * the ACL. This is what what makes umask() work with ACLs. + */ +STATIC void +xfs_acl_filter_mode( + mode_t mode, + xfs_acl_t *acl) +{ + int i, nomask = 1; + xfs_acl_entry_t *ap; + xfs_acl_entry_t *gap = NULL; + + /* + * Set ACL entries. POSIX1003.1eD16 requires that the MASK + * be merged with GROUP entry, if there is a MASK. + */ + for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) { + switch (ap->ae_tag) { + case ACL_USER_OBJ: + ap->ae_perm &= (mode >> 6) & 0x7; + break; + case ACL_GROUP_OBJ: + gap = ap; + break; + case ACL_MASK: + nomask = 0; + ap->ae_perm &= (mode >> 3) & 0x7; + break; + case ACL_OTHER: + ap->ae_perm &= mode & 0x7; + break; + default: + break; + } + } + /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */ + if (gap && nomask) + gap->ae_perm &= (mode >> 3) & 0x7; +} diff --git a/trunk/fs/xfs/xfs_acl.h b/trunk/fs/xfs/xfs_acl.h index 63dc1f2efad5..642f1db4def4 100644 --- a/trunk/fs/xfs/xfs_acl.h +++ b/trunk/fs/xfs/xfs_acl.h @@ -18,48 +18,81 @@ #ifndef __XFS_ACL_H__ #define __XFS_ACL_H__ -struct inode; -struct posix_acl; -struct xfs_inode; +/* + * Access Control Lists + */ +typedef __uint16_t xfs_acl_perm_t; +typedef __int32_t xfs_acl_tag_t; +typedef __int32_t xfs_acl_id_t; #define XFS_ACL_MAX_ENTRIES 25 #define XFS_ACL_NOT_PRESENT (-1) -/* On-disk XFS access control list structure */ -struct xfs_acl { - __be32 acl_cnt; - struct xfs_acl_entry { - __be32 ae_tag; - __be32 ae_id; - __be16 ae_perm; - } acl_entry[XFS_ACL_MAX_ENTRIES]; -}; +typedef struct xfs_acl_entry { + xfs_acl_tag_t ae_tag; + xfs_acl_id_t ae_id; + xfs_acl_perm_t ae_perm; +} xfs_acl_entry_t; + +typedef struct xfs_acl { + __int32_t acl_cnt; + xfs_acl_entry_t acl_entry[XFS_ACL_MAX_ENTRIES]; +} xfs_acl_t; /* On-disk XFS extended attribute names */ -#define SGI_ACL_FILE "SGI_ACL_FILE" -#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" +#define SGI_ACL_FILE "SGI_ACL_FILE" +#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) +#define _ACL_TYPE_ACCESS 1 +#define _ACL_TYPE_DEFAULT 2 + #ifdef CONFIG_XFS_POSIX_ACL -extern int xfs_check_acl(struct inode *inode, int mask); -extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); -extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); -extern int xfs_acl_chmod(struct inode *inode); -extern void xfs_inode_init_acls(struct xfs_inode *ip); -extern void xfs_inode_clear_acls(struct xfs_inode *ip); -extern int posix_acl_access_exists(struct inode *inode); -extern int posix_acl_default_exists(struct inode *inode); -extern struct xattr_handler xfs_xattr_system_handler; +struct vattr; +struct xfs_inode; + +extern struct kmem_zone *xfs_acl_zone; +#define xfs_acl_zone_init(zone, name) \ + (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) +#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) + +extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *); +extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); +extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *); +extern int xfs_acl_vhasacl_access(struct inode *); +extern int xfs_acl_vhasacl_default(struct inode *); +extern int xfs_acl_vset(struct inode *, void *, size_t, int); +extern int xfs_acl_vget(struct inode *, void *, size_t, int); +extern int xfs_acl_vremove(struct inode *, int); + +#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) + +#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d)) +#define _ACL_GET_ACCESS(pv,pa) (xfs_acl_vtoacl(pv,pa,NULL) == 0) +#define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0) +#define _ACL_ACCESS_EXISTS xfs_acl_vhasacl_access +#define _ACL_DEFAULT_EXISTS xfs_acl_vhasacl_default + +#define _ACL_ALLOC(a) ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP)) +#define _ACL_FREE(a) ((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0) + #else -# define xfs_check_acl NULL -# define xfs_get_acl(inode, type) NULL -# define xfs_inherit_acl(inode, default_acl) 0 -# define xfs_acl_chmod(inode) 0 -# define xfs_inode_init_acls(ip) -# define xfs_inode_clear_acls(ip) -# define posix_acl_access_exists(inode) 0 -# define posix_acl_default_exists(inode) 0 -#endif /* CONFIG_XFS_POSIX_ACL */ +#define xfs_acl_zone_init(zone,name) +#define xfs_acl_zone_destroy(zone) +#define xfs_acl_vset(v,p,sz,t) (-EOPNOTSUPP) +#define xfs_acl_vget(v,p,sz,t) (-EOPNOTSUPP) +#define xfs_acl_vremove(v,t) (-EOPNOTSUPP) +#define xfs_acl_vhasacl_access(v) (0) +#define xfs_acl_vhasacl_default(v) (0) +#define _ACL_ALLOC(a) (1) /* successfully allocate nothing */ +#define _ACL_FREE(a) ((void)0) +#define _ACL_INHERIT(c,m,d) (0) +#define _ACL_GET_ACCESS(pv,pa) (0) +#define _ACL_GET_DEFAULT(pv,pd) (0) +#define _ACL_ACCESS_EXISTS (NULL) +#define _ACL_DEFAULT_EXISTS (NULL) +#endif + #endif /* __XFS_ACL_H__ */ diff --git a/trunk/fs/xfs/xfs_ag.h b/trunk/fs/xfs/xfs_ag.h index f24b50b68d03..c8641f713caa 100644 --- a/trunk/fs/xfs/xfs_ag.h +++ b/trunk/fs/xfs/xfs_ag.h @@ -212,8 +212,6 @@ typedef struct xfs_perag /* * tags for inode radix tree */ -#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup - in xfs_inode_ag_iterator */ #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) diff --git a/trunk/fs/xfs/xfs_arch.h b/trunk/fs/xfs/xfs_arch.h index 0902249354a0..53d5e70d1360 100644 --- a/trunk/fs/xfs/xfs_arch.h +++ b/trunk/fs/xfs/xfs_arch.h @@ -73,6 +73,28 @@ static inline void be64_add_cpu(__be64 *a, __s64 b) #endif /* __KERNEL__ */ +/* do we need conversion? */ +#define ARCH_NOCONVERT 1 +#ifdef XFS_NATIVE_HOST +# define ARCH_CONVERT ARCH_NOCONVERT +#else +# define ARCH_CONVERT 0 +#endif + +/* generic swapping macros */ + +#ifndef HAVE_SWABMACROS +#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var)))) +#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var)))) +#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var)))) +#endif + +#define INT_SWAP(type, var) \ + ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \ + ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \ + ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \ + (var)))) + /* * get and set integers from potentially unaligned locations */ @@ -85,6 +107,16 @@ static inline void be64_add_cpu(__be64 *a, __s64 b) ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ } +/* does not return a value */ +#define INT_SET(reference,arch,valueref) \ + (__builtin_constant_p(valueref) ? \ + (void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \ + (void)( \ + ((reference) = (valueref)), \ + ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \ + ) \ + ) + /* * In directories inode numbers are stored as unaligned arrays of unsigned * 8bit integers on disk. diff --git a/trunk/fs/xfs/xfs_attr.c b/trunk/fs/xfs/xfs_attr.c index db15feb906ff..5fde1654b430 100644 --- a/trunk/fs/xfs/xfs_attr.c +++ b/trunk/fs/xfs/xfs_attr.c @@ -45,6 +45,7 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" +#include "xfs_acl.h" #include "xfs_rw.h" #include "xfs_vnodeops.h" @@ -248,9 +249,8 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, /* * Attach the dquots to the inode. */ - error = xfs_qm_dqattach(dp, 0); - if (error) - return error; + if ((error = XFS_QM_DQATTACH(mp, dp, 0))) + return (error); /* * If the inode doesn't have an attribute fork, add one. @@ -311,7 +311,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, } xfs_ilock(dp, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, + error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { @@ -501,9 +501,8 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) /* * Attach the dquots to the inode. */ - error = xfs_qm_dqattach(dp, 0); - if (error) - return error; + if ((error = XFS_QM_DQATTACH(mp, dp, 0))) + return (error); /* * Start our first transaction of the day. diff --git a/trunk/fs/xfs/xfs_bmap.c b/trunk/fs/xfs/xfs_bmap.c index 7928b9983c1d..ca7c6005a487 100644 --- a/trunk/fs/xfs/xfs_bmap.c +++ b/trunk/fs/xfs/xfs_bmap.c @@ -2691,7 +2691,7 @@ xfs_bmap_rtalloc( * Adjust the disk quota also. This was reserved * earlier. */ - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, + XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_RTBCOUNT, (long) ralen); } else { @@ -2995,7 +2995,7 @@ xfs_bmap_btalloc( * Adjust the disk quota also. This was reserved * earlier. */ - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, + XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, (long) args.len); @@ -3066,7 +3066,7 @@ xfs_bmap_btree_to_extents( return error; xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); ip->i_d.di_nblocks--; - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); if (cur->bc_bufs[0] == cbp) cur->bc_bufs[0] = NULL; @@ -3386,7 +3386,7 @@ xfs_bmap_del_extent( * Adjust quota data. */ if (qfield) - xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); + XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks); /* * Account for change in delayed indirect blocks. @@ -3523,7 +3523,7 @@ xfs_bmap_extents_to_btree( *firstblock = cur->bc_private.b.firstblock = args.fsbno; cur->bc_private.b.allocated++; ip->i_d.di_nblocks++; - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); + XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); /* * Fill in the child block. @@ -3690,7 +3690,7 @@ xfs_bmap_local_to_extents( XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork); XFS_IFORK_NEXT_SET(ip, whichfork, 1); ip->i_d.di_nblocks = 1; - xfs_trans_mod_dquot_byino(tp, ip, + XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); flags |= xfs_ilog_fext(whichfork); } else { @@ -4048,7 +4048,7 @@ xfs_bmap_add_attrfork( XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT))) goto error0; xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? + error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { @@ -4983,11 +4983,10 @@ xfs_bmapi( * adjusted later. We return if we haven't * allocated blocks already inside this loop. */ - error = xfs_trans_reserve_quota_nblks( - NULL, ip, (long)alen, 0, + if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS( + mp, NULL, ip, (long)alen, 0, rt ? XFS_QMOPT_RES_RTBLKS : - XFS_QMOPT_RES_REGBLKS); - if (error) { + XFS_QMOPT_RES_REGBLKS))) { if (n == 0) { *nmap = 0; ASSERT(cur == NULL); @@ -5036,8 +5035,8 @@ xfs_bmapi( if (XFS_IS_QUOTA_ON(mp)) /* unreserve the blocks now */ (void) - xfs_trans_unreserve_quota_nblks( - NULL, ip, + XFS_TRANS_UNRESERVE_QUOTA_NBLKS( + mp, NULL, ip, (long)alen, 0, rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); @@ -5692,14 +5691,14 @@ xfs_bunmapi( do_div(rtexts, mp->m_sb.sb_rextsize); xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, (int64_t)rtexts, rsvd); - (void)xfs_trans_reserve_quota_nblks(NULL, - ip, -((long)del.br_blockcount), 0, + (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, + NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)del.br_blockcount, rsvd); - (void)xfs_trans_reserve_quota_nblks(NULL, - ip, -((long)del.br_blockcount), 0, + (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, + NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); } ip->i_delayed_blks -= del.br_blockcount; @@ -6086,7 +6085,6 @@ xfs_getbmap( break; } - kmem_free(out); return error; } diff --git a/trunk/fs/xfs/xfs_bmap_btree.c b/trunk/fs/xfs/xfs_bmap_btree.c index 5c1ade06578e..0760d352586f 100644 --- a/trunk/fs/xfs/xfs_bmap_btree.c +++ b/trunk/fs/xfs/xfs_bmap_btree.c @@ -590,7 +590,7 @@ xfs_bmbt_alloc_block( cur->bc_private.b.allocated++; cur->bc_private.b.ip->i_d.di_nblocks++; xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); - xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip, + XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip, XFS_TRANS_DQ_BCOUNT, 1L); new->l = cpu_to_be64(args.fsbno); @@ -618,7 +618,7 @@ xfs_bmbt_free_block( ip->i_d.di_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, bp); return 0; } diff --git a/trunk/fs/xfs/xfs_filestream.c b/trunk/fs/xfs/xfs_filestream.c index edf8bdf4141f..6c87c8f304ef 100644 --- a/trunk/fs/xfs/xfs_filestream.c +++ b/trunk/fs/xfs/xfs_filestream.c @@ -542,8 +542,10 @@ xfs_filestream_associate( * waiting for the lock because someone else is waiting on the lock we * hold and we cannot drop that as we are in a transaction here. * - * Lucky for us, this inversion is not a problem because it's a - * directory inode that we are trying to lock here. + * Lucky for us, this inversion is rarely a problem because it's a + * directory inode that we are trying to lock here and that means the + * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is + * used. i.e. freeze, remount-ro, quotasync or unmount. * * So, if we can't get the iolock without sleeping then just give up */ diff --git a/trunk/fs/xfs/xfs_fs.h b/trunk/fs/xfs/xfs_fs.h index c4ea51b55dce..f7c06fac8229 100644 --- a/trunk/fs/xfs/xfs_fs.h +++ b/trunk/fs/xfs/xfs_fs.h @@ -239,13 +239,10 @@ typedef struct xfs_fsop_resblks { * Minimum and maximum sizes need for growth checks */ #define XFS_MIN_AG_BLOCKS 64 -#define XFS_MIN_LOG_BLOCKS 512ULL -#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) -#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) - -/* keep the maximum size under 2^31 by a small amount */ -#define XFS_MAX_LOG_BYTES \ - ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) +#define XFS_MIN_LOG_BLOCKS 512 +#define XFS_MAX_LOG_BLOCKS (64 * 1024) +#define XFS_MIN_LOG_BYTES (256 * 1024) +#define XFS_MAX_LOG_BYTES (128 * 1024 * 1024) /* * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c index 76c540f719e4..89b81eedce6a 100644 --- a/trunk/fs/xfs/xfs_iget.c +++ b/trunk/fs/xfs/xfs_iget.c @@ -18,7 +18,6 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" -#include "xfs_acl.h" #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" @@ -83,7 +82,6 @@ xfs_inode_alloc( memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); ip->i_size = 0; ip->i_new_size = 0; - xfs_inode_init_acls(ip); /* * Initialize inode's trace buffers. @@ -502,7 +500,10 @@ xfs_ireclaim( * ilock one but will still hold the iolock. */ xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_qm_dqdetach(ip); + /* + * Release dquots (and their references) if any. + */ + XFS_QM_DQDETACH(ip->i_mount, ip); xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); switch (ip->i_d.di_mode & S_IFMT) { @@ -560,7 +561,6 @@ xfs_ireclaim( ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); - xfs_inode_clear_acls(ip); kmem_zone_free(xfs_inode_zone, ip); } diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c index 1f22d65fed0a..123b20c8cbf2 100644 --- a/trunk/fs/xfs/xfs_inode.c +++ b/trunk/fs/xfs/xfs_inode.c @@ -49,6 +49,7 @@ #include "xfs_utils.h" #include "xfs_dir2_trace.h" #include "xfs_quota.h" +#include "xfs_acl.h" #include "xfs_filestream.h" #include "xfs_vnodeops.h" diff --git a/trunk/fs/xfs/xfs_inode.h b/trunk/fs/xfs/xfs_inode.h index 77016702938b..f879c1bc4b96 100644 --- a/trunk/fs/xfs/xfs_inode.h +++ b/trunk/fs/xfs/xfs_inode.h @@ -18,7 +18,6 @@ #ifndef __XFS_INODE_H__ #define __XFS_INODE_H__ -struct posix_acl; struct xfs_dinode; struct xfs_inode; @@ -273,11 +272,6 @@ typedef struct xfs_inode { /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ -#ifdef CONFIG_XFS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif - /* Trace buffers per inode. */ #ifdef XFS_INODE_TRACE struct ktrace *i_trace; /* general inode trace */ diff --git a/trunk/fs/xfs/xfs_iomap.c b/trunk/fs/xfs/xfs_iomap.c index 67ae5555a30a..5aaa2d7ec155 100644 --- a/trunk/fs/xfs/xfs_iomap.c +++ b/trunk/fs/xfs/xfs_iomap.c @@ -42,6 +42,7 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -384,7 +385,7 @@ xfs_iomap_write_direct( * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ - error = xfs_qm_dqattach_locked(ip, 0); + error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED); if (error) return XFS_ERROR(error); @@ -443,7 +444,8 @@ xfs_iomap_write_direct( if (error) goto error_out; - error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); + error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, + qblocks, 0, quota_flag); if (error) goto error1; @@ -493,7 +495,7 @@ xfs_iomap_write_direct( error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); - xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); + XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -580,7 +582,7 @@ xfs_iomap_write_delay( * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ - error = xfs_qm_dqattach_locked(ip, 0); + error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); if (error) return XFS_ERROR(error); @@ -682,8 +684,7 @@ xfs_iomap_write_allocate( /* * Make sure that the dquots are there. */ - error = xfs_qm_dqattach(ip, 0); - if (error) + if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return XFS_ERROR(error); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/trunk/fs/xfs/xfs_log_recover.c b/trunk/fs/xfs/xfs_log_recover.c index 47da2fb45377..7ba450116d4f 100644 --- a/trunk/fs/xfs/xfs_log_recover.c +++ b/trunk/fs/xfs/xfs_log_recover.c @@ -1975,30 +1975,16 @@ xlog_recover_do_reg_buffer( error = 0; if (buf_f->blf_flags & (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { - if (item->ri_buf[i].i_addr == NULL) { - cmn_err(CE_ALERT, - "XFS: NULL dquot in %s.", __func__); - goto next; - } - if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) { - cmn_err(CE_ALERT, - "XFS: dquot too small (%d) in %s.", - item->ri_buf[i].i_len, __func__); - goto next; - } error = xfs_qm_dqcheck((xfs_disk_dquot_t *) item->ri_buf[i].i_addr, -1, 0, XFS_QMOPT_DOWARN, "dquot_buf_recover"); - if (error) - goto next; } - - memcpy(xfs_buf_offset(bp, - (uint)bit << XFS_BLI_SHIFT), /* dest */ - item->ri_buf[i].i_addr, /* source */ - nbits<ri_buf[i].i_addr, /* source */ + nbits<ri_buf[1].i_addr; - - if (item->ri_buf[1].i_addr == NULL) { - cmn_err(CE_ALERT, - "XFS: NULL dquot in %s.", __func__); - return XFS_ERROR(EIO); - } - if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) { - cmn_err(CE_ALERT, - "XFS: dquot too small (%d) in %s.", - item->ri_buf[1].i_len, __func__); - return XFS_ERROR(EIO); - } - + ASSERT(recddq); /* * This type of quotas was turned off, so ignore this record. */ diff --git a/trunk/fs/xfs/xfs_mount.c b/trunk/fs/xfs/xfs_mount.c index 5c6f092659c1..65a99725d0cc 100644 --- a/trunk/fs/xfs/xfs_mount.c +++ b/trunk/fs/xfs/xfs_mount.c @@ -959,53 +959,6 @@ xfs_check_sizes(xfs_mount_t *mp) return 0; } -/* - * Clear the quotaflags in memory and in the superblock. - */ -int -xfs_mount_reset_sbqflags( - struct xfs_mount *mp) -{ - int error; - struct xfs_trans *tp; - - mp->m_qflags = 0; - - /* - * It is OK to look at sb_qflags here in mount path, - * without m_sb_lock. - */ - if (mp->m_sb.sb_qflags == 0) - return 0; - spin_lock(&mp->m_sb_lock); - mp->m_sb.sb_qflags = 0; - spin_unlock(&mp->m_sb_lock); - - /* - * If the fs is readonly, let the incore superblock run - * with quotas off but don't flush the update out to disk - */ - if (mp->m_flags & XFS_MOUNT_RDONLY) - return 0; - -#ifdef QUOTADEBUG - xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); -#endif - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT); - if (error) { - xfs_trans_cancel(tp, 0); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_mount_reset_sbqflags: Superblock update failed!"); - return error; - } - - xfs_mod_sb(tp, XFS_SB_QFLAGS); - return xfs_trans_commit(tp, 0); -} - /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -1023,8 +976,7 @@ xfs_mountfs( xfs_sb_t *sbp = &(mp->m_sb); xfs_inode_t *rip; __uint64_t resblks; - uint quotamount = 0; - uint quotaflags = 0; + uint quotamount, quotaflags; int error = 0; xfs_mount_common(mp, sbp); @@ -1258,28 +1210,9 @@ xfs_mountfs( /* * Initialise the XFS quota management subsystem for this mount */ - if (XFS_IS_QUOTA_RUNNING(mp)) { - error = xfs_qm_newmount(mp, "amount, "aflags); - if (error) - goto out_rtunmount; - } else { - ASSERT(!XFS_IS_QUOTA_ON(mp)); - - /* - * If a file system had quotas running earlier, but decided to - * mount without -o uquota/pquota/gquota options, revoke the - * quotachecked license. - */ - if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { - cmn_err(CE_NOTE, - "XFS: resetting qflags for filesystem %s", - mp->m_fsname); - - error = xfs_mount_reset_sbqflags(mp); - if (error) - return error; - } - } + error = XFS_QM_INIT(mp, "amount, "aflags); + if (error) + goto out_rtunmount; /* * Finish recovering the file system. This part needed to be @@ -1295,19 +1228,9 @@ xfs_mountfs( /* * Complete the quota initialisation, post-log-replay component. */ - if (quotamount) { - ASSERT(mp->m_qflags == 0); - mp->m_qflags = quotaflags; - - xfs_qm_mount_quotas(mp); - } - -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) - if (XFS_IS_QUOTA_ON(mp)) - xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on"); - else - xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on"); -#endif + error = XFS_QM_MOUNT(mp, quotamount, quotaflags); + if (error) + goto out_rtunmount; /* * Now we are mounted, reserve a small amount of unused space for @@ -1356,7 +1279,12 @@ xfs_unmountfs( __uint64_t resblks; int error; - xfs_qm_unmount_quotas(mp); + /* + * Release dquot that rootinode, rbmino and rsumino might be holding, + * and release the quota inodes. + */ + XFS_QM_UNMOUNT(mp); + xfs_rtunmount_inodes(mp); IRELE(mp->m_rootip); @@ -1371,9 +1299,12 @@ xfs_unmountfs( * need to force the log first. */ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); - xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC); + + XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); - xfs_qm_unmount(mp); + if (mp->m_quotainfo) + XFS_QM_DONE(mp); /* * Flush out the log synchronously so that we know for sure diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h index a5122382afde..d6a64392f983 100644 --- a/trunk/fs/xfs/xfs_mount.h +++ b/trunk/fs/xfs/xfs_mount.h @@ -64,8 +64,6 @@ struct xfs_swapext; struct xfs_mru_cache; struct xfs_nameops; struct xfs_ail; -struct xfs_quotainfo; - /* * Prototypes and functions for the Data Migration subsystem. @@ -109,6 +107,86 @@ typedef struct xfs_dmops { (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl) +/* + * Prototypes and functions for the Quota Management subsystem. + */ + +struct xfs_dquot; +struct xfs_dqtrxops; +struct xfs_quotainfo; + +typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); +typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint); +typedef void (*xfs_qmunmount_t)(struct xfs_mount *); +typedef void (*xfs_qmdone_t)(struct xfs_mount *); +typedef void (*xfs_dqrele_t)(struct xfs_dquot *); +typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint); +typedef void (*xfs_dqdetach_t)(struct xfs_inode *); +typedef int (*xfs_dqpurgeall_t)(struct xfs_mount *, uint); +typedef int (*xfs_dqvopalloc_t)(struct xfs_mount *, + struct xfs_inode *, uid_t, gid_t, prid_t, uint, + struct xfs_dquot **, struct xfs_dquot **); +typedef void (*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *, + struct xfs_dquot *, struct xfs_dquot *); +typedef int (*xfs_dqvoprename_t)(struct xfs_inode **); +typedef struct xfs_dquot * (*xfs_dqvopchown_t)( + struct xfs_trans *, struct xfs_inode *, + struct xfs_dquot **, struct xfs_dquot *); +typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *, + struct xfs_dquot *, struct xfs_dquot *, uint); +typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *); +typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags); + +typedef struct xfs_qmops { + xfs_qminit_t xfs_qminit; + xfs_qmdone_t xfs_qmdone; + xfs_qmmount_t xfs_qmmount; + xfs_qmunmount_t xfs_qmunmount; + xfs_dqrele_t xfs_dqrele; + xfs_dqattach_t xfs_dqattach; + xfs_dqdetach_t xfs_dqdetach; + xfs_dqpurgeall_t xfs_dqpurgeall; + xfs_dqvopalloc_t xfs_dqvopalloc; + xfs_dqvopcreate_t xfs_dqvopcreate; + xfs_dqvoprename_t xfs_dqvoprename; + xfs_dqvopchown_t xfs_dqvopchown; + xfs_dqvopchownresv_t xfs_dqvopchownresv; + xfs_dqstatvfs_t xfs_dqstatvfs; + xfs_dqsync_t xfs_dqsync; + struct xfs_dqtrxops *xfs_dqtrxops; +} xfs_qmops_t; + +#define XFS_QM_INIT(mp, mnt, fl) \ + (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl) +#define XFS_QM_MOUNT(mp, mnt, fl) \ + (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl) +#define XFS_QM_UNMOUNT(mp) \ + (*(mp)->m_qm_ops->xfs_qmunmount)(mp) +#define XFS_QM_DONE(mp) \ + (*(mp)->m_qm_ops->xfs_qmdone)(mp) +#define XFS_QM_DQRELE(mp, dq) \ + (*(mp)->m_qm_ops->xfs_dqrele)(dq) +#define XFS_QM_DQATTACH(mp, ip, fl) \ + (*(mp)->m_qm_ops->xfs_dqattach)(ip, fl) +#define XFS_QM_DQDETACH(mp, ip) \ + (*(mp)->m_qm_ops->xfs_dqdetach)(ip) +#define XFS_QM_DQPURGEALL(mp, fl) \ + (*(mp)->m_qm_ops->xfs_dqpurgeall)(mp, fl) +#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \ + (*(mp)->m_qm_ops->xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2) +#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \ + (*(mp)->m_qm_ops->xfs_dqvopcreate)(tp, ip, dq1, dq2) +#define XFS_QM_DQVOPRENAME(mp, ip) \ + (*(mp)->m_qm_ops->xfs_dqvoprename)(ip) +#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \ + (*(mp)->m_qm_ops->xfs_dqvopchown)(tp, ip, dqp, dq) +#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \ + (*(mp)->m_qm_ops->xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl) +#define XFS_QM_DQSTATVFS(ip, statp) \ + (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp) +#define XFS_QM_DQSYNC(mp, flags) \ + (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags) + #ifdef HAVE_PERCPU_SB /* @@ -432,6 +510,8 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); extern int xfs_dmops_get(struct xfs_mount *); extern void xfs_dmops_put(struct xfs_mount *); +extern int xfs_qmops_get(struct xfs_mount *); +extern void xfs_qmops_put(struct xfs_mount *); extern struct xfs_dmops xfs_dmcore_xfs; diff --git a/trunk/fs/xfs/xfs_qmops.c b/trunk/fs/xfs/xfs_qmops.c new file mode 100644 index 000000000000..e101790ea8e7 --- /dev/null +++ b/trunk/fs/xfs/xfs_qmops.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_quota.h" +#include "xfs_error.h" + + +STATIC struct xfs_dquot * +xfs_dqvopchown_default( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_dquot **dqp, + struct xfs_dquot *dq) +{ + return NULL; +} + +/* + * Clear the quotaflags in memory and in the superblock. + */ +int +xfs_mount_reset_sbqflags(xfs_mount_t *mp) +{ + int error; + xfs_trans_t *tp; + + mp->m_qflags = 0; + /* + * It is OK to look at sb_qflags here in mount path, + * without m_sb_lock. + */ + if (mp->m_sb.sb_qflags == 0) + return 0; + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = 0; + spin_unlock(&mp->m_sb_lock); + + /* + * if the fs is readonly, let the incore superblock run + * with quotas off but don't flush the update out to disk + */ + if (mp->m_flags & XFS_MOUNT_RDONLY) + return 0; +#ifdef QUOTADEBUG + xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); +#endif + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); + if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, + XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + xfs_fs_cmn_err(CE_ALERT, mp, + "xfs_mount_reset_sbqflags: Superblock update failed!"); + return error; + } + xfs_mod_sb(tp, XFS_SB_QFLAGS); + error = xfs_trans_commit(tp, 0); + return error; +} + +STATIC int +xfs_noquota_init( + xfs_mount_t *mp, + uint *needquotamount, + uint *quotaflags) +{ + int error = 0; + + *quotaflags = 0; + *needquotamount = B_FALSE; + + ASSERT(!XFS_IS_QUOTA_ON(mp)); + + /* + * If a file system had quotas running earlier, but decided to + * mount without -o uquota/pquota/gquota options, revoke the + * quotachecked license. + */ + if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { + cmn_err(CE_NOTE, + "XFS resetting qflags for filesystem %s", + mp->m_fsname); + + error = xfs_mount_reset_sbqflags(mp); + } + return error; +} + +static struct xfs_qmops xfs_qmcore_stub = { + .xfs_qminit = (xfs_qminit_t) xfs_noquota_init, + .xfs_qmdone = (xfs_qmdone_t) fs_noerr, + .xfs_qmmount = (xfs_qmmount_t) fs_noerr, + .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr, + .xfs_dqrele = (xfs_dqrele_t) fs_noerr, + .xfs_dqattach = (xfs_dqattach_t) fs_noerr, + .xfs_dqdetach = (xfs_dqdetach_t) fs_noerr, + .xfs_dqpurgeall = (xfs_dqpurgeall_t) fs_noerr, + .xfs_dqvopalloc = (xfs_dqvopalloc_t) fs_noerr, + .xfs_dqvopcreate = (xfs_dqvopcreate_t) fs_noerr, + .xfs_dqvoprename = (xfs_dqvoprename_t) fs_noerr, + .xfs_dqvopchown = xfs_dqvopchown_default, + .xfs_dqvopchownresv = (xfs_dqvopchownresv_t) fs_noerr, + .xfs_dqstatvfs = (xfs_dqstatvfs_t) fs_noval, + .xfs_dqsync = (xfs_dqsync_t) fs_noerr, +}; + +int +xfs_qmops_get(struct xfs_mount *mp) +{ + if (XFS_IS_QUOTA_RUNNING(mp)) { +#ifdef CONFIG_XFS_QUOTA + mp->m_qm_ops = &xfs_qmcore_xfs; +#else + cmn_err(CE_WARN, + "XFS: qouta support not available in this kernel."); + return EINVAL; +#endif + } else { + mp->m_qm_ops = &xfs_qmcore_stub; + } + + return 0; +} + +void +xfs_qmops_put(struct xfs_mount *mp) +{ +} diff --git a/trunk/fs/xfs/xfs_quota.h b/trunk/fs/xfs/xfs_quota.h index 3ec91ac74c2a..f5d1202dde25 100644 --- a/trunk/fs/xfs/xfs_quota.h +++ b/trunk/fs/xfs/xfs_quota.h @@ -197,6 +197,7 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */ #define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */ #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ +#define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */ #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ @@ -301,79 +302,69 @@ typedef struct xfs_dqtrx { long qt_delrtb_delta; /* delayed RT blk count changes */ } xfs_dqtrx_t; -#ifdef CONFIG_XFS_QUOTA -extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *); -extern void xfs_trans_free_dqinfo(struct xfs_trans *); -extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *, - uint, long); -extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *); -extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *); -extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *, - struct xfs_inode *, long, long, uint); -extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, - struct xfs_mount *, struct xfs_dquot *, - struct xfs_dquot *, long, long, uint); - -extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint, - struct xfs_dquot **, struct xfs_dquot **); -extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, - struct xfs_dquot *, struct xfs_dquot *); -extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); -extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *, - struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *); -extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *, - struct xfs_dquot *, struct xfs_dquot *, uint); -extern int xfs_qm_dqattach(struct xfs_inode *, uint); -extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint); -extern void xfs_qm_dqdetach(struct xfs_inode *); -extern void xfs_qm_dqrele(struct xfs_dquot *); -extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); -extern int xfs_qm_sync(struct xfs_mount *, int); -extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); -extern void xfs_qm_mount_quotas(struct xfs_mount *); -extern void xfs_qm_unmount(struct xfs_mount *); -extern void xfs_qm_unmount_quotas(struct xfs_mount *); - -#else -static inline int -xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, - uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp) -{ - *udqp = NULL; - *gdqp = NULL; - return 0; -} -#define xfs_trans_dup_dqinfo(tp, tp2) -#define xfs_trans_free_dqinfo(tp) -#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) -#define xfs_trans_apply_dquot_deltas(tp) -#define xfs_trans_unreserve_and_mod_dquots(tp) -#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0) -#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0) -#define xfs_qm_vop_create_dqattach(tp, ip, u, g) -#define xfs_qm_vop_rename_dqattach(it) (0) -#define xfs_qm_vop_chown(tp, ip, old, new) (NULL) -#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0) -#define xfs_qm_dqattach(ip, fl) (0) -#define xfs_qm_dqattach_locked(ip, fl) (0) -#define xfs_qm_dqdetach(ip) -#define xfs_qm_dqrele(d) -#define xfs_qm_statvfs(ip, s) -#define xfs_qm_sync(mp, fl) (0) -#define xfs_qm_newmount(mp, a, b) (0) -#define xfs_qm_mount_quotas(mp) -#define xfs_qm_unmount(mp) -#define xfs_qm_unmount_quotas(mp) (0) -#endif /* CONFIG_XFS_QUOTA */ - -#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ - xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags) -#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \ - xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ +/* + * Dquot transaction functions, used if quota is enabled. + */ +typedef void (*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *); +typedef void (*qo_mod_dquot_byino_t)(struct xfs_trans *, + struct xfs_inode *, uint, long); +typedef void (*qo_free_dqinfo_t)(struct xfs_trans *); +typedef void (*qo_apply_dquot_deltas_t)(struct xfs_trans *); +typedef void (*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *); +typedef int (*qo_reserve_quota_nblks_t)( + struct xfs_trans *, struct xfs_mount *, + struct xfs_inode *, long, long, uint); +typedef int (*qo_reserve_quota_bydquots_t)( + struct xfs_trans *, struct xfs_mount *, + struct xfs_dquot *, struct xfs_dquot *, + long, long, uint); +typedef struct xfs_dqtrxops { + qo_dup_dqinfo_t qo_dup_dqinfo; + qo_free_dqinfo_t qo_free_dqinfo; + qo_mod_dquot_byino_t qo_mod_dquot_byino; + qo_apply_dquot_deltas_t qo_apply_dquot_deltas; + qo_reserve_quota_nblks_t qo_reserve_quota_nblks; + qo_reserve_quota_bydquots_t qo_reserve_quota_bydquots; + qo_unreserve_and_mod_dquots_t qo_unreserve_and_mod_dquots; +} xfs_dqtrxops_t; + +#define XFS_DQTRXOP(mp, tp, op, args...) \ + ((mp)->m_qm_ops->xfs_dqtrxops ? \ + ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : 0) + +#define XFS_DQTRXOP_VOID(mp, tp, op, args...) \ + ((mp)->m_qm_ops->xfs_dqtrxops ? \ + ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : (void)0) + +#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \ + XFS_DQTRXOP_VOID(mp, otp, qo_dup_dqinfo, ntp) +#define XFS_TRANS_FREE_DQINFO(mp, tp) \ + XFS_DQTRXOP_VOID(mp, tp, qo_free_dqinfo) +#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \ + XFS_DQTRXOP_VOID(mp, tp, qo_mod_dquot_byino, ip, field, delta) +#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \ + XFS_DQTRXOP_VOID(mp, tp, qo_apply_dquot_deltas) +#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \ + XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl) +#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \ + XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl) +#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \ + XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots) + +#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \ + XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags) +#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \ + XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \ + f | XFS_QMOPT_RES_REGBLKS) +#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \ + XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \ f | XFS_QMOPT_RES_REGBLKS) extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); extern int xfs_mount_reset_sbqflags(struct xfs_mount *); +extern struct xfs_qmops xfs_qmcore_xfs; + #endif /* __KERNEL__ */ + #endif /* __XFS_QUOTA_H__ */ diff --git a/trunk/fs/xfs/xfs_rename.c b/trunk/fs/xfs/xfs_rename.c index b81deea0ce19..58f85e9cd11d 100644 --- a/trunk/fs/xfs/xfs_rename.c +++ b/trunk/fs/xfs/xfs_rename.c @@ -166,8 +166,7 @@ xfs_rename( /* * Attach the dquots to the inodes */ - error = xfs_qm_vop_rename_dqattach(inodes); - if (error) { + if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { xfs_trans_cancel(tp, cancel_flags); goto std_return; } diff --git a/trunk/fs/xfs/xfs_rw.c b/trunk/fs/xfs/xfs_rw.c index fea68615ed23..36f3a21c54d2 100644 --- a/trunk/fs/xfs/xfs_rw.c +++ b/trunk/fs/xfs/xfs_rw.c @@ -41,6 +41,7 @@ #include "xfs_ialloc.h" #include "xfs_attr.h" #include "xfs_bmap.h" +#include "xfs_acl.h" #include "xfs_error.h" #include "xfs_buf_item.h" #include "xfs_rw.h" diff --git a/trunk/fs/xfs/xfs_trans.c b/trunk/fs/xfs/xfs_trans.c index 66b849358e62..bcc39d358ad3 100644 --- a/trunk/fs/xfs/xfs_trans.c +++ b/trunk/fs/xfs/xfs_trans.c @@ -297,7 +297,7 @@ xfs_trans_dup( tp->t_rtx_res = tp->t_rtx_res_used; ntp->t_pflags = tp->t_pflags; - xfs_trans_dup_dqinfo(tp, ntp); + XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp); atomic_inc(&tp->t_mountp->m_active_trans); return ntp; @@ -829,7 +829,7 @@ _xfs_trans_commit( * means is that we have some (non-persistent) quota * reservations that need to be unreserved. */ - xfs_trans_unreserve_and_mod_dquots(tp); + XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp); if (tp->t_ticket) { commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); @@ -848,9 +848,10 @@ _xfs_trans_commit( /* * If we need to update the superblock, then do it now. */ - if (tp->t_flags & XFS_TRANS_SB_DIRTY) + if (tp->t_flags & XFS_TRANS_SB_DIRTY) { xfs_trans_apply_sb_deltas(tp); - xfs_trans_apply_dquot_deltas(tp); + } + XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp); /* * Ask each log item how many log_vector entries it will @@ -1055,7 +1056,7 @@ xfs_trans_uncommit( } xfs_trans_unreserve_and_mod_sb(tp); - xfs_trans_unreserve_and_mod_dquots(tp); + XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp); xfs_trans_free_items(tp, flags); xfs_trans_free_busy(tp); @@ -1180,7 +1181,7 @@ xfs_trans_cancel( } #endif xfs_trans_unreserve_and_mod_sb(tp); - xfs_trans_unreserve_and_mod_dquots(tp); + XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp); if (tp->t_ticket) { if (flags & XFS_TRANS_RELEASE_LOG_RES) { @@ -1210,7 +1211,7 @@ xfs_trans_free( xfs_trans_t *tp) { atomic_dec(&tp->t_mountp->m_active_trans); - xfs_trans_free_dqinfo(tp); + XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp); kmem_zone_free(xfs_trans_zone, tp); } diff --git a/trunk/fs/xfs/xfs_utils.c b/trunk/fs/xfs/xfs_utils.c index 4d88616bde91..79b9e5ea5359 100644 --- a/trunk/fs/xfs/xfs_utils.c +++ b/trunk/fs/xfs/xfs_utils.c @@ -166,7 +166,7 @@ xfs_dir_ialloc( xfs_buf_relse(ialloc_context); if (dqinfo) { tp->t_dqinfo = dqinfo; - xfs_trans_free_dqinfo(tp); + XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp); } *tpp = ntp; *ipp = NULL; diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c index c4eca5ed5dab..19cf90a9c762 100644 --- a/trunk/fs/xfs/xfs_vnodeops.c +++ b/trunk/fs/xfs/xfs_vnodeops.c @@ -42,7 +42,6 @@ #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_bmap.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_rw.h" #include "xfs_error.h" @@ -119,7 +118,7 @@ xfs_setattr( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, + code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid, qflags, &udqp, &gdqp); if (code) return code; @@ -181,11 +180,10 @@ xfs_setattr( * Do a quota reservation only if uid/gid is actually * going to change. */ - if (XFS_IS_QUOTA_RUNNING(mp) && - ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || - (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { + if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { ASSERT(tp); - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, + code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ @@ -219,7 +217,7 @@ xfs_setattr( /* * Make sure that the dquots are attached to the inode. */ - code = xfs_qm_dqattach_locked(ip, 0); + code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); if (code) goto error_return; @@ -353,21 +351,21 @@ xfs_setattr( * in the transaction. */ if (iuid != uid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { + if (XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & ATTR_UID); ASSERT(udqp); - olddquot1 = xfs_qm_vop_chown(tp, ip, + olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, &ip->i_udquot, udqp); } ip->i_d.di_uid = uid; inode->i_uid = uid; } if (igid != gid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_GQUOTA_ON(mp)) { ASSERT(!XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); - olddquot2 = xfs_qm_vop_chown(tp, ip, + olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_gid = gid; @@ -463,25 +461,13 @@ xfs_setattr( /* * Release any dquot(s) the inode had kept before chown. */ - xfs_qm_dqrele(olddquot1); - xfs_qm_dqrele(olddquot2); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); + XFS_QM_DQRELE(mp, olddquot1); + XFS_QM_DQRELE(mp, olddquot2); + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); - if (code) + if (code) { return code; - - /* - * XXX(hch): Updating the ACL entries is not atomic vs the i_mode - * update. We could avoid this with linked transactions - * and passing down the transaction pointer all the way - * to attr_set. No previous user of the generic - * Posix ACL code seems to care about this issue either. - */ - if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { - code = -xfs_acl_chmod(inode); - if (code) - return XFS_ERROR(code); } if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && @@ -496,8 +482,8 @@ xfs_setattr( commit_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ error_return: - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); if (tp) { xfs_trans_cancel(tp, commit_flags); } @@ -753,8 +739,7 @@ xfs_free_eofblocks( /* * Attach the dquots to the inode up front. */ - error = xfs_qm_dqattach(ip, 0); - if (error) + if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; /* @@ -1196,8 +1181,7 @@ xfs_inactive( ASSERT(ip->i_d.di_nlink == 0); - error = xfs_qm_dqattach(ip, 0); - if (error) + if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return VN_INACTIVE_CACHE; tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); @@ -1323,7 +1307,7 @@ xfs_inactive( /* * Credit the quota account(s). The inode is gone. */ - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); + XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); /* * Just ignore errors at this point. There is nothing we can @@ -1339,11 +1323,11 @@ xfs_inactive( xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " "xfs_trans_commit() returned error %d", error); } - /* * Release the dquots held by inode, if any. */ - xfs_qm_dqdetach(ip); + XFS_QM_DQDETACH(mp, ip); + xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); out: @@ -1443,7 +1427,8 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, + error = XFS_QM_DQVOPALLOC(mp, dp, + current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -1504,7 +1489,7 @@ xfs_create( /* * Reserve disk quota and the inode. */ - error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); + error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); if (error) goto out_trans_cancel; @@ -1576,7 +1561,7 @@ xfs_create( * These ids of the inode couldn't have changed since the new * inode has been locked ever since it was created. */ - xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); + XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); /* * xfs_trans_commit normally decrements the vnode ref count @@ -1595,8 +1580,8 @@ xfs_create( goto out_dqrele; } - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); *ipp = ip; @@ -1617,8 +1602,8 @@ xfs_create( out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); out_dqrele: - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -1852,11 +1837,11 @@ xfs_remove( return error; } - error = xfs_qm_dqattach(dp, 0); + error = XFS_QM_DQATTACH(mp, dp, 0); if (error) goto std_return; - error = xfs_qm_dqattach(ip, 0); + error = XFS_QM_DQATTACH(mp, ip, 0); if (error) goto std_return; @@ -2043,11 +2028,11 @@ xfs_link( /* Return through std_return after this point. */ - error = xfs_qm_dqattach(sip, 0); + error = XFS_QM_DQATTACH(mp, sip, 0); if (error) goto std_return; - error = xfs_qm_dqattach(tdp, 0); + error = XFS_QM_DQATTACH(mp, tdp, 0); if (error) goto std_return; @@ -2220,7 +2205,8 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, + error = XFS_QM_DQVOPALLOC(mp, dp, + current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -2262,7 +2248,7 @@ xfs_symlink( /* * Reserve disk quota : blocks and inode. */ - error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); + error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); if (error) goto error_return; @@ -2302,7 +2288,7 @@ xfs_symlink( /* * Also attach the dquot(s) to it, if applicable. */ - xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); + XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); if (resblks) resblks -= XFS_IALLOC_SPACE_RES(mp); @@ -2390,8 +2376,8 @@ xfs_symlink( goto error2; } error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ @@ -2415,8 +2401,8 @@ xfs_symlink( cancel_flags |= XFS_TRANS_ABORT; error_return: xfs_trans_cancel(tp, cancel_flags); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -2555,8 +2541,7 @@ xfs_alloc_file_space( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - error = xfs_qm_dqattach(ip, 0); - if (error) + if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; if (len <= 0) @@ -2643,8 +2628,8 @@ xfs_alloc_file_space( break; } xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, - 0, quota_flag); + error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, + qblocks, 0, quota_flag); if (error) goto error1; @@ -2703,7 +2688,7 @@ xfs_alloc_file_space( error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); - xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); + XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -2842,8 +2827,7 @@ xfs_free_file_space( xfs_itrace_entry(ip); - error = xfs_qm_dqattach(ip, 0); - if (error) + if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; error = 0; @@ -2969,9 +2953,9 @@ xfs_free_file_space( break; } xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota(tp, mp, - ip->i_udquot, ip->i_gdquot, - resblks, 0, XFS_QMOPT_RES_REGBLKS); + error = XFS_TRANS_RESERVE_QUOTA(mp, tp, + ip->i_udquot, ip->i_gdquot, resblks, 0, + XFS_QMOPT_RES_REGBLKS); if (error) goto error1; diff --git a/trunk/fs/xfs/xfs_vnodeops.h b/trunk/fs/xfs/xfs_vnodeops.h index a9e102de71a1..04373c6c61ff 100644 --- a/trunk/fs/xfs/xfs_vnodeops.h +++ b/trunk/fs/xfs/xfs_vnodeops.h @@ -18,7 +18,6 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ -#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ int xfs_readlink(struct xfs_inode *ip, char *link); int xfs_fsync(struct xfs_inode *ip); diff --git a/trunk/kernel/timer.c b/trunk/kernel/timer.c index faf2db897de4..c01e568935ea 100644 --- a/trunk/kernel/timer.c +++ b/trunk/kernel/timer.c @@ -757,7 +757,6 @@ void add_timer_on(struct timer_list *timer, int cpu) wake_up_idle_cpu(cpu); spin_unlock_irqrestore(&base->lock, flags); } -EXPORT_SYMBOL_GPL(add_timer_on); /** * del_timer - deactive a timer.