From d96050750b6509c94e89bc011badfc54c1639dcf Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 18 May 2009 08:14:41 -0400
Subject: [PATCH] --- yaml --- r: 148939 b: refs/heads/master c:
 9528d1c7a541b481a0e80301dc8d545848104023 h: refs/heads/master i:   148937:
 d7769787e0cf5aafbd6e923dc17bd579de94f9a3   148935:
 6439d0497b2232baba24d303423cadc9d20158ab v: v3

---
 [refs]                                        |    2 +-
 trunk/Documentation/Changes                   |   15 -
 trunk/Documentation/SubmittingPatches         |   76 +-
 .../development-process/5.Posting             |   31 +-
 .../feature-removal-schedule.txt              |   10 -
 trunk/Documentation/filesystems/debugfs.txt   |  158 --
 .../Documentation/x86/x86_64/boot-options.txt |   44 +-
 trunk/Documentation/x86/x86_64/machinecheck   |    8 +-
 trunk/arch/x86/Kconfig                        |   45 +-
 trunk/arch/x86/crypto/Makefile                |    2 -
 trunk/arch/x86/crypto/aesni-intel_glue.c      |  267 +--
 trunk/arch/x86/crypto/fpu.c                   |  166 --
 trunk/arch/x86/include/asm/entry_arch.h       |   11 +-
 trunk/arch/x86/include/asm/hardirq.h          |    2 +-
 trunk/arch/x86/include/asm/hw_irq.h           |    2 -
 trunk/arch/x86/include/asm/irq_vectors.h      |   17 +-
 trunk/arch/x86/include/asm/mce.h              |   88 +-
 trunk/arch/x86/include/asm/msr-index.h        |    7 -
 trunk/arch/x86/kernel/apic/apic.c             |    4 +-
 trunk/arch/x86/kernel/apic/nmi.c              |    2 +-
 trunk/arch/x86/kernel/cpu/mcheck/Makefile     |   10 +-
 trunk/arch/x86/kernel/cpu/mcheck/k7.c         |   42 +-
 trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c |  127 --
 .../arch/x86/kernel/cpu/mcheck/mce-internal.h |   15 -
 .../arch/x86/kernel/cpu/mcheck/mce-severity.c |  218 --
 trunk/arch/x86/kernel/cpu/mcheck/mce.c        | 1964 -----------------
 trunk/arch/x86/kernel/cpu/mcheck/mce.h        |   26 +-
 trunk/arch/x86/kernel/cpu/mcheck/mce_32.c     |   76 +
 trunk/arch/x86/kernel/cpu/mcheck/mce_64.c     | 1188 ++++++++++
 trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c |  203 +-
 trunk/arch/x86/kernel/cpu/mcheck/mce_intel.c  |   74 -
 .../arch/x86/kernel/cpu/mcheck/mce_intel_64.c |   65 +-
 trunk/arch/x86/kernel/cpu/mcheck/non-fatal.c  |   57 +-
 trunk/arch/x86/kernel/cpu/mcheck/p4.c         |   86 +-
 trunk/arch/x86/kernel/cpu/mcheck/p5.c         |   48 +-
 trunk/arch/x86/kernel/cpu/mcheck/p6.c         |   26 +-
 .../arch/x86/kernel/cpu/mcheck/therm_throt.c  |   73 +-
 trunk/arch/x86/kernel/cpu/mcheck/threshold.c  |    2 +-
 trunk/arch/x86/kernel/cpu/mcheck/winchip.c    |   17 +-
 trunk/arch/x86/kernel/entry_64.S              |   11 +-
 trunk/arch/x86/kernel/irq.c                   |   19 +-
 trunk/arch/x86/kernel/irqinit.c               |    3 -
 trunk/arch/x86/kernel/signal.c                |    6 +-
 trunk/arch/x86/kernel/smp.c                   |   28 +-
 trunk/arch/x86/kernel/traps.c                 |    6 +-
 trunk/crypto/Kconfig                          |   10 -
 trunk/crypto/algboss.c                        |   18 +-
 trunk/crypto/api.c                            |   14 +-
 trunk/crypto/cryptd.c                         |   14 +-
 trunk/crypto/internal.h                       |    3 +
 trunk/crypto/pcompress.c                      |    1 -
 trunk/crypto/tcrypt.c                         |  183 +-
 trunk/crypto/testmgr.c                        |  470 +---
 trunk/crypto/testmgr.h                        |  645 +-----
 trunk/crypto/zlib.c                           |   24 +-
 trunk/drivers/char/hw_random/Kconfig          |    2 +-
 trunk/drivers/char/hw_random/omap-rng.c       |    2 +-
 trunk/drivers/char/hw_random/timeriomem-rng.c |   26 +-
 trunk/drivers/char/hw_random/via-rng.c        |   15 +-
 trunk/drivers/crypto/Kconfig                  |    2 +-
 trunk/drivers/crypto/hifn_795x.c              |    8 +-
 trunk/drivers/crypto/padlock-aes.c            |   13 -
 trunk/drivers/crypto/talitos.c                |  713 ++----
 trunk/drivers/i2c/busses/Kconfig              |    2 +-
 trunk/drivers/i2c/busses/i2c-bfin-twi.c       |   11 +-
 trunk/fs/xfs/Kconfig                          |    1 -
 trunk/fs/xfs/Makefile                         |    5 +-
 trunk/fs/xfs/linux-2.6/xfs_acl.c              |  523 -----
 trunk/fs/xfs/linux-2.6/xfs_ioctl.c            |   25 +-
 trunk/fs/xfs/linux-2.6/xfs_iops.c             |   53 +-
 trunk/fs/xfs/linux-2.6/xfs_lrw.c              |    1 +
 trunk/fs/xfs/linux-2.6/xfs_quotaops.c         |    4 +-
 trunk/fs/xfs/linux-2.6/xfs_super.c            |   49 +-
 trunk/fs/xfs/linux-2.6/xfs_sync.c             |  479 ++--
 trunk/fs/xfs/linux-2.6/xfs_sync.h             |   19 +-
 trunk/fs/xfs/linux-2.6/xfs_xattr.c            |   67 +-
 trunk/fs/xfs/quota/xfs_dquot.c                |    5 +-
 trunk/fs/xfs/quota/xfs_dquot.h                |    1 +
 trunk/fs/xfs/quota/xfs_dquot_item.c           |    1 +
 trunk/fs/xfs/quota/xfs_qm.c                   |  168 +-
 trunk/fs/xfs/quota/xfs_qm.h                   |   21 +
 trunk/fs/xfs/quota/xfs_qm_bhv.c               |   77 +-
 trunk/fs/xfs/quota/xfs_qm_stats.c             |    1 +
 trunk/fs/xfs/quota/xfs_qm_syscalls.c          |  113 +-
 trunk/fs/xfs/quota/xfs_trans_dquot.c          |   66 +-
 trunk/fs/xfs/xfs_acl.c                        |  874 ++++++++
 trunk/fs/xfs/xfs_acl.h                        |   97 +-
 trunk/fs/xfs/xfs_ag.h                         |    2 -
 trunk/fs/xfs/xfs_arch.h                       |   32 +
 trunk/fs/xfs/xfs_attr.c                       |   13 +-
 trunk/fs/xfs/xfs_bmap.c                       |   34 +-
 trunk/fs/xfs/xfs_bmap_btree.c                 |    4 +-
 trunk/fs/xfs/xfs_filestream.c                 |    6 +-
 trunk/fs/xfs/xfs_fs.h                         |   11 +-
 trunk/fs/xfs/xfs_iget.c                       |    8 +-
 trunk/fs/xfs/xfs_inode.c                      |    1 +
 trunk/fs/xfs/xfs_inode.h                      |    6 -
 trunk/fs/xfs/xfs_iomap.c                      |   13 +-
 trunk/fs/xfs/xfs_log_recover.c                |   38 +-
 trunk/fs/xfs/xfs_mount.c                      |  105 +-
 trunk/fs/xfs/xfs_mount.h                      |   84 +-
 trunk/fs/xfs/xfs_qmops.c                      |  152 ++
 trunk/fs/xfs/xfs_quota.h                      |  129 +-
 trunk/fs/xfs/xfs_rename.c                     |    3 +-
 trunk/fs/xfs/xfs_rw.c                         |    1 +
 trunk/fs/xfs/xfs_trans.c                      |   15 +-
 trunk/fs/xfs/xfs_utils.c                      |    2 +-
 trunk/fs/xfs/xfs_vnodeops.c                   |  114 +-
 trunk/fs/xfs/xfs_vnodeops.h                   |    1 -
 trunk/kernel/timer.c                          |    1 -
 110 files changed, 4237 insertions(+), 6711 deletions(-)
 delete mode 100644 trunk/Documentation/filesystems/debugfs.txt
 delete mode 100644 trunk/arch/x86/crypto/fpu.c
 delete mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c
 delete mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce-internal.h
 delete mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce-severity.c
 delete mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce.c
 create mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce_32.c
 create mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce_64.c
 delete mode 100644 trunk/arch/x86/kernel/cpu/mcheck/mce_intel.c
 delete mode 100644 trunk/fs/xfs/linux-2.6/xfs_acl.c
 create mode 100644 trunk/fs/xfs/xfs_acl.c
 create mode 100644 trunk/fs/xfs/xfs_qmops.c

diff --git a/[refs] b/[refs]
index 121480d33758..0f8bd7197cdc 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: a2ee2981ae2a7046b10980feae9f4ab813877106
+refs/heads/master: 9528d1c7a541b481a0e80301dc8d545848104023
diff --git a/trunk/Documentation/Changes b/trunk/Documentation/Changes
index d21b3b5aa543..b95082be4d5e 100644
--- a/trunk/Documentation/Changes
+++ b/trunk/Documentation/Changes
@@ -48,7 +48,6 @@ o  procps                 3.2.0                   # ps --version
 o  oprofile               0.9                     # oprofiled --version
 o  udev                   081                     # udevinfo -V
 o  grub                   0.93                    # grub --version
-o  mcelog		  0.6
 
 Kernel compilation
 ==================
@@ -277,16 +276,6 @@ before running exportfs or mountd.  It is recommended that all NFS
 services be protected from the internet-at-large by a firewall where
 that is possible.
 
-mcelog
-------
-
-In Linux 2.6.31+ the i386 kernel needs to run the mcelog utility
-as a regular cronjob similar to the x86-64 kernel to process and log
-machine check events when CONFIG_X86_NEW_MCE is enabled. Machine check
-events are errors reported by the CPU. Processing them is strongly encouraged.
-All x86-64 kernels since 2.6.4 require the mcelog utility to
-process machine checks.
-
 Getting updated software
 ========================
 
@@ -376,10 +365,6 @@ FUSE
 ----
 o <http://sourceforge.net/projects/fuse>
 
-mcelog
-------
-o <ftp://ftp.kernel.org/pub/linux/utils/cpu/mce/mcelog/>
-
 Networking
 **********
 
diff --git a/trunk/Documentation/SubmittingPatches b/trunk/Documentation/SubmittingPatches
index 6c456835c1fd..f309d3c6221c 100644
--- a/trunk/Documentation/SubmittingPatches
+++ b/trunk/Documentation/SubmittingPatches
@@ -91,10 +91,6 @@ Be as specific as possible.  The WORST descriptions possible include
 things like "update driver X", "bug fix for driver X", or "this patch
 includes updates for subsystem X.  Please apply."
 
-The maintainer will thank you if you write your patch description in a
-form which can be easily pulled into Linux's source code management
-system, git, as a "commit log".  See #15, below.
-
 If your description starts to get long, that's a sign that you probably
 need to split up your patch.  See #3, next.
 
@@ -409,14 +405,7 @@ person it names.  This tag documents that potentially interested parties
 have been included in the discussion
 
 
-14) Using Reported-by:, Tested-by: and Reviewed-by:
-
-If this patch fixes a problem reported by somebody else, consider adding a
-Reported-by: tag to credit the reporter for their contribution.  Please
-note that this tag should not be added without the reporter's permission,
-especially if the problem was not reported in a public forum.  That said,
-if we diligently credit our bug reporters, they will, hopefully, be
-inspired to help us again in the future.
+14) Using Tested-by: and Reviewed-by:
 
 A Tested-by: tag indicates that the patch has been successfully tested (in
 some environment) by the person named.  This tag informs maintainers that
@@ -455,7 +444,7 @@ offer a Reviewed-by tag for a patch.  This tag serves to give credit to
 reviewers and to inform maintainers of the degree of review which has been
 done on the patch.  Reviewed-by: tags, when supplied by reviewers known to
 understand the subject area and to perform thorough reviews, will normally
-increase the likelihood of your patch getting into the kernel.
+increase the liklihood of your patch getting into the kernel.
 
 
 15) The canonical patch format
@@ -496,33 +485,12 @@ phrase" should not be a filename.  Do not use the same "summary
 phrase" for every patch in a whole patch series (where a "patch
 series" is an ordered sequence of multiple, related patches).
 
-Bear in mind that the "summary phrase" of your email becomes a
-globally-unique identifier for that patch.  It propagates all the way
-into the git changelog.  The "summary phrase" may later be used in
-developer discussions which refer to the patch.  People will want to
-google for the "summary phrase" to read discussion regarding that
-patch.  It will also be the only thing that people may quickly see
-when, two or three months later, they are going through perhaps
-thousands of patches using tools such as "gitk" or "git log
---oneline".
-
-For these reasons, the "summary" must be no more than 70-75
-characters, and it must describe both what the patch changes, as well
-as why the patch might be necessary.  It is challenging to be both
-succinct and descriptive, but that is what a well-written summary
-should do.
-
-The "summary phrase" may be prefixed by tags enclosed in square
-brackets: "Subject: [PATCH tag] <summary phrase>".  The tags are not
-considered part of the summary phrase, but describe how the patch
-should be treated.  Common tags might include a version descriptor if
-the multiple versions of the patch have been sent out in response to
-comments (i.e., "v1, v2, v3"), or "RFC" to indicate a request for
-comments.  If there are four patches in a patch series the individual
-patches may be numbered like this: 1/4, 2/4, 3/4, 4/4.  This assures
-that developers understand the order in which the patches should be
-applied and that they have reviewed or applied all of the patches in
-the patch series.
+Bear in mind that the "summary phrase" of your email becomes
+a globally-unique identifier for that patch.  It propagates
+all the way into the git changelog.  The "summary phrase" may
+later be used in developer discussions which refer to the patch.
+People will want to google for the "summary phrase" to read
+discussion regarding that patch.
 
 A couple of example Subjects:
 
@@ -542,31 +510,19 @@ the patch author in the changelog.
 The explanation body will be committed to the permanent source
 changelog, so should make sense to a competent reader who has long
 since forgotten the immediate details of the discussion that might
-have led to this patch.  Including symptoms of the failure which the
-patch addresses (kernel log messages, oops messages, etc.) is
-especially useful for people who might be searching the commit logs
-looking for the applicable patch.  If a patch fixes a compile failure,
-it may not be necessary to include _all_ of the compile failures; just
-enough that it is likely that someone searching for the patch can find
-it.  As in the "summary phrase", it is important to be both succinct as
-well as descriptive.
+have led to this patch.
 
 The "---" marker line serves the essential purpose of marking for patch
 handling tools where the changelog message ends.
 
 One good use for the additional comments after the "---" marker is for
-a diffstat, to show what files have changed, and the number of
-inserted and deleted lines per file.  A diffstat is especially useful
-on bigger patches.  Other comments relevant only to the moment or the
-maintainer, not suitable for the permanent changelog, should also go
-here.  A good example of such comments might be "patch changelogs"
-which describe what has changed between the v1 and v2 version of the
-patch.
-
-If you are going to include a diffstat after the "---" marker, please
-use diffstat options "-p 1 -w 70" so that filenames are listed from
-the top of the kernel source tree and don't use too much horizontal
-space (easily fit in 80 columns, maybe with some indentation).
+a diffstat, to show what files have changed, and the number of inserted
+and deleted lines per file.  A diffstat is especially useful on bigger
+patches.  Other comments relevant only to the moment or the maintainer,
+not suitable for the permanent changelog, should also go here.
+Use diffstat options "-p 1 -w 70" so that filenames are listed from the
+top of the kernel source tree and don't use too much horizontal space
+(easily fit in 80 columns, maybe with some indentation).
 
 See more details on the proper patch format in the following
 references.
diff --git a/trunk/Documentation/development-process/5.Posting b/trunk/Documentation/development-process/5.Posting
index f622c1e9f0f9..dd48132a74dd 100644
--- a/trunk/Documentation/development-process/5.Posting
+++ b/trunk/Documentation/development-process/5.Posting
@@ -119,7 +119,7 @@ which takes quite a bit of time and thought after the "real work" has been
 done.  When done properly, though, it is time well spent.
 
 
-5.4: PATCH FORMATTING AND CHANGELOGS
+5.4: PATCH FORMATTING
 
 So now you have a perfect series of patches for posting, but the work is
 not done quite yet.  Each patch needs to be formatted into a message which
@@ -146,33 +146,8 @@ that end, each patch will be composed of the following:
  - One or more tag lines, with, at a minimum, one Signed-off-by: line from
    the author of the patch.  Tags will be described in more detail below.
 
-The items above, together, form the changelog for the patch.  Writing good
-changelogs is a crucial but often-neglected art; it's worth spending
-another moment discussing this issue.  When writing a changelog, you should
-bear in mind that a number of different people will be reading your words.
-These include subsystem maintainers and reviewers who need to decide
-whether the patch should be included, distributors and other maintainers
-trying to decide whether a patch should be backported to other kernels, bug
-hunters wondering whether the patch is responsible for a problem they are
-chasing, users who want to know how the kernel has changed, and more.  A
-good changelog conveys the needed information to all of these people in the
-most direct and concise way possible.
-
-To that end, the summary line should describe the effects of and motivation
-for the change as well as possible given the one-line constraint.  The
-detailed description can then amplify on those topics and provide any
-needed additional information.  If the patch fixes a bug, cite the commit
-which introduced the bug if possible.  If a problem is associated with
-specific log or compiler output, include that output to help others
-searching for a solution to the same problem.  If the change is meant to
-support other changes coming in later patch, say so.  If internal APIs are
-changed, detail those changes and how other developers should respond.  In
-general, the more you can put yourself into the shoes of everybody who will
-be reading your changelog, the better that changelog (and the kernel as a
-whole) will be.
-
-Needless to say, the changelog should be the text used when committing the
-change to a revision control system.  It will be followed by:
+The above three items should, normally, be the text used when committing
+the change to a revision control system.  They are followed by:
 
  - The patch itself, in the unified ("-u") patch format.  Using the "-p"
    option to diff will associate function names with changes, making the
diff --git a/trunk/Documentation/feature-removal-schedule.txt b/trunk/Documentation/feature-removal-schedule.txt
index ec9ef5d0d7b3..de491a3e2313 100644
--- a/trunk/Documentation/feature-removal-schedule.txt
+++ b/trunk/Documentation/feature-removal-schedule.txt
@@ -437,13 +437,3 @@ Why:	Superseded by tdfxfb. I2C/DDC support used to live in a separate
 	driver but this caused driver conflicts.
 Who:	Jean Delvare <khali@linux-fr.org>
 	Krzysztof Helt <krzysztof.h1@wp.pl>
-
-----------------------------
-
-What:	CONFIG_X86_OLD_MCE
-When:	2.6.32
-Why:	Remove the old legacy 32bit machine check code. This has been
-	superseded by the newer machine check code from the 64bit port,
-	but the old version has been kept around for easier testing. Note this
-	doesn't impact the old P5 and WinChip machine check handlers.
-Who:	Andi Kleen <andi@firstfloor.org>
diff --git a/trunk/Documentation/filesystems/debugfs.txt b/trunk/Documentation/filesystems/debugfs.txt
deleted file mode 100644
index ed52af60c2d8..000000000000
--- a/trunk/Documentation/filesystems/debugfs.txt
+++ /dev/null
@@ -1,158 +0,0 @@
-Copyright 2009 Jonathan Corbet <corbet@lwn.net>
-
-Debugfs exists as a simple way for kernel developers to make information
-available to user space.  Unlike /proc, which is only meant for information
-about a process, or sysfs, which has strict one-value-per-file rules,
-debugfs has no rules at all.  Developers can put any information they want
-there.  The debugfs filesystem is also intended to not serve as a stable
-ABI to user space; in theory, there are no stability constraints placed on
-files exported there.  The real world is not always so simple, though [1];
-even debugfs interfaces are best designed with the idea that they will need
-to be maintained forever.
-
-Debugfs is typically mounted with a command like:
-
-    mount -t debugfs none /sys/kernel/debug
-
-(Or an equivalent /etc/fstab line). 
-
-Note that the debugfs API is exported GPL-only to modules.
-
-Code using debugfs should include <linux/debugfs.h>.  Then, the first order
-of business will be to create at least one directory to hold a set of
-debugfs files:
-
-    struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
-
-This call, if successful, will make a directory called name underneath the
-indicated parent directory.  If parent is NULL, the directory will be
-created in the debugfs root.  On success, the return value is a struct
-dentry pointer which can be used to create files in the directory (and to
-clean it up at the end).  A NULL return value indicates that something went
-wrong.  If ERR_PTR(-ENODEV) is returned, that is an indication that the
-kernel has been built without debugfs support and none of the functions
-described below will work.
-
-The most general way to create a file within a debugfs directory is with:
-
-    struct dentry *debugfs_create_file(const char *name, mode_t mode,
-				       struct dentry *parent, void *data,
-				       const struct file_operations *fops);
-
-Here, name is the name of the file to create, mode describes the access
-permissions the file should have, parent indicates the directory which
-should hold the file, data will be stored in the i_private field of the
-resulting inode structure, and fops is a set of file operations which
-implement the file's behavior.  At a minimum, the read() and/or write()
-operations should be provided; others can be included as needed.  Again,
-the return value will be a dentry pointer to the created file, NULL for
-error, or ERR_PTR(-ENODEV) if debugfs support is missing.
-
-In a number of cases, the creation of a set of file operations is not
-actually necessary; the debugfs code provides a number of helper functions
-for simple situations.  Files containing a single integer value can be
-created with any of:
-
-    struct dentry *debugfs_create_u8(const char *name, mode_t mode,
-				     struct dentry *parent, u8 *value);
-    struct dentry *debugfs_create_u16(const char *name, mode_t mode,
-				      struct dentry *parent, u16 *value);
-    struct dentry *debugfs_create_u32(const char *name, mode_t mode,
-				      struct dentry *parent, u32 *value);
-    struct dentry *debugfs_create_u64(const char *name, mode_t mode,
-				      struct dentry *parent, u64 *value);
-
-These files support both reading and writing the given value; if a specific
-file should not be written to, simply set the mode bits accordingly.  The
-values in these files are in decimal; if hexadecimal is more appropriate,
-the following functions can be used instead:
-
-    struct dentry *debugfs_create_x8(const char *name, mode_t mode,
-				     struct dentry *parent, u8 *value);
-    struct dentry *debugfs_create_x16(const char *name, mode_t mode,
-				      struct dentry *parent, u16 *value);
-    struct dentry *debugfs_create_x32(const char *name, mode_t mode,
-				      struct dentry *parent, u32 *value);
-
-Note that there is no debugfs_create_x64().
-
-These functions are useful as long as the developer knows the size of the
-value to be exported.  Some types can have different widths on different
-architectures, though, complicating the situation somewhat.  There is a
-function meant to help out in one special case:
-
-    struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
-				         struct dentry *parent, 
-					 size_t *value);
-
-As might be expected, this function will create a debugfs file to represent
-a variable of type size_t.
-
-Boolean values can be placed in debugfs with:
-
-    struct dentry *debugfs_create_bool(const char *name, mode_t mode,
-				       struct dentry *parent, u32 *value);
-
-A read on the resulting file will yield either Y (for non-zero values) or
-N, followed by a newline.  If written to, it will accept either upper- or
-lower-case values, or 1 or 0.  Any other input will be silently ignored.
-
-Finally, a block of arbitrary binary data can be exported with:
-
-    struct debugfs_blob_wrapper {
-	void *data;
-	unsigned long size;
-    };
-
-    struct dentry *debugfs_create_blob(const char *name, mode_t mode,
-				       struct dentry *parent,
-				       struct debugfs_blob_wrapper *blob);
-
-A read of this file will return the data pointed to by the
-debugfs_blob_wrapper structure.  Some drivers use "blobs" as a simple way
-to return several lines of (static) formatted text output.  This function
-can be used to export binary information, but there does not appear to be
-any code which does so in the mainline.  Note that all files created with
-debugfs_create_blob() are read-only.
-
-There are a couple of other directory-oriented helper functions:
-
-    struct dentry *debugfs_rename(struct dentry *old_dir, 
-    				  struct dentry *old_dentry,
-		                  struct dentry *new_dir, 
-				  const char *new_name);
-
-    struct dentry *debugfs_create_symlink(const char *name, 
-                                          struct dentry *parent,
-				      	  const char *target);
-
-A call to debugfs_rename() will give a new name to an existing debugfs
-file, possibly in a different directory.  The new_name must not exist prior
-to the call; the return value is old_dentry with updated information.
-Symbolic links can be created with debugfs_create_symlink().
-
-There is one important thing that all debugfs users must take into account:
-there is no automatic cleanup of any directories created in debugfs.  If a
-module is unloaded without explicitly removing debugfs entries, the result
-will be a lot of stale pointers and no end of highly antisocial behavior.
-So all debugfs users - at least those which can be built as modules - must
-be prepared to remove all files and directories they create there.  A file
-can be removed with:
-
-    void debugfs_remove(struct dentry *dentry);
-
-The dentry value can be NULL, in which case nothing will be removed.
-
-Once upon a time, debugfs users were required to remember the dentry
-pointer for every debugfs file they created so that all files could be
-cleaned up.  We live in more civilized times now, though, and debugfs users
-can call:
-
-    void debugfs_remove_recursive(struct dentry *dentry);
-
-If this function is passed a pointer for the dentry corresponding to the
-top-level directory, the entire hierarchy below that directory will be
-removed.
-
-Notes:
-	[1] http://lwn.net/Articles/309298/
diff --git a/trunk/Documentation/x86/x86_64/boot-options.txt b/trunk/Documentation/x86/x86_64/boot-options.txt
index 29a6ff8bc7d3..2db5893d6c97 100644
--- a/trunk/Documentation/x86/x86_64/boot-options.txt
+++ b/trunk/Documentation/x86/x86_64/boot-options.txt
@@ -5,51 +5,21 @@ only the AMD64 specific ones are listed here.
 
 Machine check
 
-   Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
-
-   mce=off
-		Disable machine check
-   mce=no_cmci
-		Disable CMCI(Corrected Machine Check Interrupt) that
-		Intel processor supports.  Usually this disablement is
-		not recommended, but it might be handy if your hardware
-		is misbehaving.
-		Note that you'll get more problems without CMCI than with
-		due to the shared banks, i.e. you might get duplicated
-		error logs.
-   mce=dont_log_ce
-		Don't make logs for corrected errors.  All events reported
-		as corrected are silently cleared by OS.
-		This option will be useful if you have no interest in any
-		of corrected errors.
-   mce=ignore_ce
-		Disable features for corrected errors, e.g. polling timer
-		and CMCI.  All events reported as corrected are not cleared
-		by OS and remained in its error banks.
-		Usually this disablement is not recommended, however if
-		there is an agent checking/clearing corrected errors
-		(e.g. BIOS or hardware monitoring applications), conflicting
-		with OS's error handling, and you cannot deactivate the agent,
-		then this option will be a help.
-   mce=bootlog
-		Enable logging of machine checks left over from booting.
-		Disabled by default on AMD because some BIOS leave bogus ones.
-		If your BIOS doesn't do that it's a good idea to enable though
-		to make sure you log even machine check events that result
-		in a reboot. On Intel systems it is enabled by default.
+   mce=off disable machine check
+   mce=bootlog Enable logging of machine checks left over from booting.
+               Disabled by default on AMD because some BIOS leave bogus ones.
+               If your BIOS doesn't do that it's a good idea to enable though
+               to make sure you log even machine check events that result
+               in a reboot. On Intel systems it is enabled by default.
    mce=nobootlog
 		Disable boot machine check logging.
-   mce=tolerancelevel[,monarchtimeout] (number,number)
-		tolerance levels:
+   mce=tolerancelevel (number)
 		0: always panic on uncorrected errors, log corrected errors
 		1: panic or SIGBUS on uncorrected errors, log corrected errors
 		2: SIGBUS or log uncorrected errors, log corrected errors
 		3: never panic or SIGBUS, log all errors (for testing only)
 		Default is 1
 		Can be also set using sysfs which is preferable.
-		monarchtimeout:
-		Sets the time in us to wait for other CPUs on machine checks. 0
-		to disable.
 
    nomce (for compatibility with i386): same as mce=off
 
diff --git a/trunk/Documentation/x86/x86_64/machinecheck b/trunk/Documentation/x86/x86_64/machinecheck
index b1fb30273286..a05e58e7b159 100644
--- a/trunk/Documentation/x86/x86_64/machinecheck
+++ b/trunk/Documentation/x86/x86_64/machinecheck
@@ -41,9 +41,7 @@ check_interval
 	the polling interval.  When the poller stops finding MCEs, it
 	triggers an exponential backoff (poll less often) on the polling
 	interval. The check_interval variable is both the initial and
-	maximum polling interval. 0 means no polling for corrected machine
-	check errors (but some corrected errors might be still reported
-	in other ways)
+	maximum polling interval.
 
 tolerant
 	Tolerance level. When a machine check exception occurs for a non
@@ -69,10 +67,6 @@ trigger
 	Program to run when a machine check event is detected.
 	This is an alternative to running mcelog regularly from cron
 	and allows to detect events faster.
-monarch_timeout
-	How long to wait for the other CPUs to machine check too on a
-	exception. 0 to disable waiting for other CPUs.
-	Unit: us
 
 TBD document entries for AMD threshold interrupt configuration
 
diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig
index 356d2ec8e2fb..68f5578fe38e 100644
--- a/trunk/arch/x86/Kconfig
+++ b/trunk/arch/x86/Kconfig
@@ -789,26 +789,10 @@ config X86_MCE
 	  to disable it.  MCE support simply ignores non-MCE processors like
 	  the 386 and 486, so nearly everyone can say Y here.
 
-config X86_OLD_MCE
-	depends on X86_32 && X86_MCE
-	bool "Use legacy machine check code (will go away)"
-	default n
-	select X86_ANCIENT_MCE
-	---help---
-	  Use the old i386 machine check code. This is merely intended for
-	  testing in a transition period. Try this if you run into any machine
-	  check related software problems, but report the problem to
-	  linux-kernel.  When in doubt say no.
-
-config X86_NEW_MCE
-	depends on X86_MCE
-	bool
-	default y if (!X86_OLD_MCE && X86_32) || X86_64
-
 config X86_MCE_INTEL
 	def_bool y
 	prompt "Intel MCE features"
-	depends on X86_NEW_MCE && X86_LOCAL_APIC
+	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
 	---help---
 	   Additional support for intel specific MCE features such as
 	   the thermal monitor.
@@ -816,36 +800,19 @@ config X86_MCE_INTEL
 config X86_MCE_AMD
 	def_bool y
 	prompt "AMD MCE features"
-	depends on X86_NEW_MCE && X86_LOCAL_APIC
+	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
 	---help---
 	   Additional support for AMD specific MCE features such as
 	   the DRAM Error Threshold.
 
-config X86_ANCIENT_MCE
-	def_bool n
-	depends on X86_32
-	prompt "Support for old Pentium 5 / WinChip machine checks"
-	---help---
-	  Include support for machine check handling on old Pentium 5 or WinChip
-	  systems. These typically need to be enabled explicitely on the command
-	  line.
-
 config X86_MCE_THRESHOLD
 	depends on X86_MCE_AMD || X86_MCE_INTEL
 	bool
 	default y
 
-config X86_MCE_INJECT
-	depends on X86_NEW_MCE
-	tristate "Machine check injector support"
-	---help---
-	  Provide support for injecting machine checks for testing purposes.
-	  If you don't know what a machine check is and you don't do kernel
-	  QA it is safe to say n.
-
 config X86_MCE_NONFATAL
 	tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
-	depends on X86_OLD_MCE
+	depends on X86_32 && X86_MCE
 	---help---
 	  Enabling this feature starts a timer that triggers every 5 seconds which
 	  will look at the machine check registers to see if anything happened.
@@ -858,15 +825,11 @@ config X86_MCE_NONFATAL
 
 config X86_MCE_P4THERMAL
 	bool "check for P4 thermal throttling interrupt."
-	depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
+	depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
 	---help---
 	  Enabling this feature will cause a message to be printed when the P4
 	  enters thermal throttling.
 
-config X86_THERMAL_VECTOR
-	def_bool y
-	depends on X86_MCE_P4THERMAL || X86_MCE_INTEL
-
 config VM86
 	bool "Enable VM86 support" if EMBEDDED
 	default y
diff --git a/trunk/arch/x86/crypto/Makefile b/trunk/arch/x86/crypto/Makefile
index cfb0010fa940..ebe7deedd5b4 100644
--- a/trunk/arch/x86/crypto/Makefile
+++ b/trunk/arch/x86/crypto/Makefile
@@ -2,8 +2,6 @@
 # Arch-specific CryptoAPI modules.
 #
 
-obj-$(CONFIG_CRYPTO_FPU) += fpu.o
-
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
diff --git a/trunk/arch/x86/crypto/aesni-intel_glue.c b/trunk/arch/x86/crypto/aesni-intel_glue.c
index 4e663398f77f..02af0af65497 100644
--- a/trunk/arch/x86/crypto/aesni-intel_glue.c
+++ b/trunk/arch/x86/crypto/aesni-intel_glue.c
@@ -21,22 +21,6 @@
 #include <asm/i387.h>
 #include <asm/aes.h>
 
-#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE)
-#define HAS_CTR
-#endif
-
-#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE)
-#define HAS_LRW
-#endif
-
-#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
-#define HAS_PCBC
-#endif
-
-#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE)
-#define HAS_XTS
-#endif
-
 struct async_aes_ctx {
 	struct cryptd_ablkcipher *cryptd_tfm;
 };
@@ -153,41 +137,6 @@ static struct crypto_alg aesni_alg = {
 	}
 };
 
-static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
-	aesni_enc(ctx, dst, src);
-}
-
-static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
-	aesni_dec(ctx, dst, src);
-}
-
-static struct crypto_alg __aesni_alg = {
-	.cra_name		= "__aes-aesni",
-	.cra_driver_name	= "__driver-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
-	.cra_alignmask		= 0,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(__aesni_alg.cra_list),
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= AES_MIN_KEY_SIZE,
-			.cia_max_keysize	= AES_MAX_KEY_SIZE,
-			.cia_setkey		= aes_set_key,
-			.cia_encrypt		= __aes_encrypt,
-			.cia_decrypt		= __aes_decrypt
-		}
-	}
-};
-
 static int ecb_encrypt(struct blkcipher_desc *desc,
 		       struct scatterlist *dst, struct scatterlist *src,
 		       unsigned int nbytes)
@@ -328,16 +277,8 @@ static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
 			unsigned int key_len)
 {
 	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
-	int err;
 
-	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
-				    & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(child, key, key_len);
-	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
-				    & CRYPTO_TFM_RES_MASK);
-	return err;
+	return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len);
 }
 
 static int ablk_encrypt(struct ablkcipher_request *req)
@@ -470,163 +411,6 @@ static struct crypto_alg ablk_cbc_alg = {
 	},
 };
 
-#ifdef HAS_CTR
-static int ablk_ctr_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))",
-					     0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_ctr_alg = {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "ctr-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_ctr_alg.cra_list),
-	.cra_init		= ablk_ctr_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-			.geniv		= "chainiv",
-		},
-	},
-};
-#endif
-
-#ifdef HAS_LRW
-static int ablk_lrw_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))",
-					     0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_lrw_alg = {
-	.cra_name		= "lrw(aes)",
-	.cra_driver_name	= "lrw-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_lrw_alg.cra_list),
-	.cra_init		= ablk_lrw_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-};
-#endif
-
-#ifdef HAS_PCBC
-static int ablk_pcbc_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))",
-					     0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_pcbc_alg = {
-	.cra_name		= "pcbc(aes)",
-	.cra_driver_name	= "pcbc-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_pcbc_alg.cra_list),
-	.cra_init		= ablk_pcbc_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-};
-#endif
-
-#ifdef HAS_XTS
-static int ablk_xts_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))",
-					     0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_xts_alg = {
-	.cra_name		= "xts(aes)",
-	.cra_driver_name	= "xts-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_xts_alg.cra_list),
-	.cra_init		= ablk_xts_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-			.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-};
-#endif
-
 static int __init aesni_init(void)
 {
 	int err;
@@ -637,8 +421,6 @@ static int __init aesni_init(void)
 	}
 	if ((err = crypto_register_alg(&aesni_alg)))
 		goto aes_err;
-	if ((err = crypto_register_alg(&__aesni_alg)))
-		goto __aes_err;
 	if ((err = crypto_register_alg(&blk_ecb_alg)))
 		goto blk_ecb_err;
 	if ((err = crypto_register_alg(&blk_cbc_alg)))
@@ -647,41 +429,9 @@ static int __init aesni_init(void)
 		goto ablk_ecb_err;
 	if ((err = crypto_register_alg(&ablk_cbc_alg)))
 		goto ablk_cbc_err;
-#ifdef HAS_CTR
-	if ((err = crypto_register_alg(&ablk_ctr_alg)))
-		goto ablk_ctr_err;
-#endif
-#ifdef HAS_LRW
-	if ((err = crypto_register_alg(&ablk_lrw_alg)))
-		goto ablk_lrw_err;
-#endif
-#ifdef HAS_PCBC
-	if ((err = crypto_register_alg(&ablk_pcbc_alg)))
-		goto ablk_pcbc_err;
-#endif
-#ifdef HAS_XTS
-	if ((err = crypto_register_alg(&ablk_xts_alg)))
-		goto ablk_xts_err;
-#endif
 
 	return err;
 
-#ifdef HAS_XTS
-ablk_xts_err:
-#endif
-#ifdef HAS_PCBC
-	crypto_unregister_alg(&ablk_pcbc_alg);
-ablk_pcbc_err:
-#endif
-#ifdef HAS_LRW
-	crypto_unregister_alg(&ablk_lrw_alg);
-ablk_lrw_err:
-#endif
-#ifdef HAS_CTR
-	crypto_unregister_alg(&ablk_ctr_alg);
-ablk_ctr_err:
-#endif
-	crypto_unregister_alg(&ablk_cbc_alg);
 ablk_cbc_err:
 	crypto_unregister_alg(&ablk_ecb_alg);
 ablk_ecb_err:
@@ -689,8 +439,6 @@ static int __init aesni_init(void)
 blk_cbc_err:
 	crypto_unregister_alg(&blk_ecb_alg);
 blk_ecb_err:
-	crypto_unregister_alg(&__aesni_alg);
-__aes_err:
 	crypto_unregister_alg(&aesni_alg);
 aes_err:
 	return err;
@@ -698,23 +446,10 @@ static int __init aesni_init(void)
 
 static void __exit aesni_exit(void)
 {
-#ifdef HAS_XTS
-	crypto_unregister_alg(&ablk_xts_alg);
-#endif
-#ifdef HAS_PCBC
-	crypto_unregister_alg(&ablk_pcbc_alg);
-#endif
-#ifdef HAS_LRW
-	crypto_unregister_alg(&ablk_lrw_alg);
-#endif
-#ifdef HAS_CTR
-	crypto_unregister_alg(&ablk_ctr_alg);
-#endif
 	crypto_unregister_alg(&ablk_cbc_alg);
 	crypto_unregister_alg(&ablk_ecb_alg);
 	crypto_unregister_alg(&blk_cbc_alg);
 	crypto_unregister_alg(&blk_ecb_alg);
-	crypto_unregister_alg(&__aesni_alg);
 	crypto_unregister_alg(&aesni_alg);
 }
 
diff --git a/trunk/arch/x86/crypto/fpu.c b/trunk/arch/x86/crypto/fpu.c
deleted file mode 100644
index 5f9781a3815f..000000000000
--- a/trunk/arch/x86/crypto/fpu.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * FPU: Wrapper for blkcipher touching fpu
- *
- * Copyright (c) Intel Corp.
- *   Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/algapi.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/i387.h>
-
-struct crypto_fpu_ctx {
-	struct crypto_blkcipher *child;
-};
-
-static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
-			     unsigned int keylen)
-{
-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
-	struct crypto_blkcipher *child = ctx->child;
-	int err;
-
-	crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
-				   CRYPTO_TFM_REQ_MASK);
-	err = crypto_blkcipher_setkey(child, key, keylen);
-	crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
-				     CRYPTO_TFM_RES_MASK);
-	return err;
-}
-
-static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
-			      struct scatterlist *dst, struct scatterlist *src,
-			      unsigned int nbytes)
-{
-	int err;
-	struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
-	struct crypto_blkcipher *child = ctx->child;
-	struct blkcipher_desc desc = {
-		.tfm = child,
-		.info = desc_in->info,
-		.flags = desc_in->flags,
-	};
-
-	kernel_fpu_begin();
-	err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
-	kernel_fpu_end();
-	return err;
-}
-
-static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
-			      struct scatterlist *dst, struct scatterlist *src,
-			      unsigned int nbytes)
-{
-	int err;
-	struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
-	struct crypto_blkcipher *child = ctx->child;
-	struct blkcipher_desc desc = {
-		.tfm = child,
-		.info = desc_in->info,
-		.flags = desc_in->flags,
-	};
-
-	kernel_fpu_begin();
-	err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
-	kernel_fpu_end();
-	return err;
-}
-
-static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
-{
-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_blkcipher *cipher;
-
-	cipher = crypto_spawn_blkcipher(spawn);
-	if (IS_ERR(cipher))
-		return PTR_ERR(cipher);
-
-	ctx->child = cipher;
-	return 0;
-}
-
-static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
-	crypto_free_blkcipher(ctx->child);
-}
-
-static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
-{
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
-	int err;
-
-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
-	if (err)
-		return ERR_PTR(err);
-
-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
-				  CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(alg))
-		return ERR_CAST(alg);
-
-	inst = crypto_alloc_instance("fpu", alg);
-	if (IS_ERR(inst))
-		goto out_put_alg;
-
-	inst->alg.cra_flags = alg->cra_flags;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = alg->cra_type;
-	inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
-	inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
-	inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
-	inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
-	inst->alg.cra_init = crypto_fpu_init_tfm;
-	inst->alg.cra_exit = crypto_fpu_exit_tfm;
-	inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
-	inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
-	inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
-
-out_put_alg:
-	crypto_mod_put(alg);
-	return inst;
-}
-
-static void crypto_fpu_free(struct crypto_instance *inst)
-{
-	crypto_drop_spawn(crypto_instance_ctx(inst));
-	kfree(inst);
-}
-
-static struct crypto_template crypto_fpu_tmpl = {
-	.name = "fpu",
-	.alloc = crypto_fpu_alloc,
-	.free = crypto_fpu_free,
-	.module = THIS_MODULE,
-};
-
-static int __init crypto_fpu_module_init(void)
-{
-	return crypto_register_template(&crypto_fpu_tmpl);
-}
-
-static void __exit crypto_fpu_module_exit(void)
-{
-	crypto_unregister_template(&crypto_fpu_tmpl);
-}
-
-module_init(crypto_fpu_module_init);
-module_exit(crypto_fpu_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("FPU block cipher wrapper");
diff --git a/trunk/arch/x86/include/asm/entry_arch.h b/trunk/arch/x86/include/asm/entry_arch.h
index ff8cbfa07851..d750a10ccad6 100644
--- a/trunk/arch/x86/include/asm/entry_arch.h
+++ b/trunk/arch/x86/include/asm/entry_arch.h
@@ -14,7 +14,6 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
-BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
 
 BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
 		 smp_invalidate_interrupt)
@@ -53,16 +52,8 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
 #endif
 
-#ifdef CONFIG_X86_THERMAL_VECTOR
+#ifdef CONFIG_X86_MCE_P4THERMAL
 BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 #endif
 
-#ifdef CONFIG_X86_MCE_THRESHOLD
-BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
-#endif
-
-#ifdef CONFIG_X86_NEW_MCE
-BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
-#endif
-
 #endif
diff --git a/trunk/arch/x86/include/asm/hardirq.h b/trunk/arch/x86/include/asm/hardirq.h
index 82e3e8f01043..9ebc5c255032 100644
--- a/trunk/arch/x86/include/asm/hardirq.h
+++ b/trunk/arch/x86/include/asm/hardirq.h
@@ -22,7 +22,7 @@ typedef struct {
 #endif
 #ifdef CONFIG_X86_MCE
 	unsigned int irq_thermal_count;
-# ifdef CONFIG_X86_MCE_THRESHOLD
+# ifdef CONFIG_X86_64
 	unsigned int irq_threshold_count;
 # endif
 #endif
diff --git a/trunk/arch/x86/include/asm/hw_irq.h b/trunk/arch/x86/include/asm/hw_irq.h
index ba180d93b08c..6df45f639666 100644
--- a/trunk/arch/x86/include/asm/hw_irq.h
+++ b/trunk/arch/x86/include/asm/hw_irq.h
@@ -34,7 +34,6 @@ extern void perf_pending_interrupt(void);
 extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
 extern void reschedule_interrupt(void);
-extern void mce_self_interrupt(void);
 
 extern void invalidate_interrupt(void);
 extern void invalidate_interrupt0(void);
@@ -47,7 +46,6 @@ extern void invalidate_interrupt6(void);
 extern void invalidate_interrupt7(void);
 
 extern void irq_move_cleanup_interrupt(void);
-extern void reboot_interrupt(void);
 extern void threshold_interrupt(void);
 
 extern void call_function_interrupt(void);
diff --git a/trunk/arch/x86/include/asm/irq_vectors.h b/trunk/arch/x86/include/asm/irq_vectors.h
index 5b21f0ec3df2..e997be98c9b9 100644
--- a/trunk/arch/x86/include/asm/irq_vectors.h
+++ b/trunk/arch/x86/include/asm/irq_vectors.h
@@ -25,7 +25,6 @@
  */
 
 #define NMI_VECTOR			0x02
-#define MCE_VECTOR			0x12
 
 /*
  * IDT vectors usable for external interrupt sources start
@@ -88,8 +87,13 @@
 #define CALL_FUNCTION_VECTOR		0xfc
 #define CALL_FUNCTION_SINGLE_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR		0xfa
-#define THRESHOLD_APIC_VECTOR		0xf9
-#define REBOOT_VECTOR			0xf8
+
+#ifdef CONFIG_X86_32
+/* 0xf8 - 0xf9 : free */
+#else
+# define THRESHOLD_APIC_VECTOR		0xf9
+# define UV_BAU_MESSAGE			0xf8
+#endif
 
 /* f0-f7 used for spreading out TLB flushes: */
 #define INVALIDATE_TLB_VECTOR_END	0xf7
@@ -113,13 +117,6 @@
  */
 #define LOCAL_PENDING_VECTOR		0xec
 
-#define UV_BAU_MESSAGE			0xec
-
-/*
- * Self IPI vector for machine checks
- */
-#define MCE_SELF_VECTOR			0xeb
-
 /*
  * First APIC vector available to drivers: (vectors 0x30-0xee) we
  * start at 0x31(0x41) to spread out vectors evenly between priority
diff --git a/trunk/arch/x86/include/asm/mce.h b/trunk/arch/x86/include/asm/mce.h
index 540a466e50f5..4f8c199584e7 100644
--- a/trunk/arch/x86/include/asm/mce.h
+++ b/trunk/arch/x86/include/asm/mce.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_MCE_H
 #define _ASM_X86_MCE_H
 
+#ifdef __x86_64__
+
 #include <linux/types.h>
 #include <asm/ioctls.h>
 
@@ -8,35 +10,21 @@
  * Machine Check support for x86
  */
 
-#define MCG_BANKCNT_MASK	0xff         /* Number of Banks */
-#define MCG_CTL_P		(1ULL<<8)    /* MCG_CAP register available */
-#define MCG_EXT_P		(1ULL<<9)    /* Extended registers available */
-#define MCG_CMCI_P		(1ULL<<10)   /* CMCI supported */
-#define MCG_EXT_CNT_MASK	0xff0000     /* Number of Extended registers */
-#define MCG_EXT_CNT_SHIFT	16
-#define MCG_EXT_CNT(c)		(((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
-#define MCG_SER_P	 	(1ULL<<24)   /* MCA recovery/new status bits */
-
-#define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */
-#define MCG_STATUS_EIPV  (1ULL<<1)   /* ip points to correct instruction */
-#define MCG_STATUS_MCIP  (1ULL<<2)   /* machine check in progress */
-
-#define MCI_STATUS_VAL   (1ULL<<63)  /* valid error */
-#define MCI_STATUS_OVER  (1ULL<<62)  /* previous errors lost */
-#define MCI_STATUS_UC    (1ULL<<61)  /* uncorrected error */
-#define MCI_STATUS_EN    (1ULL<<60)  /* error enabled */
-#define MCI_STATUS_MISCV (1ULL<<59)  /* misc error reg. valid */
-#define MCI_STATUS_ADDRV (1ULL<<58)  /* addr reg. valid */
-#define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
-#define MCI_STATUS_S	 (1ULL<<56)  /* Signaled machine check */
-#define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
-
-/* MISC register defines */
-#define MCM_ADDR_SEGOFF  0	/* segment offset */
-#define MCM_ADDR_LINEAR  1	/* linear address */
-#define MCM_ADDR_PHYS	 2	/* physical address */
-#define MCM_ADDR_MEM	 3	/* memory address */
-#define MCM_ADDR_GENERIC 7	/* generic */
+#define MCG_CTL_P	 (1UL<<8)   /* MCG_CAP register available */
+#define MCG_EXT_P	 (1ULL<<9)   /* Extended registers available */
+#define MCG_CMCI_P	 (1ULL<<10)  /* CMCI supported */
+
+#define MCG_STATUS_RIPV  (1UL<<0)   /* restart ip valid */
+#define MCG_STATUS_EIPV  (1UL<<1)   /* ip points to correct instruction */
+#define MCG_STATUS_MCIP  (1UL<<2)   /* machine check in progress */
+
+#define MCI_STATUS_VAL   (1UL<<63)  /* valid error */
+#define MCI_STATUS_OVER  (1UL<<62)  /* previous errors lost */
+#define MCI_STATUS_UC    (1UL<<61)  /* uncorrected error */
+#define MCI_STATUS_EN    (1UL<<60)  /* error enabled */
+#define MCI_STATUS_MISCV (1UL<<59)  /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1UL<<58)  /* addr reg. valid */
+#define MCI_STATUS_PCC   (1UL<<57)  /* processor context corrupt */
 
 /* Fields are zero when not available */
 struct mce {
@@ -46,19 +34,13 @@ struct mce {
 	__u64 mcgstatus;
 	__u64 ip;
 	__u64 tsc;	/* cpu time stamp counter */
-	__u64 time;	/* wall time_t when error was detected */
-	__u8  cpuvendor;	/* cpu vendor as encoded in system.h */
-	__u8  pad1;
-	__u16 pad2;
-	__u32 cpuid;	/* CPUID 1 EAX */
+	__u64 res1;	/* for future extension */
+	__u64 res2;	/* dito. */
 	__u8  cs;		/* code segment */
 	__u8  bank;	/* machine check bank */
-	__u8  cpu;	/* cpu number; obsolete; use extcpu now */
+	__u8  cpu;	/* cpu that raised the error */
 	__u8  finished;   /* entry is valid */
-	__u32 extcpu;	/* linux cpu number that detected the error */
-	__u32 socketid;	/* CPU socket ID */
-	__u32 apicid;	/* CPU initial apic ID */
-	__u64 mcgcap;	/* MCGCAP MSR: machine check capabilities of CPU */
+	__u32 pad;
 };
 
 /*
@@ -75,7 +57,7 @@ struct mce_log {
 	unsigned len;	    /* = MCE_LOG_LEN */
 	unsigned next;
 	unsigned flags;
-	unsigned recordlen;	/* length of struct mce */
+	unsigned pad0;
 	struct mce entry[MCE_LOG_LEN];
 };
 
@@ -100,16 +82,19 @@ struct mce_log {
 #define K8_MCE_THRESHOLD_BANK_5    (MCE_THRESHOLD_BASE + 5 * 9)
 #define K8_MCE_THRESHOLD_DRAM_ECC  (MCE_THRESHOLD_BANK_4 + 0)
 
+#endif /* __x86_64__ */
+
 #ifdef __KERNEL__
 
+#ifdef CONFIG_X86_32
 extern int mce_disabled;
+#else /* CONFIG_X86_32 */
 
 #include <asm/atomic.h>
-#include <linux/percpu.h>
 
 void mce_setup(struct mce *m);
 void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct sys_device, mce_dev);
+DECLARE_PER_CPU(struct sys_device, device_mce);
 extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 
 /*
@@ -119,8 +104,6 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
 
 #ifdef CONFIG_X86_MCE_INTEL
-extern int mce_cmci_disabled;
-extern int mce_ignore_ce;
 void mce_intel_feature_init(struct cpuinfo_x86 *c);
 void cmci_clear(void);
 void cmci_reenable(void);
@@ -140,16 +123,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
 static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
 #endif
 
-int mce_available(struct cpuinfo_x86 *c);
-
-DECLARE_PER_CPU(unsigned, mce_exception_count);
-DECLARE_PER_CPU(unsigned, mce_poll_count);
+extern int mce_available(struct cpuinfo_x86 *c);
 
 void mce_log_therm_throt_event(__u64 status);
 
 extern atomic_t mce_entry;
 
-void do_machine_check(struct pt_regs *, long);
+extern void do_machine_check(struct pt_regs *, long);
 
 typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
 DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
@@ -159,16 +139,14 @@ enum mcp_flags {
 	MCP_UC = (1 << 1),		/* log uncorrected errors */
 	MCP_DONTLOG = (1 << 2),		/* only clear, don't log */
 };
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
-int mce_notify_irq(void);
-void mce_notify_process(void);
+extern int mce_notify_user(void);
 
-DECLARE_PER_CPU(struct mce, injectm);
-extern struct file_operations mce_chrdev_ops;
+#endif /* !CONFIG_X86_32 */
 
 #ifdef CONFIG_X86_MCE
-void mcheck_init(struct cpuinfo_x86 *c);
+extern void mcheck_init(struct cpuinfo_x86 *c);
 #else
 #define mcheck_init(c) do { } while (0)
 #endif
diff --git a/trunk/arch/x86/include/asm/msr-index.h b/trunk/arch/x86/include/asm/msr-index.h
index 1692fb5050e3..4d58d04fca83 100644
--- a/trunk/arch/x86/include/asm/msr-index.h
+++ b/trunk/arch/x86/include/asm/msr-index.h
@@ -207,14 +207,7 @@
 
 #define MSR_IA32_THERM_CONTROL		0x0000019a
 #define MSR_IA32_THERM_INTERRUPT	0x0000019b
-
-#define THERM_INT_LOW_ENABLE		(1 << 0)
-#define THERM_INT_HIGH_ENABLE		(1 << 1)
-
 #define MSR_IA32_THERM_STATUS		0x0000019c
-
-#define THERM_STATUS_PROCHOT		(1 << 0)
-
 #define MSR_IA32_MISC_ENABLE		0x000001a0
 
 /* MISC_ENABLE bits: architectural */
diff --git a/trunk/arch/x86/kernel/apic/apic.c b/trunk/arch/x86/kernel/apic/apic.c
index 8c7c042ecad1..076d3881f3da 100644
--- a/trunk/arch/x86/kernel/apic/apic.c
+++ b/trunk/arch/x86/kernel/apic/apic.c
@@ -899,7 +899,7 @@ void clear_local_APIC(void)
 	}
 
 	/* lets not touch this if we didn't frob it */
-#ifdef CONFIG_X86_THERMAL_VECTOR
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
 	if (maxlvt >= 5) {
 		v = apic_read(APIC_LVTTHMR);
 		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
@@ -2017,7 +2017,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
 	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
 	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#ifdef CONFIG_X86_THERMAL_VECTOR
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
 	if (maxlvt >= 5)
 		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
 #endif
diff --git a/trunk/arch/x86/kernel/apic/nmi.c b/trunk/arch/x86/kernel/apic/nmi.c
index b3025b43b63a..a691302dc3ff 100644
--- a/trunk/arch/x86/kernel/apic/nmi.c
+++ b/trunk/arch/x86/kernel/apic/nmi.c
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
 
 static inline int mce_in_progress(void)
 {
-#if defined(CONFIG_X86_NEW_MCE)
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
 	return atomic_read(&mce_entry) > 0;
 #endif
 	return 0;
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/Makefile b/trunk/arch/x86/kernel/cpu/mcheck/Makefile
index 45004faf67ea..b2f89829bbe8 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/trunk/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,7 @@
-obj-y				=  mce.o therm_throt.o
+obj-y				=  mce_$(BITS).o therm_throt.o
 
-obj-$(CONFIG_X86_NEW_MCE)	+= mce-severity.o
-obj-$(CONFIG_X86_OLD_MCE)	+= k7.o p4.o p6.o
-obj-$(CONFIG_X86_ANCIENT_MCE)	+= winchip.o p5.o
-obj-$(CONFIG_X86_MCE_P4THERMAL)	+= mce_intel.o
-obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o mce_intel.o
+obj-$(CONFIG_X86_32)		+= k7.o p4.o p5.o p6.o winchip.o
+obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o
 obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd_64.o
 obj-$(CONFIG_X86_MCE_NONFATAL)	+= non-fatal.o
 obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
-obj-$(CONFIG_X86_MCE_INJECT)	+= mce-inject.o
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/k7.c b/trunk/arch/x86/kernel/cpu/mcheck/k7.c
index 89e510424152..dd3af6e7b39a 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/k7.c
@@ -2,10 +2,11 @@
  * Athlon specific Machine Check Exception Reporting
  * (C) Copyright 2002 Dave Jones <davej@redhat.com>
  */
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
+
 #include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
 #include <linux/smp.h>
 
 #include <asm/processor.h>
@@ -14,12 +15,12 @@
 
 #include "mce.h"
 
-/* Machine Check Handler For AMD Athlon/Duron: */
+/* Machine Check Handler For AMD Athlon/Duron */
 static void k7_machine_check(struct pt_regs *regs, long error_code)
 {
+	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
-	int recover = 1;
 	int i;
 
 	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -31,19 +32,15 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
 
 	for (i = 1; i < nr_mce_banks; i++) {
 		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
-		if (high & (1<<31)) {
+		if (high&(1<<31)) {
 			char misc[20];
 			char addr[24];
-
-			misc[0] = '\0';
-			addr[0] = '\0';
-
+			misc[0] = addr[0] = '\0';
 			if (high & (1<<29))
 				recover |= 1;
 			if (high & (1<<25))
 				recover |= 2;
 			high &= ~(1<<31);
-
 			if (high & (1<<27)) {
 				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
 				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
@@ -52,31 +49,27 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
 				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
 				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
 			}
-
 			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
 				smp_processor_id(), i, high, low, misc, addr);
-
-			/* Clear it: */
+			/* Clear it */
 			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-			/* Serialize: */
+			/* Serialize */
 			wmb();
 			add_taint(TAINT_MACHINE_CHECK);
 		}
 	}
 
-	if (recover & 2)
+	if (recover&2)
 		panic("CPU context corrupt");
-	if (recover & 1)
+	if (recover&1)
 		panic("Unable to continue");
-
 	printk(KERN_EMERG "Attempting to continue.\n");
-
 	mcgstl &= ~(1<<2);
 	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }
 
 
-/* AMD K7 machine check is Intel like: */
+/* AMD K7 machine check is Intel like */
 void amd_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@@ -86,26 +79,21 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
 		return;
 
 	machine_check_vector = k7_machine_check;
-	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
 
 	printk(KERN_INFO "Intel machine check architecture supported.\n");
-
 	rdmsr(MSR_IA32_MCG_CAP, l, h);
 	if (l & (1<<8))	/* Control register present ? */
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 	nr_mce_banks = l & 0xff;
 
-	/*
-	 * Clear status for MC index 0 separately, we don't touch CTL,
-	 * as some K7 Athlons cause spurious MCEs when its enabled:
-	 */
+	/* Clear status for MC index 0 separately, we don't touch CTL,
+	 * as some K7 Athlons cause spurious MCEs when its enabled. */
 	if (boot_cpu_data.x86 == 6) {
 		wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
 		i = 1;
 	} else
 		i = 0;
-
 	for (; i < nr_mce_banks; i++) {
 		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
 		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c
deleted file mode 100644
index a3a235a53f09..000000000000
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Machine check injection support.
- * Copyright 2008 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- * Authors:
- * Andi Kleen
- * Ying Huang
- */
-#include <linux/uaccess.h>
-#include <linux/module.h>
-#include <linux/timer.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <asm/mce.h>
-
-/* Update fake mce registers on current CPU. */
-static void inject_mce(struct mce *m)
-{
-	struct mce *i = &per_cpu(injectm, m->extcpu);
-
-	/* Make sure noone reads partially written injectm */
-	i->finished = 0;
-	mb();
-	m->finished = 0;
-	/* First set the fields after finished */
-	i->extcpu = m->extcpu;
-	mb();
-	/* Now write record in order, finished last (except above) */
-	memcpy(i, m, sizeof(struct mce));
-	/* Finally activate it */
-	mb();
-	i->finished = 1;
-}
-
-struct delayed_mce {
-	struct timer_list timer;
-	struct mce m;
-};
-
-/* Inject mce on current CPU */
-static void raise_mce(unsigned long data)
-{
-	struct delayed_mce *dm = (struct delayed_mce *)data;
-	struct mce *m = &dm->m;
-	int cpu = m->extcpu;
-
-	inject_mce(m);
-	if (m->status & MCI_STATUS_UC) {
-		struct pt_regs regs;
-		memset(&regs, 0, sizeof(struct pt_regs));
-		regs.ip = m->ip;
-		regs.cs = m->cs;
-		printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
-		do_machine_check(&regs, 0);
-		printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
-	} else {
-		mce_banks_t b;
-		memset(&b, 0xff, sizeof(mce_banks_t));
-		printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
-		machine_check_poll(0, &b);
-		mce_notify_irq();
-		printk(KERN_INFO "Finished machine check poll on CPU %d\n",
-		       cpu);
-	}
-	kfree(dm);
-}
-
-/* Error injection interface */
-static ssize_t mce_write(struct file *filp, const char __user *ubuf,
-			 size_t usize, loff_t *off)
-{
-	struct delayed_mce *dm;
-	struct mce m;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	/*
-	 * There are some cases where real MSR reads could slip
-	 * through.
-	 */
-	if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
-		return -EIO;
-
-	if ((unsigned long)usize > sizeof(struct mce))
-		usize = sizeof(struct mce);
-	if (copy_from_user(&m, ubuf, usize))
-		return -EFAULT;
-
-	if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
-		return -EINVAL;
-
-	dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
-	if (!dm)
-		return -ENOMEM;
-
-	/*
-	 * Need to give user space some time to set everything up,
-	 * so do it a jiffie or two later everywhere.
-	 * Should we use a hrtimer here for better synchronization?
-	 */
-	memcpy(&dm->m, &m, sizeof(struct mce));
-	setup_timer(&dm->timer, raise_mce, (unsigned long)dm);
-	dm->timer.expires = jiffies + 2;
-	add_timer_on(&dm->timer, m.extcpu);
-	return usize;
-}
-
-static int inject_init(void)
-{
-	printk(KERN_INFO "Machine check injector initialized\n");
-	mce_chrdev_ops.write = mce_write;
-	return 0;
-}
-
-module_init(inject_init);
-/*
- * Cannot tolerate unloading currently because we cannot
- * guarantee all openers of mce_chrdev will get a reference to us.
- */
-MODULE_LICENSE("GPL");
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-internal.h b/trunk/arch/x86/kernel/cpu/mcheck/mce-internal.h
deleted file mode 100644
index 54dcb8ff12e5..000000000000
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <asm/mce.h>
-
-enum severity_level {
-	MCE_NO_SEVERITY,
-	MCE_KEEP_SEVERITY,
-	MCE_SOME_SEVERITY,
-	MCE_AO_SEVERITY,
-	MCE_UC_SEVERITY,
-	MCE_AR_SEVERITY,
-	MCE_PANIC_SEVERITY,
-};
-
-int mce_severity(struct mce *a, int tolerant, char **msg);
-
-extern int mce_ser;
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-severity.c b/trunk/arch/x86/kernel/cpu/mcheck/mce-severity.c
deleted file mode 100644
index ff0807f97056..000000000000
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * MCE grading rules.
- * Copyright 2008, 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- * Author: Andi Kleen
- */
-#include <linux/kernel.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <asm/mce.h>
-
-#include "mce-internal.h"
-
-/*
- * Grade an mce by severity. In general the most severe ones are processed
- * first. Since there are quite a lot of combinations test the bits in a
- * table-driven way. The rules are simply processed in order, first
- * match wins.
- *
- * Note this is only used for machine check exceptions, the corrected
- * errors use much simpler rules. The exceptions still check for the corrected
- * errors, but only to leave them alone for the CMCI handler (except for
- * panic situations)
- */
-
-enum context { IN_KERNEL = 1, IN_USER = 2 };
-enum ser { SER_REQUIRED = 1, NO_SER = 2 };
-
-static struct severity {
-	u64 mask;
-	u64 result;
-	unsigned char sev;
-	unsigned char mcgmask;
-	unsigned char mcgres;
-	unsigned char ser;
-	unsigned char context;
-	unsigned char covered;
-	char *msg;
-} severities[] = {
-#define KERNEL .context = IN_KERNEL
-#define USER .context = IN_USER
-#define SER .ser = SER_REQUIRED
-#define NOSER .ser = NO_SER
-#define SEV(s) .sev = MCE_ ## s ## _SEVERITY
-#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
-#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
-#define MCGMASK(x, res, s, m, r...) \
-	{ .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
-#define MASK(x, y, s, m, r...) \
-	{ .mask = x, .result = y, SEV(s), .msg = m, ## r }
-#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
-#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
-#define MCACOD 0xffff
-
-	BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
-	BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
-	BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
-	/* When MCIP is not set something is very confused */
-	MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
-	/* Neither return not error IP -- no chance to recover -> PANIC */
-	MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
-		"Neither restart nor error IP"),
-	MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
-		KERNEL),
-	BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
-	MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
-	     "Spurious not enabled", SER),
-
-	/* ignore OVER for UCNA */
-	MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
-	     "Uncorrected no action required", SER),
-	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
-	     "Illegal combination (UCNA with AR=1)", SER),
-	MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
-
-	/* AR add known MCACODs here */
-	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
-	     "Action required with lost events", SER),
-	MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
-	     "Action required; unknown MCACOD", SER),
-
-	/* known AO MCACODs: */
-	MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
-	     "Action optional: memory scrubbing error", SER),
-	MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
-	     "Action optional: last level cache writeback error", SER),
-
-	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
-	     "Action optional unknown MCACOD", SER),
-	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
-	     "Action optional with lost events", SER),
-	BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
-	BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
-	BITSET(0, SOME, "No match")	/* always matches. keep at end */
-};
-
-/*
- * If the EIPV bit is set, it means the saved IP is the
- * instruction which caused the MCE.
- */
-static int error_context(struct mce *m)
-{
-	if (m->mcgstatus & MCG_STATUS_EIPV)
-		return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
-	/* Unknown, assume kernel */
-	return IN_KERNEL;
-}
-
-int mce_severity(struct mce *a, int tolerant, char **msg)
-{
-	enum context ctx = error_context(a);
-	struct severity *s;
-
-	for (s = severities;; s++) {
-		if ((a->status & s->mask) != s->result)
-			continue;
-		if ((a->mcgstatus & s->mcgmask) != s->mcgres)
-			continue;
-		if (s->ser == SER_REQUIRED && !mce_ser)
-			continue;
-		if (s->ser == NO_SER && mce_ser)
-			continue;
-		if (s->context && ctx != s->context)
-			continue;
-		if (msg)
-			*msg = s->msg;
-		s->covered = 1;
-		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
-			if (panic_on_oops || tolerant < 1)
-				return MCE_PANIC_SEVERITY;
-		}
-		return s->sev;
-	}
-}
-
-static void *s_start(struct seq_file *f, loff_t *pos)
-{
-	if (*pos >= ARRAY_SIZE(severities))
-		return NULL;
-	return &severities[*pos];
-}
-
-static void *s_next(struct seq_file *f, void *data, loff_t *pos)
-{
-	if (++(*pos) >= ARRAY_SIZE(severities))
-		return NULL;
-	return &severities[*pos];
-}
-
-static void s_stop(struct seq_file *f, void *data)
-{
-}
-
-static int s_show(struct seq_file *f, void *data)
-{
-	struct severity *ser = data;
-	seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
-	return 0;
-}
-
-static const struct seq_operations severities_seq_ops = {
-	.start	= s_start,
-	.next	= s_next,
-	.stop	= s_stop,
-	.show	= s_show,
-};
-
-static int severities_coverage_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &severities_seq_ops);
-}
-
-static ssize_t severities_coverage_write(struct file *file,
-					 const char __user *ubuf,
-					 size_t count, loff_t *ppos)
-{
-	int i;
-	for (i = 0; i < ARRAY_SIZE(severities); i++)
-		severities[i].covered = 0;
-	return count;
-}
-
-static const struct file_operations severities_coverage_fops = {
-	.open		= severities_coverage_open,
-	.release	= seq_release,
-	.read		= seq_read,
-	.write		= severities_coverage_write,
-};
-
-static int __init severities_debugfs_init(void)
-{
-	struct dentry *dmce = NULL, *fseverities_coverage = NULL;
-
-	dmce = debugfs_create_dir("mce", NULL);
-	if (dmce == NULL)
-		goto err_out;
-	fseverities_coverage = debugfs_create_file("severities-coverage",
-						   0444, dmce, NULL,
-						   &severities_coverage_fops);
-	if (fseverities_coverage == NULL)
-		goto err_out;
-
-	return 0;
-
-err_out:
-	if (fseverities_coverage)
-		debugfs_remove(fseverities_coverage);
-	if (dmce)
-		debugfs_remove(dmce);
-	return -ENOMEM;
-}
-late_initcall(severities_debugfs_init);
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce.c b/trunk/arch/x86/kernel/cpu/mcheck/mce.c
deleted file mode 100644
index fabba15e4558..000000000000
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce.c
+++ /dev/null
@@ -1,1964 +0,0 @@
-/*
- * Machine check handler.
- *
- * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
- * Rest from unknown author(s).
- * 2004 Andi Kleen. Rewrote most of it.
- * Copyright 2008 Intel Corporation
- * Author: Andi Kleen
- */
-#include <linux/thread_info.h>
-#include <linux/capability.h>
-#include <linux/miscdevice.h>
-#include <linux/interrupt.h>
-#include <linux/ratelimit.h>
-#include <linux/kallsyms.h>
-#include <linux/rcupdate.h>
-#include <linux/kobject.h>
-#include <linux/uaccess.h>
-#include <linux/kdebug.h>
-#include <linux/kernel.h>
-#include <linux/percpu.h>
-#include <linux/string.h>
-#include <linux/sysdev.h>
-#include <linux/delay.h>
-#include <linux/ctype.h>
-#include <linux/sched.h>
-#include <linux/sysfs.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/kmod.h>
-#include <linux/poll.h>
-#include <linux/nmi.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-
-#include <asm/processor.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/idle.h>
-#include <asm/ipi.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-
-#include "mce-internal.h"
-#include "mce.h"
-
-/* Handle unconfigured int18 (should never happen) */
-static void unexpected_machine_check(struct pt_regs *regs, long error_code)
-{
-	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
-	       smp_processor_id());
-}
-
-/* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *, long error_code) =
-						unexpected_machine_check;
-
-int				mce_disabled;
-
-#ifdef CONFIG_X86_NEW_MCE
-
-#define MISC_MCELOG_MINOR	227
-
-#define SPINUNIT 100	/* 100ns */
-
-atomic_t mce_entry;
-
-DEFINE_PER_CPU(unsigned, mce_exception_count);
-
-/*
- * Tolerant levels:
- *   0: always panic on uncorrected errors, log corrected errors
- *   1: panic or SIGBUS on uncorrected errors, log corrected errors
- *   2: SIGBUS or log uncorrected errors (if possible), log corrected errors
- *   3: never panic or SIGBUS, log all errors (for testing only)
- */
-static int			tolerant = 1;
-static int			banks;
-static u64			*bank;
-static unsigned long		notify_user;
-static int			rip_msr;
-static int			mce_bootlog = -1;
-static int			monarch_timeout = -1;
-static int			mce_panic_timeout;
-static int			mce_dont_log_ce;
-int				mce_cmci_disabled;
-int				mce_ignore_ce;
-int				mce_ser;
-
-static char			trigger[128];
-static char			*trigger_argv[2] = { trigger, NULL };
-
-static unsigned long		dont_init_banks;
-
-static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
-static DEFINE_PER_CPU(struct mce, mces_seen);
-static int			cpu_missing;
-
-
-/* MCA banks polled by the period polling timer for corrected events */
-DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
-	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
-};
-
-static inline int skip_bank_init(int i)
-{
-	return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
-}
-
-static DEFINE_PER_CPU(struct work_struct, mce_work);
-
-/* Do initial initialization of a struct mce */
-void mce_setup(struct mce *m)
-{
-	memset(m, 0, sizeof(struct mce));
-	m->cpu = m->extcpu = smp_processor_id();
-	rdtscll(m->tsc);
-	/* We hope get_seconds stays lockless */
-	m->time = get_seconds();
-	m->cpuvendor = boot_cpu_data.x86_vendor;
-	m->cpuid = cpuid_eax(1);
-#ifdef CONFIG_SMP
-	m->socketid = cpu_data(m->extcpu).phys_proc_id;
-#endif
-	m->apicid = cpu_data(m->extcpu).initial_apicid;
-	rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
-}
-
-DEFINE_PER_CPU(struct mce, injectm);
-EXPORT_PER_CPU_SYMBOL_GPL(injectm);
-
-/*
- * Lockless MCE logging infrastructure.
- * This avoids deadlocks on printk locks without having to break locks. Also
- * separate MCEs from kernel messages to avoid bogus bug reports.
- */
-
-static struct mce_log mcelog = {
-	.signature	= MCE_LOG_SIGNATURE,
-	.len		= MCE_LOG_LEN,
-	.recordlen	= sizeof(struct mce),
-};
-
-void mce_log(struct mce *mce)
-{
-	unsigned next, entry;
-
-	mce->finished = 0;
-	wmb();
-	for (;;) {
-		entry = rcu_dereference(mcelog.next);
-		for (;;) {
-			/*
-			 * When the buffer fills up discard new entries.
-			 * Assume that the earlier errors are the more
-			 * interesting ones:
-			 */
-			if (entry >= MCE_LOG_LEN) {
-				set_bit(MCE_OVERFLOW,
-					(unsigned long *)&mcelog.flags);
-				return;
-			}
-			/* Old left over entry. Skip: */
-			if (mcelog.entry[entry].finished) {
-				entry++;
-				continue;
-			}
-			break;
-		}
-		smp_rmb();
-		next = entry + 1;
-		if (cmpxchg(&mcelog.next, entry, next) == entry)
-			break;
-	}
-	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
-	wmb();
-	mcelog.entry[entry].finished = 1;
-	wmb();
-
-	mce->finished = 1;
-	set_bit(0, &notify_user);
-}
-
-static void print_mce(struct mce *m)
-{
-	printk(KERN_EMERG
-	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
-	       m->extcpu, m->mcgstatus, m->bank, m->status);
-	if (m->ip) {
-		printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
-		       !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
-		       m->cs, m->ip);
-		if (m->cs == __KERNEL_CS)
-			print_symbol("{%s}", m->ip);
-		printk("\n");
-	}
-	printk(KERN_EMERG "TSC %llx ", m->tsc);
-	if (m->addr)
-		printk("ADDR %llx ", m->addr);
-	if (m->misc)
-		printk("MISC %llx ", m->misc);
-	printk("\n");
-	printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
-			m->cpuvendor, m->cpuid, m->time, m->socketid,
-			m->apicid);
-}
-
-static void print_mce_head(void)
-{
-	printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n");
-}
-
-static void print_mce_tail(void)
-{
-	printk(KERN_EMERG "This is not a software problem!\n"
-	       KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n");
-}
-
-#define PANIC_TIMEOUT 5 /* 5 seconds */
-
-static atomic_t mce_paniced;
-
-/* Panic in progress. Enable interrupts and wait for final IPI */
-static void wait_for_panic(void)
-{
-	long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
-	preempt_disable();
-	local_irq_enable();
-	while (timeout-- > 0)
-		udelay(1);
-	if (panic_timeout == 0)
-		panic_timeout = mce_panic_timeout;
-	panic("Panicing machine check CPU died");
-}
-
-static void mce_panic(char *msg, struct mce *final, char *exp)
-{
-	int i;
-
-	/*
-	 * Make sure only one CPU runs in machine check panic
-	 */
-	if (atomic_add_return(1, &mce_paniced) > 1)
-		wait_for_panic();
-	barrier();
-
-	bust_spinlocks(1);
-	console_verbose();
-	print_mce_head();
-	/* First print corrected ones that are still unlogged */
-	for (i = 0; i < MCE_LOG_LEN; i++) {
-		struct mce *m = &mcelog.entry[i];
-		if (!(m->status & MCI_STATUS_VAL))
-			continue;
-		if (!(m->status & MCI_STATUS_UC))
-			print_mce(m);
-	}
-	/* Now print uncorrected but with the final one last */
-	for (i = 0; i < MCE_LOG_LEN; i++) {
-		struct mce *m = &mcelog.entry[i];
-		if (!(m->status & MCI_STATUS_VAL))
-			continue;
-		if (!(m->status & MCI_STATUS_UC))
-			continue;
-		if (!final || memcmp(m, final, sizeof(struct mce)))
-			print_mce(m);
-	}
-	if (final)
-		print_mce(final);
-	if (cpu_missing)
-		printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
-	print_mce_tail();
-	if (exp)
-		printk(KERN_EMERG "Machine check: %s\n", exp);
-	if (panic_timeout == 0)
-		panic_timeout = mce_panic_timeout;
-	panic(msg);
-}
-
-/* Support code for software error injection */
-
-static int msr_to_offset(u32 msr)
-{
-	unsigned bank = __get_cpu_var(injectm.bank);
-	if (msr == rip_msr)
-		return offsetof(struct mce, ip);
-	if (msr == MSR_IA32_MC0_STATUS + bank*4)
-		return offsetof(struct mce, status);
-	if (msr == MSR_IA32_MC0_ADDR + bank*4)
-		return offsetof(struct mce, addr);
-	if (msr == MSR_IA32_MC0_MISC + bank*4)
-		return offsetof(struct mce, misc);
-	if (msr == MSR_IA32_MCG_STATUS)
-		return offsetof(struct mce, mcgstatus);
-	return -1;
-}
-
-/* MSR access wrappers used for error injection */
-static u64 mce_rdmsrl(u32 msr)
-{
-	u64 v;
-	if (__get_cpu_var(injectm).finished) {
-		int offset = msr_to_offset(msr);
-		if (offset < 0)
-			return 0;
-		return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
-	}
-	rdmsrl(msr, v);
-	return v;
-}
-
-static void mce_wrmsrl(u32 msr, u64 v)
-{
-	if (__get_cpu_var(injectm).finished) {
-		int offset = msr_to_offset(msr);
-		if (offset >= 0)
-			*(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
-		return;
-	}
-	wrmsrl(msr, v);
-}
-
-/*
- * Simple lockless ring to communicate PFNs from the exception handler with the
- * process context work function. This is vastly simplified because there's
- * only a single reader and a single writer.
- */
-#define MCE_RING_SIZE 16	/* we use one entry less */
-
-struct mce_ring {
-	unsigned short start;
-	unsigned short end;
-	unsigned long ring[MCE_RING_SIZE];
-};
-static DEFINE_PER_CPU(struct mce_ring, mce_ring);
-
-/* Runs with CPU affinity in workqueue */
-static int mce_ring_empty(void)
-{
-	struct mce_ring *r = &__get_cpu_var(mce_ring);
-
-	return r->start == r->end;
-}
-
-static int mce_ring_get(unsigned long *pfn)
-{
-	struct mce_ring *r;
-	int ret = 0;
-
-	*pfn = 0;
-	get_cpu();
-	r = &__get_cpu_var(mce_ring);
-	if (r->start == r->end)
-		goto out;
-	*pfn = r->ring[r->start];
-	r->start = (r->start + 1) % MCE_RING_SIZE;
-	ret = 1;
-out:
-	put_cpu();
-	return ret;
-}
-
-/* Always runs in MCE context with preempt off */
-static int mce_ring_add(unsigned long pfn)
-{
-	struct mce_ring *r = &__get_cpu_var(mce_ring);
-	unsigned next;
-
-	next = (r->end + 1) % MCE_RING_SIZE;
-	if (next == r->start)
-		return -1;
-	r->ring[r->end] = pfn;
-	wmb();
-	r->end = next;
-	return 0;
-}
-
-int mce_available(struct cpuinfo_x86 *c)
-{
-	if (mce_disabled)
-		return 0;
-	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
-}
-
-static void mce_schedule_work(void)
-{
-	if (!mce_ring_empty()) {
-		struct work_struct *work = &__get_cpu_var(mce_work);
-		if (!work_pending(work))
-			schedule_work(work);
-	}
-}
-
-/*
- * Get the address of the instruction at the time of the machine check
- * error.
- */
-static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
-{
-
-	if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) {
-		m->ip = regs->ip;
-		m->cs = regs->cs;
-	} else {
-		m->ip = 0;
-		m->cs = 0;
-	}
-	if (rip_msr)
-		m->ip = mce_rdmsrl(rip_msr);
-}
-
-#ifdef CONFIG_X86_LOCAL_APIC 
-/*
- * Called after interrupts have been reenabled again
- * when a MCE happened during an interrupts off region
- * in the kernel.
- */
-asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
-{
-	ack_APIC_irq();
-	exit_idle();
-	irq_enter();
-	mce_notify_irq();
-	mce_schedule_work();
-	irq_exit();
-}
-#endif
-
-static void mce_report_event(struct pt_regs *regs)
-{
-	if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
-		mce_notify_irq();
-		/*
-		 * Triggering the work queue here is just an insurance
-		 * policy in case the syscall exit notify handler
-		 * doesn't run soon enough or ends up running on the
-		 * wrong CPU (can happen when audit sleeps)
-		 */
-		mce_schedule_work();
-		return;
-	}
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	/*
-	 * Without APIC do not notify. The event will be picked
-	 * up eventually.
-	 */
-	if (!cpu_has_apic)
-		return;
-
-	/*
-	 * When interrupts are disabled we cannot use
-	 * kernel services safely. Trigger an self interrupt
-	 * through the APIC to instead do the notification
-	 * after interrupts are reenabled again.
-	 */
-	apic->send_IPI_self(MCE_SELF_VECTOR);
-
-	/*
-	 * Wait for idle afterwards again so that we don't leave the
-	 * APIC in a non idle state because the normal APIC writes
-	 * cannot exclude us.
-	 */
-	apic_wait_icr_idle();
-#endif
-}
-
-DEFINE_PER_CPU(unsigned, mce_poll_count);
-
-/*
- * Poll for corrected events or events that happened before reset.
- * Those are just logged through /dev/mcelog.
- *
- * This is executed in standard interrupt context.
- *
- * Note: spec recommends to panic for fatal unsignalled
- * errors here. However this would be quite problematic --
- * we would need to reimplement the Monarch handling and
- * it would mess up the exclusion between exception handler
- * and poll hander -- * so we skip this for now.
- * These cases should not happen anyways, or only when the CPU
- * is already totally * confused. In this case it's likely it will
- * not fully execute the machine check handler either.
- */
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
-{
-	struct mce m;
-	int i;
-
-	__get_cpu_var(mce_poll_count)++;
-
-	mce_setup(&m);
-
-	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
-	for (i = 0; i < banks; i++) {
-		if (!bank[i] || !test_bit(i, *b))
-			continue;
-
-		m.misc = 0;
-		m.addr = 0;
-		m.bank = i;
-		m.tsc = 0;
-
-		barrier();
-		m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
-		if (!(m.status & MCI_STATUS_VAL))
-			continue;
-
-		/*
-		 * Uncorrected or signalled events are handled by the exception
-		 * handler when it is enabled, so don't process those here.
-		 *
-		 * TBD do the same check for MCI_STATUS_EN here?
-		 */
-		if (!(flags & MCP_UC) &&
-		    (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
-			continue;
-
-		if (m.status & MCI_STATUS_MISCV)
-			m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
-		if (m.status & MCI_STATUS_ADDRV)
-			m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
-
-		if (!(flags & MCP_TIMESTAMP))
-			m.tsc = 0;
-		/*
-		 * Don't get the IP here because it's unlikely to
-		 * have anything to do with the actual error location.
-		 */
-		if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
-			mce_log(&m);
-			add_taint(TAINT_MACHINE_CHECK);
-		}
-
-		/*
-		 * Clear state for this bank.
-		 */
-		mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
-	}
-
-	/*
-	 * Don't clear MCG_STATUS here because it's only defined for
-	 * exceptions.
-	 */
-
-	sync_core();
-}
-EXPORT_SYMBOL_GPL(machine_check_poll);
-
-/*
- * Do a quick check if any of the events requires a panic.
- * This decides if we keep the events around or clear them.
- */
-static int mce_no_way_out(struct mce *m, char **msg)
-{
-	int i;
-
-	for (i = 0; i < banks; i++) {
-		m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
-		if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
-			return 1;
-	}
-	return 0;
-}
-
-/*
- * Variable to establish order between CPUs while scanning.
- * Each CPU spins initially until executing is equal its number.
- */
-static atomic_t mce_executing;
-
-/*
- * Defines order of CPUs on entry. First CPU becomes Monarch.
- */
-static atomic_t mce_callin;
-
-/*
- * Check if a timeout waiting for other CPUs happened.
- */
-static int mce_timed_out(u64 *t)
-{
-	/*
-	 * The others already did panic for some reason.
-	 * Bail out like in a timeout.
-	 * rmb() to tell the compiler that system_state
-	 * might have been modified by someone else.
-	 */
-	rmb();
-	if (atomic_read(&mce_paniced))
-		wait_for_panic();
-	if (!monarch_timeout)
-		goto out;
-	if ((s64)*t < SPINUNIT) {
-		/* CHECKME: Make panic default for 1 too? */
-		if (tolerant < 1)
-			mce_panic("Timeout synchronizing machine check over CPUs",
-				  NULL, NULL);
-		cpu_missing = 1;
-		return 1;
-	}
-	*t -= SPINUNIT;
-out:
-	touch_nmi_watchdog();
-	return 0;
-}
-
-/*
- * The Monarch's reign.  The Monarch is the CPU who entered
- * the machine check handler first. It waits for the others to
- * raise the exception too and then grades them. When any
- * error is fatal panic. Only then let the others continue.
- *
- * The other CPUs entering the MCE handler will be controlled by the
- * Monarch. They are called Subjects.
- *
- * This way we prevent any potential data corruption in a unrecoverable case
- * and also makes sure always all CPU's errors are examined.
- *
- * Also this detects the case of an machine check event coming from outer
- * space (not detected by any CPUs) In this case some external agent wants
- * us to shut down, so panic too.
- *
- * The other CPUs might still decide to panic if the handler happens
- * in a unrecoverable place, but in this case the system is in a semi-stable
- * state and won't corrupt anything by itself. It's ok to let the others
- * continue for a bit first.
- *
- * All the spin loops have timeouts; when a timeout happens a CPU
- * typically elects itself to be Monarch.
- */
-static void mce_reign(void)
-{
-	int cpu;
-	struct mce *m = NULL;
-	int global_worst = 0;
-	char *msg = NULL;
-	char *nmsg = NULL;
-
-	/*
-	 * This CPU is the Monarch and the other CPUs have run
-	 * through their handlers.
-	 * Grade the severity of the errors of all the CPUs.
-	 */
-	for_each_possible_cpu(cpu) {
-		int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant,
-					    &nmsg);
-		if (severity > global_worst) {
-			msg = nmsg;
-			global_worst = severity;
-			m = &per_cpu(mces_seen, cpu);
-		}
-	}
-
-	/*
-	 * Cannot recover? Panic here then.
-	 * This dumps all the mces in the log buffer and stops the
-	 * other CPUs.
-	 */
-	if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3)
-		mce_panic("Fatal Machine check", m, msg);
-
-	/*
-	 * For UC somewhere we let the CPU who detects it handle it.
-	 * Also must let continue the others, otherwise the handling
-	 * CPU could deadlock on a lock.
-	 */
-
-	/*
-	 * No machine check event found. Must be some external
-	 * source or one CPU is hung. Panic.
-	 */
-	if (!m && tolerant < 3)
-		mce_panic("Machine check from unknown source", NULL, NULL);
-
-	/*
-	 * Now clear all the mces_seen so that they don't reappear on
-	 * the next mce.
-	 */
-	for_each_possible_cpu(cpu)
-		memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
-}
-
-static atomic_t global_nwo;
-
-/*
- * Start of Monarch synchronization. This waits until all CPUs have
- * entered the exception handler and then determines if any of them
- * saw a fatal event that requires panic. Then it executes them
- * in the entry order.
- * TBD double check parallel CPU hotunplug
- */
-static int mce_start(int no_way_out, int *order)
-{
-	int nwo;
-	int cpus = num_online_cpus();
-	u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
-
-	if (!timeout) {
-		*order = -1;
-		return no_way_out;
-	}
-
-	atomic_add(no_way_out, &global_nwo);
-
-	/*
-	 * Wait for everyone.
-	 */
-	while (atomic_read(&mce_callin) != cpus) {
-		if (mce_timed_out(&timeout)) {
-			atomic_set(&global_nwo, 0);
-			*order = -1;
-			return no_way_out;
-		}
-		ndelay(SPINUNIT);
-	}
-
-	/*
-	 * Cache the global no_way_out state.
-	 */
-	nwo = atomic_read(&global_nwo);
-
-	/*
-	 * Monarch starts executing now, the others wait.
-	 */
-	if (*order == 1) {
-		atomic_set(&mce_executing, 1);
-		return nwo;
-	}
-
-	/*
-	 * Now start the scanning loop one by one
-	 * in the original callin order.
-	 * This way when there are any shared banks it will
-	 * be only seen by one CPU before cleared, avoiding duplicates.
-	 */
-	while (atomic_read(&mce_executing) < *order) {
-		if (mce_timed_out(&timeout)) {
-			atomic_set(&global_nwo, 0);
-			*order = -1;
-			return no_way_out;
-		}
-		ndelay(SPINUNIT);
-	}
-	return nwo;
-}
-
-/*
- * Synchronize between CPUs after main scanning loop.
- * This invokes the bulk of the Monarch processing.
- */
-static int mce_end(int order)
-{
-	int ret = -1;
-	u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
-
-	if (!timeout)
-		goto reset;
-	if (order < 0)
-		goto reset;
-
-	/*
-	 * Allow others to run.
-	 */
-	atomic_inc(&mce_executing);
-
-	if (order == 1) {
-		/* CHECKME: Can this race with a parallel hotplug? */
-		int cpus = num_online_cpus();
-
-		/*
-		 * Monarch: Wait for everyone to go through their scanning
-		 * loops.
-		 */
-		while (atomic_read(&mce_executing) <= cpus) {
-			if (mce_timed_out(&timeout))
-				goto reset;
-			ndelay(SPINUNIT);
-		}
-
-		mce_reign();
-		barrier();
-		ret = 0;
-	} else {
-		/*
-		 * Subject: Wait for Monarch to finish.
-		 */
-		while (atomic_read(&mce_executing) != 0) {
-			if (mce_timed_out(&timeout))
-				goto reset;
-			ndelay(SPINUNIT);
-		}
-
-		/*
-		 * Don't reset anything. That's done by the Monarch.
-		 */
-		return 0;
-	}
-
-	/*
-	 * Reset all global state.
-	 */
-reset:
-	atomic_set(&global_nwo, 0);
-	atomic_set(&mce_callin, 0);
-	barrier();
-
-	/*
-	 * Let others run again.
-	 */
-	atomic_set(&mce_executing, 0);
-	return ret;
-}
-
-/*
- * Check if the address reported by the CPU is in a format we can parse.
- * It would be possible to add code for most other cases, but all would
- * be somewhat complicated (e.g. segment offset would require an instruction
- * parser). So only support physical addresses upto page granuality for now.
- */
-static int mce_usable_address(struct mce *m)
-{
-	if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
-		return 0;
-	if ((m->misc & 0x3f) > PAGE_SHIFT)
-		return 0;
-	if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS)
-		return 0;
-	return 1;
-}
-
-static void mce_clear_state(unsigned long *toclear)
-{
-	int i;
-
-	for (i = 0; i < banks; i++) {
-		if (test_bit(i, toclear))
-			mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
-	}
-}
-
-/*
- * The actual machine check handler. This only handles real
- * exceptions when something got corrupted coming in through int 18.
- *
- * This is executed in NMI context not subject to normal locking rules. This
- * implies that most kernel services cannot be safely used. Don't even
- * think about putting a printk in there!
- *
- * On Intel systems this is entered on all CPUs in parallel through
- * MCE broadcast. However some CPUs might be broken beyond repair,
- * so be always careful when synchronizing with others.
- */
-void do_machine_check(struct pt_regs *regs, long error_code)
-{
-	struct mce m, *final;
-	int i;
-	int worst = 0;
-	int severity;
-	/*
-	 * Establish sequential order between the CPUs entering the machine
-	 * check handler.
-	 */
-	int order;
-
-	/*
-	 * If no_way_out gets set, there is no safe way to recover from this
-	 * MCE.  If tolerant is cranked up, we'll try anyway.
-	 */
-	int no_way_out = 0;
-	/*
-	 * If kill_it gets set, there might be a way to recover from this
-	 * error.
-	 */
-	int kill_it = 0;
-	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
-	char *msg = "Unknown";
-
-	atomic_inc(&mce_entry);
-
-	__get_cpu_var(mce_exception_count)++;
-
-	if (notify_die(DIE_NMI, "machine check", regs, error_code,
-			   18, SIGKILL) == NOTIFY_STOP)
-		goto out;
-	if (!banks)
-		goto out;
-
-	order = atomic_add_return(1, &mce_callin);
-	mce_setup(&m);
-
-	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
-	no_way_out = mce_no_way_out(&m, &msg);
-
-	final = &__get_cpu_var(mces_seen);
-	*final = m;
-
-	barrier();
-
-	/*
-	 * When no restart IP must always kill or panic.
-	 */
-	if (!(m.mcgstatus & MCG_STATUS_RIPV))
-		kill_it = 1;
-
-	/*
-	 * Go through all the banks in exclusion of the other CPUs.
-	 * This way we don't report duplicated events on shared banks
-	 * because the first one to see it will clear it.
-	 */
-	no_way_out = mce_start(no_way_out, &order);
-	for (i = 0; i < banks; i++) {
-		__clear_bit(i, toclear);
-		if (!bank[i])
-			continue;
-
-		m.misc = 0;
-		m.addr = 0;
-		m.bank = i;
-
-		m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
-		if ((m.status & MCI_STATUS_VAL) == 0)
-			continue;
-
-		/*
-		 * Non uncorrected or non signaled errors are handled by
-		 * machine_check_poll. Leave them alone, unless this panics.
-		 */
-		if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
-			!no_way_out)
-			continue;
-
-		/*
-		 * Set taint even when machine check was not enabled.
-		 */
-		add_taint(TAINT_MACHINE_CHECK);
-
-		severity = mce_severity(&m, tolerant, NULL);
-
-		/*
-		 * When machine check was for corrected handler don't touch,
-		 * unless we're panicing.
-		 */
-		if (severity == MCE_KEEP_SEVERITY && !no_way_out)
-			continue;
-		__set_bit(i, toclear);
-		if (severity == MCE_NO_SEVERITY) {
-			/*
-			 * Machine check event was not enabled. Clear, but
-			 * ignore.
-			 */
-			continue;
-		}
-
-		/*
-		 * Kill on action required.
-		 */
-		if (severity == MCE_AR_SEVERITY)
-			kill_it = 1;
-
-		if (m.status & MCI_STATUS_MISCV)
-			m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
-		if (m.status & MCI_STATUS_ADDRV)
-			m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
-
-		/*
-		 * Action optional error. Queue address for later processing.
-		 * When the ring overflows we just ignore the AO error.
-		 * RED-PEN add some logging mechanism when
-		 * usable_address or mce_add_ring fails.
-		 * RED-PEN don't ignore overflow for tolerant == 0
-		 */
-		if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
-			mce_ring_add(m.addr >> PAGE_SHIFT);
-
-		mce_get_rip(&m, regs);
-		mce_log(&m);
-
-		if (severity > worst) {
-			*final = m;
-			worst = severity;
-		}
-	}
-
-	if (!no_way_out)
-		mce_clear_state(toclear);
-
-	/*
-	 * Do most of the synchronization with other CPUs.
-	 * When there's any problem use only local no_way_out state.
-	 */
-	if (mce_end(order) < 0)
-		no_way_out = worst >= MCE_PANIC_SEVERITY;
-
-	/*
-	 * If we have decided that we just CAN'T continue, and the user
-	 * has not set tolerant to an insane level, give up and die.
-	 *
-	 * This is mainly used in the case when the system doesn't
-	 * support MCE broadcasting or it has been disabled.
-	 */
-	if (no_way_out && tolerant < 3)
-		mce_panic("Fatal machine check on current CPU", final, msg);
-
-	/*
-	 * If the error seems to be unrecoverable, something should be
-	 * done.  Try to kill as little as possible.  If we can kill just
-	 * one task, do that.  If the user has set the tolerance very
-	 * high, don't try to do anything at all.
-	 */
-
-	if (kill_it && tolerant < 3)
-		force_sig(SIGBUS, current);
-
-	/* notify userspace ASAP */
-	set_thread_flag(TIF_MCE_NOTIFY);
-
-	if (worst > 0)
-		mce_report_event(regs);
-	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
-out:
-	atomic_dec(&mce_entry);
-	sync_core();
-}
-EXPORT_SYMBOL_GPL(do_machine_check);
-
-/* dummy to break dependency. actual code is in mm/memory-failure.c */
-void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
-{
-	printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn);
-}
-
-/*
- * Called after mce notification in process context. This code
- * is allowed to sleep. Call the high level VM handler to process
- * any corrupted pages.
- * Assume that the work queue code only calls this one at a time
- * per CPU.
- * Note we don't disable preemption, so this code might run on the wrong
- * CPU. In this case the event is picked up by the scheduled work queue.
- * This is merely a fast path to expedite processing in some common
- * cases.
- */
-void mce_notify_process(void)
-{
-	unsigned long pfn;
-	mce_notify_irq();
-	while (mce_ring_get(&pfn))
-		memory_failure(pfn, MCE_VECTOR);
-}
-
-static void mce_process_work(struct work_struct *dummy)
-{
-	mce_notify_process();
-}
-
-#ifdef CONFIG_X86_MCE_INTEL
-/***
- * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
- * @cpu: The CPU on which the event occurred.
- * @status: Event status information
- *
- * This function should be called by the thermal interrupt after the
- * event has been processed and the decision was made to log the event
- * further.
- *
- * The status parameter will be saved to the 'status' field of 'struct mce'
- * and historically has been the register value of the
- * MSR_IA32_THERMAL_STATUS (Intel) msr.
- */
-void mce_log_therm_throt_event(__u64 status)
-{
-	struct mce m;
-
-	mce_setup(&m);
-	m.bank = MCE_THERMAL_BANK;
-	m.status = status;
-	mce_log(&m);
-}
-#endif /* CONFIG_X86_MCE_INTEL */
-
-/*
- * Periodic polling timer for "silent" machine check errors.  If the
- * poller finds an MCE, poll 2x faster.  When the poller finds no more
- * errors, poll 2x slower (up to check_interval seconds).
- */
-static int check_interval = 5 * 60; /* 5 minutes */
-
-static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
-static DEFINE_PER_CPU(struct timer_list, mce_timer);
-
-static void mcheck_timer(unsigned long data)
-{
-	struct timer_list *t = &per_cpu(mce_timer, data);
-	int *n;
-
-	WARN_ON(smp_processor_id() != data);
-
-	if (mce_available(&current_cpu_data)) {
-		machine_check_poll(MCP_TIMESTAMP,
-				&__get_cpu_var(mce_poll_banks));
-	}
-
-	/*
-	 * Alert userspace if needed.  If we logged an MCE, reduce the
-	 * polling interval, otherwise increase the polling interval.
-	 */
-	n = &__get_cpu_var(next_interval);
-	if (mce_notify_irq())
-		*n = max(*n/2, HZ/100);
-	else
-		*n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
-
-	t->expires = jiffies + *n;
-	add_timer(t);
-}
-
-static void mce_do_trigger(struct work_struct *work)
-{
-	call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
-}
-
-static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
-
-/*
- * Notify the user(s) about new machine check events.
- * Can be called from interrupt context, but not from machine check/NMI
- * context.
- */
-int mce_notify_irq(void)
-{
-	/* Not more than two messages every minute */
-	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
-
-	clear_thread_flag(TIF_MCE_NOTIFY);
-
-	if (test_and_clear_bit(0, &notify_user)) {
-		wake_up_interruptible(&mce_wait);
-
-		/*
-		 * There is no risk of missing notifications because
-		 * work_pending is always cleared before the function is
-		 * executed.
-		 */
-		if (trigger[0] && !work_pending(&mce_trigger_work))
-			schedule_work(&mce_trigger_work);
-
-		if (__ratelimit(&ratelimit))
-			printk(KERN_INFO "Machine check events logged\n");
-
-		return 1;
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mce_notify_irq);
-
-/*
- * Initialize Machine Checks for a CPU.
- */
-static int mce_cap_init(void)
-{
-	unsigned b;
-	u64 cap;
-
-	rdmsrl(MSR_IA32_MCG_CAP, cap);
-
-	b = cap & MCG_BANKCNT_MASK;
-	printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
-
-	if (b > MAX_NR_BANKS) {
-		printk(KERN_WARNING
-		       "MCE: Using only %u machine check banks out of %u\n",
-			MAX_NR_BANKS, b);
-		b = MAX_NR_BANKS;
-	}
-
-	/* Don't support asymmetric configurations today */
-	WARN_ON(banks != 0 && b != banks);
-	banks = b;
-	if (!bank) {
-		bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
-		if (!bank)
-			return -ENOMEM;
-		memset(bank, 0xff, banks * sizeof(u64));
-	}
-
-	/* Use accurate RIP reporting if available. */
-	if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
-		rip_msr = MSR_IA32_MCG_EIP;
-
-	if (cap & MCG_SER_P)
-		mce_ser = 1;
-
-	return 0;
-}
-
-static void mce_init(void)
-{
-	mce_banks_t all_banks;
-	u64 cap;
-	int i;
-
-	/*
-	 * Log the machine checks left over from the previous reset.
-	 */
-	bitmap_fill(all_banks, MAX_NR_BANKS);
-	machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
-
-	set_in_cr4(X86_CR4_MCE);
-
-	rdmsrl(MSR_IA32_MCG_CAP, cap);
-	if (cap & MCG_CTL_P)
-		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-
-	for (i = 0; i < banks; i++) {
-		if (skip_bank_init(i))
-			continue;
-		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
-		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
-	}
-}
-
-/* Add per CPU specific workarounds here */
-static void mce_cpu_quirks(struct cpuinfo_x86 *c)
-{
-	/* This should be disabled by the BIOS, but isn't always */
-	if (c->x86_vendor == X86_VENDOR_AMD) {
-		if (c->x86 == 15 && banks > 4) {
-			/*
-			 * disable GART TBL walk error reporting, which
-			 * trips off incorrectly with the IOMMU & 3ware
-			 * & Cerberus:
-			 */
-			clear_bit(10, (unsigned long *)&bank[4]);
-		}
-		if (c->x86 <= 17 && mce_bootlog < 0) {
-			/*
-			 * Lots of broken BIOS around that don't clear them
-			 * by default and leave crap in there. Don't log:
-			 */
-			mce_bootlog = 0;
-		}
-		/*
-		 * Various K7s with broken bank 0 around. Always disable
-		 * by default.
-		 */
-		 if (c->x86 == 6)
-			bank[0] = 0;
-	}
-
-	if (c->x86_vendor == X86_VENDOR_INTEL) {
-		/*
-		 * SDM documents that on family 6 bank 0 should not be written
-		 * because it aliases to another special BIOS controlled
-		 * register.
-		 * But it's not aliased anymore on model 0x1a+
-		 * Don't ignore bank 0 completely because there could be a
-		 * valid event later, merely don't write CTL0.
-		 */
-
-		if (c->x86 == 6 && c->x86_model < 0x1A)
-			__set_bit(0, &dont_init_banks);
-
-		/*
-		 * All newer Intel systems support MCE broadcasting. Enable
-		 * synchronization with a one second timeout.
-		 */
-		if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
-			monarch_timeout < 0)
-			monarch_timeout = USEC_PER_SEC;
-	}
-	if (monarch_timeout < 0)
-		monarch_timeout = 0;
-	if (mce_bootlog != 0)
-		mce_panic_timeout = 30;
-}
-
-static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
-{
-	if (c->x86 != 5)
-		return;
-	switch (c->x86_vendor) {
-	case X86_VENDOR_INTEL:
-		if (mce_p5_enabled())
-			intel_p5_mcheck_init(c);
-		break;
-	case X86_VENDOR_CENTAUR:
-		winchip_mcheck_init(c);
-		break;
-	}
-}
-
-static void mce_cpu_features(struct cpuinfo_x86 *c)
-{
-	switch (c->x86_vendor) {
-	case X86_VENDOR_INTEL:
-		mce_intel_feature_init(c);
-		break;
-	case X86_VENDOR_AMD:
-		mce_amd_feature_init(c);
-		break;
-	default:
-		break;
-	}
-}
-
-static void mce_init_timer(void)
-{
-	struct timer_list *t = &__get_cpu_var(mce_timer);
-	int *n = &__get_cpu_var(next_interval);
-
-	if (mce_ignore_ce)
-		return;
-
-	*n = check_interval * HZ;
-	if (!*n)
-		return;
-	setup_timer(t, mcheck_timer, smp_processor_id());
-	t->expires = round_jiffies(jiffies + *n);
-	add_timer(t);
-}
-
-/*
- * Called for each booted CPU to set up machine checks.
- * Must be called with preempt off:
- */
-void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
-{
-	if (mce_disabled)
-		return;
-
-	mce_ancient_init(c);
-
-	if (!mce_available(c))
-		return;
-
-	if (mce_cap_init() < 0) {
-		mce_disabled = 1;
-		return;
-	}
-	mce_cpu_quirks(c);
-
-	machine_check_vector = do_machine_check;
-
-	mce_init();
-	mce_cpu_features(c);
-	mce_init_timer();
-	INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
-}
-
-/*
- * Character device to read and clear the MCE log.
- */
-
-static DEFINE_SPINLOCK(mce_state_lock);
-static int		open_count;		/* #times opened */
-static int		open_exclu;		/* already open exclusive? */
-
-static int mce_open(struct inode *inode, struct file *file)
-{
-	spin_lock(&mce_state_lock);
-
-	if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
-		spin_unlock(&mce_state_lock);
-
-		return -EBUSY;
-	}
-
-	if (file->f_flags & O_EXCL)
-		open_exclu = 1;
-	open_count++;
-
-	spin_unlock(&mce_state_lock);
-
-	return nonseekable_open(inode, file);
-}
-
-static int mce_release(struct inode *inode, struct file *file)
-{
-	spin_lock(&mce_state_lock);
-
-	open_count--;
-	open_exclu = 0;
-
-	spin_unlock(&mce_state_lock);
-
-	return 0;
-}
-
-static void collect_tscs(void *data)
-{
-	unsigned long *cpu_tsc = (unsigned long *)data;
-
-	rdtscll(cpu_tsc[smp_processor_id()]);
-}
-
-static DEFINE_MUTEX(mce_read_mutex);
-
-static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
-			loff_t *off)
-{
-	char __user *buf = ubuf;
-	unsigned long *cpu_tsc;
-	unsigned prev, next;
-	int i, err;
-
-	cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
-	if (!cpu_tsc)
-		return -ENOMEM;
-
-	mutex_lock(&mce_read_mutex);
-	next = rcu_dereference(mcelog.next);
-
-	/* Only supports full reads right now */
-	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
-		mutex_unlock(&mce_read_mutex);
-		kfree(cpu_tsc);
-
-		return -EINVAL;
-	}
-
-	err = 0;
-	prev = 0;
-	do {
-		for (i = prev; i < next; i++) {
-			unsigned long start = jiffies;
-
-			while (!mcelog.entry[i].finished) {
-				if (time_after_eq(jiffies, start + 2)) {
-					memset(mcelog.entry + i, 0,
-					       sizeof(struct mce));
-					goto timeout;
-				}
-				cpu_relax();
-			}
-			smp_rmb();
-			err |= copy_to_user(buf, mcelog.entry + i,
-					    sizeof(struct mce));
-			buf += sizeof(struct mce);
-timeout:
-			;
-		}
-
-		memset(mcelog.entry + prev, 0,
-		       (next - prev) * sizeof(struct mce));
-		prev = next;
-		next = cmpxchg(&mcelog.next, prev, 0);
-	} while (next != prev);
-
-	synchronize_sched();
-
-	/*
-	 * Collect entries that were still getting written before the
-	 * synchronize.
-	 */
-	on_each_cpu(collect_tscs, cpu_tsc, 1);
-
-	for (i = next; i < MCE_LOG_LEN; i++) {
-		if (mcelog.entry[i].finished &&
-		    mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
-			err |= copy_to_user(buf, mcelog.entry+i,
-					    sizeof(struct mce));
-			smp_rmb();
-			buf += sizeof(struct mce);
-			memset(&mcelog.entry[i], 0, sizeof(struct mce));
-		}
-	}
-	mutex_unlock(&mce_read_mutex);
-	kfree(cpu_tsc);
-
-	return err ? -EFAULT : buf - ubuf;
-}
-
-static unsigned int mce_poll(struct file *file, poll_table *wait)
-{
-	poll_wait(file, &mce_wait, wait);
-	if (rcu_dereference(mcelog.next))
-		return POLLIN | POLLRDNORM;
-	return 0;
-}
-
-static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
-{
-	int __user *p = (int __user *)arg;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	switch (cmd) {
-	case MCE_GET_RECORD_LEN:
-		return put_user(sizeof(struct mce), p);
-	case MCE_GET_LOG_LEN:
-		return put_user(MCE_LOG_LEN, p);
-	case MCE_GETCLEAR_FLAGS: {
-		unsigned flags;
-
-		do {
-			flags = mcelog.flags;
-		} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
-
-		return put_user(flags, p);
-	}
-	default:
-		return -ENOTTY;
-	}
-}
-
-/* Modified in mce-inject.c, so not static or const */
-struct file_operations mce_chrdev_ops = {
-	.open			= mce_open,
-	.release		= mce_release,
-	.read			= mce_read,
-	.poll			= mce_poll,
-	.unlocked_ioctl		= mce_ioctl,
-};
-EXPORT_SYMBOL_GPL(mce_chrdev_ops);
-
-static struct miscdevice mce_log_device = {
-	MISC_MCELOG_MINOR,
-	"mcelog",
-	&mce_chrdev_ops,
-};
-
-/*
- * mce=off Disables machine check
- * mce=no_cmci Disables CMCI
- * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
- * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
- * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
- *	monarchtimeout is how long to wait for other CPUs on machine
- *	check, or 0 to not wait
- * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
- * mce=nobootlog Don't log MCEs from before booting.
- */
-static int __init mcheck_enable(char *str)
-{
-	if (*str == 0)
-		enable_p5_mce();
-	if (*str == '=')
-		str++;
-	if (!strcmp(str, "off"))
-		mce_disabled = 1;
-	else if (!strcmp(str, "no_cmci"))
-		mce_cmci_disabled = 1;
-	else if (!strcmp(str, "dont_log_ce"))
-		mce_dont_log_ce = 1;
-	else if (!strcmp(str, "ignore_ce"))
-		mce_ignore_ce = 1;
-	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
-		mce_bootlog = (str[0] == 'b');
-	else if (isdigit(str[0])) {
-		get_option(&str, &tolerant);
-		if (*str == ',') {
-			++str;
-			get_option(&str, &monarch_timeout);
-		}
-	} else {
-		printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
-		       str);
-		return 0;
-	}
-	return 1;
-}
-__setup("mce", mcheck_enable);
-
-/*
- * Sysfs support
- */
-
-/*
- * Disable machine checks on suspend and shutdown. We can't really handle
- * them later.
- */
-static int mce_disable(void)
-{
-	int i;
-
-	for (i = 0; i < banks; i++) {
-		if (!skip_bank_init(i))
-			wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
-	}
-	return 0;
-}
-
-static int mce_suspend(struct sys_device *dev, pm_message_t state)
-{
-	return mce_disable();
-}
-
-static int mce_shutdown(struct sys_device *dev)
-{
-	return mce_disable();
-}
-
-/*
- * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
- * Only one CPU is active at this time, the others get re-added later using
- * CPU hotplug:
- */
-static int mce_resume(struct sys_device *dev)
-{
-	mce_init();
-	mce_cpu_features(&current_cpu_data);
-
-	return 0;
-}
-
-static void mce_cpu_restart(void *data)
-{
-	del_timer_sync(&__get_cpu_var(mce_timer));
-	if (mce_available(&current_cpu_data))
-		mce_init();
-	mce_init_timer();
-}
-
-/* Reinit MCEs after user configuration changes */
-static void mce_restart(void)
-{
-	on_each_cpu(mce_cpu_restart, NULL, 1);
-}
-
-static struct sysdev_class mce_sysclass = {
-	.suspend	= mce_suspend,
-	.shutdown	= mce_shutdown,
-	.resume		= mce_resume,
-	.name		= "machinecheck",
-};
-
-DEFINE_PER_CPU(struct sys_device, mce_dev);
-
-__cpuinitdata
-void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
-
-static struct sysdev_attribute *bank_attrs;
-
-static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
-			 char *buf)
-{
-	u64 b = bank[attr - bank_attrs];
-
-	return sprintf(buf, "%llx\n", b);
-}
-
-static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
-			const char *buf, size_t size)
-{
-	u64 new;
-
-	if (strict_strtoull(buf, 0, &new) < 0)
-		return -EINVAL;
-
-	bank[attr - bank_attrs] = new;
-	mce_restart();
-
-	return size;
-}
-
-static ssize_t
-show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
-{
-	strcpy(buf, trigger);
-	strcat(buf, "\n");
-	return strlen(trigger) + 1;
-}
-
-static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
-				const char *buf, size_t siz)
-{
-	char *p;
-	int len;
-
-	strncpy(trigger, buf, sizeof(trigger));
-	trigger[sizeof(trigger)-1] = 0;
-	len = strlen(trigger);
-	p = strchr(trigger, '\n');
-
-	if (*p)
-		*p = 0;
-
-	return len;
-}
-
-static ssize_t store_int_with_restart(struct sys_device *s,
-				      struct sysdev_attribute *attr,
-				      const char *buf, size_t size)
-{
-	ssize_t ret = sysdev_store_int(s, attr, buf, size);
-	mce_restart();
-	return ret;
-}
-
-static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
-static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
-static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
-
-static struct sysdev_ext_attribute attr_check_interval = {
-	_SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
-		     store_int_with_restart),
-	&check_interval
-};
-
-static struct sysdev_attribute *mce_attrs[] = {
-	&attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger,
-	&attr_monarch_timeout.attr,
-	NULL
-};
-
-static cpumask_var_t mce_dev_initialized;
-
-/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
-static __cpuinit int mce_create_device(unsigned int cpu)
-{
-	int err;
-	int i;
-
-	if (!mce_available(&boot_cpu_data))
-		return -EIO;
-
-	memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
-	per_cpu(mce_dev, cpu).id	= cpu;
-	per_cpu(mce_dev, cpu).cls	= &mce_sysclass;
-
-	err = sysdev_register(&per_cpu(mce_dev, cpu));
-	if (err)
-		return err;
-
-	for (i = 0; mce_attrs[i]; i++) {
-		err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
-		if (err)
-			goto error;
-	}
-	for (i = 0; i < banks; i++) {
-		err = sysdev_create_file(&per_cpu(mce_dev, cpu),
-					&bank_attrs[i]);
-		if (err)
-			goto error2;
-	}
-	cpumask_set_cpu(cpu, mce_dev_initialized);
-
-	return 0;
-error2:
-	while (--i >= 0)
-		sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
-error:
-	while (--i >= 0)
-		sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
-
-	sysdev_unregister(&per_cpu(mce_dev, cpu));
-
-	return err;
-}
-
-static __cpuinit void mce_remove_device(unsigned int cpu)
-{
-	int i;
-
-	if (!cpumask_test_cpu(cpu, mce_dev_initialized))
-		return;
-
-	for (i = 0; mce_attrs[i]; i++)
-		sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
-
-	for (i = 0; i < banks; i++)
-		sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
-
-	sysdev_unregister(&per_cpu(mce_dev, cpu));
-	cpumask_clear_cpu(cpu, mce_dev_initialized);
-}
-
-/* Make sure there are no machine checks on offlined CPUs. */
-static void mce_disable_cpu(void *h)
-{
-	unsigned long action = *(unsigned long *)h;
-	int i;
-
-	if (!mce_available(&current_cpu_data))
-		return;
-	if (!(action & CPU_TASKS_FROZEN))
-		cmci_clear();
-	for (i = 0; i < banks; i++) {
-		if (!skip_bank_init(i))
-			wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
-	}
-}
-
-static void mce_reenable_cpu(void *h)
-{
-	unsigned long action = *(unsigned long *)h;
-	int i;
-
-	if (!mce_available(&current_cpu_data))
-		return;
-
-	if (!(action & CPU_TASKS_FROZEN))
-		cmci_reenable();
-	for (i = 0; i < banks; i++) {
-		if (!skip_bank_init(i))
-			wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
-	}
-}
-
-/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int __cpuinit
-mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	struct timer_list *t = &per_cpu(mce_timer, cpu);
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		mce_create_device(cpu);
-		if (threshold_cpu_callback)
-			threshold_cpu_callback(action, cpu);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		if (threshold_cpu_callback)
-			threshold_cpu_callback(action, cpu);
-		mce_remove_device(cpu);
-		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		del_timer_sync(t);
-		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
-		break;
-	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
-		t->expires = round_jiffies(jiffies +
-						__get_cpu_var(next_interval));
-		add_timer_on(t, cpu);
-		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
-		break;
-	case CPU_POST_DEAD:
-		/* intentionally ignoring frozen here */
-		cmci_rediscover(cpu);
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block mce_cpu_notifier __cpuinitdata = {
-	.notifier_call = mce_cpu_callback,
-};
-
-static __init int mce_init_banks(void)
-{
-	int i;
-
-	bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
-				GFP_KERNEL);
-	if (!bank_attrs)
-		return -ENOMEM;
-
-	for (i = 0; i < banks; i++) {
-		struct sysdev_attribute *a = &bank_attrs[i];
-
-		a->attr.name	= kasprintf(GFP_KERNEL, "bank%d", i);
-		if (!a->attr.name)
-			goto nomem;
-
-		a->attr.mode	= 0644;
-		a->show		= show_bank;
-		a->store	= set_bank;
-	}
-	return 0;
-
-nomem:
-	while (--i >= 0)
-		kfree(bank_attrs[i].attr.name);
-	kfree(bank_attrs);
-	bank_attrs = NULL;
-
-	return -ENOMEM;
-}
-
-static __init int mce_init_device(void)
-{
-	int err;
-	int i = 0;
-
-	if (!mce_available(&boot_cpu_data))
-		return -EIO;
-
-	alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
-
-	err = mce_init_banks();
-	if (err)
-		return err;
-
-	err = sysdev_class_register(&mce_sysclass);
-	if (err)
-		return err;
-
-	for_each_online_cpu(i) {
-		err = mce_create_device(i);
-		if (err)
-			return err;
-	}
-
-	register_hotcpu_notifier(&mce_cpu_notifier);
-	misc_register(&mce_log_device);
-
-	return err;
-}
-
-device_initcall(mce_init_device);
-
-#else /* CONFIG_X86_OLD_MCE: */
-
-int nr_mce_banks;
-EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
-
-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
-{
-	if (mce_disabled == 1)
-		return;
-
-	switch (c->x86_vendor) {
-	case X86_VENDOR_AMD:
-		amd_mcheck_init(c);
-		break;
-
-	case X86_VENDOR_INTEL:
-		if (c->x86 == 5)
-			intel_p5_mcheck_init(c);
-		if (c->x86 == 6)
-			intel_p6_mcheck_init(c);
-		if (c->x86 == 15)
-			intel_p4_mcheck_init(c);
-		break;
-
-	case X86_VENDOR_CENTAUR:
-		if (c->x86 == 5)
-			winchip_mcheck_init(c);
-		break;
-
-	default:
-		break;
-	}
-	printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
-}
-
-static int __init mcheck_enable(char *str)
-{
-	mce_disabled = -1;
-	return 1;
-}
-
-__setup("mce", mcheck_enable);
-
-#endif /* CONFIG_X86_OLD_MCE */
-
-/*
- * Old style boot options parsing. Only for compatibility.
- */
-static int __init mcheck_disable(char *str)
-{
-	mce_disabled = 1;
-	return 1;
-}
-__setup("nomce", mcheck_disable);
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce.h b/trunk/arch/x86/kernel/cpu/mcheck/mce.h
index 84a552b458c8..ae9f628838f1 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce.h
@@ -1,38 +1,14 @@
 #include <linux/init.h>
 #include <asm/mce.h>
 
-#ifdef CONFIG_X86_OLD_MCE
 void amd_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
-#endif
-
-#ifdef CONFIG_X86_ANCIENT_MCE
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
-extern int mce_p5_enable;
-static inline int mce_p5_enabled(void) { return mce_p5_enable; }
-static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
-#else
-static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline int mce_p5_enabled(void) { return 0; }
-static inline void enable_p5_mce(void) { }
-#endif
 
 /* Call the installed machine check handler for this CPU setup. */
 extern void (*machine_check_vector)(struct pt_regs *, long error_code);
 
-#ifdef CONFIG_X86_OLD_MCE
-
 extern int nr_mce_banks;
 
-void intel_set_thermal_handler(void);
-
-#else
-
-static inline void intel_set_thermal_handler(void) { }
-
-#endif
-
-void intel_init_thermal(struct cpuinfo_x86 *c);
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c
new file mode 100644
index 000000000000..3552119b091d
--- /dev/null
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -0,0 +1,76 @@
+/*
+ * mce.c - x86 Machine Check Exception Reporting
+ * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/mce.h>
+
+#include "mce.h"
+
+int mce_disabled;
+int nr_mce_banks;
+
+EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
+
+/* Handle unconfigured int18 (should never happen) */
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
+{
+	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
+}
+
+/* Call the installed machine check handler for this CPU setup. */
+void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
+
+/* This has to be run for each processor */
+void mcheck_init(struct cpuinfo_x86 *c)
+{
+	if (mce_disabled == 1)
+		return;
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		amd_mcheck_init(c);
+		break;
+
+	case X86_VENDOR_INTEL:
+		if (c->x86 == 5)
+			intel_p5_mcheck_init(c);
+		if (c->x86 == 6)
+			intel_p6_mcheck_init(c);
+		if (c->x86 == 15)
+			intel_p4_mcheck_init(c);
+		break;
+
+	case X86_VENDOR_CENTAUR:
+		if (c->x86 == 5)
+			winchip_mcheck_init(c);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static int __init mcheck_disable(char *str)
+{
+	mce_disabled = 1;
+	return 1;
+}
+
+static int __init mcheck_enable(char *str)
+{
+	mce_disabled = -1;
+	return 1;
+}
+
+__setup("nomce", mcheck_disable);
+__setup("mce", mcheck_enable);
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c
new file mode 100644
index 000000000000..289cc4815028
--- /dev/null
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -0,0 +1,1188 @@
+/*
+ * Machine check handler.
+ * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ * Rest from unknown author(s).
+ * 2004 Andi Kleen. Rewrote most of it.
+ * Copyright 2008 Intel Corporation
+ * Author: Andi Kleen
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/string.h>
+#include <linux/rcupdate.h>
+#include <linux/kallsyms.h>
+#include <linux/sysdev.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/poll.h>
+#include <linux/thread_info.h>
+#include <linux/ctype.h>
+#include <linux/kmod.h>
+#include <linux/kdebug.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/ratelimit.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/mce.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/idle.h>
+
+#define MISC_MCELOG_MINOR 227
+
+atomic_t mce_entry;
+
+static int mce_dont_init;
+
+/*
+ * Tolerant levels:
+ *   0: always panic on uncorrected errors, log corrected errors
+ *   1: panic or SIGBUS on uncorrected errors, log corrected errors
+ *   2: SIGBUS or log uncorrected errors (if possible), log corrected errors
+ *   3: never panic or SIGBUS, log all errors (for testing only)
+ */
+static int tolerant = 1;
+static int banks;
+static u64 *bank;
+static unsigned long notify_user;
+static int rip_msr;
+static int mce_bootlog = -1;
+static atomic_t mce_events;
+
+static char trigger[128];
+static char *trigger_argv[2] = { trigger, NULL };
+
+static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
+
+/* MCA banks polled by the period polling timer for corrected events */
+DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
+	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
+};
+
+/* Do initial initialization of a struct mce */
+void mce_setup(struct mce *m)
+{
+	memset(m, 0, sizeof(struct mce));
+	m->cpu = smp_processor_id();
+	rdtscll(m->tsc);
+}
+
+/*
+ * Lockless MCE logging infrastructure.
+ * This avoids deadlocks on printk locks without having to break locks. Also
+ * separate MCEs from kernel messages to avoid bogus bug reports.
+ */
+
+static struct mce_log mcelog = {
+	MCE_LOG_SIGNATURE,
+	MCE_LOG_LEN,
+};
+
+void mce_log(struct mce *mce)
+{
+	unsigned next, entry;
+	atomic_inc(&mce_events);
+	mce->finished = 0;
+	wmb();
+	for (;;) {
+		entry = rcu_dereference(mcelog.next);
+		for (;;) {
+			/* When the buffer fills up discard new entries. Assume
+			   that the earlier errors are the more interesting. */
+			if (entry >= MCE_LOG_LEN) {
+				set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+				return;
+			}
+			/* Old left over entry. Skip. */
+			if (mcelog.entry[entry].finished) {
+				entry++;
+				continue;
+			}
+			break;
+		}
+		smp_rmb();
+		next = entry + 1;
+		if (cmpxchg(&mcelog.next, entry, next) == entry)
+			break;
+	}
+	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
+	wmb();
+	mcelog.entry[entry].finished = 1;
+	wmb();
+
+	set_bit(0, &notify_user);
+}
+
+static void print_mce(struct mce *m)
+{
+	printk(KERN_EMERG "\n"
+	       KERN_EMERG "HARDWARE ERROR\n"
+	       KERN_EMERG
+	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
+	       m->cpu, m->mcgstatus, m->bank, m->status);
+	if (m->ip) {
+		printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
+		       !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
+		       m->cs, m->ip);
+		if (m->cs == __KERNEL_CS)
+			print_symbol("{%s}", m->ip);
+		printk("\n");
+	}
+	printk(KERN_EMERG "TSC %llx ", m->tsc);
+	if (m->addr)
+		printk("ADDR %llx ", m->addr);
+	if (m->misc)
+		printk("MISC %llx ", m->misc);
+	printk("\n");
+	printk(KERN_EMERG "This is not a software problem!\n");
+	printk(KERN_EMERG "Run through mcelog --ascii to decode "
+	       "and contact your hardware vendor\n");
+}
+
+static void mce_panic(char *msg, struct mce *backup, unsigned long start)
+{
+	int i;
+
+	oops_begin();
+	for (i = 0; i < MCE_LOG_LEN; i++) {
+		unsigned long tsc = mcelog.entry[i].tsc;
+
+		if (time_before(tsc, start))
+			continue;
+		print_mce(&mcelog.entry[i]);
+		if (backup && mcelog.entry[i].tsc == backup->tsc)
+			backup = NULL;
+	}
+	if (backup)
+		print_mce(backup);
+	panic(msg);
+}
+
+int mce_available(struct cpuinfo_x86 *c)
+{
+	if (mce_dont_init)
+		return 0;
+	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+}
+
+static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
+{
+	if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
+		m->ip = regs->ip;
+		m->cs = regs->cs;
+	} else {
+		m->ip = 0;
+		m->cs = 0;
+	}
+	if (rip_msr) {
+		/* Assume the RIP in the MSR is exact. Is this true? */
+		m->mcgstatus |= MCG_STATUS_EIPV;
+		rdmsrl(rip_msr, m->ip);
+		m->cs = 0;
+	}
+}
+
+/*
+ * Poll for corrected events or events that happened before reset.
+ * Those are just logged through /dev/mcelog.
+ *
+ * This is executed in standard interrupt context.
+ */
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+{
+	struct mce m;
+	int i;
+
+	mce_setup(&m);
+
+	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+	for (i = 0; i < banks; i++) {
+		if (!bank[i] || !test_bit(i, *b))
+			continue;
+
+		m.misc = 0;
+		m.addr = 0;
+		m.bank = i;
+		m.tsc = 0;
+
+		barrier();
+		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
+		if (!(m.status & MCI_STATUS_VAL))
+			continue;
+
+		/*
+		 * Uncorrected events are handled by the exception handler
+		 * when it is enabled. But when the exception is disabled log
+		 * everything.
+		 *
+		 * TBD do the same check for MCI_STATUS_EN here?
+		 */
+		if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
+			continue;
+
+		if (m.status & MCI_STATUS_MISCV)
+			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
+		if (m.status & MCI_STATUS_ADDRV)
+			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
+
+		if (!(flags & MCP_TIMESTAMP))
+			m.tsc = 0;
+		/*
+		 * Don't get the IP here because it's unlikely to
+		 * have anything to do with the actual error location.
+		 */
+		if (!(flags & MCP_DONTLOG)) {
+			mce_log(&m);
+			add_taint(TAINT_MACHINE_CHECK);
+		}
+
+		/*
+		 * Clear state for this bank.
+		 */
+		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+	}
+
+	/*
+	 * Don't clear MCG_STATUS here because it's only defined for
+	 * exceptions.
+	 */
+}
+
+/*
+ * The actual machine check handler. This only handles real
+ * exceptions when something got corrupted coming in through int 18.
+ *
+ * This is executed in NMI context not subject to normal locking rules. This
+ * implies that most kernel services cannot be safely used. Don't even
+ * think about putting a printk in there!
+ */
+void do_machine_check(struct pt_regs * regs, long error_code)
+{
+	struct mce m, panicm;
+	u64 mcestart = 0;
+	int i;
+	int panicm_found = 0;
+	/*
+	 * If no_way_out gets set, there is no safe way to recover from this
+	 * MCE.  If tolerant is cranked up, we'll try anyway.
+	 */
+	int no_way_out = 0;
+	/*
+	 * If kill_it gets set, there might be a way to recover from this
+	 * error.
+	 */
+	int kill_it = 0;
+	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
+
+	atomic_inc(&mce_entry);
+
+	if (notify_die(DIE_NMI, "machine check", regs, error_code,
+			   18, SIGKILL) == NOTIFY_STOP)
+		goto out2;
+	if (!banks)
+		goto out2;
+
+	mce_setup(&m);
+
+	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+	/* if the restart IP is not valid, we're done for */
+	if (!(m.mcgstatus & MCG_STATUS_RIPV))
+		no_way_out = 1;
+
+	rdtscll(mcestart);
+	barrier();
+
+	for (i = 0; i < banks; i++) {
+		__clear_bit(i, toclear);
+		if (!bank[i])
+			continue;
+
+		m.misc = 0;
+		m.addr = 0;
+		m.bank = i;
+
+		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
+		if ((m.status & MCI_STATUS_VAL) == 0)
+			continue;
+
+		/*
+		 * Non uncorrected errors are handled by machine_check_poll
+		 * Leave them alone.
+		 */
+		if ((m.status & MCI_STATUS_UC) == 0)
+			continue;
+
+		/*
+		 * Set taint even when machine check was not enabled.
+		 */
+		add_taint(TAINT_MACHINE_CHECK);
+
+		__set_bit(i, toclear);
+
+		if (m.status & MCI_STATUS_EN) {
+			/* if PCC was set, there's no way out */
+			no_way_out |= !!(m.status & MCI_STATUS_PCC);
+			/*
+			 * If this error was uncorrectable and there was
+			 * an overflow, we're in trouble.  If no overflow,
+			 * we might get away with just killing a task.
+			 */
+			if (m.status & MCI_STATUS_UC) {
+				if (tolerant < 1 || m.status & MCI_STATUS_OVER)
+					no_way_out = 1;
+				kill_it = 1;
+			}
+		} else {
+			/*
+			 * Machine check event was not enabled. Clear, but
+			 * ignore.
+			 */
+			continue;
+		}
+
+		if (m.status & MCI_STATUS_MISCV)
+			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
+		if (m.status & MCI_STATUS_ADDRV)
+			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
+
+		mce_get_rip(&m, regs);
+		mce_log(&m);
+
+		/* Did this bank cause the exception? */
+		/* Assume that the bank with uncorrectable errors did it,
+		   and that there is only a single one. */
+		if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
+			panicm = m;
+			panicm_found = 1;
+		}
+	}
+
+	/* If we didn't find an uncorrectable error, pick
+	   the last one (shouldn't happen, just being safe). */
+	if (!panicm_found)
+		panicm = m;
+
+	/*
+	 * If we have decided that we just CAN'T continue, and the user
+	 *  has not set tolerant to an insane level, give up and die.
+	 */
+	if (no_way_out && tolerant < 3)
+		mce_panic("Machine check", &panicm, mcestart);
+
+	/*
+	 * If the error seems to be unrecoverable, something should be
+	 * done.  Try to kill as little as possible.  If we can kill just
+	 * one task, do that.  If the user has set the tolerance very
+	 * high, don't try to do anything at all.
+	 */
+	if (kill_it && tolerant < 3) {
+		int user_space = 0;
+
+		/*
+		 * If the EIPV bit is set, it means the saved IP is the
+		 * instruction which caused the MCE.
+		 */
+		if (m.mcgstatus & MCG_STATUS_EIPV)
+			user_space = panicm.ip && (panicm.cs & 3);
+
+		/*
+		 * If we know that the error was in user space, send a
+		 * SIGBUS.  Otherwise, panic if tolerance is low.
+		 *
+		 * force_sig() takes an awful lot of locks and has a slight
+		 * risk of deadlocking.
+		 */
+		if (user_space) {
+			force_sig(SIGBUS, current);
+		} else if (panic_on_oops || tolerant < 2) {
+			mce_panic("Uncorrected machine check",
+				&panicm, mcestart);
+		}
+	}
+
+	/* notify userspace ASAP */
+	set_thread_flag(TIF_MCE_NOTIFY);
+
+	/* the last thing we do is clear state */
+	for (i = 0; i < banks; i++) {
+		if (test_bit(i, toclear))
+			wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+	}
+	wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ out2:
+	atomic_dec(&mce_entry);
+}
+EXPORT_SYMBOL_GPL(do_machine_check);
+
+#ifdef CONFIG_X86_MCE_INTEL
+/***
+ * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
+ * @cpu: The CPU on which the event occurred.
+ * @status: Event status information
+ *
+ * This function should be called by the thermal interrupt after the
+ * event has been processed and the decision was made to log the event
+ * further.
+ *
+ * The status parameter will be saved to the 'status' field of 'struct mce'
+ * and historically has been the register value of the
+ * MSR_IA32_THERMAL_STATUS (Intel) msr.
+ */
+void mce_log_therm_throt_event(__u64 status)
+{
+	struct mce m;
+
+	mce_setup(&m);
+	m.bank = MCE_THERMAL_BANK;
+	m.status = status;
+	mce_log(&m);
+}
+#endif /* CONFIG_X86_MCE_INTEL */
+
+/*
+ * Periodic polling timer for "silent" machine check errors.  If the
+ * poller finds an MCE, poll 2x faster.  When the poller finds no more
+ * errors, poll 2x slower (up to check_interval seconds).
+ */
+
+static int check_interval = 5 * 60; /* 5 minutes */
+static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
+static void mcheck_timer(unsigned long);
+static DEFINE_PER_CPU(struct timer_list, mce_timer);
+
+static void mcheck_timer(unsigned long data)
+{
+	struct timer_list *t = &per_cpu(mce_timer, data);
+	int *n;
+
+	WARN_ON(smp_processor_id() != data);
+
+	if (mce_available(&current_cpu_data))
+		machine_check_poll(MCP_TIMESTAMP,
+				&__get_cpu_var(mce_poll_banks));
+
+	/*
+	 * Alert userspace if needed.  If we logged an MCE, reduce the
+	 * polling interval, otherwise increase the polling interval.
+	 */
+	n = &__get_cpu_var(next_interval);
+	if (mce_notify_user()) {
+		*n = max(*n/2, HZ/100);
+	} else {
+		*n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
+	}
+
+	t->expires = jiffies + *n;
+	add_timer(t);
+}
+
+static void mce_do_trigger(struct work_struct *work)
+{
+	call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
+}
+
+static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
+
+/*
+ * Notify the user(s) about new machine check events.
+ * Can be called from interrupt context, but not from machine check/NMI
+ * context.
+ */
+int mce_notify_user(void)
+{
+	/* Not more than two messages every minute */
+	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
+
+	clear_thread_flag(TIF_MCE_NOTIFY);
+	if (test_and_clear_bit(0, &notify_user)) {
+		wake_up_interruptible(&mce_wait);
+
+		/*
+		 * There is no risk of missing notifications because
+		 * work_pending is always cleared before the function is
+		 * executed.
+		 */
+		if (trigger[0] && !work_pending(&mce_trigger_work))
+			schedule_work(&mce_trigger_work);
+
+		if (__ratelimit(&ratelimit))
+			printk(KERN_INFO "Machine check events logged\n");
+
+		return 1;
+	}
+	return 0;
+}
+
+/* see if the idle task needs to notify userspace */
+static int
+mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk)
+{
+	/* IDLE_END should be safe - interrupts are back on */
+	if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
+		mce_notify_user();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mce_idle_notifier = {
+	.notifier_call = mce_idle_callback,
+};
+
+static __init int periodic_mcheck_init(void)
+{
+       idle_notifier_register(&mce_idle_notifier);
+       return 0;
+}
+__initcall(periodic_mcheck_init);
+
+/*
+ * Initialize Machine Checks for a CPU.
+ */
+static int mce_cap_init(void)
+{
+	u64 cap;
+	unsigned b;
+
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
+	b = cap & 0xff;
+	if (b > MAX_NR_BANKS) {
+		printk(KERN_WARNING
+		       "MCE: Using only %u machine check banks out of %u\n",
+			MAX_NR_BANKS, b);
+		b = MAX_NR_BANKS;
+	}
+
+	/* Don't support asymmetric configurations today */
+	WARN_ON(banks != 0 && b != banks);
+	banks = b;
+	if (!bank) {
+		bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
+		if (!bank)
+			return -ENOMEM;
+		memset(bank, 0xff, banks * sizeof(u64));
+	}
+
+	/* Use accurate RIP reporting if available. */
+	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
+		rip_msr = MSR_IA32_MCG_EIP;
+
+	return 0;
+}
+
+static void mce_init(void *dummy)
+{
+	u64 cap;
+	int i;
+	mce_banks_t all_banks;
+
+	/*
+	 * Log the machine checks left over from the previous reset.
+	 */
+	bitmap_fill(all_banks, MAX_NR_BANKS);
+	machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
+
+	set_in_cr4(X86_CR4_MCE);
+
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
+	if (cap & MCG_CTL_P)
+		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+
+	for (i = 0; i < banks; i++) {
+		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+	}
+}
+
+/* Add per CPU specific workarounds here */
+static void mce_cpu_quirks(struct cpuinfo_x86 *c)
+{
+	/* This should be disabled by the BIOS, but isn't always */
+	if (c->x86_vendor == X86_VENDOR_AMD) {
+		if (c->x86 == 15 && banks > 4)
+			/* disable GART TBL walk error reporting, which trips off
+			   incorrectly with the IOMMU & 3ware & Cerberus. */
+			clear_bit(10, (unsigned long *)&bank[4]);
+		if(c->x86 <= 17 && mce_bootlog < 0)
+			/* Lots of broken BIOS around that don't clear them
+			   by default and leave crap in there. Don't log. */
+			mce_bootlog = 0;
+	}
+
+}
+
+static void mce_cpu_features(struct cpuinfo_x86 *c)
+{
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		mce_intel_feature_init(c);
+		break;
+	case X86_VENDOR_AMD:
+		mce_amd_feature_init(c);
+		break;
+	default:
+		break;
+	}
+}
+
+static void mce_init_timer(void)
+{
+	struct timer_list *t = &__get_cpu_var(mce_timer);
+	int *n = &__get_cpu_var(next_interval);
+
+	*n = check_interval * HZ;
+	if (!*n)
+		return;
+	setup_timer(t, mcheck_timer, smp_processor_id());
+	t->expires = round_jiffies(jiffies + *n);
+	add_timer(t);
+}
+
+/*
+ * Called for each booted CPU to set up machine checks.
+ * Must be called with preempt off.
+ */
+void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+{
+	if (!mce_available(c))
+		return;
+
+	if (mce_cap_init() < 0) {
+		mce_dont_init = 1;
+		return;
+	}
+	mce_cpu_quirks(c);
+
+	mce_init(NULL);
+	mce_cpu_features(c);
+	mce_init_timer();
+}
+
+/*
+ * Character device to read and clear the MCE log.
+ */
+
+static DEFINE_SPINLOCK(mce_state_lock);
+static int open_count;	/* #times opened */
+static int open_exclu;	/* already open exclusive? */
+
+static int mce_open(struct inode *inode, struct file *file)
+{
+	lock_kernel();
+	spin_lock(&mce_state_lock);
+
+	if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
+		spin_unlock(&mce_state_lock);
+		unlock_kernel();
+		return -EBUSY;
+	}
+
+	if (file->f_flags & O_EXCL)
+		open_exclu = 1;
+	open_count++;
+
+	spin_unlock(&mce_state_lock);
+	unlock_kernel();
+
+	return nonseekable_open(inode, file);
+}
+
+static int mce_release(struct inode *inode, struct file *file)
+{
+	spin_lock(&mce_state_lock);
+
+	open_count--;
+	open_exclu = 0;
+
+	spin_unlock(&mce_state_lock);
+
+	return 0;
+}
+
+static void collect_tscs(void *data)
+{
+	unsigned long *cpu_tsc = (unsigned long *)data;
+
+	rdtscll(cpu_tsc[smp_processor_id()]);
+}
+
+static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
+			loff_t *off)
+{
+	unsigned long *cpu_tsc;
+	static DEFINE_MUTEX(mce_read_mutex);
+	unsigned prev, next;
+	char __user *buf = ubuf;
+	int i, err;
+
+	cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
+	if (!cpu_tsc)
+		return -ENOMEM;
+
+	mutex_lock(&mce_read_mutex);
+	next = rcu_dereference(mcelog.next);
+
+	/* Only supports full reads right now */
+	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
+		mutex_unlock(&mce_read_mutex);
+		kfree(cpu_tsc);
+		return -EINVAL;
+	}
+
+	err = 0;
+	prev = 0;
+	do {
+		for (i = prev; i < next; i++) {
+			unsigned long start = jiffies;
+
+			while (!mcelog.entry[i].finished) {
+				if (time_after_eq(jiffies, start + 2)) {
+					memset(mcelog.entry + i, 0,
+					       sizeof(struct mce));
+					goto timeout;
+				}
+				cpu_relax();
+			}
+			smp_rmb();
+			err |= copy_to_user(buf, mcelog.entry + i,
+					    sizeof(struct mce));
+			buf += sizeof(struct mce);
+timeout:
+			;
+		}
+
+		memset(mcelog.entry + prev, 0,
+		       (next - prev) * sizeof(struct mce));
+		prev = next;
+		next = cmpxchg(&mcelog.next, prev, 0);
+	} while (next != prev);
+
+	synchronize_sched();
+
+	/*
+	 * Collect entries that were still getting written before the
+	 * synchronize.
+	 */
+	on_each_cpu(collect_tscs, cpu_tsc, 1);
+	for (i = next; i < MCE_LOG_LEN; i++) {
+		if (mcelog.entry[i].finished &&
+		    mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
+			err |= copy_to_user(buf, mcelog.entry+i,
+					    sizeof(struct mce));
+			smp_rmb();
+			buf += sizeof(struct mce);
+			memset(&mcelog.entry[i], 0, sizeof(struct mce));
+		}
+	}
+	mutex_unlock(&mce_read_mutex);
+	kfree(cpu_tsc);
+	return err ? -EFAULT : buf - ubuf;
+}
+
+static unsigned int mce_poll(struct file *file, poll_table *wait)
+{
+	poll_wait(file, &mce_wait, wait);
+	if (rcu_dereference(mcelog.next))
+		return POLLIN | POLLRDNORM;
+	return 0;
+}
+
+static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	int __user *p = (int __user *)arg;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	switch (cmd) {
+	case MCE_GET_RECORD_LEN:
+		return put_user(sizeof(struct mce), p);
+	case MCE_GET_LOG_LEN:
+		return put_user(MCE_LOG_LEN, p);
+	case MCE_GETCLEAR_FLAGS: {
+		unsigned flags;
+
+		do {
+			flags = mcelog.flags;
+		} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
+		return put_user(flags, p);
+	}
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations mce_chrdev_ops = {
+	.open = mce_open,
+	.release = mce_release,
+	.read = mce_read,
+	.poll = mce_poll,
+	.unlocked_ioctl = mce_ioctl,
+};
+
+static struct miscdevice mce_log_device = {
+	MISC_MCELOG_MINOR,
+	"mcelog",
+	&mce_chrdev_ops,
+};
+
+/*
+ * Old style boot options parsing. Only for compatibility.
+ */
+static int __init mcheck_disable(char *str)
+{
+	mce_dont_init = 1;
+	return 1;
+}
+
+/* mce=off disables machine check.
+   mce=TOLERANCELEVEL (number, see above)
+   mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
+   mce=nobootlog Don't log MCEs from before booting. */
+static int __init mcheck_enable(char *str)
+{
+	if (!strcmp(str, "off"))
+		mce_dont_init = 1;
+	else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
+		mce_bootlog = str[0] == 'b';
+	else if (isdigit(str[0]))
+		get_option(&str, &tolerant);
+	else
+		printk("mce= argument %s ignored. Please use /sys", str);
+	return 1;
+}
+
+__setup("nomce", mcheck_disable);
+__setup("mce=", mcheck_enable);
+
+/*
+ * Sysfs support
+ */
+
+/*
+ * Disable machine checks on suspend and shutdown. We can't really handle
+ * them later.
+ */
+static int mce_disable(void)
+{
+	int i;
+
+	for (i = 0; i < banks; i++)
+		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+	return 0;
+}
+
+static int mce_suspend(struct sys_device *dev, pm_message_t state)
+{
+	return mce_disable();
+}
+
+static int mce_shutdown(struct sys_device *dev)
+{
+	return mce_disable();
+}
+
+/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
+   Only one CPU is active at this time, the others get readded later using
+   CPU hotplug. */
+static int mce_resume(struct sys_device *dev)
+{
+	mce_init(NULL);
+	mce_cpu_features(&current_cpu_data);
+	return 0;
+}
+
+static void mce_cpu_restart(void *data)
+{
+	del_timer_sync(&__get_cpu_var(mce_timer));
+	if (mce_available(&current_cpu_data))
+		mce_init(NULL);
+	mce_init_timer();
+}
+
+/* Reinit MCEs after user configuration changes */
+static void mce_restart(void)
+{
+	on_each_cpu(mce_cpu_restart, NULL, 1);
+}
+
+static struct sysdev_class mce_sysclass = {
+	.suspend = mce_suspend,
+	.shutdown = mce_shutdown,
+	.resume = mce_resume,
+	.name = "machinecheck",
+};
+
+DEFINE_PER_CPU(struct sys_device, device_mce);
+void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata;
+
+/* Why are there no generic functions for this? */
+#define ACCESSOR(name, var, start) \
+	static ssize_t show_ ## name(struct sys_device *s,		\
+				     struct sysdev_attribute *attr,	\
+				     char *buf) {			\
+		return sprintf(buf, "%lx\n", (unsigned long)var);	\
+	}								\
+	static ssize_t set_ ## name(struct sys_device *s,		\
+				    struct sysdev_attribute *attr,	\
+				    const char *buf, size_t siz) {	\
+		char *end;						\
+		unsigned long new = simple_strtoul(buf, &end, 0);	\
+		if (end == buf) return -EINVAL;				\
+		var = new;						\
+		start;							\
+		return end-buf;						\
+	}								\
+	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
+
+static struct sysdev_attribute *bank_attrs;
+
+static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
+			 char *buf)
+{
+	u64 b = bank[attr - bank_attrs];
+	return sprintf(buf, "%llx\n", b);
+}
+
+static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
+			const char *buf, size_t siz)
+{
+	char *end;
+	u64 new = simple_strtoull(buf, &end, 0);
+	if (end == buf)
+		return -EINVAL;
+	bank[attr - bank_attrs] = new;
+	mce_restart();
+	return end-buf;
+}
+
+static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
+				char *buf)
+{
+	strcpy(buf, trigger);
+	strcat(buf, "\n");
+	return strlen(trigger) + 1;
+}
+
+static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
+				const char *buf,size_t siz)
+{
+	char *p;
+	int len;
+	strncpy(trigger, buf, sizeof(trigger));
+	trigger[sizeof(trigger)-1] = 0;
+	len = strlen(trigger);
+	p = strchr(trigger, '\n');
+	if (*p) *p = 0;
+	return len;
+}
+
+static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
+static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
+ACCESSOR(check_interval,check_interval,mce_restart())
+static struct sysdev_attribute *mce_attributes[] = {
+	&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
+	NULL
+};
+
+static cpumask_var_t mce_device_initialized;
+
+/* Per cpu sysdev init.  All of the cpus still share the same ctl bank */
+static __cpuinit int mce_create_device(unsigned int cpu)
+{
+	int err;
+	int i;
+
+	if (!mce_available(&boot_cpu_data))
+		return -EIO;
+
+	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
+	per_cpu(device_mce,cpu).id = cpu;
+	per_cpu(device_mce,cpu).cls = &mce_sysclass;
+
+	err = sysdev_register(&per_cpu(device_mce,cpu));
+	if (err)
+		return err;
+
+	for (i = 0; mce_attributes[i]; i++) {
+		err = sysdev_create_file(&per_cpu(device_mce,cpu),
+					 mce_attributes[i]);
+		if (err)
+			goto error;
+	}
+	for (i = 0; i < banks; i++) {
+		err = sysdev_create_file(&per_cpu(device_mce, cpu),
+					&bank_attrs[i]);
+		if (err)
+			goto error2;
+	}
+	cpumask_set_cpu(cpu, mce_device_initialized);
+
+	return 0;
+error2:
+	while (--i >= 0) {
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
+					&bank_attrs[i]);
+	}
+error:
+	while (--i >= 0) {
+		sysdev_remove_file(&per_cpu(device_mce,cpu),
+				   mce_attributes[i]);
+	}
+	sysdev_unregister(&per_cpu(device_mce,cpu));
+
+	return err;
+}
+
+static __cpuinit void mce_remove_device(unsigned int cpu)
+{
+	int i;
+
+	if (!cpumask_test_cpu(cpu, mce_device_initialized))
+		return;
+
+	for (i = 0; mce_attributes[i]; i++)
+		sysdev_remove_file(&per_cpu(device_mce,cpu),
+			mce_attributes[i]);
+	for (i = 0; i < banks; i++)
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
+			&bank_attrs[i]);
+	sysdev_unregister(&per_cpu(device_mce,cpu));
+	cpumask_clear_cpu(cpu, mce_device_initialized);
+}
+
+/* Make sure there are no machine checks on offlined CPUs. */
+static void mce_disable_cpu(void *h)
+{
+	int i;
+	unsigned long action = *(unsigned long *)h;
+
+	if (!mce_available(&current_cpu_data))
+		return;
+	if (!(action & CPU_TASKS_FROZEN))
+		cmci_clear();
+	for (i = 0; i < banks; i++)
+		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+}
+
+static void mce_reenable_cpu(void *h)
+{
+	int i;
+	unsigned long action = *(unsigned long *)h;
+
+	if (!mce_available(&current_cpu_data))
+		return;
+	if (!(action & CPU_TASKS_FROZEN))
+		cmci_reenable();
+	for (i = 0; i < banks; i++)
+		wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
+}
+
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
+				      unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct timer_list *t = &per_cpu(mce_timer, cpu);
+
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		mce_create_device(cpu);
+		if (threshold_cpu_callback)
+			threshold_cpu_callback(action, cpu);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		if (threshold_cpu_callback)
+			threshold_cpu_callback(action, cpu);
+		mce_remove_device(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		del_timer_sync(t);
+		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+		break;
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
+		t->expires = round_jiffies(jiffies +
+						__get_cpu_var(next_interval));
+		add_timer_on(t, cpu);
+		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+		break;
+	case CPU_POST_DEAD:
+		/* intentionally ignoring frozen here */
+		cmci_rediscover(cpu);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mce_cpu_notifier __cpuinitdata = {
+	.notifier_call = mce_cpu_callback,
+};
+
+static __init int mce_init_banks(void)
+{
+	int i;
+
+	bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
+				GFP_KERNEL);
+	if (!bank_attrs)
+		return -ENOMEM;
+
+	for (i = 0; i < banks; i++) {
+		struct sysdev_attribute *a = &bank_attrs[i];
+		a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
+		if (!a->attr.name)
+			goto nomem;
+		a->attr.mode = 0644;
+		a->show = show_bank;
+		a->store = set_bank;
+	}
+	return 0;
+
+nomem:
+	while (--i >= 0)
+		kfree(bank_attrs[i].attr.name);
+	kfree(bank_attrs);
+	bank_attrs = NULL;
+	return -ENOMEM;
+}
+
+static __init int mce_init_device(void)
+{
+	int err;
+	int i = 0;
+
+	if (!mce_available(&boot_cpu_data))
+		return -EIO;
+
+	zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
+
+	err = mce_init_banks();
+	if (err)
+		return err;
+
+	err = sysdev_class_register(&mce_sysclass);
+	if (err)
+		return err;
+
+	for_each_online_cpu(i) {
+		err = mce_create_device(i);
+		if (err)
+			return err;
+	}
+
+	register_hotcpu_notifier(&mce_cpu_notifier);
+	misc_register(&mce_log_device);
+	return err;
+}
+
+device_initcall(mce_init_device);
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index ddae21620bda..56dde9c4bc96 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -13,22 +13,22 @@
  *
  *  All MC4_MISCi registers are shared between multi-cores
  */
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/notifier.h>
 #include <linux/kobject.h>
-#include <linux/percpu.h>
-#include <linux/sysdev.h>
-#include <linux/errno.h>
+#include <linux/notifier.h>
 #include <linux/sched.h>
-#include <linux/sysfs.h>
-#include <linux/init.h>
-#include <linux/cpu.h>
 #include <linux/smp.h>
-
+#include <linux/sysdev.h>
+#include <linux/sysfs.h>
 #include <asm/apic.h>
-#include <asm/idle.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
+#include <asm/percpu.h>
+#include <asm/idle.h>
 
 #define PFX               "mce_threshold: "
 #define VERSION           "version 1.1.1"
@@ -48,26 +48,26 @@
 #define MCG_XBLK_ADDR     0xC0000400
 
 struct threshold_block {
-	unsigned int		block;
-	unsigned int		bank;
-	unsigned int		cpu;
-	u32			address;
-	u16			interrupt_enable;
-	u16			threshold_limit;
-	struct kobject		kobj;
-	struct list_head	miscj;
+	unsigned int block;
+	unsigned int bank;
+	unsigned int cpu;
+	u32 address;
+	u16 interrupt_enable;
+	u16 threshold_limit;
+	struct kobject kobj;
+	struct list_head miscj;
 };
 
 /* defaults used early on boot */
 static struct threshold_block threshold_defaults = {
-	.interrupt_enable	= 0,
-	.threshold_limit	= THRESHOLD_MAX,
+	.interrupt_enable = 0,
+	.threshold_limit = THRESHOLD_MAX,
 };
 
 struct threshold_bank {
-	struct kobject		*kobj;
-	struct threshold_block	*blocks;
-	cpumask_var_t		cpus;
+	struct kobject *kobj;
+	struct threshold_block *blocks;
+	cpumask_var_t cpus;
 };
 static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
 
@@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void);
  */
 
 struct thresh_restart {
-	struct threshold_block	*b;
-	int			reset;
-	u16			old_limit;
+	struct threshold_block *b;
+	int reset;
+	u16 old_limit;
 };
 
 /* must be called with correct cpu affinity */
@@ -110,7 +110,6 @@ static void threshold_restart_bank(void *_tr)
 	} else if (tr->old_limit) {	/* change limit w/o reset */
 		int new_count = (mci_misc_hi & THRESHOLD_MAX) +
 		    (tr->old_limit - tr->b->threshold_limit);
-
 		mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
 		    (new_count & THRESHOLD_MAX);
 	}
@@ -126,11 +125,11 @@ static void threshold_restart_bank(void *_tr)
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
+	unsigned int bank, block;
 	unsigned int cpu = smp_processor_id();
+	u8 lvt_off;
 	u32 low = 0, high = 0, address = 0;
-	unsigned int bank, block;
 	struct thresh_restart tr;
-	u8 lvt_off;
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
@@ -141,7 +140,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 				if (!address)
 					break;
 				address += MCG_XBLK_ADDR;
-			} else
+			}
+			else
 				++address;
 
 			if (rdmsr_safe(address, &low, &high))
@@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
  */
 static void amd_threshold_interrupt(void)
 {
-	u32 low = 0, high = 0, address = 0;
 	unsigned int bank, block;
 	struct mce m;
+	u32 low = 0, high = 0, address = 0;
 
 	mce_setup(&m);
 
@@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void)
 		if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
 			continue;
 		for (block = 0; block < NR_BLOCKS; ++block) {
-			if (block == 0) {
+			if (block == 0)
 				address = MSR_IA32_MC0_MISC + bank * 4;
-			} else if (block == 1) {
+			else if (block == 1) {
 				address = (low & MASK_BLKPTR_LO) >> 21;
 				if (!address)
 					break;
 				address += MCG_XBLK_ADDR;
-			} else {
-				++address;
 			}
+			else
+				++address;
 
 			if (rdmsr_safe(address, &low, &high))
 				break;
@@ -229,10 +229,8 @@ static void amd_threshold_interrupt(void)
 			     (high & MASK_LOCKED_HI))
 				continue;
 
-			/*
-			 * Log the machine check that caused the threshold
-			 * event.
-			 */
+			/* Log the machine check that caused the threshold
+			   event. */
 			machine_check_poll(MCP_TIMESTAMP,
 					&__get_cpu_var(mce_poll_banks));
 
@@ -256,52 +254,48 @@ static void amd_threshold_interrupt(void)
 
 struct threshold_attr {
 	struct attribute attr;
-	ssize_t (*show) (struct threshold_block *, char *);
-	ssize_t (*store) (struct threshold_block *, const char *, size_t count);
+	ssize_t(*show) (struct threshold_block *, char *);
+	ssize_t(*store) (struct threshold_block *, const char *, size_t count);
 };
 
-#define SHOW_FIELDS(name)						\
-static ssize_t show_ ## name(struct threshold_block *b, char *buf)	\
-{									\
-	return sprintf(buf, "%lx\n", (unsigned long) b->name);		\
+#define SHOW_FIELDS(name)                                           \
+static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
+{                                                                   \
+        return sprintf(buf, "%lx\n", (unsigned long) b->name);      \
 }
 SHOW_FIELDS(interrupt_enable)
 SHOW_FIELDS(threshold_limit)
 
-static ssize_t
-store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
+static ssize_t store_interrupt_enable(struct threshold_block *b,
+				      const char *buf, size_t count)
 {
+	char *end;
 	struct thresh_restart tr;
-	unsigned long new;
-
-	if (strict_strtoul(buf, 0, &new) < 0)
+	unsigned long new = simple_strtoul(buf, &end, 0);
+	if (end == buf)
 		return -EINVAL;
-
 	b->interrupt_enable = !!new;
 
-	tr.b		= b;
-	tr.reset	= 0;
-	tr.old_limit	= 0;
-
+	tr.b = b;
+	tr.reset = 0;
+	tr.old_limit = 0;
 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
-	return size;
+	return end - buf;
 }
 
-static ssize_t
-store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
+static ssize_t store_threshold_limit(struct threshold_block *b,
+				     const char *buf, size_t count)
 {
+	char *end;
 	struct thresh_restart tr;
-	unsigned long new;
-
-	if (strict_strtoul(buf, 0, &new) < 0)
+	unsigned long new = simple_strtoul(buf, &end, 0);
+	if (end == buf)
 		return -EINVAL;
-
 	if (new > THRESHOLD_MAX)
 		new = THRESHOLD_MAX;
 	if (new < 1)
 		new = 1;
-
 	tr.old_limit = b->threshold_limit;
 	b->threshold_limit = new;
 	tr.b = b;
@@ -309,12 +303,12 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
 
 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
-	return size;
+	return end - buf;
 }
 
 struct threshold_block_cross_cpu {
-	struct threshold_block	*tb;
-	long			retval;
+	struct threshold_block *tb;
+	long retval;
 };
 
 static void local_error_count_handler(void *_tbcc)
@@ -344,13 +338,16 @@ static ssize_t store_error_count(struct threshold_block *b,
 	return 1;
 }
 
-#define RW_ATTR(val)							\
-static struct threshold_attr val = {					\
-	.attr	= {.name = __stringify(val), .mode = 0644 },		\
-	.show	= show_## val,						\
-	.store	= store_## val,						\
+#define THRESHOLD_ATTR(_name,_mode,_show,_store) {            \
+        .attr = {.name = __stringify(_name), .mode = _mode }, \
+        .show = _show,                                        \
+        .store = _store,                                      \
 };
 
+#define RW_ATTR(name)                                           \
+static struct threshold_attr name =                             \
+        THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
+
 RW_ATTR(interrupt_enable);
 RW_ATTR(threshold_limit);
 RW_ATTR(error_count);
@@ -362,17 +359,15 @@ static struct attribute *default_attrs[] = {
 	NULL
 };
 
-#define to_block(k)	container_of(k, struct threshold_block, kobj)
-#define to_attr(a)	container_of(a, struct threshold_attr, attr)
+#define to_block(k) container_of(k, struct threshold_block, kobj)
+#define to_attr(a) container_of(a, struct threshold_attr, attr)
 
 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct threshold_block *b = to_block(kobj);
 	struct threshold_attr *a = to_attr(attr);
 	ssize_t ret;
-
 	ret = a->show ? a->show(b, buf) : -EIO;
-
 	return ret;
 }
 
@@ -382,20 +377,18 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	struct threshold_block *b = to_block(kobj);
 	struct threshold_attr *a = to_attr(attr);
 	ssize_t ret;
-
 	ret = a->store ? a->store(b, buf, count) : -EIO;
-
 	return ret;
 }
 
 static struct sysfs_ops threshold_ops = {
-	.show			= show,
-	.store			= store,
+	.show = show,
+	.store = store,
 };
 
 static struct kobj_type threshold_ktype = {
-	.sysfs_ops		= &threshold_ops,
-	.default_attrs		= default_attrs,
+	.sysfs_ops = &threshold_ops,
+	.default_attrs = default_attrs,
 };
 
 static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
@@ -403,9 +396,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
 					       unsigned int block,
 					       u32 address)
 {
-	struct threshold_block *b = NULL;
-	u32 low, high;
 	int err;
+	u32 low, high;
+	struct threshold_block *b = NULL;
 
 	if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
 		return 0;
@@ -428,21 +421,20 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
 	if (!b)
 		return -ENOMEM;
 
-	b->block		= block;
-	b->bank			= bank;
-	b->cpu			= cpu;
-	b->address		= address;
-	b->interrupt_enable	= 0;
-	b->threshold_limit	= THRESHOLD_MAX;
+	b->block = block;
+	b->bank = bank;
+	b->cpu = cpu;
+	b->address = address;
+	b->interrupt_enable = 0;
+	b->threshold_limit = THRESHOLD_MAX;
 
 	INIT_LIST_HEAD(&b->miscj);
 
-	if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
+	if (per_cpu(threshold_banks, cpu)[bank]->blocks)
 		list_add(&b->miscj,
 			 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
-	} else {
+	else
 		per_cpu(threshold_banks, cpu)[bank]->blocks = b;
-	}
 
 	err = kobject_init_and_add(&b->kobj, &threshold_ktype,
 				   per_cpu(threshold_banks, cpu)[bank]->kobj,
@@ -455,9 +447,8 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
 		if (!address)
 			return 0;
 		address += MCG_XBLK_ADDR;
-	} else {
+	} else
 		++address;
-	}
 
 	err = allocate_threshold_blocks(cpu, bank, ++block, address);
 	if (err)
@@ -509,14 +500,13 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (!b)
 			goto out;
 
-		err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj,
+		err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
 					b->kobj, name);
 		if (err)
 			goto out;
 
 		cpumask_copy(b->cpus, cpu_core_mask(cpu));
 		per_cpu(threshold_banks, cpu)[bank] = b;
-
 		goto out;
 	}
 #endif
@@ -532,7 +522,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		goto out;
 	}
 
-	b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj);
+	b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
 	if (!b->kobj)
 		goto out_free;
 
@@ -552,7 +542,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (i == cpu)
 			continue;
 
-		err = sysfs_create_link(&per_cpu(mce_dev, i).kobj,
+		err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
 					b->kobj, name);
 		if (err)
 			goto out;
@@ -615,13 +605,15 @@ static void deallocate_threshold_block(unsigned int cpu,
 
 static void threshold_remove_bank(unsigned int cpu, int bank)
 {
+	int i = 0;
 	struct threshold_bank *b;
 	char name[32];
-	int i = 0;
 
 	b = per_cpu(threshold_banks, cpu)[bank];
+
 	if (!b)
 		return;
+
 	if (!b->blocks)
 		goto free_out;
 
@@ -630,9 +622,8 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 #ifdef CONFIG_SMP
 	/* sibling symlink */
 	if (shared_bank[bank] && b->blocks->cpu != cpu) {
-		sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name);
+		sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
 		per_cpu(threshold_banks, cpu)[bank] = NULL;
-
 		return;
 	}
 #endif
@@ -642,7 +633,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 		if (i == cpu)
 			continue;
 
-		sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name);
+		sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
 		per_cpu(threshold_banks, i)[bank] = NULL;
 	}
 
@@ -668,9 +659,12 @@ static void threshold_remove_device(unsigned int cpu)
 }
 
 /* get notified when a cpu comes on/off */
-static void __cpuinit
-amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
+static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action,
+						     unsigned int cpu)
 {
+	if (cpu >= NR_CPUS)
+		return;
+
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
@@ -692,12 +686,11 @@ static __init int threshold_init_device(void)
 	/* to hit CPUs online before the notifier is up */
 	for_each_online_cpu(lcpu) {
 		int err = threshold_create_device(lcpu);
-
 		if (err)
 			return err;
 	}
 	threshold_cpu_callback = amd_64_threshold_cpu_callback;
-
 	return 0;
 }
+
 device_initcall(threshold_init_device);
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_intel.c
deleted file mode 100644
index 2b011d2d8579..000000000000
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Common code for Intel machine checks
- */
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/therm_throt.h>
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/apic.h>
-#include <asm/msr.h>
-
-#include "mce.h"
-
-void intel_init_thermal(struct cpuinfo_x86 *c)
-{
-	unsigned int cpu = smp_processor_id();
-	int tm2 = 0;
-	u32 l, h;
-
-	/* Thermal monitoring depends on ACPI and clock modulation*/
-	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
-		return;
-
-	/*
-	 * First check if its enabled already, in which case there might
-	 * be some SMM goo which handles it, so we can't even put a handler
-	 * since it might be delivered via SMI already:
-	 */
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	h = apic_read(APIC_LVTTHMR);
-	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
-		printk(KERN_DEBUG
-		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
-		return;
-	}
-
-	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
-		tm2 = 1;
-
-	/* Check whether a vector already exists */
-	if (h & APIC_VECTOR_MASK) {
-		printk(KERN_DEBUG
-		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
-		       cpu, (h & APIC_VECTOR_MASK));
-		return;
-	}
-
-	/* We'll mask the thermal vector in the lapic till we're ready: */
-	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
-	apic_write(APIC_LVTTHMR, h);
-
-	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
-	wrmsr(MSR_IA32_THERM_INTERRUPT,
-		l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
-
-	intel_set_thermal_handler();
-
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
-
-	/* Unmask the thermal vector: */
-	l = apic_read(APIC_LVTTHMR);
-	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-
-	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
-	       cpu, tm2 ? "TM2" : "TM1");
-
-	/* enable thermal throttle processing */
-	atomic_set(&therm_throt_en, 1);
-}
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index f2ef6952c400..65a0fceedcd7 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -16,8 +16,6 @@
 #include <asm/idle.h>
 #include <asm/therm_throt.h>
 
-#include "mce.h"
-
 asmlinkage void smp_thermal_interrupt(void)
 {
 	__u64 msr_val;
@@ -28,13 +26,67 @@ asmlinkage void smp_thermal_interrupt(void)
 	irq_enter();
 
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
-	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
+	if (therm_throt_process(msr_val & 1))
 		mce_log_therm_throt_event(msr_val);
 
 	inc_irq_stat(irq_thermal_count);
 	irq_exit();
 }
 
+static void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int tm2 = 0;
+	unsigned int cpu = smp_processor_id();
+
+	if (!cpu_has(c, X86_FEATURE_ACPI))
+		return;
+
+	if (!cpu_has(c, X86_FEATURE_ACC))
+		return;
+
+	/* first check if TM1 is already enabled by the BIOS, in which
+	 * case there might be some SMM goo which handles it, so we can't even
+	 * put a handler since it might be delivered via SMI already.
+	 */
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	h = apic_read(APIC_LVTTHMR);
+	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+		return;
+	}
+
+	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
+		tm2 = 1;
+
+	if (h & APIC_VECTOR_MASK) {
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal LVT vector (%#x) already "
+		       "installed\n", cpu, (h & APIC_VECTOR_MASK));
+		return;
+	}
+
+	h = THERMAL_APIC_VECTOR;
+	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);
+	apic_write(APIC_LVTTHMR, h);
+
+	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
+
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
+
+	l = apic_read(APIC_LVTTHMR);
+	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
+		cpu, tm2 ? "TM2" : "TM1");
+
+	/* enable thermal throttle processing */
+	atomic_set(&therm_throt_en, 1);
+	return;
+}
+
 /*
  * Support for Intel Correct Machine Check Interrupts. This allows
  * the CPU to raise an interrupt when a corrected machine check happened.
@@ -56,9 +108,6 @@ static int cmci_supported(int *banks)
 {
 	u64 cap;
 
-	if (mce_cmci_disabled || mce_ignore_ce)
-		return 0;
-
 	/*
 	 * Vendor check is not strictly needed, but the initial
 	 * initialization is vendor keyed and this
@@ -82,7 +131,7 @@ static int cmci_supported(int *banks)
 static void intel_threshold_interrupt(void)
 {
 	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
-	mce_notify_irq();
+	mce_notify_user();
 }
 
 static void print_update(char *type, int *hdr, int num)
@@ -198,7 +247,7 @@ void cmci_rediscover(int dying)
 		return;
 	cpumask_copy(old, &current->cpus_allowed);
 
-	for_each_online_cpu(cpu) {
+	for_each_online_cpu (cpu) {
 		if (cpu == dying)
 			continue;
 		if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/non-fatal.c b/trunk/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 70b710420f74..a74af128efc9 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -6,14 +6,15 @@
  * This file contains routines to check for non-fatal MCEs every 15s
  *
  */
-#include <linux/interrupt.h>
-#include <linux/workqueue.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
+
 #include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
 #include <linux/smp.h>
+#include <linux/module.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -21,9 +22,9 @@
 
 #include "mce.h"
 
-static int		firstbank;
+static int firstbank;
 
-#define MCE_RATE	(15*HZ)	/* timer rate is 15s */
+#define MCE_RATE	15*HZ	/* timer rate is 15s */
 
 static void mce_checkregs(void *info)
 {
@@ -33,24 +34,23 @@ static void mce_checkregs(void *info)
 	for (i = firstbank; i < nr_mce_banks; i++) {
 		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
 
-		if (!(high & (1<<31)))
-			continue;
-
-		printk(KERN_INFO "MCE: The hardware reports a non fatal, "
-			"correctable incident occurred on CPU %d.\n",
+		if (high & (1<<31)) {
+			printk(KERN_INFO "MCE: The hardware reports a non "
+				"fatal, correctable incident occurred on "
+				"CPU %d.\n",
 				smp_processor_id());
-
-		printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
-
-		/*
-		 * Scrub the error so we don't pick it up in MCE_RATE
-		 * seconds time:
-		 */
-		wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-
-		/* Serialize: */
-		wmb();
-		add_taint(TAINT_MACHINE_CHECK);
+			printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
+
+			/*
+			 * Scrub the error so we don't pick it up in MCE_RATE
+			 * seconds time.
+			 */
+			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+
+			/* Serialize */
+			wmb();
+			add_taint(TAINT_MACHINE_CHECK);
+		}
 	}
 }
 
@@ -77,17 +77,16 @@ static int __init init_nonfatal_mce_checker(void)
 
 	/* Some Athlons misbehave when we frob bank 0 */
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-						boot_cpu_data.x86 == 6)
-		firstbank = 1;
+		boot_cpu_data.x86 == 6)
+			firstbank = 1;
 	else
-		firstbank = 0;
+			firstbank = 0;
 
 	/*
 	 * Check for non-fatal errors every MCE_RATE s
 	 */
 	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
 	printk(KERN_INFO "Machine check exception polling timer started.\n");
-
 	return 0;
 }
 module_init(init_nonfatal_mce_checker);
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/p4.c b/trunk/arch/x86/kernel/cpu/mcheck/p4.c
index 82cee108a2d3..f53bdcbaf382 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/p4.c
@@ -2,17 +2,18 @@
  * P4 specific Machine Check Exception Reporting
  */
 
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
 #include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
 #include <linux/smp.h>
 
-#include <asm/therm_throt.h>
 #include <asm/processor.h>
 #include <asm/system.h>
-#include <asm/apic.h>
 #include <asm/msr.h>
+#include <asm/apic.h>
+
+#include <asm/therm_throt.h>
 
 #include "mce.h"
 
@@ -35,7 +36,6 @@ static int mce_num_extended_msrs;
 
 
 #ifdef CONFIG_X86_MCE_P4THERMAL
-
 static void unexpected_thermal_interrupt(struct pt_regs *regs)
 {
 	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
@@ -43,7 +43,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
 	add_taint(TAINT_MACHINE_CHECK);
 }
 
-/* P4/Xeon Thermal transition interrupt handler: */
+/* P4/Xeon Thermal transition interrupt handler */
 static void intel_thermal_interrupt(struct pt_regs *regs)
 {
 	__u64 msr_val;
@@ -51,12 +51,11 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
 	ack_APIC_irq();
 
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
-	therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
+	therm_throt_process(msr_val & 0x1);
 }
 
-/* Thermal interrupt handler for this CPU setup: */
-static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
-						unexpected_thermal_interrupt;
+/* Thermal interrupt handler for this CPU setup */
+static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
 
 void smp_thermal_interrupt(struct pt_regs *regs)
 {
@@ -66,15 +65,67 @@ void smp_thermal_interrupt(struct pt_regs *regs)
 	irq_exit();
 }
 
-void intel_set_thermal_handler(void)
+/* P4/Xeon Thermal regulation detect and init */
+static void intel_init_thermal(struct cpuinfo_x86 *c)
 {
+	u32 l, h;
+	unsigned int cpu = smp_processor_id();
+
+	/* Thermal monitoring */
+	if (!cpu_has(c, X86_FEATURE_ACPI))
+		return;	/* -ENODEV */
+
+	/* Clock modulation */
+	if (!cpu_has(c, X86_FEATURE_ACC))
+		return;	/* -ENODEV */
+
+	/* first check if its enabled already, in which case there might
+	 * be some SMM goo which handles it, so we can't even put a handler
+	 * since it might be delivered via SMI already -zwanem.
+	 */
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	h = apic_read(APIC_LVTTHMR);
+	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
+		printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
+				cpu);
+		return; /* -EBUSY */
+	}
+
+	/* check whether a vector already exists, temporarily masked? */
+	if (h & APIC_VECTOR_MASK) {
+		printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
+				"installed\n",
+			cpu, (h & APIC_VECTOR_MASK));
+		return; /* -EBUSY */
+	}
+
+	/* The temperature transition interrupt handler setup */
+	h = THERMAL_APIC_VECTOR;		/* our delivery vector */
+	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);	/* we'll mask till we're ready */
+	apic_write(APIC_LVTTHMR, h);
+
+	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+
+	/* ok we're good to go... */
 	vendor_thermal_interrupt = intel_thermal_interrupt;
-}
 
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
+
+	l = apic_read(APIC_LVTTHMR);
+	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+	printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
+
+	/* enable thermal throttle processing */
+	atomic_set(&therm_throt_en, 1);
+	return;
+}
 #endif /* CONFIG_X86_MCE_P4THERMAL */
 
+
 /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
+static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
 {
 	u32 h;
 
@@ -92,9 +143,9 @@ static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
 
 static void intel_machine_check(struct pt_regs *regs, long error_code)
 {
+	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
-	int recover = 1;
 	int i;
 
 	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -106,9 +157,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 
 	if (mce_num_extended_msrs > 0) {
 		struct intel_mce_extended_msrs dbg;
-
 		intel_get_extended_msrs(&dbg);
-
 		printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
 			"\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
 			"\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
@@ -122,7 +171,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
-
 			misc[0] = addr[0] = '\0';
 			if (high & (1<<29))
 				recover |= 1;
@@ -148,7 +196,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 		panic("Unable to continue");
 
 	printk(KERN_EMERG "Attempting to continue.\n");
-
 	/*
 	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
 	 * recoverable/continuable.This will allow BIOS to look at the MSRs
@@ -170,6 +217,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }
 
+
 void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/p5.c b/trunk/arch/x86/kernel/cpu/mcheck/p5.c
index 015f481ab1b0..c9f77ea69edc 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/p5.c
@@ -2,10 +2,11 @@
  * P5 specific Machine Check Exception Reporting
  * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
  */
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
+
 #include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
 #include <linux/smp.h>
 
 #include <asm/processor.h>
@@ -14,58 +15,39 @@
 
 #include "mce.h"
 
-/* By default disabled */
-int		mce_p5_enable;
-
-/* Machine check handler for Pentium class Intel CPUs: */
+/* Machine check handler for Pentium class Intel */
 static void pentium_machine_check(struct pt_regs *regs, long error_code)
 {
 	u32 loaddr, hi, lotype;
-
 	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
 	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
-
-	printk(KERN_EMERG
-		"CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
-		smp_processor_id(), loaddr, lotype);
-
-	if (lotype & (1<<5)) {
-		printk(KERN_EMERG
-			"CPU#%d: Possible thermal failure (CPU on fire ?).\n",
-			smp_processor_id());
-	}
-
+	printk(KERN_EMERG "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
+	if (lotype&(1<<5))
+		printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
 	add_taint(TAINT_MACHINE_CHECK);
 }
 
-/* Set up machine check reporting for processors with Intel style MCE: */
+/* Set up machine check reporting for processors with Intel style MCE */
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 
-	/* Check for MCE support: */
+	/*Check for MCE support */
 	if (!cpu_has(c, X86_FEATURE_MCE))
 		return;
 
-#ifdef CONFIG_X86_OLD_MCE
-	/* Default P5 to off as its often misconnected: */
+	/* Default P5 to off as its often misconnected */
 	if (mce_disabled != -1)
 		return;
-#endif
-
 	machine_check_vector = pentium_machine_check;
-	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
 
-	/* Read registers before enabling: */
+	/* Read registers before enabling */
 	rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
 	rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
-	printk(KERN_INFO
-	       "Intel old style machine check architecture supported.\n");
+	printk(KERN_INFO "Intel old style machine check architecture supported.\n");
 
-	/* Enable MCE: */
+	/* Enable MCE */
 	set_in_cr4(X86_CR4_MCE);
-	printk(KERN_INFO
-	       "Intel old style machine check reporting enabled on CPU#%d.\n",
-	       smp_processor_id());
+	printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
 }
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/p6.c b/trunk/arch/x86/kernel/cpu/mcheck/p6.c
index 43c24e667457..2ac52d7b434b 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/p6.c
@@ -2,10 +2,11 @@
  * P6 specific Machine Check Exception Reporting
  * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
  */
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
+
 #include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
 #include <linux/smp.h>
 
 #include <asm/processor.h>
@@ -17,9 +18,9 @@
 /* Machine Check Handler For PII/PIII */
 static void intel_machine_check(struct pt_regs *regs, long error_code)
 {
+	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
-	int recover = 1;
 	int i;
 
 	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -34,16 +35,12 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
-
-			misc[0] = '\0';
-			addr[0] = '\0';
-
+			misc[0] = addr[0] = '\0';
 			if (high & (1<<29))
 				recover |= 1;
 			if (high & (1<<25))
 				recover |= 2;
 			high &= ~(1<<31);
-
 			if (high & (1<<27)) {
 				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
 				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
@@ -52,7 +49,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
 				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
 			}
-
 			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
 				smp_processor_id(), i, high, low, misc, addr);
 		}
@@ -67,17 +63,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 	/*
 	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
 	 * recoverable/continuable.This will allow BIOS to look at the MSRs
-	 * for errors if the OS could not log the error:
+	 * for errors if the OS could not log the error.
 	 */
 	for (i = 0; i < nr_mce_banks; i++) {
 		unsigned int msr;
-
 		msr = MSR_IA32_MC0_STATUS+i*4;
 		rdmsr(msr, low, high);
 		if (high & (1<<31)) {
-			/* Clear it: */
+			/* Clear it */
 			wrmsr(msr, 0UL, 0UL);
-			/* Serialize: */
+			/* Serialize */
 			wmb();
 			add_taint(TAINT_MACHINE_CHECK);
 		}
@@ -86,7 +81,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }
 
-/* Set up machine check reporting for processors with Intel style MCE: */
+/* Set up machine check reporting for processors with Intel style MCE */
 void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@@ -102,7 +97,6 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
 
 	/* Ok machine check is available */
 	machine_check_vector = intel_machine_check;
-	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
 
 	printk(KERN_INFO "Intel machine check architecture supported.\n");
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 7b1ae2e20ba5..d5ae2243f0b9 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -1,7 +1,7 @@
 /*
+ *
  * Thermal throttle event support code (such as syslog messaging and rate
  * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
- *
  * This allows consistent reporting of CPU thermal throttle events.
  *
  * Maintains a counter in /sys that keeps track of the number of thermal
@@ -13,43 +13,43 @@
  * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
  *          Inspired by Ross Biro's and Al Borchers' counter code.
  */
-#include <linux/notifier.h>
-#include <linux/jiffies.h>
+
 #include <linux/percpu.h>
 #include <linux/sysdev.h>
 #include <linux/cpu.h>
-
+#include <asm/cpu.h>
+#include <linux/notifier.h>
+#include <linux/jiffies.h>
 #include <asm/therm_throt.h>
 
 /* How long to wait between reporting thermal events */
-#define CHECK_INTERVAL		(300 * HZ)
+#define CHECK_INTERVAL              (300 * HZ)
 
 static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
 static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
-
-atomic_t therm_throt_en		= ATOMIC_INIT(0);
+atomic_t therm_throt_en = ATOMIC_INIT(0);
 
 #ifdef CONFIG_SYSFS
-#define define_therm_throt_sysdev_one_ro(_name)				\
-	static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
-
-#define define_therm_throt_sysdev_show_func(name)			\
-static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev,	\
-					struct sysdev_attribute *attr,	\
-					      char *buf)		\
-{									\
-	unsigned int cpu = dev->id;					\
-	ssize_t ret;							\
-									\
-	preempt_disable();	/* CPU hotplug */			\
-	if (cpu_online(cpu))						\
-		ret = sprintf(buf, "%lu\n",				\
-			      per_cpu(thermal_throttle_##name, cpu));	\
-	else								\
-		ret = 0;						\
-	preempt_enable();						\
-									\
-	return ret;							\
+#define define_therm_throt_sysdev_one_ro(_name)                              \
+        static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
+
+#define define_therm_throt_sysdev_show_func(name)                            \
+static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev,        \
+					struct sysdev_attribute *attr,	     \
+                                              char *buf)                     \
+{                                                                            \
+	unsigned int cpu = dev->id;                                          \
+	ssize_t ret;                                                         \
+                                                                             \
+	preempt_disable();              /* CPU hotplug */                    \
+	if (cpu_online(cpu))                                                 \
+		ret = sprintf(buf, "%lu\n",                                  \
+			      per_cpu(thermal_throttle_##name, cpu));        \
+	else                                                                 \
+		ret = 0;                                                     \
+	preempt_enable();                                                    \
+                                                                             \
+	return ret;                                                          \
 }
 
 define_therm_throt_sysdev_show_func(count);
@@ -61,8 +61,8 @@ static struct attribute *thermal_throttle_attrs[] = {
 };
 
 static struct attribute_group thermal_throttle_attr_group = {
-	.attrs	= thermal_throttle_attrs,
-	.name	= "thermal_throttle"
+	.attrs = thermal_throttle_attrs,
+	.name = "thermal_throttle"
 };
 #endif /* CONFIG_SYSFS */
 
@@ -110,11 +110,10 @@ int therm_throt_process(int curr)
 }
 
 #ifdef CONFIG_SYSFS
-/* Add/Remove thermal_throttle interface for CPU device: */
+/* Add/Remove thermal_throttle interface for CPU device */
 static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
 {
-	return sysfs_create_group(&sys_dev->kobj,
-				  &thermal_throttle_attr_group);
+	return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
 }
 
 static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
@@ -122,21 +121,19 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
 	sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
 }
 
-/* Mutex protecting device creation against CPU hotplug: */
+/* Mutex protecting device creation against CPU hotplug */
 static DEFINE_MUTEX(therm_cpu_lock);
 
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static __cpuinit int
-thermal_throttle_cpu_callback(struct notifier_block *nfb,
-			      unsigned long action,
-			      void *hcpu)
+static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
+						   unsigned long action,
+						   void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
 	struct sys_device *sys_dev;
 	int err = 0;
 
 	sys_dev = get_cpu_sysdev(cpu);
-
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/threshold.c b/trunk/arch/x86/kernel/cpu/mcheck/threshold.c
index d746df2909c9..23ee9e730f78 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -17,7 +17,7 @@ static void default_threshold_interrupt(void)
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
 
-asmlinkage void smp_threshold_interrupt(void)
+asmlinkage void mce_threshold_interrupt(void)
 {
 	exit_idle();
 	irq_enter();
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/winchip.c b/trunk/arch/x86/kernel/cpu/mcheck/winchip.c
index 81b02487090b..2a043d89811d 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -2,10 +2,11 @@
  * IDT Winchip specific Machine Check Exception Reporting
  * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
  */
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
+
 #include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -13,7 +14,7 @@
 
 #include "mce.h"
 
-/* Machine check handler for WinChip C6: */
+/* Machine check handler for WinChip C6 */
 static void winchip_machine_check(struct pt_regs *regs, long error_code)
 {
 	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
@@ -24,18 +25,12 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
 void winchip_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 lo, hi;
-
 	machine_check_vector = winchip_machine_check;
-	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
-
 	rdmsr(MSR_IDT_FCR1, lo, hi);
 	lo |= (1<<2);	/* Enable EIERRINT (int 18 MCE) */
 	lo &= ~(1<<4);	/* Enable MCE */
 	wrmsr(MSR_IDT_FCR1, lo, hi);
-
 	set_in_cr4(X86_CR4_MCE);
-
-	printk(KERN_INFO
-	       "Winchip machine check reporting enabled on CPU#0.\n");
+	printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
 }
diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S
index de74f0a3e0ed..a4742a340d8d 100644
--- a/trunk/arch/x86/kernel/entry_64.S
+++ b/trunk/arch/x86/kernel/entry_64.S
@@ -963,8 +963,6 @@ END(\sym)
 #ifdef CONFIG_SMP
 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
 	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
-apicinterrupt REBOOT_VECTOR \
-	reboot_interrupt smp_reboot_interrupt
 #endif
 
 #ifdef CONFIG_X86_UV
@@ -996,15 +994,10 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
 #endif
 
 apicinterrupt THRESHOLD_APIC_VECTOR \
-	threshold_interrupt smp_threshold_interrupt
+	threshold_interrupt mce_threshold_interrupt
 apicinterrupt THERMAL_APIC_VECTOR \
 	thermal_interrupt smp_thermal_interrupt
 
-#ifdef CONFIG_X86_MCE
-apicinterrupt MCE_SELF_VECTOR \
-	mce_self_interrupt smp_mce_self_interrupt
-#endif
-
 #ifdef CONFIG_SMP
 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
 	call_function_single_interrupt smp_call_function_single_interrupt
@@ -1386,7 +1379,7 @@ errorentry xen_stack_segment do_stack_segment
 errorentry general_protection do_general_protection
 errorentry page_fault do_page_fault
 #ifdef CONFIG_X86_MCE
-paranoidzeroentry machine_check *machine_check_vector(%rip)
+paranoidzeroentry machine_check do_machine_check
 #endif
 
 	/*
diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c
index b0cdde6932f5..38287b5f116e 100644
--- a/trunk/arch/x86/kernel/irq.c
+++ b/trunk/arch/x86/kernel/irq.c
@@ -12,7 +12,6 @@
 #include <asm/io_apic.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
-#include <asm/mce.h>
 #include <asm/hw_irq.h>
 
 atomic_t irq_err_count;
@@ -97,22 +96,12 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
 	seq_printf(p, "  Thermal event interrupts\n");
-# ifdef CONFIG_X86_MCE_THRESHOLD
+# ifdef CONFIG_X86_64
 	seq_printf(p, "%*s: ", prec, "THR");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
 	seq_printf(p, "  Threshold APIC interrupts\n");
 # endif
-#endif
-#ifdef CONFIG_X86_NEW_MCE
-	seq_printf(p, "%*s: ", prec, "MCE");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
-	seq_printf(p, "  Machine check exceptions\n");
-	seq_printf(p, "%*s: ", prec, "MCP");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
-	seq_printf(p, "  Machine check polls\n");
 #endif
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
 #if defined(CONFIG_X86_IO_APIC)
@@ -196,13 +185,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #endif
 #ifdef CONFIG_X86_MCE
 	sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_MCE_THRESHOLD
+# ifdef CONFIG_X86_64
 	sum += irq_stats(cpu)->irq_threshold_count;
 # endif
-#endif
-#ifdef CONFIG_X86_NEW_MCE
-	sum += per_cpu(mce_exception_count, cpu);
-	sum += per_cpu(mce_poll_count, cpu);
 #endif
 	return sum;
 }
diff --git a/trunk/arch/x86/kernel/irqinit.c b/trunk/arch/x86/kernel/irqinit.c
index 696f0e475c2d..267c6624c77f 100644
--- a/trunk/arch/x86/kernel/irqinit.c
+++ b/trunk/arch/x86/kernel/irqinit.c
@@ -173,9 +173,6 @@ static void __init smp_intr_init(void)
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
-
-	/* IPI used for rebooting/stopping */
-	alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
 #endif
 #endif /* CONFIG_SMP */
 }
diff --git a/trunk/arch/x86/kernel/signal.c b/trunk/arch/x86/kernel/signal.c
index 4c578751e94e..0a813b17b172 100644
--- a/trunk/arch/x86/kernel/signal.c
+++ b/trunk/arch/x86/kernel/signal.c
@@ -24,11 +24,11 @@
 #include <asm/ucontext.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
-#include <asm/mce.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
+#include <asm/mce.h>
 #endif /* CONFIG_X86_64 */
 
 #include <asm/syscall.h>
@@ -856,10 +856,10 @@ static void do_signal(struct pt_regs *regs)
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
-#ifdef CONFIG_X86_NEW_MCE
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
 	/* notify userspace of pending MCEs */
 	if (thread_info_flags & _TIF_MCE_NOTIFY)
-		mce_notify_process();
+		mce_notify_user();
 #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
 
 	/* deal with pending signal delivery */
diff --git a/trunk/arch/x86/kernel/smp.c b/trunk/arch/x86/kernel/smp.c
index ec1de97600e7..28f5fb495a66 100644
--- a/trunk/arch/x86/kernel/smp.c
+++ b/trunk/arch/x86/kernel/smp.c
@@ -150,40 +150,14 @@ void native_send_call_func_ipi(const struct cpumask *mask)
  * this function calls the 'stop' function on all other CPUs in the system.
  */
 
-asmlinkage void smp_reboot_interrupt(void)
-{
-	ack_APIC_irq();
-	irq_enter();
-	stop_this_cpu(NULL);
-	irq_exit();
-}
-
 static void native_smp_send_stop(void)
 {
 	unsigned long flags;
-	unsigned long wait;
 
 	if (reboot_force)
 		return;
 
-	/*
-	 * Use an own vector here because smp_call_function
-	 * does lots of things not suitable in a panic situation.
-	 * On most systems we could also use an NMI here,
-	 * but there are a few systems around where NMI
-	 * is problematic so stay with an non NMI for now
-	 * (this implies we cannot stop CPUs spinning with irq off
-	 * currently)
-	 */
-	if (num_online_cpus() > 1) {
-		apic->send_IPI_allbutself(REBOOT_VECTOR);
-
-		/* Don't wait longer than a second */
-		wait = USEC_PER_SEC;
-		while (num_online_cpus() > 1 && wait--)
-			udelay(1);
-	}
-
+	smp_call_function(stop_this_cpu, NULL, 0);
 	local_irq_save(flags);
 	disable_local_APIC();
 	local_irq_restore(flags);
diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c
index 1e1e27b7d438..07d60c870ce2 100644
--- a/trunk/arch/x86/kernel/traps.c
+++ b/trunk/arch/x86/kernel/traps.c
@@ -798,15 +798,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
 
 	return new_kesp;
 }
-#endif
-
+#else
 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }
 
-asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
+asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
 {
 }
+#endif
 
 /*
  * 'math_state_restore()' saves the current math information in the
diff --git a/trunk/crypto/Kconfig b/trunk/crypto/Kconfig
index 4dfdd03e708f..74d0e622a515 100644
--- a/trunk/crypto/Kconfig
+++ b/trunk/crypto/Kconfig
@@ -241,11 +241,6 @@ config CRYPTO_XTS
 	  key size 256, 384 or 512 bits. This implementation currently
 	  can't handle a sectorsize which is not a multiple of 16 bytes.
 
-config CRYPTO_FPU
-	tristate
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_MANAGER
-
 comment "Hash modes"
 
 config CRYPTO_HMAC
@@ -491,7 +486,6 @@ config CRYPTO_AES_NI_INTEL
 	select CRYPTO_AES_X86_64
 	select CRYPTO_CRYPTD
 	select CRYPTO_ALGAPI
-	select CRYPTO_FPU
 	help
 	  Use Intel AES-NI instructions for AES algorithm.
 
@@ -511,10 +505,6 @@ config CRYPTO_AES_NI_INTEL
 
 	  See <http://csrc.nist.gov/encryption/aes/> for more information.
 
-	  In addition to AES cipher algorithm support, the
-	  acceleration for some popular block cipher mode is supported
-	  too, including ECB, CBC, CTR, LRW, PCBC, XTS.
-
 config CRYPTO_ANUBIS
 	tristate "Anubis cipher algorithm"
 	select CRYPTO_ALGAPI
diff --git a/trunk/crypto/algboss.c b/trunk/crypto/algboss.c
index 9908dd830c26..6906f92aeac0 100644
--- a/trunk/crypto/algboss.c
+++ b/trunk/crypto/algboss.c
@@ -280,13 +280,29 @@ static struct notifier_block cryptomgr_notifier = {
 
 static int __init cryptomgr_init(void)
 {
-	return crypto_register_notifier(&cryptomgr_notifier);
+	int err;
+
+	err = testmgr_init();
+	if (err)
+		return err;
+
+	err = crypto_register_notifier(&cryptomgr_notifier);
+	if (err)
+		goto free_testmgr;
+
+	return 0;
+
+free_testmgr:
+	testmgr_exit();
+	return err;
 }
 
 static void __exit cryptomgr_exit(void)
 {
 	int err = crypto_unregister_notifier(&cryptomgr_notifier);
 	BUG_ON(err);
+
+	testmgr_exit();
 }
 
 subsys_initcall(cryptomgr_init);
diff --git a/trunk/crypto/api.c b/trunk/crypto/api.c
index d5944f92b416..fd2545decb28 100644
--- a/trunk/crypto/api.c
+++ b/trunk/crypto/api.c
@@ -217,11 +217,14 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
 
 	alg = crypto_alg_lookup(name, type, mask);
 	if (!alg) {
-		request_module("%s", name);
+		char tmp[CRYPTO_MAX_ALG_NAME];
+
+		request_module(name);
 
 		if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask &
-		      CRYPTO_ALG_NEED_FALLBACK))
-			request_module("%s-all", name);
+		      CRYPTO_ALG_NEED_FALLBACK) &&
+		    snprintf(tmp, sizeof(tmp), "%s-all", name) < sizeof(tmp))
+			request_module(tmp);
 
 		alg = crypto_alg_lookup(name, type, mask);
 	}
@@ -577,17 +580,20 @@ EXPORT_SYMBOL_GPL(crypto_alloc_tfm);
 void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm)
 {
 	struct crypto_alg *alg;
+	int size;
 
 	if (unlikely(!mem))
 		return;
 
 	alg = tfm->__crt_alg;
+	size = ksize(mem);
 
 	if (!tfm->exit && alg->cra_exit)
 		alg->cra_exit(tfm);
 	crypto_exit_ops(tfm);
 	crypto_mod_put(alg);
-	kzfree(mem);
+	memset(mem, 0, size);
+	kfree(mem);
 }
 EXPORT_SYMBOL_GPL(crypto_destroy_tfm);
 
diff --git a/trunk/crypto/cryptd.c b/trunk/crypto/cryptd.c
index ae5fa99d5d36..d14b22658d7a 100644
--- a/trunk/crypto/cryptd.c
+++ b/trunk/crypto/cryptd.c
@@ -586,24 +586,20 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name,
 						  u32 type, u32 mask)
 {
 	char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
-	struct crypto_tfm *tfm;
+	struct crypto_ablkcipher *tfm;
 
 	if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
 		     "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
 		return ERR_PTR(-EINVAL);
-	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	type |= CRYPTO_ALG_TYPE_BLKCIPHER;
-	mask &= ~CRYPTO_ALG_TYPE_MASK;
-	mask |= (CRYPTO_ALG_GENIV | CRYPTO_ALG_TYPE_BLKCIPHER_MASK);
-	tfm = crypto_alloc_base(cryptd_alg_name, type, mask);
+	tfm = crypto_alloc_ablkcipher(cryptd_alg_name, type, mask);
 	if (IS_ERR(tfm))
 		return ERR_CAST(tfm);
-	if (tfm->__crt_alg->cra_module != THIS_MODULE) {
-		crypto_free_tfm(tfm);
+	if (crypto_ablkcipher_tfm(tfm)->__crt_alg->cra_module != THIS_MODULE) {
+		crypto_free_ablkcipher(tfm);
 		return ERR_PTR(-EINVAL);
 	}
 
-	return __cryptd_ablkcipher_cast(__crypto_ablkcipher_cast(tfm));
+	return __cryptd_ablkcipher_cast(tfm);
 }
 EXPORT_SYMBOL_GPL(cryptd_alloc_ablkcipher);
 
diff --git a/trunk/crypto/internal.h b/trunk/crypto/internal.h
index 113579a82dff..fc76e1f37fc3 100644
--- a/trunk/crypto/internal.h
+++ b/trunk/crypto/internal.h
@@ -121,6 +121,9 @@ int crypto_register_notifier(struct notifier_block *nb);
 int crypto_unregister_notifier(struct notifier_block *nb);
 int crypto_probing_notify(unsigned long val, void *v);
 
+int __init testmgr_init(void);
+void testmgr_exit(void);
+
 static inline void crypto_alg_put(struct crypto_alg *alg)
 {
 	if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
diff --git a/trunk/crypto/pcompress.c b/trunk/crypto/pcompress.c
index bcadc03726b7..ca9a4af91efe 100644
--- a/trunk/crypto/pcompress.c
+++ b/trunk/crypto/pcompress.c
@@ -26,7 +26,6 @@
 #include <linux/string.h>
 
 #include <crypto/compress.h>
-#include <crypto/internal/compress.h>
 
 #include "internal.h"
 
diff --git a/trunk/crypto/tcrypt.c b/trunk/crypto/tcrypt.c
index d59ba5079d14..c3c9124209a1 100644
--- a/trunk/crypto/tcrypt.c
+++ b/trunk/crypto/tcrypt.c
@@ -27,7 +27,6 @@
 #include <linux/timex.h>
 #include <linux/interrupt.h>
 #include "tcrypt.h"
-#include "internal.h"
 
 /*
  * Need slab memory for testing (size in number of pages).
@@ -397,16 +396,16 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 	struct scatterlist sg[TVMEMSIZE];
 	struct crypto_hash *tfm;
 	struct hash_desc desc;
-	static char output[1024];
+	char output[1024];
 	int i;
 	int ret;
 
-	printk(KERN_INFO "\ntesting speed of %s\n", algo);
+	printk("\ntesting speed of %s\n", algo);
 
 	tfm = crypto_alloc_hash(algo, 0, CRYPTO_ALG_ASYNC);
 
 	if (IS_ERR(tfm)) {
-		printk(KERN_ERR "failed to load transform for %s: %ld\n", algo,
+		printk("failed to load transform for %s: %ld\n", algo,
 		       PTR_ERR(tfm));
 		return;
 	}
@@ -415,7 +414,7 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 	desc.flags = 0;
 
 	if (crypto_hash_digestsize(tfm) > sizeof(output)) {
-		printk(KERN_ERR "digestsize(%u) > outputbuffer(%zu)\n",
+		printk("digestsize(%u) > outputbuffer(%zu)\n",
 		       crypto_hash_digestsize(tfm), sizeof(output));
 		goto out;
 	}
@@ -428,14 +427,12 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 
 	for (i = 0; speed[i].blen != 0; i++) {
 		if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
-			printk(KERN_ERR
-			       "template (%u) too big for tvmem (%lu)\n",
+			printk("template (%u) too big for tvmem (%lu)\n",
 			       speed[i].blen, TVMEMSIZE * PAGE_SIZE);
 			goto out;
 		}
 
-		printk(KERN_INFO "test%3u "
-		       "(%5u byte blocks,%5u bytes per update,%4u updates): ",
+		printk("test%3u (%5u byte blocks,%5u bytes per update,%4u updates): ",
 		       i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
 
 		if (sec)
@@ -446,7 +443,7 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 					       speed[i].plen, output);
 
 		if (ret) {
-			printk(KERN_ERR "hashing failed ret=%d\n", ret);
+			printk("hashing failed ret=%d\n", ret);
 			break;
 		}
 	}
@@ -469,255 +466,239 @@ static void test_available(void)
 
 static inline int tcrypt_test(const char *alg)
 {
-	int ret;
-
-	ret = alg_test(alg, alg, 0, 0);
-	/* non-fips algs return -EINVAL in fips mode */
-	if (fips_enabled && ret == -EINVAL)
-		ret = 0;
-	return ret;
+	return alg_test(alg, alg, 0, 0);
 }
 
-static int do_test(int m)
+static void do_test(int m)
 {
 	int i;
-	int ret = 0;
 
 	switch (m) {
 	case 0:
 		for (i = 1; i < 200; i++)
-			ret += do_test(i);
+			do_test(i);
 		break;
 
 	case 1:
-		ret += tcrypt_test("md5");
+		tcrypt_test("md5");
 		break;
 
 	case 2:
-		ret += tcrypt_test("sha1");
+		tcrypt_test("sha1");
 		break;
 
 	case 3:
-		ret += tcrypt_test("ecb(des)");
-		ret += tcrypt_test("cbc(des)");
+		tcrypt_test("ecb(des)");
+		tcrypt_test("cbc(des)");
 		break;
 
 	case 4:
-		ret += tcrypt_test("ecb(des3_ede)");
-		ret += tcrypt_test("cbc(des3_ede)");
+		tcrypt_test("ecb(des3_ede)");
+		tcrypt_test("cbc(des3_ede)");
 		break;
 
 	case 5:
-		ret += tcrypt_test("md4");
+		tcrypt_test("md4");
 		break;
 
 	case 6:
-		ret += tcrypt_test("sha256");
+		tcrypt_test("sha256");
 		break;
 
 	case 7:
-		ret += tcrypt_test("ecb(blowfish)");
-		ret += tcrypt_test("cbc(blowfish)");
+		tcrypt_test("ecb(blowfish)");
+		tcrypt_test("cbc(blowfish)");
 		break;
 
 	case 8:
-		ret += tcrypt_test("ecb(twofish)");
-		ret += tcrypt_test("cbc(twofish)");
+		tcrypt_test("ecb(twofish)");
+		tcrypt_test("cbc(twofish)");
 		break;
 
 	case 9:
-		ret += tcrypt_test("ecb(serpent)");
+		tcrypt_test("ecb(serpent)");
 		break;
 
 	case 10:
-		ret += tcrypt_test("ecb(aes)");
-		ret += tcrypt_test("cbc(aes)");
-		ret += tcrypt_test("lrw(aes)");
-		ret += tcrypt_test("xts(aes)");
-		ret += tcrypt_test("ctr(aes)");
-		ret += tcrypt_test("rfc3686(ctr(aes))");
+		tcrypt_test("ecb(aes)");
+		tcrypt_test("cbc(aes)");
+		tcrypt_test("lrw(aes)");
+		tcrypt_test("xts(aes)");
+		tcrypt_test("rfc3686(ctr(aes))");
 		break;
 
 	case 11:
-		ret += tcrypt_test("sha384");
+		tcrypt_test("sha384");
 		break;
 
 	case 12:
-		ret += tcrypt_test("sha512");
+		tcrypt_test("sha512");
 		break;
 
 	case 13:
-		ret += tcrypt_test("deflate");
+		tcrypt_test("deflate");
 		break;
 
 	case 14:
-		ret += tcrypt_test("ecb(cast5)");
+		tcrypt_test("ecb(cast5)");
 		break;
 
 	case 15:
-		ret += tcrypt_test("ecb(cast6)");
+		tcrypt_test("ecb(cast6)");
 		break;
 
 	case 16:
-		ret += tcrypt_test("ecb(arc4)");
+		tcrypt_test("ecb(arc4)");
 		break;
 
 	case 17:
-		ret += tcrypt_test("michael_mic");
+		tcrypt_test("michael_mic");
 		break;
 
 	case 18:
-		ret += tcrypt_test("crc32c");
+		tcrypt_test("crc32c");
 		break;
 
 	case 19:
-		ret += tcrypt_test("ecb(tea)");
+		tcrypt_test("ecb(tea)");
 		break;
 
 	case 20:
-		ret += tcrypt_test("ecb(xtea)");
+		tcrypt_test("ecb(xtea)");
 		break;
 
 	case 21:
-		ret += tcrypt_test("ecb(khazad)");
+		tcrypt_test("ecb(khazad)");
 		break;
 
 	case 22:
-		ret += tcrypt_test("wp512");
+		tcrypt_test("wp512");
 		break;
 
 	case 23:
-		ret += tcrypt_test("wp384");
+		tcrypt_test("wp384");
 		break;
 
 	case 24:
-		ret += tcrypt_test("wp256");
+		tcrypt_test("wp256");
 		break;
 
 	case 25:
-		ret += tcrypt_test("ecb(tnepres)");
+		tcrypt_test("ecb(tnepres)");
 		break;
 
 	case 26:
-		ret += tcrypt_test("ecb(anubis)");
-		ret += tcrypt_test("cbc(anubis)");
+		tcrypt_test("ecb(anubis)");
+		tcrypt_test("cbc(anubis)");
 		break;
 
 	case 27:
-		ret += tcrypt_test("tgr192");
+		tcrypt_test("tgr192");
 		break;
 
 	case 28:
 
-		ret += tcrypt_test("tgr160");
+		tcrypt_test("tgr160");
 		break;
 
 	case 29:
-		ret += tcrypt_test("tgr128");
+		tcrypt_test("tgr128");
 		break;
 
 	case 30:
-		ret += tcrypt_test("ecb(xeta)");
+		tcrypt_test("ecb(xeta)");
 		break;
 
 	case 31:
-		ret += tcrypt_test("pcbc(fcrypt)");
+		tcrypt_test("pcbc(fcrypt)");
 		break;
 
 	case 32:
-		ret += tcrypt_test("ecb(camellia)");
-		ret += tcrypt_test("cbc(camellia)");
+		tcrypt_test("ecb(camellia)");
+		tcrypt_test("cbc(camellia)");
 		break;
 	case 33:
-		ret += tcrypt_test("sha224");
+		tcrypt_test("sha224");
 		break;
 
 	case 34:
-		ret += tcrypt_test("salsa20");
+		tcrypt_test("salsa20");
 		break;
 
 	case 35:
-		ret += tcrypt_test("gcm(aes)");
+		tcrypt_test("gcm(aes)");
 		break;
 
 	case 36:
-		ret += tcrypt_test("lzo");
+		tcrypt_test("lzo");
 		break;
 
 	case 37:
-		ret += tcrypt_test("ccm(aes)");
+		tcrypt_test("ccm(aes)");
 		break;
 
 	case 38:
-		ret += tcrypt_test("cts(cbc(aes))");
+		tcrypt_test("cts(cbc(aes))");
 		break;
 
         case 39:
-		ret += tcrypt_test("rmd128");
+		tcrypt_test("rmd128");
 		break;
 
         case 40:
-		ret += tcrypt_test("rmd160");
+		tcrypt_test("rmd160");
 		break;
 
 	case 41:
-		ret += tcrypt_test("rmd256");
+		tcrypt_test("rmd256");
 		break;
 
 	case 42:
-		ret += tcrypt_test("rmd320");
+		tcrypt_test("rmd320");
 		break;
 
 	case 43:
-		ret += tcrypt_test("ecb(seed)");
+		tcrypt_test("ecb(seed)");
 		break;
 
 	case 44:
-		ret += tcrypt_test("zlib");
-		break;
-
-	case 45:
-		ret += tcrypt_test("rfc4309(ccm(aes))");
+		tcrypt_test("zlib");
 		break;
 
 	case 100:
-		ret += tcrypt_test("hmac(md5)");
+		tcrypt_test("hmac(md5)");
 		break;
 
 	case 101:
-		ret += tcrypt_test("hmac(sha1)");
+		tcrypt_test("hmac(sha1)");
 		break;
 
 	case 102:
-		ret += tcrypt_test("hmac(sha256)");
+		tcrypt_test("hmac(sha256)");
 		break;
 
 	case 103:
-		ret += tcrypt_test("hmac(sha384)");
+		tcrypt_test("hmac(sha384)");
 		break;
 
 	case 104:
-		ret += tcrypt_test("hmac(sha512)");
+		tcrypt_test("hmac(sha512)");
 		break;
 
 	case 105:
-		ret += tcrypt_test("hmac(sha224)");
+		tcrypt_test("hmac(sha224)");
 		break;
 
 	case 106:
-		ret += tcrypt_test("xcbc(aes)");
+		tcrypt_test("xcbc(aes)");
 		break;
 
 	case 107:
-		ret += tcrypt_test("hmac(rmd128)");
+		tcrypt_test("hmac(rmd128)");
 		break;
 
 	case 108:
-		ret += tcrypt_test("hmac(rmd160)");
-		break;
-
-	case 150:
-		ret += tcrypt_test("ansi_cprng");
+		tcrypt_test("hmac(rmd160)");
 		break;
 
 	case 200:
@@ -881,8 +862,6 @@ static int do_test(int m)
 		test_available();
 		break;
 	}
-
-	return ret;
 }
 
 static int __init tcrypt_mod_init(void)
@@ -896,21 +875,15 @@ static int __init tcrypt_mod_init(void)
 			goto err_free_tv;
 	}
 
-	err = do_test(mode);
-	if (err) {
-		printk(KERN_ERR "tcrypt: one or more tests failed!\n");
-		goto err_free_tv;
-	}
+	do_test(mode);
 
-	/* We intentionaly return -EAGAIN to prevent keeping the module,
-	 * unless we're running in fips mode. It does all its work from
-	 * init() and doesn't offer any runtime functionality, but in
-	 * the fips case, checking for a successful load is helpful.
+	/* We intentionaly return -EAGAIN to prevent keeping
+	 * the module. It does all its work from init()
+	 * and doesn't offer any runtime functionality 
 	 * => we don't need it in the memory, do we?
 	 *                                        -- mludvig
 	 */
-	if (!fips_enabled)
-		err = -EAGAIN;
+	err = -EAGAIN;
 
 err_free_tv:
 	for (i = 0; i < TVMEMSIZE && tvmem[i]; i++)
diff --git a/trunk/crypto/testmgr.c b/trunk/crypto/testmgr.c
index e9e9d84293b9..b50c3c6b17a2 100644
--- a/trunk/crypto/testmgr.c
+++ b/trunk/crypto/testmgr.c
@@ -19,7 +19,6 @@
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/string.h>
-#include <crypto/rng.h>
 
 #include "internal.h"
 #include "testmgr.h"
@@ -85,16 +84,10 @@ struct hash_test_suite {
 	unsigned int count;
 };
 
-struct cprng_test_suite {
-	struct cprng_testvec *vecs;
-	unsigned int count;
-};
-
 struct alg_test_desc {
 	const char *alg;
 	int (*test)(const struct alg_test_desc *desc, const char *driver,
 		    u32 type, u32 mask);
-	int fips_allowed;	/* set if alg is allowed in fips mode */
 
 	union {
 		struct aead_test_suite aead;
@@ -102,12 +95,14 @@ struct alg_test_desc {
 		struct comp_test_suite comp;
 		struct pcomp_test_suite pcomp;
 		struct hash_test_suite hash;
-		struct cprng_test_suite cprng;
 	} suite;
 };
 
 static unsigned int IDX[8] = { IDX1, IDX2, IDX3, IDX4, IDX5, IDX6, IDX7, IDX8 };
 
+static char *xbuf[XBUFSIZE];
+static char *axbuf[XBUFSIZE];
+
 static void hexdump(unsigned char *buf, unsigned int len)
 {
 	print_hex_dump(KERN_CONT, "", DUMP_PREFIX_OFFSET,
@@ -126,33 +121,6 @@ static void tcrypt_complete(struct crypto_async_request *req, int err)
 	complete(&res->completion);
 }
 
-static int testmgr_alloc_buf(char *buf[XBUFSIZE])
-{
-	int i;
-
-	for (i = 0; i < XBUFSIZE; i++) {
-		buf[i] = (void *)__get_free_page(GFP_KERNEL);
-		if (!buf[i])
-			goto err_free_buf;
-	}
-
-	return 0;
-
-err_free_buf:
-	while (i-- > 0)
-		free_page((unsigned long)buf[i]);
-
-	return -ENOMEM;
-}
-
-static void testmgr_free_buf(char *buf[XBUFSIZE])
-{
-	int i;
-
-	for (i = 0; i < XBUFSIZE; i++)
-		free_page((unsigned long)buf[i]);
-}
-
 static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 		     unsigned int tcount)
 {
@@ -162,12 +130,8 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 	char result[64];
 	struct ahash_request *req;
 	struct tcrypt_result tresult;
+	int ret;
 	void *hash_buff;
-	char *xbuf[XBUFSIZE];
-	int ret = -ENOMEM;
-
-	if (testmgr_alloc_buf(xbuf))
-		goto out_nobuf;
 
 	init_completion(&tresult.completion);
 
@@ -175,25 +139,17 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 	if (!req) {
 		printk(KERN_ERR "alg: hash: Failed to allocate request for "
 		       "%s\n", algo);
+		ret = -ENOMEM;
 		goto out_noreq;
 	}
 	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				   tcrypt_complete, &tresult);
 
-	j = 0;
 	for (i = 0; i < tcount; i++) {
-		if (template[i].np)
-			continue;
-
-		j++;
 		memset(result, 0, 64);
 
 		hash_buff = xbuf[0];
 
-		ret = -EINVAL;
-		if (WARN_ON(template[i].psize > PAGE_SIZE))
-			goto out;
-
 		memcpy(hash_buff, template[i].plaintext, template[i].psize);
 		sg_init_one(&sg[0], hash_buff, template[i].psize);
 
@@ -203,7 +159,7 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 						  template[i].ksize);
 			if (ret) {
 				printk(KERN_ERR "alg: hash: setkey failed on "
-				       "test %d for %s: ret=%d\n", j, algo,
+				       "test %d for %s: ret=%d\n", i + 1, algo,
 				       -ret);
 				goto out;
 			}
@@ -225,14 +181,14 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 			/* fall through */
 		default:
 			printk(KERN_ERR "alg: hash: digest failed on test %d "
-			       "for %s: ret=%d\n", j, algo, -ret);
+			       "for %s: ret=%d\n", i + 1, algo, -ret);
 			goto out;
 		}
 
 		if (memcmp(result, template[i].digest,
 			   crypto_ahash_digestsize(tfm))) {
 			printk(KERN_ERR "alg: hash: Test %d failed for %s\n",
-			       j, algo);
+			       i + 1, algo);
 			hexdump(result, crypto_ahash_digestsize(tfm));
 			ret = -EINVAL;
 			goto out;
@@ -247,11 +203,7 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 
 			temp = 0;
 			sg_init_table(sg, template[i].np);
-			ret = -EINVAL;
 			for (k = 0; k < template[i].np; k++) {
-				if (WARN_ON(offset_in_page(IDX[k]) +
-					    template[i].tap[k] > PAGE_SIZE))
-					goto out;
 				sg_set_buf(&sg[k],
 					   memcpy(xbuf[IDX[k] >> PAGE_SHIFT] +
 						  offset_in_page(IDX[k]),
@@ -313,8 +265,6 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 out:
 	ahash_request_free(req);
 out_noreq:
-	testmgr_free_buf(xbuf);
-out_nobuf:
 	return ret;
 }
 
@@ -323,7 +273,7 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 {
 	const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm));
 	unsigned int i, j, k, n, temp;
-	int ret = -ENOMEM;
+	int ret = 0;
 	char *q;
 	char *key;
 	struct aead_request *req;
@@ -335,13 +285,6 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 	void *input;
 	void *assoc;
 	char iv[MAX_IVLEN];
-	char *xbuf[XBUFSIZE];
-	char *axbuf[XBUFSIZE];
-
-	if (testmgr_alloc_buf(xbuf))
-		goto out_noxbuf;
-	if (testmgr_alloc_buf(axbuf))
-		goto out_noaxbuf;
 
 	if (enc == ENCRYPT)
 		e = "encryption";
@@ -354,6 +297,7 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 	if (!req) {
 		printk(KERN_ERR "alg: aead: Failed to allocate request for "
 		       "%s\n", algo);
+		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -370,11 +314,6 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 			input = xbuf[0];
 			assoc = axbuf[0];
 
-			ret = -EINVAL;
-			if (WARN_ON(template[i].ilen > PAGE_SIZE ||
-				    template[i].alen > PAGE_SIZE))
-				goto out;
-
 			memcpy(input, template[i].input, template[i].ilen);
 			memcpy(assoc, template[i].assoc, template[i].alen);
 			if (template[i].iv)
@@ -424,16 +363,6 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 
 			switch (ret) {
 			case 0:
-				if (template[i].novrfy) {
-					/* verification was supposed to fail */
-					printk(KERN_ERR "alg: aead: %s failed "
-					       "on test %d for %s: ret was 0, "
-					       "expected -EBADMSG\n",
-					       e, j, algo);
-					/* so really, we got a bad message */
-					ret = -EBADMSG;
-					goto out;
-				}
 				break;
 			case -EINPROGRESS:
 			case -EBUSY:
@@ -443,10 +372,6 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 					INIT_COMPLETION(result.completion);
 					break;
 				}
-			case -EBADMSG:
-				if (template[i].novrfy)
-					/* verification failure was expected */
-					continue;
 				/* fall through */
 			default:
 				printk(KERN_ERR "alg: aead: %s failed on test "
@@ -534,11 +459,7 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 			}
 
 			sg_init_table(asg, template[i].anp);
-			ret = -EINVAL;
 			for (k = 0, temp = 0; k < template[i].anp; k++) {
-				if (WARN_ON(offset_in_page(IDX[k]) +
-					    template[i].atap[k] > PAGE_SIZE))
-					goto out;
 				sg_set_buf(&asg[k],
 					   memcpy(axbuf[IDX[k] >> PAGE_SHIFT] +
 						  offset_in_page(IDX[k]),
@@ -560,16 +481,6 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 
 			switch (ret) {
 			case 0:
-				if (template[i].novrfy) {
-					/* verification was supposed to fail */
-					printk(KERN_ERR "alg: aead: %s failed "
-					       "on chunk test %d for %s: ret "
-					       "was 0, expected -EBADMSG\n",
-					       e, j, algo);
-					/* so really, we got a bad message */
-					ret = -EBADMSG;
-					goto out;
-				}
 				break;
 			case -EINPROGRESS:
 			case -EBUSY:
@@ -579,10 +490,6 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 					INIT_COMPLETION(result.completion);
 					break;
 				}
-			case -EBADMSG:
-				if (template[i].novrfy)
-					/* verification failure was expected */
-					continue;
 				/* fall through */
 			default:
 				printk(KERN_ERR "alg: aead: %s failed on "
@@ -639,10 +546,6 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 
 out:
 	aead_request_free(req);
-	testmgr_free_buf(axbuf);
-out_noaxbuf:
-	testmgr_free_buf(xbuf);
-out_noxbuf:
 	return ret;
 }
 
@@ -651,14 +554,10 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 {
 	const char *algo = crypto_tfm_alg_driver_name(crypto_cipher_tfm(tfm));
 	unsigned int i, j, k;
+	int ret;
 	char *q;
 	const char *e;
 	void *data;
-	char *xbuf[XBUFSIZE];
-	int ret = -ENOMEM;
-
-	if (testmgr_alloc_buf(xbuf))
-		goto out_nobuf;
 
 	if (enc == ENCRYPT)
 	        e = "encryption";
@@ -672,10 +571,6 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 
 		j++;
 
-		ret = -EINVAL;
-		if (WARN_ON(template[i].ilen > PAGE_SIZE))
-			goto out;
-
 		data = xbuf[0];
 		memcpy(data, template[i].input, template[i].ilen);
 
@@ -716,8 +611,6 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 	ret = 0;
 
 out:
-	testmgr_free_buf(xbuf);
-out_nobuf:
 	return ret;
 }
 
@@ -727,6 +620,7 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 	const char *algo =
 		crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm));
 	unsigned int i, j, k, n, temp;
+	int ret;
 	char *q;
 	struct ablkcipher_request *req;
 	struct scatterlist sg[8];
@@ -734,11 +628,6 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 	struct tcrypt_result result;
 	void *data;
 	char iv[MAX_IVLEN];
-	char *xbuf[XBUFSIZE];
-	int ret = -ENOMEM;
-
-	if (testmgr_alloc_buf(xbuf))
-		goto out_nobuf;
 
 	if (enc == ENCRYPT)
 	        e = "encryption";
@@ -751,6 +640,7 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 	if (!req) {
 		printk(KERN_ERR "alg: skcipher: Failed to allocate request "
 		       "for %s\n", algo);
+		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -767,10 +657,6 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 		if (!(template[i].np)) {
 			j++;
 
-			ret = -EINVAL;
-			if (WARN_ON(template[i].ilen > PAGE_SIZE))
-				goto out;
-
 			data = xbuf[0];
 			memcpy(data, template[i].input, template[i].ilen);
 
@@ -939,8 +825,6 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 
 out:
 	ablkcipher_request_free(req);
-	testmgr_free_buf(xbuf);
-out_nobuf:
 	return ret;
 }
 
@@ -953,8 +837,7 @@ static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
 	int ret;
 
 	for (i = 0; i < ctcount; i++) {
-		int ilen;
-		unsigned int dlen = COMP_BUF_SIZE;
+		int ilen, dlen = COMP_BUF_SIZE;
 
 		memset(result, 0, sizeof (result));
 
@@ -986,8 +869,7 @@ static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
 	}
 
 	for (i = 0; i < dtcount; i++) {
-		int ilen;
-		unsigned int dlen = COMP_BUF_SIZE;
+		int ilen, dlen = COMP_BUF_SIZE;
 
 		memset(result, 0, sizeof (result));
 
@@ -1032,25 +914,24 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 	const char *algo = crypto_tfm_alg_driver_name(crypto_pcomp_tfm(tfm));
 	unsigned int i;
 	char result[COMP_BUF_SIZE];
-	int res;
+	int error;
 
 	for (i = 0; i < ctcount; i++) {
 		struct comp_request req;
-		unsigned int produced = 0;
 
-		res = crypto_compress_setup(tfm, ctemplate[i].params,
-					    ctemplate[i].paramsize);
-		if (res) {
+		error = crypto_compress_setup(tfm, ctemplate[i].params,
+					      ctemplate[i].paramsize);
+		if (error) {
 			pr_err("alg: pcomp: compression setup failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
+			       "%d for %s: error=%d\n", i + 1, algo, error);
+			return error;
 		}
 
-		res = crypto_compress_init(tfm);
-		if (res) {
+		error = crypto_compress_init(tfm);
+		if (error) {
 			pr_err("alg: pcomp: compression init failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
+			       "%d for %s: error=%d\n", i + 1, algo, error);
+			return error;
 		}
 
 		memset(result, 0, sizeof(result));
@@ -1060,37 +941,32 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 		req.next_out = result;
 		req.avail_out = ctemplate[i].outlen / 2;
 
-		res = crypto_compress_update(tfm, &req);
-		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
+		error = crypto_compress_update(tfm, &req);
+		if (error && (error != -EAGAIN || req.avail_in)) {
 			pr_err("alg: pcomp: compression update failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
+			       "%d for %s: error=%d\n", i + 1, algo, error);
+			return error;
 		}
-		if (res > 0)
-			produced += res;
 
 		/* Add remaining input data */
 		req.avail_in += (ctemplate[i].inlen + 1) / 2;
 
-		res = crypto_compress_update(tfm, &req);
-		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
+		error = crypto_compress_update(tfm, &req);
+		if (error && (error != -EAGAIN || req.avail_in)) {
 			pr_err("alg: pcomp: compression update failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
+			       "%d for %s: error=%d\n", i + 1, algo, error);
+			return error;
 		}
-		if (res > 0)
-			produced += res;
 
 		/* Provide remaining output space */
 		req.avail_out += COMP_BUF_SIZE - ctemplate[i].outlen / 2;
 
-		res = crypto_compress_final(tfm, &req);
-		if (res < 0) {
+		error = crypto_compress_final(tfm, &req);
+		if (error) {
 			pr_err("alg: pcomp: compression final failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
+			       "%d for %s: error=%d\n", i + 1, algo, error);
+			return error;
 		}
-		produced += res;
 
 		if (COMP_BUF_SIZE - req.avail_out != ctemplate[i].outlen) {
 			pr_err("alg: comp: Compression test %d failed for %s: "
@@ -1100,13 +976,6 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 			return -EINVAL;
 		}
 
-		if (produced != ctemplate[i].outlen) {
-			pr_err("alg: comp: Compression test %d failed for %s: "
-			       "returned len = %u (expected %d)\n", i + 1,
-			       algo, produced, ctemplate[i].outlen);
-			return -EINVAL;
-		}
-
 		if (memcmp(result, ctemplate[i].output, ctemplate[i].outlen)) {
 			pr_err("alg: pcomp: Compression test %d failed for "
 			       "%s\n", i + 1, algo);
@@ -1117,21 +986,21 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 
 	for (i = 0; i < dtcount; i++) {
 		struct comp_request req;
-		unsigned int produced = 0;
 
-		res = crypto_decompress_setup(tfm, dtemplate[i].params,
-					      dtemplate[i].paramsize);
-		if (res) {
+		error = crypto_decompress_setup(tfm, dtemplate[i].params,
+						dtemplate[i].paramsize);
+		if (error) {
 			pr_err("alg: pcomp: decompression setup failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo, res);
-			return res;
+			       "test %d for %s: error=%d\n", i + 1, algo,
+			       error);
+			return error;
 		}
 
-		res = crypto_decompress_init(tfm);
-		if (res) {
+		error = crypto_decompress_init(tfm);
+		if (error) {
 			pr_err("alg: pcomp: decompression init failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
+			       "%d for %s: error=%d\n", i + 1, algo, error);
+			return error;
 		}
 
 		memset(result, 0, sizeof(result));
@@ -1141,38 +1010,35 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 		req.next_out = result;
 		req.avail_out = dtemplate[i].outlen / 2;
 
-		res = crypto_decompress_update(tfm, &req);
-		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
+		error = crypto_decompress_update(tfm, &req);
+		if (error  && (error != -EAGAIN || req.avail_in)) {
 			pr_err("alg: pcomp: decompression update failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo, res);
-			return res;
+			       "test %d for %s: error=%d\n", i + 1, algo,
+			       error);
+			return error;
 		}
-		if (res > 0)
-			produced += res;
 
 		/* Add remaining input data */
 		req.avail_in += (dtemplate[i].inlen + 1) / 2;
 
-		res = crypto_decompress_update(tfm, &req);
-		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
+		error = crypto_decompress_update(tfm, &req);
+		if (error  && (error != -EAGAIN || req.avail_in)) {
 			pr_err("alg: pcomp: decompression update failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo, res);
-			return res;
+			       "test %d for %s: error=%d\n", i + 1, algo,
+			       error);
+			return error;
 		}
-		if (res > 0)
-			produced += res;
 
 		/* Provide remaining output space */
 		req.avail_out += COMP_BUF_SIZE - dtemplate[i].outlen / 2;
 
-		res = crypto_decompress_final(tfm, &req);
-		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
+		error = crypto_decompress_final(tfm, &req);
+		if (error  && (error != -EAGAIN || req.avail_in)) {
 			pr_err("alg: pcomp: decompression final failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo, res);
-			return res;
+			       "test %d for %s: error=%d\n", i + 1, algo,
+			       error);
+			return error;
 		}
-		if (res > 0)
-			produced += res;
 
 		if (COMP_BUF_SIZE - req.avail_out != dtemplate[i].outlen) {
 			pr_err("alg: comp: Decompression test %d failed for "
@@ -1182,13 +1048,6 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 			return -EINVAL;
 		}
 
-		if (produced != dtemplate[i].outlen) {
-			pr_err("alg: comp: Decompression test %d failed for "
-			       "%s: returned len = %u (expected %d)\n", i + 1,
-			       algo, produced, dtemplate[i].outlen);
-			return -EINVAL;
-		}
-
 		if (memcmp(result, dtemplate[i].output, dtemplate[i].outlen)) {
 			pr_err("alg: pcomp: Decompression test %d failed for "
 			       "%s\n", i + 1, algo);
@@ -1200,68 +1059,6 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 	return 0;
 }
 
-
-static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template,
-		      unsigned int tcount)
-{
-	const char *algo = crypto_tfm_alg_driver_name(crypto_rng_tfm(tfm));
-	int err, i, j, seedsize;
-	u8 *seed;
-	char result[32];
-
-	seedsize = crypto_rng_seedsize(tfm);
-
-	seed = kmalloc(seedsize, GFP_KERNEL);
-	if (!seed) {
-		printk(KERN_ERR "alg: cprng: Failed to allocate seed space "
-		       "for %s\n", algo);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < tcount; i++) {
-		memset(result, 0, 32);
-
-		memcpy(seed, template[i].v, template[i].vlen);
-		memcpy(seed + template[i].vlen, template[i].key,
-		       template[i].klen);
-		memcpy(seed + template[i].vlen + template[i].klen,
-		       template[i].dt, template[i].dtlen);
-
-		err = crypto_rng_reset(tfm, seed, seedsize);
-		if (err) {
-			printk(KERN_ERR "alg: cprng: Failed to reset rng "
-			       "for %s\n", algo);
-			goto out;
-		}
-
-		for (j = 0; j < template[i].loops; j++) {
-			err = crypto_rng_get_bytes(tfm, result,
-						   template[i].rlen);
-			if (err != template[i].rlen) {
-				printk(KERN_ERR "alg: cprng: Failed to obtain "
-				       "the correct amount of random data for "
-				       "%s (requested %d, got %d)\n", algo,
-				       template[i].rlen, err);
-				goto out;
-			}
-		}
-
-		err = memcmp(result, template[i].result,
-			     template[i].rlen);
-		if (err) {
-			printk(KERN_ERR "alg: cprng: Test %d failed for %s\n",
-			       i, algo);
-			hexdump(result, template[i].rlen);
-			err = -EINVAL;
-			goto out;
-		}
-	}
-
-out:
-	kfree(seed);
-	return err;
-}
-
 static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
 			 u32 type, u32 mask)
 {
@@ -1461,42 +1258,11 @@ static int alg_test_crc32c(const struct alg_test_desc *desc,
 	return err;
 }
 
-static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
-			  u32 type, u32 mask)
-{
-	struct crypto_rng *rng;
-	int err;
-
-	rng = crypto_alloc_rng(driver, type, mask);
-	if (IS_ERR(rng)) {
-		printk(KERN_ERR "alg: cprng: Failed to load transform for %s: "
-		       "%ld\n", driver, PTR_ERR(rng));
-		return PTR_ERR(rng);
-	}
-
-	err = test_cprng(rng, desc->suite.cprng.vecs, desc->suite.cprng.count);
-
-	crypto_free_rng(rng);
-
-	return err;
-}
-
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
 	{
-		.alg = "ansi_cprng",
-		.test = alg_test_cprng,
-		.fips_allowed = 1,
-		.suite = {
-			.cprng = {
-				.vecs = ansi_cprng_aes_tv_template,
-				.count = ANSI_CPRNG_AES_TEST_VECTORS
-			}
-		}
-	}, {
 		.alg = "cbc(aes)",
 		.test = alg_test_skcipher,
-		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1572,7 +1338,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "cbc(des3_ede)",
 		.test = alg_test_skcipher,
-		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1603,7 +1368,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ccm(aes)",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -1619,29 +1383,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "crc32c",
 		.test = alg_test_crc32c,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = crc32c_tv_template,
 				.count = CRC32C_TEST_VECTORS
 			}
 		}
-	}, {
-		.alg = "ctr(aes)",
-		.test = alg_test_skcipher,
-		.fips_allowed = 1,
-		.suite = {
-			.cipher = {
-				.enc = {
-					.vecs = aes_ctr_enc_tv_template,
-					.count = AES_CTR_ENC_TEST_VECTORS
-				},
-				.dec = {
-					.vecs = aes_ctr_dec_tv_template,
-					.count = AES_CTR_DEC_TEST_VECTORS
-				}
-			}
-		}
 	}, {
 		.alg = "cts(cbc(aes))",
 		.test = alg_test_skcipher,
@@ -1675,7 +1422,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ecb(aes)",
 		.test = alg_test_skcipher,
-		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1781,7 +1527,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ecb(des)",
 		.test = alg_test_skcipher,
-		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1797,7 +1542,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ecb(des3_ede)",
 		.test = alg_test_skcipher,
-		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1933,7 +1677,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "gcm(aes)",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -1976,7 +1719,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "hmac(sha1)",
 		.test = alg_test_hash,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = hmac_sha1_tv_template,
@@ -1986,7 +1728,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "hmac(sha224)",
 		.test = alg_test_hash,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = hmac_sha224_tv_template,
@@ -1996,7 +1737,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "hmac(sha256)",
 		.test = alg_test_hash,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = hmac_sha256_tv_template,
@@ -2006,7 +1746,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "hmac(sha384)",
 		.test = alg_test_hash,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = hmac_sha384_tv_template,
@@ -2016,7 +1755,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "hmac(sha512)",
 		.test = alg_test_hash,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = hmac_sha512_tv_template,
@@ -2098,32 +1836,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "rfc3686(ctr(aes))",
 		.test = alg_test_skcipher,
-		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
-					.vecs = aes_ctr_rfc3686_enc_tv_template,
-					.count = AES_CTR_3686_ENC_TEST_VECTORS
-				},
-				.dec = {
-					.vecs = aes_ctr_rfc3686_dec_tv_template,
-					.count = AES_CTR_3686_DEC_TEST_VECTORS
-				}
-			}
-		}
-	}, {
-		.alg = "rfc4309(ccm(aes))",
-		.test = alg_test_aead,
-		.fips_allowed = 1,
-		.suite = {
-			.aead = {
-				.enc = {
-					.vecs = aes_ccm_rfc4309_enc_tv_template,
-					.count = AES_CCM_4309_ENC_TEST_VECTORS
+					.vecs = aes_ctr_enc_tv_template,
+					.count = AES_CTR_ENC_TEST_VECTORS
 				},
 				.dec = {
-					.vecs = aes_ccm_rfc4309_dec_tv_template,
-					.count = AES_CCM_4309_DEC_TEST_VECTORS
+					.vecs = aes_ctr_dec_tv_template,
+					.count = AES_CTR_DEC_TEST_VECTORS
 				}
 			}
 		}
@@ -2177,7 +1898,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "sha1",
 		.test = alg_test_hash,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = sha1_tv_template,
@@ -2187,7 +1907,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "sha224",
 		.test = alg_test_hash,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = sha224_tv_template,
@@ -2197,7 +1916,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "sha256",
 		.test = alg_test_hash,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = sha256_tv_template,
@@ -2207,7 +1925,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "sha384",
 		.test = alg_test_hash,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = sha384_tv_template,
@@ -2217,7 +1934,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "sha512",
 		.test = alg_test_hash,
-		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = sha512_tv_template,
@@ -2361,36 +2077,60 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 		if (i < 0)
 			goto notest;
 
-		if (fips_enabled && !alg_test_descs[i].fips_allowed)
-			goto non_fips_alg;
-
-		rc = alg_test_cipher(alg_test_descs + i, driver, type, mask);
-		goto test_done;
+		return alg_test_cipher(alg_test_descs + i, driver, type, mask);
 	}
 
 	i = alg_find_test(alg);
 	if (i < 0)
 		goto notest;
 
-	if (fips_enabled && !alg_test_descs[i].fips_allowed)
-		goto non_fips_alg;
-
 	rc = alg_test_descs[i].test(alg_test_descs + i, driver,
 				      type, mask);
-test_done:
 	if (fips_enabled && rc)
 		panic("%s: %s alg self test failed in fips mode!\n", driver, alg);
 
-	if (fips_enabled && !rc)
-		printk(KERN_INFO "alg: self-tests for %s (%s) passed\n",
-		       driver, alg);
-
 	return rc;
 
 notest:
 	printk(KERN_INFO "alg: No test for %s (%s)\n", alg, driver);
 	return 0;
-non_fips_alg:
-	return -EINVAL;
 }
 EXPORT_SYMBOL_GPL(alg_test);
+
+int __init testmgr_init(void)
+{
+	int i;
+
+	for (i = 0; i < XBUFSIZE; i++) {
+		xbuf[i] = (void *)__get_free_page(GFP_KERNEL);
+		if (!xbuf[i])
+			goto err_free_xbuf;
+	}
+
+	for (i = 0; i < XBUFSIZE; i++) {
+		axbuf[i] = (void *)__get_free_page(GFP_KERNEL);
+		if (!axbuf[i])
+			goto err_free_axbuf;
+	}
+
+	return 0;
+
+err_free_axbuf:
+	for (i = 0; i < XBUFSIZE && axbuf[i]; i++)
+		free_page((unsigned long)axbuf[i]);
+err_free_xbuf:
+	for (i = 0; i < XBUFSIZE && xbuf[i]; i++)
+		free_page((unsigned long)xbuf[i]);
+
+	return -ENOMEM;
+}
+
+void testmgr_exit(void)
+{
+	int i;
+
+	for (i = 0; i < XBUFSIZE; i++)
+		free_page((unsigned long)axbuf[i]);
+	for (i = 0; i < XBUFSIZE; i++)
+		free_page((unsigned long)xbuf[i]);
+}
diff --git a/trunk/crypto/testmgr.h b/trunk/crypto/testmgr.h
index 69316228fc19..526f00a9c72f 100644
--- a/trunk/crypto/testmgr.h
+++ b/trunk/crypto/testmgr.h
@@ -62,7 +62,6 @@ struct aead_testvec {
 	int np;
 	int anp;
 	unsigned char fail;
-	unsigned char novrfy;	/* ccm dec verification failure expected */
 	unsigned char wk; /* weak key flag */
 	unsigned char klen;
 	unsigned short ilen;
@@ -70,18 +69,6 @@ struct aead_testvec {
 	unsigned short rlen;
 };
 
-struct cprng_testvec {
-	char *key;
-	char *dt;
-	char *v;
-	char *result;
-	unsigned char klen;
-	unsigned short dtlen;
-	unsigned short vlen;
-	unsigned short rlen;
-	unsigned short loops;
-};
-
 static char zeroed_string[48];
 
 /*
@@ -2854,16 +2841,12 @@ static struct cipher_testvec cast6_dec_tv_template[] = {
 #define AES_LRW_DEC_TEST_VECTORS 8
 #define AES_XTS_ENC_TEST_VECTORS 4
 #define AES_XTS_DEC_TEST_VECTORS 4
-#define AES_CTR_ENC_TEST_VECTORS 3
-#define AES_CTR_DEC_TEST_VECTORS 3
-#define AES_CTR_3686_ENC_TEST_VECTORS 7
-#define AES_CTR_3686_DEC_TEST_VECTORS 6
+#define AES_CTR_ENC_TEST_VECTORS 7
+#define AES_CTR_DEC_TEST_VECTORS 6
 #define AES_GCM_ENC_TEST_VECTORS 9
 #define AES_GCM_DEC_TEST_VECTORS 8
 #define AES_CCM_ENC_TEST_VECTORS 7
 #define AES_CCM_DEC_TEST_VECTORS 7
-#define AES_CCM_4309_ENC_TEST_VECTORS 7
-#define AES_CCM_4309_DEC_TEST_VECTORS 10
 
 static struct cipher_testvec aes_enc_tv_template[] = {
 	{ /* From FIPS-197 */
@@ -4000,164 +3983,6 @@ static struct cipher_testvec aes_xts_dec_tv_template[] = {
 
 
 static struct cipher_testvec aes_ctr_enc_tv_template[] = {
-	{ /* From NIST Special Publication 800-38A, Appendix F.5 */
-		.key	= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
-			  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
-		.klen	= 16,
-		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.ilen	= 64,
-		.result	= "\x87\x4d\x61\x91\xb6\x20\xe3\x26"
-			  "\x1b\xef\x68\x64\x99\x0d\xb6\xce"
-			  "\x98\x06\xf6\x6b\x79\x70\xfd\xff"
-			  "\x86\x17\x18\x7b\xb9\xff\xfd\xff"
-			  "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e"
-			  "\x5b\x4f\x09\x02\x0d\xb0\x3e\xab"
-			  "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1"
-			  "\x79\x21\x70\xa0\xf3\x00\x9c\xee",
-		.rlen	= 64,
-	}, {
-		.key	= "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
-			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
-			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
-		.klen	= 24,
-		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.ilen	= 64,
-		.result	= "\x1a\xbc\x93\x24\x17\x52\x1c\xa2"
-			  "\x4f\x2b\x04\x59\xfe\x7e\x6e\x0b"
-			  "\x09\x03\x39\xec\x0a\xa6\xfa\xef"
-			  "\xd5\xcc\xc2\xc6\xf4\xce\x8e\x94"
-			  "\x1e\x36\xb2\x6b\xd1\xeb\xc6\x70"
-			  "\xd1\xbd\x1d\x66\x56\x20\xab\xf7"
-			  "\x4f\x78\xa7\xf6\xd2\x98\x09\x58"
-			  "\x5a\x97\xda\xec\x58\xc6\xb0\x50",
-		.rlen	= 64,
-	}, {
-		.key	= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
-			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
-			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
-			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
-		.klen	= 32,
-		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.ilen	= 64,
-		.result	= "\x60\x1e\xc3\x13\x77\x57\x89\xa5"
-			  "\xb7\xa7\xf5\x04\xbb\xf3\xd2\x28"
-			  "\xf4\x43\xe3\xca\x4d\x62\xb5\x9a"
-			  "\xca\x84\xe9\x90\xca\xca\xf5\xc5"
-			  "\x2b\x09\x30\xda\xa2\x3d\xe9\x4c"
-			  "\xe8\x70\x17\xba\x2d\x84\x98\x8d"
-			  "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6"
-			  "\x13\xc2\xdd\x08\x45\x79\x41\xa6",
-		.rlen	= 64,
-	}
-};
-
-static struct cipher_testvec aes_ctr_dec_tv_template[] = {
-	{ /* From NIST Special Publication 800-38A, Appendix F.5 */
-		.key	= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
-			  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
-		.klen	= 16,
-		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x87\x4d\x61\x91\xb6\x20\xe3\x26"
-			  "\x1b\xef\x68\x64\x99\x0d\xb6\xce"
-			  "\x98\x06\xf6\x6b\x79\x70\xfd\xff"
-			  "\x86\x17\x18\x7b\xb9\xff\xfd\xff"
-			  "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e"
-			  "\x5b\x4f\x09\x02\x0d\xb0\x3e\xab"
-			  "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1"
-			  "\x79\x21\x70\xa0\xf3\x00\x9c\xee",
-		.ilen	= 64,
-		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.rlen	= 64,
-	}, {
-		.key	= "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
-			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
-			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
-		.klen	= 24,
-		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x1a\xbc\x93\x24\x17\x52\x1c\xa2"
-			  "\x4f\x2b\x04\x59\xfe\x7e\x6e\x0b"
-			  "\x09\x03\x39\xec\x0a\xa6\xfa\xef"
-			  "\xd5\xcc\xc2\xc6\xf4\xce\x8e\x94"
-			  "\x1e\x36\xb2\x6b\xd1\xeb\xc6\x70"
-			  "\xd1\xbd\x1d\x66\x56\x20\xab\xf7"
-			  "\x4f\x78\xa7\xf6\xd2\x98\x09\x58"
-			  "\x5a\x97\xda\xec\x58\xc6\xb0\x50",
-		.ilen	= 64,
-		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.rlen	= 64,
-	}, {
-		.key	= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
-			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
-			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
-			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
-		.klen	= 32,
-		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x60\x1e\xc3\x13\x77\x57\x89\xa5"
-			  "\xb7\xa7\xf5\x04\xbb\xf3\xd2\x28"
-			  "\xf4\x43\xe3\xca\x4d\x62\xb5\x9a"
-			  "\xca\x84\xe9\x90\xca\xca\xf5\xc5"
-			  "\x2b\x09\x30\xda\xa2\x3d\xe9\x4c"
-			  "\xe8\x70\x17\xba\x2d\x84\x98\x8d"
-			  "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6"
-			  "\x13\xc2\xdd\x08\x45\x79\x41\xa6",
-		.ilen	= 64,
-		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.rlen	= 64,
-	}
-};
-
-static struct cipher_testvec aes_ctr_rfc3686_enc_tv_template[] = {
 	{ /* From RFC 3686 */
 		.key	= "\xae\x68\x52\xf8\x12\x10\x67\xcc"
 			  "\x4b\xf7\xa5\x76\x55\x77\xf3\x9e"
@@ -5289,7 +5114,7 @@ static struct cipher_testvec aes_ctr_rfc3686_enc_tv_template[] = {
 	},
 };
 
-static struct cipher_testvec aes_ctr_rfc3686_dec_tv_template[] = {
+static struct cipher_testvec aes_ctr_dec_tv_template[] = {
 	{ /* From RFC 3686 */
 		.key	= "\xae\x68\x52\xf8\x12\x10\x67\xcc"
 			  "\x4b\xf7\xa5\x76\x55\x77\xf3\x9e"
@@ -6000,470 +5825,6 @@ static struct aead_testvec aes_ccm_dec_tv_template[] = {
 	},
 };
 
-/*
- * rfc4309 refers to section 8 of rfc3610 for test vectors, but they all
- * use a 13-byte nonce, we only support an 11-byte nonce. Similarly, all of
- * Special Publication 800-38C's test vectors also use nonce lengths our
- * implementation doesn't support. The following are taken from fips cavs
- * fax files on hand at Red Hat.
- *
- * nb: actual key lengths are (klen - 3), the last 3 bytes are actually
- * part of the nonce which combine w/the iv, but need to be input this way.
- */
-static struct aead_testvec aes_ccm_rfc4309_enc_tv_template[] = {
-	{
-		.key	= "\x83\xac\x54\x66\xc2\xeb\xe5\x05"
-			  "\x2e\x01\xd1\xfc\x5d\x82\x66\x2e"
-			  "\x96\xac\x59",
-		.klen	= 19,
-		.iv	= "\x30\x07\xa1\xe2\xa2\xc7\x55\x24",
-		.alen	= 0,
-		.input	= "\x19\xc8\x81\xf6\xe9\x86\xff\x93"
-			  "\x0b\x78\x67\xe5\xbb\xb7\xfc\x6e"
-			  "\x83\x77\xb3\xa6\x0c\x8c\x9f\x9c"
-			  "\x35\x2e\xad\xe0\x62\xf9\x91\xa1",
-		.ilen	= 32,
-		.result	= "\xab\x6f\xe1\x69\x1d\x19\x99\xa8"
-			  "\x92\xa0\xc4\x6f\x7e\xe2\x8b\xb1"
-			  "\x70\xbb\x8c\xa6\x4c\x6e\x97\x8a"
-			  "\x57\x2b\xbe\x5d\x98\xa6\xb1\x32"
-			  "\xda\x24\xea\xd9\xa1\x39\x98\xfd"
-			  "\xa4\xbe\xd9\xf2\x1a\x6d\x22\xa8",
-		.rlen	= 48,
-	}, {
-		.key	= "\x1e\x2c\x7e\x01\x41\x9a\xef\xc0"
-			  "\x0d\x58\x96\x6e\x5c\xa2\x4b\xd3"
-			  "\x4f\xa3\x19",
-		.klen	= 19,
-		.iv	= "\xd3\x01\x5a\xd8\x30\x60\x15\x56",
-		.assoc	= "\xda\xe6\x28\x9c\x45\x2d\xfd\x63"
-			  "\x5e\xda\x4c\xb6\xe6\xfc\xf9\xb7"
-			  "\x0c\x56\xcb\xe4\xe0\x05\x7a\xe1"
-			  "\x0a\x63\x09\x78\xbc\x2c\x55\xde",
-		.alen	= 32,
-		.input	= "\x87\xa3\x36\xfd\x96\xb3\x93\x78"
-			  "\xa9\x28\x63\xba\x12\xa3\x14\x85"
-			  "\x57\x1e\x06\xc9\x7b\x21\xef\x76"
-			  "\x7f\x38\x7e\x8e\x29\xa4\x3e\x7e",
-		.ilen	= 32,
-		.result	= "\x8a\x1e\x11\xf0\x02\x6b\xe2\x19"
-			  "\xfc\x70\xc4\x6d\x8e\xb7\x99\xab"
-			  "\xc5\x4b\xa2\xac\xd3\xf3\x48\xff"
-			  "\x3b\xb5\xce\x53\xef\xde\xbb\x02"
-			  "\xa9\x86\x15\x6c\x13\xfe\xda\x0a"
-			  "\x22\xb8\x29\x3d\xd8\x39\x9a\x23",
-		.rlen	= 48,
-	}, {
-		.key	= "\xf4\x6b\xc2\x75\x62\xfe\xb4\xe1"
-			  "\xa3\xf0\xff\xdd\x4e\x4b\x12\x75"
-			  "\x53\x14\x73\x66\x8d\x88\xf6\x80"
-			  "\xa0\x20\x35",
-		.klen	= 27,
-		.iv	= "\x26\xf2\x21\x8d\x50\x20\xda\xe2",
-		.assoc	= "\x5b\x9e\x13\x67\x02\x5e\xef\xc1"
-			  "\x6c\xf9\xd7\x1e\x52\x8f\x7a\x47"
-			  "\xe9\xd4\xcf\x20\x14\x6e\xf0\x2d"
-			  "\xd8\x9e\x2b\x56\x10\x23\x56\xe7",
-		.alen	= 32,
-		.ilen	= 0,
-		.result	= "\x36\xea\x7a\x70\x08\xdc\x6a\xbc"
-			  "\xad\x0c\x7a\x63\xf6\x61\xfd\x9b",
-		.rlen	= 16,
-	}, {
-		.key	= "\x56\xdf\x5c\x8f\x26\x3f\x0e\x42"
-			  "\xef\x7a\xd3\xce\xfc\x84\x60\x62"
-			  "\xca\xb4\x40\xaf\x5f\xc9\xc9\x01"
-			  "\xd6\x3c\x8c",
-		.klen	= 27,
-		.iv	= "\x86\x84\xb6\xcd\xef\x09\x2e\x94",
-		.assoc	= "\x02\x65\x78\x3c\xe9\x21\x30\x91"
-			  "\xb1\xb9\xda\x76\x9a\x78\x6d\x95"
-			  "\xf2\x88\x32\xa3\xf2\x50\xcb\x4c"
-			  "\xe3\x00\x73\x69\x84\x69\x87\x79",
-		.alen	= 32,
-		.input	= "\x9f\xd2\x02\x4b\x52\x49\x31\x3c"
-			  "\x43\x69\x3a\x2d\x8e\x70\xad\x7e"
-			  "\xe0\xe5\x46\x09\x80\x89\x13\xb2"
-			  "\x8c\x8b\xd9\x3f\x86\xfb\xb5\x6b",
-		.ilen	= 32,
-		.result	= "\x39\xdf\x7c\x3c\x5a\x29\xb9\x62"
-			  "\x5d\x51\xc2\x16\xd8\xbd\x06\x9f"
-			  "\x9b\x6a\x09\x70\xc1\x51\x83\xc2"
-			  "\x66\x88\x1d\x4f\x9a\xda\xe0\x1e"
-			  "\xc7\x79\x11\x58\xe5\x6b\x20\x40"
-			  "\x7a\xea\x46\x42\x8b\xe4\x6f\xe1",
-		.rlen	= 48,
-	}, {
-		.key	= "\xe0\x8d\x99\x71\x60\xd7\x97\x1a"
-			  "\xbd\x01\x99\xd5\x8a\xdf\x71\x3a"
-			  "\xd3\xdf\x24\x4b\x5e\x3d\x4b\x4e"
-			  "\x30\x7a\xb9\xd8\x53\x0a\x5e\x2b"
-			  "\x1e\x29\x91",
-		.klen	= 35,
-		.iv	= "\xad\x8e\xc1\x53\x0a\xcf\x2d\xbe",
-		.assoc	= "\x19\xb6\x1f\x57\xc4\xf3\xf0\x8b"
-			  "\x78\x2b\x94\x02\x29\x0f\x42\x27"
-			  "\x6b\x75\xcb\x98\x34\x08\x7e\x79"
-			  "\xe4\x3e\x49\x0d\x84\x8b\x22\x87",
-		.alen	= 32,
-		.input	= "\xe1\xd9\xd8\x13\xeb\x3a\x75\x3f"
-			  "\x9d\xbd\x5f\x66\xbe\xdc\xbb\x66"
-			  "\xbf\x17\x99\x62\x4a\x39\x27\x1f"
-			  "\x1d\xdc\x24\xae\x19\x2f\x98\x4c",
-		.ilen	= 32,
-		.result	= "\x19\xb8\x61\x33\x45\x2b\x43\x96"
-			  "\x6f\x51\xd0\x20\x30\x7d\x9b\xc6"
-			  "\x26\x3d\xf8\xc9\x65\x16\xa8\x9f"
-			  "\xf0\x62\x17\x34\xf2\x1e\x8d\x75"
-			  "\x4e\x13\xcc\xc0\xc3\x2a\x54\x2d",
-		.rlen	= 40,
-	}, {
-		.key	= "\x7c\xc8\x18\x3b\x8d\x99\xe0\x7c"
-			  "\x45\x41\xb8\xbd\x5c\xa7\xc2\x32"
-			  "\x8a\xb8\x02\x59\xa4\xfe\xa9\x2c"
-			  "\x09\x75\x9a\x9b\x3c\x9b\x27\x39"
-			  "\xf9\xd9\x4e",
-		.klen	= 35,
-		.iv	= "\x63\xb5\x3d\x9d\x43\xf6\x1e\x50",
-		.assoc	= "\x57\xf5\x6b\x8b\x57\x5c\x3d\x3b"
-			  "\x13\x02\x01\x0c\x83\x4c\x96\x35"
-			  "\x8e\xd6\x39\xcf\x7d\x14\x9b\x94"
-			  "\xb0\x39\x36\xe6\x8f\x57\xe0\x13",
-		.alen	= 32,
-		.input	= "\x3b\x6c\x29\x36\xb6\xef\x07\xa6"
-			  "\x83\x72\x07\x4f\xcf\xfa\x66\x89"
-			  "\x5f\xca\xb1\xba\xd5\x8f\x2c\x27"
-			  "\x30\xdb\x75\x09\x93\xd4\x65\xe4",
-		.ilen	= 32,
-		.result	= "\xb0\x88\x5a\x33\xaa\xe5\xc7\x1d"
-			  "\x85\x23\xc7\xc6\x2f\xf4\x1e\x3d"
-			  "\xcc\x63\x44\x25\x07\x78\x4f\x9e"
-			  "\x96\xb8\x88\xeb\xbc\x48\x1f\x06"
-			  "\x39\xaf\x39\xac\xd8\x4a\x80\x39"
-			  "\x7b\x72\x8a\xf7",
-		.rlen	= 44,
-	}, {
-		.key	= "\xab\xd0\xe9\x33\x07\x26\xe5\x83"
-			  "\x8c\x76\x95\xd4\xb6\xdc\xf3\x46"
-			  "\xf9\x8f\xad\xe3\x02\x13\x83\x77"
-			  "\x3f\xb0\xf1\xa1\xa1\x22\x0f\x2b"
-			  "\x24\xa7\x8b",
-		.klen	= 35,
-		.iv	= "\x07\xcb\xcc\x0e\xe6\x33\xbf\xf5",
-		.assoc	= "\xd4\xdb\x30\x1d\x03\xfe\xfd\x5f"
-			  "\x87\xd4\x8c\xb6\xb6\xf1\x7a\x5d"
-			  "\xab\x90\x65\x8d\x8e\xca\x4d\x4f"
-			  "\x16\x0c\x40\x90\x4b\xc7\x36\x73",
-		.alen	= 32,
-		.input	= "\xf5\xc6\x7d\x48\xc1\xb7\xe6\x92"
-			  "\x97\x5a\xca\xc4\xa9\x6d\xf9\x3d"
-			  "\x6c\xde\xbc\xf1\x90\xea\x6a\xb2"
-			  "\x35\x86\x36\xaf\x5c\xfe\x4b\x3a",
-		.ilen	= 32,
-		.result	= "\x83\x6f\x40\x87\x72\xcf\xc1\x13"
-			  "\xef\xbb\x80\x21\x04\x6c\x58\x09"
-			  "\x07\x1b\xfc\xdf\xc0\x3f\x5b\xc7"
-			  "\xe0\x79\xa8\x6e\x71\x7c\x3f\xcf"
-			  "\x5c\xda\xb2\x33\xe5\x13\xe2\x0d"
-			  "\x74\xd1\xef\xb5\x0f\x3a\xb5\xf8",
-		.rlen	= 48,
-	},
-};
-
-static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
-	{
-		.key	= "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
-			  "\xff\x80\x2e\x48\x7d\x82\xf8\xb9"
-			  "\xc6\xfb\x7d",
-		.klen	= 19,
-		.iv	= "\x80\x0d\x13\xab\xd8\xa6\xb2\xd8",
-		.alen	= 0,
-		.input	= "\xd5\xe8\x93\x9f\xc7\x89\x2e\x2b",
-		.ilen	= 8,
-		.result	= "\x00",
-		.rlen	= 0,
-		.novrfy	= 1,
-	}, {
-		.key	= "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
-			  "\xff\x80\x2e\x48\x7d\x82\xf8\xb9"
-			  "\xaf\x94\x87",
-		.klen	= 19,
-		.iv	= "\x78\x35\x82\x81\x7f\x88\x94\x68",
-		.alen	= 0,
-		.input	= "\x41\x3c\xb8\x87\x73\xcb\xf3\xf3",
-		.ilen	= 8,
-		.result	= "\x00",
-		.rlen	= 0,
-	}, {
-		.key	= "\x61\x0e\x8c\xae\xe3\x23\xb6\x38"
-			  "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8"
-			  "\xc6\xfb\x7d",
-		.klen	= 19,
-		.iv	= "\x80\x0d\x13\xab\xd8\xa6\xb2\xd8",
-		.assoc	= "\xf3\x94\x87\x78\x35\x82\x81\x7f"
-			  "\x88\x94\x68\xb1\x78\x6b\x2b\xd6"
-			  "\x04\x1f\x4e\xed\x78\xd5\x33\x66"
-			  "\xd8\x94\x99\x91\x81\x54\x62\x57",
-		.alen	= 32,
-		.input	= "\xf0\x7c\x29\x02\xae\x1c\x2f\x55"
-			  "\xd0\xd1\x3d\x1a\xa3\x6d\xe4\x0a"
-			  "\x86\xb0\x87\x6b\x62\x33\x8c\x34"
-			  "\xce\xab\x57\xcc\x79\x0b\xe0\x6f"
-			  "\x5c\x3e\x48\x1f\x6c\x46\xf7\x51"
-			  "\x8b\x84\x83\x2a\xc1\x05\xb8\xc5",
-		.ilen	= 48,
-		.result	= "\x50\x82\x3e\x07\xe2\x1e\xb6\xfb"
-			  "\x33\xe4\x73\xce\xd2\xfb\x95\x79"
-			  "\xe8\xb4\xb5\x77\x11\x10\x62\x6f"
-			  "\x6a\x82\xd1\x13\xec\xf5\xd0\x48",
-		.rlen	= 32,
-		.novrfy	= 1,
-	}, {
-		.key	= "\x61\x0e\x8c\xae\xe3\x23\xb6\x38"
-			  "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8"
-			  "\x05\xe0\xc9",
-		.klen	= 19,
-		.iv	= "\x0f\xed\x34\xea\x97\xd4\x3b\xdf",
-		.assoc	= "\x49\x5c\x50\x1f\x1d\x94\xcc\x81"
-			  "\xba\xb7\xb6\x03\xaf\xa5\xc1\xa1"
-			  "\xd8\x5c\x42\x68\xe0\x6c\xda\x89"
-			  "\x05\xac\x56\xac\x1b\x2a\xd3\x86",
-		.alen	= 32,
-		.input	= "\x39\xbe\x7d\x15\x62\x77\xf3\x3c"
-			  "\xad\x83\x52\x6d\x71\x03\x25\x1c"
-			  "\xed\x81\x3a\x9a\x16\x7d\x19\x80"
-			  "\x72\x04\x72\xd0\xf6\xff\x05\x0f"
-			  "\xb7\x14\x30\x00\x32\x9e\xa0\xa6"
-			  "\x9e\x5a\x18\xa1\xb8\xfe\xdb\xd3",
-		.ilen	= 48,
-		.result	= "\x75\x05\xbe\xc2\xd9\x1e\xde\x60"
-			  "\x47\x3d\x8c\x7d\xbd\xb5\xd9\xb7"
-			  "\xf2\xae\x61\x05\x8f\x82\x24\x3f"
-			  "\x9c\x67\x91\xe1\x38\x4f\xe4\x0c",
-		.rlen	= 32,
-	}, {
-		.key	= "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
-			  "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3"
-			  "\xa4\x48\x93\x39\x26\x71\x4a\xc6"
-			  "\xee\x49\x83",
-		.klen	= 27,
-		.iv	= "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
-		.assoc	= "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
-			  "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
-			  "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
-			  "\x11\xc4\xc6\xdb\x00\x56\x36\x61",
-		.alen	= 32,
-		.input	= "\x71\x99\xfa\xf4\x44\x12\x68\x9b",
-		.ilen	= 8,
-		.result	= "\x00",
-		.rlen	= 0,
-	}, {
-		.key	= "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
-			  "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
-			  "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
-			  "\xee\x49\x83",
-		.klen	= 27,
-		.iv	= "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
-		.assoc	= "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
-			  "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
-			  "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
-			  "\x11\xc4\xc6\xdb\x00\x56\x36\x61",
-		.alen	= 32,
-		.input	= "\xfb\xe5\x5d\x34\xbe\xe5\xe8\xe7"
-			  "\x5a\xef\x2f\xbf\x1f\x7f\xd4\xb2"
-			  "\x66\xca\x61\x1e\x96\x7a\x61\xb3"
-			  "\x1c\x16\x45\x52\xba\x04\x9c\x9f"
-			  "\xb1\xd2\x40\xbc\x52\x7c\x6f\xb1",
-		.ilen	= 40,
-		.result	= "\x85\x34\x66\x42\xc8\x92\x0f\x36"
-			  "\x58\xe0\x6b\x91\x3c\x98\x5c\xbb"
-			  "\x0a\x85\xcc\x02\xad\x7a\x96\xe9"
-			  "\x65\x43\xa4\xc3\x0f\xdc\x55\x81",
-		.rlen	= 32,
-	}, {
-		.key	= "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
-			  "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
-			  "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
-			  "\xd1\xfc\x57",
-		.klen	= 27,
-		.iv	= "\x9c\xfe\xb8\x9c\xad\x71\xaa\x1f",
-		.assoc	= "\x86\x67\xa5\xa9\x14\x5f\x0d\xc6"
-			  "\xff\x14\xc7\x44\xbf\x6c\x3a\xc3"
-			  "\xff\xb6\x81\xbd\xe2\xd5\x06\xc7"
-			  "\x3c\xa1\x52\x13\x03\x8a\x23\x3a",
-		.alen	= 32,
-		.input	= "\x3f\x66\xb0\x9d\xe5\x4b\x38\x00"
-			  "\xc6\x0e\x6e\xe5\xd6\x98\xa6\x37"
-			  "\x8c\x26\x33\xc6\xb2\xa2\x17\xfa"
-			  "\x64\x19\xc0\x30\xd7\xfc\x14\x6b"
-			  "\xe3\x33\xc2\x04\xb0\x37\xbe\x3f"
-			  "\xa9\xb4\x2d\x68\x03\xa3\x44\xef",
-		.ilen	= 48,
-		.result	= "\x02\x87\x4d\x28\x80\x6e\xb2\xed"
-			  "\x99\x2a\xa8\xca\x04\x25\x45\x90"
-			  "\x1d\xdd\x5a\xd9\xe4\xdb\x9c\x9c"
-			  "\x49\xe9\x01\xfe\xa7\x80\x6d\x6b",
-		.rlen	= 32,
-		.novrfy	= 1,
-	}, {
-		.key	= "\xa4\x4b\x54\x29\x0a\xb8\x6d\x01"
-			  "\x5b\x80\x2a\xcf\x25\xc4\xb7\x5c"
-			  "\x20\x2c\xad\x30\xc2\x2b\x41\xfb"
-			  "\x0e\x85\xbc\x33\xad\x0f\x2b\xff"
-			  "\xee\x49\x83",
-		.klen	= 35,
-		.iv	= "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
-		.alen	= 0,
-		.input	= "\x1f\xb8\x8f\xa3\xdd\x54\x00\xf2",
-		.ilen	= 8,
-		.result	= "\x00",
-		.rlen	= 0,
-	}, {
-		.key	= "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
-			  "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3"
-			  "\xa4\x48\x93\x39\x26\x71\x4a\xc6"
-			  "\xae\x8f\x11\x4c\xc2\x9c\x4a\xbb"
-			  "\x85\x34\x66",
-		.klen	= 35,
-		.iv	= "\x42\xc8\x92\x0f\x36\x58\xe0\x6b",
-		.alen	= 0,
-		.input	= "\x48\x01\x5e\x02\x24\x04\x66\x47"
-			  "\xa1\xea\x6f\xaf\xe8\xfc\xfb\xdd"
-			  "\xa5\xa9\x87\x8d\x84\xee\x2e\x77"
-			  "\xbb\x86\xb9\xf5\x5c\x6c\xff\xf6"
-			  "\x72\xc3\x8e\xf7\x70\xb1\xb2\x07"
-			  "\xbc\xa8\xa3\xbd\x83\x7c\x1d\x2a",
-		.ilen	= 48,
-		.result	= "\xdc\x56\xf2\x71\xb0\xb1\xa0\x6c"
-			  "\xf0\x97\x3a\xfb\x6d\xe7\x32\x99"
-			  "\x3e\xaf\x70\x5e\xb2\x4d\xea\x39"
-			  "\x89\xd4\x75\x7a\x63\xb1\xda\x93",
-		.rlen	= 32,
-		.novrfy	= 1,
-	}, {
-		.key	= "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
-			  "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
-			  "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
-			  "\x0d\x1a\x53\x3b\xb5\xe3\xf8\x8b"
-			  "\xcf\x76\x3f",
-		.klen	= 35,
-		.iv	= "\xd9\x95\x75\x8f\x44\x89\x40\x7b",
-		.assoc	= "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88"
-			  "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b"
-			  "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b"
-			  "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe",
-		.alen	= 32,
-		.input	= "\x48\x58\xd6\xf3\xad\x63\x58\xbf"
-			  "\xae\xc7\x5e\xae\x83\x8f\x7b\xe4"
-			  "\x78\x5c\x4c\x67\x71\x89\x94\xbf"
-			  "\x47\xf1\x63\x7e\x1c\x59\xbd\xc5"
-			  "\x7f\x44\x0a\x0c\x01\x18\x07\x92"
-			  "\xe1\xd3\x51\xce\x32\x6d\x0c\x5b",
-		.ilen	= 48,
-		.result	= "\xc2\x54\xc8\xde\x78\x87\x77\x40"
-			  "\x49\x71\xe4\xb7\xe7\xcb\x76\x61"
-			  "\x0a\x41\xb9\xe9\xc0\x76\x54\xab"
-			  "\x04\x49\x3b\x19\x93\x57\x25\x5d",
-		.rlen	= 32,
-	},
-};
-
-/*
- * ANSI X9.31 Continuous Pseudo-Random Number Generator (AES mode)
- * test vectors, taken from Appendix B.2.9 and B.2.10:
- *     http://csrc.nist.gov/groups/STM/cavp/documents/rng/RNGVS.pdf
- * Only AES-128 is supported at this time.
- */
-#define ANSI_CPRNG_AES_TEST_VECTORS	6
-
-static struct cprng_testvec ansi_cprng_aes_tv_template[] = {
-	{
-		.key	= "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
-			  "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
-		.klen	= 16,
-		.dt	= "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
-			  "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xf9",
-		.dtlen	= 16,
-		.v	= "\x80\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.vlen	= 16,
-		.result	= "\x59\x53\x1e\xd1\x3b\xb0\xc0\x55"
-			  "\x84\x79\x66\x85\xc1\x2f\x76\x41",
-		.rlen	= 16,
-		.loops	= 1,
-	}, {
-		.key	= "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
-			  "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
-		.klen	= 16,
-		.dt	= "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
-			  "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfa",
-		.dtlen	= 16,
-		.v	= "\xc0\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.vlen	= 16,
-		.result	= "\x7c\x22\x2c\xf4\xca\x8f\xa2\x4c"
-			  "\x1c\x9c\xb6\x41\xa9\xf3\x22\x0d",
-		.rlen	= 16,
-		.loops	= 1,
-	}, {
-		.key	= "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
-			  "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
-		.klen	= 16,
-		.dt	= "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
-			  "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfb",
-		.dtlen	= 16,
-		.v	= "\xe0\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.vlen	= 16,
-		.result	= "\x8a\xaa\x00\x39\x66\x67\x5b\xe5"
-			  "\x29\x14\x28\x81\xa9\x4d\x4e\xc7",
-		.rlen	= 16,
-		.loops	= 1,
-	}, {
-		.key	= "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
-			  "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
-		.klen	= 16,
-		.dt	= "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
-			  "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfc",
-		.dtlen	= 16,
-		.v	= "\xf0\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.vlen	= 16,
-		.result	= "\x88\xdd\xa4\x56\x30\x24\x23\xe5"
-			  "\xf6\x9d\xa5\x7e\x7b\x95\xc7\x3a",
-		.rlen	= 16,
-		.loops	= 1,
-	}, {
-		.key	= "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
-			  "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
-		.klen	= 16,
-		.dt	= "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
-			  "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfd",
-		.dtlen	= 16,
-		.v	= "\xf8\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.vlen	= 16,
-		.result	= "\x05\x25\x92\x46\x61\x79\xd2\xcb"
-			  "\x78\xc4\x0b\x14\x0a\x5a\x9a\xc8",
-		.rlen	= 16,
-		.loops	= 1,
-	}, {	/* Monte Carlo Test */
-		.key	= "\x9f\x5b\x51\x20\x0b\xf3\x34\xb5"
-			  "\xd8\x2b\xe8\xc3\x72\x55\xc8\x48",
-		.klen	= 16,
-		.dt	= "\x63\x76\xbb\xe5\x29\x02\xba\x3b"
-			  "\x67\xc9\x25\xfa\x70\x1f\x11\xac",
-		.dtlen	= 16,
-		.v	= "\x57\x2c\x8e\x76\x87\x26\x47\x97"
-			  "\x7e\x74\xfb\xdd\xc4\x95\x01\xd1",
-		.vlen	= 16,
-		.result	= "\x48\xe9\xbd\x0d\x06\xee\x18\xfb"
-			  "\xe4\x57\x90\xd5\xc3\xfc\x9b\x73",
-		.rlen	= 16,
-		.loops	= 10000,
-	},
-};
-
 /* Cast5 test vectors from RFC 2144 */
 #define CAST5_ENC_TEST_VECTORS	3
 #define CAST5_DEC_TEST_VECTORS	3
diff --git a/trunk/crypto/zlib.c b/trunk/crypto/zlib.c
index c3015733c990..33609bab614e 100644
--- a/trunk/crypto/zlib.c
+++ b/trunk/crypto/zlib.c
@@ -165,15 +165,15 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
 		return -EINVAL;
 	}
 
-	ret = req->avail_out - stream->avail_out;
 	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in, ret);
+		 req->avail_in - stream->avail_in,
+		 req->avail_out - stream->avail_out);
 	req->next_in = stream->next_in;
 	req->avail_in = stream->avail_in;
 	req->next_out = stream->next_out;
 	req->avail_out = stream->avail_out;
-	return ret;
+	return 0;
 }
 
 static int zlib_compress_final(struct crypto_pcomp *tfm,
@@ -195,15 +195,15 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
 		return -EINVAL;
 	}
 
-	ret = req->avail_out - stream->avail_out;
 	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in, ret);
+		 req->avail_in - stream->avail_in,
+		 req->avail_out - stream->avail_out);
 	req->next_in = stream->next_in;
 	req->avail_in = stream->avail_in;
 	req->next_out = stream->next_out;
 	req->avail_out = stream->avail_out;
-	return ret;
+	return 0;
 }
 
 
@@ -280,15 +280,15 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm,
 		return -EINVAL;
 	}
 
-	ret = req->avail_out - stream->avail_out;
 	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in, ret);
+		 req->avail_in - stream->avail_in,
+		 req->avail_out - stream->avail_out);
 	req->next_in = stream->next_in;
 	req->avail_in = stream->avail_in;
 	req->next_out = stream->next_out;
 	req->avail_out = stream->avail_out;
-	return ret;
+	return 0;
 }
 
 static int zlib_decompress_final(struct crypto_pcomp *tfm,
@@ -328,15 +328,15 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm,
 		return -EINVAL;
 	}
 
-	ret = req->avail_out - stream->avail_out;
 	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in, ret);
+		 req->avail_in - stream->avail_in,
+		 req->avail_out - stream->avail_out);
 	req->next_in = stream->next_in;
 	req->avail_in = stream->avail_in;
 	req->next_out = stream->next_out;
 	req->avail_out = stream->avail_out;
-	return ret;
+	return 0;
 }
 
 
diff --git a/trunk/drivers/char/hw_random/Kconfig b/trunk/drivers/char/hw_random/Kconfig
index 9c00440dcf86..5fab6470f4b2 100644
--- a/trunk/drivers/char/hw_random/Kconfig
+++ b/trunk/drivers/char/hw_random/Kconfig
@@ -88,7 +88,7 @@ config HW_RANDOM_N2RNG
 
 config HW_RANDOM_VIA
 	tristate "VIA HW Random Number Generator support"
-	depends on HW_RANDOM && X86
+	depends on HW_RANDOM && X86_32
 	default HW_RANDOM
 	---help---
 	  This driver provides kernel-side support for the Random Number
diff --git a/trunk/drivers/char/hw_random/omap-rng.c b/trunk/drivers/char/hw_random/omap-rng.c
index 00dd3de1be51..538313f9e7ac 100644
--- a/trunk/drivers/char/hw_random/omap-rng.c
+++ b/trunk/drivers/char/hw_random/omap-rng.c
@@ -89,7 +89,7 @@ static struct hwrng omap_rng_ops = {
 	.data_read	= omap_rng_data_read,
 };
 
-static int __devinit omap_rng_probe(struct platform_device *pdev)
+static int __init omap_rng_probe(struct platform_device *pdev)
 {
 	struct resource *res, *mem;
 	int ret;
diff --git a/trunk/drivers/char/hw_random/timeriomem-rng.c b/trunk/drivers/char/hw_random/timeriomem-rng.c
index a94e930575f2..dcd352ad0e7f 100644
--- a/trunk/drivers/char/hw_random/timeriomem-rng.c
+++ b/trunk/drivers/char/hw_random/timeriomem-rng.c
@@ -88,9 +88,9 @@ static struct hwrng timeriomem_rng_ops = {
 	.priv		= 0,
 };
 
-static int __devinit timeriomem_rng_probe(struct platform_device *pdev)
+static int __init timeriomem_rng_probe(struct platform_device *pdev)
 {
-	struct resource *res;
+	struct resource *res, *mem;
 	int ret;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -98,12 +98,21 @@ static int __devinit timeriomem_rng_probe(struct platform_device *pdev)
 	if (!res)
 		return -ENOENT;
 
+	mem = request_mem_region(res->start, res->end - res->start + 1,
+				 pdev->name);
+	if (mem == NULL)
+		return -EBUSY;
+
+	dev_set_drvdata(&pdev->dev, mem);
+
 	timeriomem_rng_data = pdev->dev.platform_data;
 
 	timeriomem_rng_data->address = ioremap(res->start,
 						res->end - res->start + 1);
-	if (!timeriomem_rng_data->address)
-		return -EIO;
+	if (!timeriomem_rng_data->address) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
 
 	if (timeriomem_rng_data->period != 0
 		&& usecs_to_jiffies(timeriomem_rng_data->period) > 0) {
@@ -116,7 +125,7 @@ static int __devinit timeriomem_rng_probe(struct platform_device *pdev)
 
 	ret = hwrng_register(&timeriomem_rng_ops);
 	if (ret)
-		goto failed;
+		goto err_register;
 
 	dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n",
 			timeriomem_rng_data->address,
@@ -124,19 +133,24 @@ static int __devinit timeriomem_rng_probe(struct platform_device *pdev)
 
 	return 0;
 
-failed:
+err_register:
 	dev_err(&pdev->dev, "problem registering\n");
 	iounmap(timeriomem_rng_data->address);
+err_ioremap:
+	release_resource(mem);
 
 	return ret;
 }
 
 static int __devexit timeriomem_rng_remove(struct platform_device *pdev)
 {
+	struct resource *mem = dev_get_drvdata(&pdev->dev);
+
 	del_timer_sync(&timeriomem_rng_timer);
 	hwrng_unregister(&timeriomem_rng_ops);
 
 	iounmap(timeriomem_rng_data->address);
+	release_resource(mem);
 
 	return 0;
 }
diff --git a/trunk/drivers/char/hw_random/via-rng.c b/trunk/drivers/char/hw_random/via-rng.c
index 794aacb715c1..4e9573c1d39e 100644
--- a/trunk/drivers/char/hw_random/via-rng.c
+++ b/trunk/drivers/char/hw_random/via-rng.c
@@ -132,19 +132,6 @@ static int via_rng_init(struct hwrng *rng)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 lo, hi, old_lo;
 
-	/* VIA Nano CPUs don't have the MSR_VIA_RNG anymore.  The RNG
-	 * is always enabled if CPUID rng_en is set.  There is no
-	 * RNG configuration like it used to be the case in this
-	 * register */
-	if ((c->x86 == 6) && (c->x86_model >= 0x0f)) {
-		if (!cpu_has_xstore_enabled) {
-			printk(KERN_ERR PFX "can't enable hardware RNG "
-				"if XSTORE is not enabled\n");
-			return -ENODEV;
-		}
-		return 0;
-	}
-
 	/* Control the RNG via MSR.  Tread lightly and pay very close
 	 * close attention to values written, as the reserved fields
 	 * are documented to be "undefined and unpredictable"; but it
@@ -218,5 +205,5 @@ static void __exit mod_exit(void)
 module_init(mod_init);
 module_exit(mod_exit);
 
-MODULE_DESCRIPTION("H/W RNG driver for VIA CPU with PadLock");
+MODULE_DESCRIPTION("H/W RNG driver for VIA chipsets");
 MODULE_LICENSE("GPL");
diff --git a/trunk/drivers/crypto/Kconfig b/trunk/drivers/crypto/Kconfig
index e748e55bd86b..01afd758072f 100644
--- a/trunk/drivers/crypto/Kconfig
+++ b/trunk/drivers/crypto/Kconfig
@@ -12,7 +12,7 @@ if CRYPTO_HW
 
 config CRYPTO_DEV_PADLOCK
 	tristate "Support for VIA PadLock ACE"
-	depends on X86 && !UML
+	depends on X86_32 && !UML
 	select CRYPTO_ALGAPI
 	help
 	  Some VIA processors come with an integrated crypto engine
diff --git a/trunk/drivers/crypto/hifn_795x.c b/trunk/drivers/crypto/hifn_795x.c
index 5f753fc08730..2bef086fb342 100644
--- a/trunk/drivers/crypto/hifn_795x.c
+++ b/trunk/drivers/crypto/hifn_795x.c
@@ -2564,7 +2564,7 @@ static void hifn_tasklet_callback(unsigned long data)
 		hifn_process_queue(dev);
 }
 
-static int __devinit hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int err, i;
 	struct hifn_device *dev;
@@ -2696,7 +2696,7 @@ static int __devinit hifn_probe(struct pci_dev *pdev, const struct pci_device_id
 	return err;
 }
 
-static void __devexit hifn_remove(struct pci_dev *pdev)
+static void hifn_remove(struct pci_dev *pdev)
 {
 	int i;
 	struct hifn_device *dev;
@@ -2744,7 +2744,7 @@ static struct pci_driver hifn_pci_driver = {
 	.remove   = __devexit_p(hifn_remove),
 };
 
-static int __init hifn_init(void)
+static int __devinit hifn_init(void)
 {
 	unsigned int freq;
 	int err;
@@ -2789,7 +2789,7 @@ static int __init hifn_init(void)
 	return 0;
 }
 
-static void __exit hifn_fini(void)
+static void __devexit hifn_fini(void)
 {
 	pci_unregister_driver(&hifn_pci_driver);
 
diff --git a/trunk/drivers/crypto/padlock-aes.c b/trunk/drivers/crypto/padlock-aes.c
index 87f92c39b5f0..856b3cc25583 100644
--- a/trunk/drivers/crypto/padlock-aes.c
+++ b/trunk/drivers/crypto/padlock-aes.c
@@ -154,11 +154,7 @@ static inline void padlock_reset_key(struct cword *cword)
 	int cpu = raw_smp_processor_id();
 
 	if (cword != per_cpu(last_cword, cpu))
-#ifndef CONFIG_X86_64
 		asm volatile ("pushfl; popfl");
-#else
-		asm volatile ("pushfq; popfq");
-#endif
 }
 
 static inline void padlock_store_cword(struct cword *cword)
@@ -212,19 +208,10 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
 
 	asm volatile ("test $1, %%cl;"
 		      "je 1f;"
-#ifndef CONFIG_X86_64
 		      "lea -1(%%ecx), %%eax;"
 		      "mov $1, %%ecx;"
-#else
-		      "lea -1(%%rcx), %%rax;"
-		      "mov $1, %%rcx;"
-#endif
 		      ".byte 0xf3,0x0f,0xa7,0xc8;"	/* rep xcryptecb */
-#ifndef CONFIG_X86_64
 		      "mov %%eax, %%ecx;"
-#else
-		      "mov %%rax, %%rcx;"
-#endif
 		      "1:"
 		      ".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
 		      : "+S"(input), "+D"(output)
diff --git a/trunk/drivers/crypto/talitos.c b/trunk/drivers/crypto/talitos.c
index c70775fd3ce2..a3918c16b3db 100644
--- a/trunk/drivers/crypto/talitos.c
+++ b/trunk/drivers/crypto/talitos.c
@@ -44,8 +44,6 @@
 #include <crypto/sha.h>
 #include <crypto/aead.h>
 #include <crypto/authenc.h>
-#include <crypto/skcipher.h>
-#include <crypto/scatterwalk.h>
 
 #include "talitos.h"
 
@@ -341,8 +339,7 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
 				status = error;
 
 		dma_unmap_single(dev, request->dma_desc,
-				 sizeof(struct talitos_desc),
-				 DMA_BIDIRECTIONAL);
+			sizeof(struct talitos_desc), DMA_BIDIRECTIONAL);
 
 		/* copy entries so we can call callback outside lock */
 		saved_req.desc = request->desc;
@@ -416,8 +413,7 @@ static struct talitos_desc *current_desc(struct device *dev, int ch)
 /*
  * user diagnostics; report root cause of error based on execution unit status
  */
-static void report_eu_error(struct device *dev, int ch,
-			    struct talitos_desc *desc)
+static void report_eu_error(struct device *dev, int ch, struct talitos_desc *desc)
 {
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	int i;
@@ -688,8 +684,8 @@ struct talitos_ctx {
 	unsigned int authsize;
 };
 
-static int aead_setauthsize(struct crypto_aead *authenc,
-			    unsigned int authsize)
+static int aead_authenc_setauthsize(struct crypto_aead *authenc,
+						 unsigned int authsize)
 {
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 
@@ -698,8 +694,8 @@ static int aead_setauthsize(struct crypto_aead *authenc,
 	return 0;
 }
 
-static int aead_setkey(struct crypto_aead *authenc,
-		       const u8 *key, unsigned int keylen)
+static int aead_authenc_setkey(struct crypto_aead *authenc,
+					    const u8 *key, unsigned int keylen)
 {
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 	struct rtattr *rta = (void *)key;
@@ -744,7 +740,7 @@ static int aead_setkey(struct crypto_aead *authenc,
 }
 
 /*
- * talitos_edesc - s/w-extended descriptor
+ * ipsec_esp_edesc - s/w-extended ipsec_esp descriptor
  * @src_nents: number of segments in input scatterlist
  * @dst_nents: number of segments in output scatterlist
  * @dma_len: length of dma mapped link_tbl space
@@ -756,67 +752,17 @@ static int aead_setkey(struct crypto_aead *authenc,
  * is greater than 1, an integrity check value is concatenated to the end
  * of link_tbl data
  */
-struct talitos_edesc {
+struct ipsec_esp_edesc {
 	int src_nents;
 	int dst_nents;
-	int src_is_chained;
-	int dst_is_chained;
 	int dma_len;
 	dma_addr_t dma_link_tbl;
 	struct talitos_desc desc;
 	struct talitos_ptr link_tbl[0];
 };
 
-static int talitos_map_sg(struct device *dev, struct scatterlist *sg,
-			  unsigned int nents, enum dma_data_direction dir,
-			  int chained)
-{
-	if (unlikely(chained))
-		while (sg) {
-			dma_map_sg(dev, sg, 1, dir);
-			sg = scatterwalk_sg_next(sg);
-		}
-	else
-		dma_map_sg(dev, sg, nents, dir);
-	return nents;
-}
-
-static void talitos_unmap_sg_chain(struct device *dev, struct scatterlist *sg,
-				   enum dma_data_direction dir)
-{
-	while (sg) {
-		dma_unmap_sg(dev, sg, 1, dir);
-		sg = scatterwalk_sg_next(sg);
-	}
-}
-
-static void talitos_sg_unmap(struct device *dev,
-			     struct talitos_edesc *edesc,
-			     struct scatterlist *src,
-			     struct scatterlist *dst)
-{
-	unsigned int src_nents = edesc->src_nents ? : 1;
-	unsigned int dst_nents = edesc->dst_nents ? : 1;
-
-	if (src != dst) {
-		if (edesc->src_is_chained)
-			talitos_unmap_sg_chain(dev, src, DMA_TO_DEVICE);
-		else
-			dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
-
-		if (edesc->dst_is_chained)
-			talitos_unmap_sg_chain(dev, dst, DMA_FROM_DEVICE);
-		else
-			dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
-	} else
-		if (edesc->src_is_chained)
-			talitos_unmap_sg_chain(dev, src, DMA_BIDIRECTIONAL);
-		else
-			dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
-}
-
 static void ipsec_esp_unmap(struct device *dev,
-			    struct talitos_edesc *edesc,
+			    struct ipsec_esp_edesc *edesc,
 			    struct aead_request *areq)
 {
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], DMA_FROM_DEVICE);
@@ -826,7 +772,15 @@ static void ipsec_esp_unmap(struct device *dev,
 
 	dma_unmap_sg(dev, areq->assoc, 1, DMA_TO_DEVICE);
 
-	talitos_sg_unmap(dev, edesc, areq->src, areq->dst);
+	if (areq->src != areq->dst) {
+		dma_unmap_sg(dev, areq->src, edesc->src_nents ? : 1,
+			     DMA_TO_DEVICE);
+		dma_unmap_sg(dev, areq->dst, edesc->dst_nents ? : 1,
+			     DMA_FROM_DEVICE);
+	} else {
+		dma_unmap_sg(dev, areq->src, edesc->src_nents ? : 1,
+			     DMA_BIDIRECTIONAL);
+	}
 
 	if (edesc->dma_len)
 		dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
@@ -841,14 +795,13 @@ static void ipsec_esp_encrypt_done(struct device *dev,
 				   int err)
 {
 	struct aead_request *areq = context;
+	struct ipsec_esp_edesc *edesc =
+		 container_of(desc, struct ipsec_esp_edesc, desc);
 	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
-	struct talitos_edesc *edesc;
 	struct scatterlist *sg;
 	void *icvdata;
 
-	edesc = container_of(desc, struct talitos_edesc, desc);
-
 	ipsec_esp_unmap(dev, edesc, areq);
 
 	/* copy the generated ICV to dst */
@@ -866,18 +819,17 @@ static void ipsec_esp_encrypt_done(struct device *dev,
 }
 
 static void ipsec_esp_decrypt_swauth_done(struct device *dev,
-					  struct talitos_desc *desc,
-					  void *context, int err)
+				   struct talitos_desc *desc, void *context,
+				   int err)
 {
 	struct aead_request *req = context;
+	struct ipsec_esp_edesc *edesc =
+		 container_of(desc, struct ipsec_esp_edesc, desc);
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
-	struct talitos_edesc *edesc;
 	struct scatterlist *sg;
 	void *icvdata;
 
-	edesc = container_of(desc, struct talitos_edesc, desc);
-
 	ipsec_esp_unmap(dev, edesc, req);
 
 	if (!err) {
@@ -899,20 +851,20 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev,
 }
 
 static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
-					  struct talitos_desc *desc,
-					  void *context, int err)
+				   struct talitos_desc *desc, void *context,
+				   int err)
 {
 	struct aead_request *req = context;
-	struct talitos_edesc *edesc;
-
-	edesc = container_of(desc, struct talitos_edesc, desc);
+	struct ipsec_esp_edesc *edesc =
+		 container_of(desc, struct ipsec_esp_edesc, desc);
 
 	ipsec_esp_unmap(dev, edesc, req);
 
 	/* check ICV auth status */
-	if (!err && ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) !=
-		     DESC_HDR_LO_ICCR1_PASS))
-		err = -EBADMSG;
+	if (!err)
+		if ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) !=
+		    DESC_HDR_LO_ICCR1_PASS)
+			err = -EBADMSG;
 
 	kfree(edesc);
 
@@ -934,7 +886,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count,
 		link_tbl_ptr->j_extent = 0;
 		link_tbl_ptr++;
 		cryptlen -= sg_dma_len(sg);
-		sg = scatterwalk_sg_next(sg);
+		sg = sg_next(sg);
 	}
 
 	/* adjust (decrease) last one (or two) entry's len to cryptlen */
@@ -958,7 +910,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count,
 /*
  * fill in and submit ipsec_esp descriptor
  */
-static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
+static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
 		     u8 *giv, u64 seq,
 		     void (*callback) (struct device *dev,
 				       struct talitos_desc *desc,
@@ -1000,31 +952,32 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	desc->ptr[4].len = cpu_to_be16(cryptlen);
 	desc->ptr[4].j_extent = authsize;
 
-	sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1,
-				  (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
-							   : DMA_TO_DEVICE,
-				  edesc->src_is_chained);
+	if (areq->src == areq->dst)
+		sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ? : 1,
+				      DMA_BIDIRECTIONAL);
+	else
+		sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ? : 1,
+				      DMA_TO_DEVICE);
 
 	if (sg_count == 1) {
 		desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src));
 	} else {
 		sg_link_tbl_len = cryptlen;
 
-		if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV)
+		if ((edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) &&
+			(edesc->desc.hdr & DESC_HDR_MODE0_ENCRYPT) == 0) {
 			sg_link_tbl_len = cryptlen + authsize;
-
+		}
 		sg_count = sg_to_link_tbl(areq->src, sg_count, sg_link_tbl_len,
 					  &edesc->link_tbl[0]);
 		if (sg_count > 1) {
 			desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
 			desc->ptr[4].ptr = cpu_to_be32(edesc->dma_link_tbl);
-			dma_sync_single_for_device(dev, edesc->dma_link_tbl,
-						   edesc->dma_len,
-						   DMA_BIDIRECTIONAL);
+			dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
+						   edesc->dma_len, DMA_BIDIRECTIONAL);
 		} else {
 			/* Only one segment now, so no link tbl needed */
-			desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->
-								      src));
+			desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src));
 		}
 	}
 
@@ -1032,11 +985,10 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	desc->ptr[5].len = cpu_to_be16(cryptlen);
 	desc->ptr[5].j_extent = authsize;
 
-	if (areq->src != areq->dst)
-		sg_count = talitos_map_sg(dev, areq->dst,
-					  edesc->dst_nents ? : 1,
-					  DMA_FROM_DEVICE,
-					  edesc->dst_is_chained);
+	if (areq->src != areq->dst) {
+		sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1,
+				      DMA_FROM_DEVICE);
+	}
 
 	if (sg_count == 1) {
 		desc->ptr[5].ptr = cpu_to_be32(sg_dma_address(areq->dst));
@@ -1081,55 +1033,49 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	return ret;
 }
 
+
 /*
  * derive number of elements in scatterlist
  */
-static int sg_count(struct scatterlist *sg_list, int nbytes, int *chained)
+static int sg_count(struct scatterlist *sg_list, int nbytes)
 {
 	struct scatterlist *sg = sg_list;
 	int sg_nents = 0;
 
-	*chained = 0;
-	while (nbytes > 0) {
+	while (nbytes) {
 		sg_nents++;
 		nbytes -= sg->length;
-		if (!sg_is_last(sg) && (sg + 1)->length == 0)
-			*chained = 1;
-		sg = scatterwalk_sg_next(sg);
+		sg = sg_next(sg);
 	}
 
 	return sg_nents;
 }
 
 /*
- * allocate and map the extended descriptor
+ * allocate and map the ipsec_esp extended descriptor
  */
-static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
-						 struct scatterlist *src,
-						 struct scatterlist *dst,
-						 unsigned int cryptlen,
-						 unsigned int authsize,
-						 int icv_stashing,
-						 u32 cryptoflags)
+static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
+						     int icv_stashing)
 {
-	struct talitos_edesc *edesc;
+	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
+	struct ipsec_esp_edesc *edesc;
 	int src_nents, dst_nents, alloc_len, dma_len;
-	int src_chained, dst_chained = 0;
-	gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+	gfp_t flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		      GFP_ATOMIC;
 
-	if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) {
-		dev_err(dev, "length exceeds h/w max limit\n");
+	if (areq->cryptlen + ctx->authsize > TALITOS_MAX_DATA_LEN) {
+		dev_err(ctx->dev, "cryptlen exceeds h/w max limit\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	src_nents = sg_count(src, cryptlen + authsize, &src_chained);
+	src_nents = sg_count(areq->src, areq->cryptlen + ctx->authsize);
 	src_nents = (src_nents == 1) ? 0 : src_nents;
 
-	if (dst == src) {
+	if (areq->dst == areq->src) {
 		dst_nents = src_nents;
 	} else {
-		dst_nents = sg_count(dst, cryptlen + authsize, &dst_chained);
+		dst_nents = sg_count(areq->dst, areq->cryptlen + ctx->authsize);
 		dst_nents = (dst_nents == 1) ? 0 : dst_nents;
 	}
 
@@ -1138,52 +1084,39 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 	 * allowing for two separate entries for ICV and generated ICV (+ 2),
 	 * and the ICV data itself
 	 */
-	alloc_len = sizeof(struct talitos_edesc);
+	alloc_len = sizeof(struct ipsec_esp_edesc);
 	if (src_nents || dst_nents) {
 		dma_len = (src_nents + dst_nents + 2) *
-				 sizeof(struct talitos_ptr) + authsize;
+				 sizeof(struct talitos_ptr) + ctx->authsize;
 		alloc_len += dma_len;
 	} else {
 		dma_len = 0;
-		alloc_len += icv_stashing ? authsize : 0;
+		alloc_len += icv_stashing ? ctx->authsize : 0;
 	}
 
 	edesc = kmalloc(alloc_len, GFP_DMA | flags);
 	if (!edesc) {
-		dev_err(dev, "could not allocate edescriptor\n");
+		dev_err(ctx->dev, "could not allocate edescriptor\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
-	edesc->src_is_chained = src_chained;
-	edesc->dst_is_chained = dst_chained;
 	edesc->dma_len = dma_len;
-	edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
+	edesc->dma_link_tbl = dma_map_single(ctx->dev, &edesc->link_tbl[0],
 					     edesc->dma_len, DMA_BIDIRECTIONAL);
 
 	return edesc;
 }
 
-static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq,
-					      int icv_stashing)
-{
-	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
-
-	return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst,
-				   areq->cryptlen, ctx->authsize, icv_stashing,
-				   areq->base.flags);
-}
-
-static int aead_encrypt(struct aead_request *req)
+static int aead_authenc_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
-	struct talitos_edesc *edesc;
+	struct ipsec_esp_edesc *edesc;
 
 	/* allocate extended descriptor */
-	edesc = aead_edesc_alloc(req, 0);
+	edesc = ipsec_esp_edesc_alloc(req, 0);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
@@ -1193,67 +1126,70 @@ static int aead_encrypt(struct aead_request *req)
 	return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_encrypt_done);
 }
 
-static int aead_decrypt(struct aead_request *req)
+
+
+static int aead_authenc_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 	unsigned int authsize = ctx->authsize;
 	struct talitos_private *priv = dev_get_drvdata(ctx->dev);
-	struct talitos_edesc *edesc;
+	struct ipsec_esp_edesc *edesc;
 	struct scatterlist *sg;
 	void *icvdata;
 
 	req->cryptlen -= authsize;
 
 	/* allocate extended descriptor */
-	edesc = aead_edesc_alloc(req, 1);
+	edesc = ipsec_esp_edesc_alloc(req, 1);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	if ((priv->features & TALITOS_FTR_HW_AUTH_CHECK) &&
-	    ((!edesc->src_nents && !edesc->dst_nents) ||
-	     priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT)) {
+	    (((!edesc->src_nents && !edesc->dst_nents) ||
+		priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT))) {
 
 		/* decrypt and check the ICV */
-		edesc->desc.hdr = ctx->desc_hdr_template |
-				  DESC_HDR_DIR_INBOUND |
+		edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND |
 				  DESC_HDR_MODE1_MDEU_CICV;
 
 		/* reset integrity check result bits */
 		edesc->desc.hdr_lo = 0;
 
-		return ipsec_esp(edesc, req, NULL, 0,
-				 ipsec_esp_decrypt_hwauth_done);
+		return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_hwauth_done);
 
-	}
+	} else {
 
-	/* Have to check the ICV with software */
-	edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
+		/* Have to check the ICV with software */
 
-	/* stash incoming ICV for later cmp with ICV generated by the h/w */
-	if (edesc->dma_len)
-		icvdata = &edesc->link_tbl[edesc->src_nents +
-					   edesc->dst_nents + 2];
-	else
-		icvdata = &edesc->link_tbl[0];
+		edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
+
+		/* stash incoming ICV for later cmp with ICV generated by the h/w */
+		if (edesc->dma_len)
+			icvdata = &edesc->link_tbl[edesc->src_nents +
+						   edesc->dst_nents + 2];
+		else
+			icvdata = &edesc->link_tbl[0];
 
-	sg = sg_last(req->src, edesc->src_nents ? : 1);
+		sg = sg_last(req->src, edesc->src_nents ? : 1);
 
-	memcpy(icvdata, (char *)sg_virt(sg) + sg->length - ctx->authsize,
-	       ctx->authsize);
+		memcpy(icvdata, (char *)sg_virt(sg) + sg->length - ctx->authsize,
+		       ctx->authsize);
 
-	return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_swauth_done);
+		return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_swauth_done);
+	}
 }
 
-static int aead_givencrypt(struct aead_givcrypt_request *req)
+static int aead_authenc_givencrypt(
+	struct aead_givcrypt_request *req)
 {
 	struct aead_request *areq = &req->areq;
 	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
-	struct talitos_edesc *edesc;
+	struct ipsec_esp_edesc *edesc;
 
 	/* allocate extended descriptor */
-	edesc = aead_edesc_alloc(areq, 0);
+	edesc = ipsec_esp_edesc_alloc(areq, 0);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
@@ -1268,228 +1204,31 @@ static int aead_givencrypt(struct aead_givcrypt_request *req)
 			 ipsec_esp_encrypt_done);
 }
 
-static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
-			     const u8 *key, unsigned int keylen)
-{
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	struct ablkcipher_alg *alg = crypto_ablkcipher_alg(cipher);
-
-	if (keylen > TALITOS_MAX_KEY_SIZE)
-		goto badkey;
-
-	if (keylen < alg->min_keysize || keylen > alg->max_keysize)
-		goto badkey;
-
-	memcpy(&ctx->key, key, keylen);
-	ctx->keylen = keylen;
-
-	return 0;
-
-badkey:
-	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	return -EINVAL;
-}
-
-static void common_nonsnoop_unmap(struct device *dev,
-				  struct talitos_edesc *edesc,
-				  struct ablkcipher_request *areq)
-{
-	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
-	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE);
-	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE);
-
-	talitos_sg_unmap(dev, edesc, areq->src, areq->dst);
-
-	if (edesc->dma_len)
-		dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
-				 DMA_BIDIRECTIONAL);
-}
-
-static void ablkcipher_done(struct device *dev,
-			    struct talitos_desc *desc, void *context,
-			    int err)
-{
-	struct ablkcipher_request *areq = context;
-	struct talitos_edesc *edesc;
-
-	edesc = container_of(desc, struct talitos_edesc, desc);
-
-	common_nonsnoop_unmap(dev, edesc, areq);
-
-	kfree(edesc);
-
-	areq->base.complete(&areq->base, err);
-}
-
-static int common_nonsnoop(struct talitos_edesc *edesc,
-			   struct ablkcipher_request *areq,
-			   u8 *giv,
-			   void (*callback) (struct device *dev,
-					     struct talitos_desc *desc,
-					     void *context, int error))
-{
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	struct device *dev = ctx->dev;
-	struct talitos_desc *desc = &edesc->desc;
-	unsigned int cryptlen = areq->nbytes;
-	unsigned int ivsize;
-	int sg_count, ret;
-
-	/* first DWORD empty */
-	desc->ptr[0].len = 0;
-	desc->ptr[0].ptr = 0;
-	desc->ptr[0].j_extent = 0;
-
-	/* cipher iv */
-	ivsize = crypto_ablkcipher_ivsize(cipher);
-	map_single_talitos_ptr(dev, &desc->ptr[1], ivsize, giv ?: areq->info, 0,
-			       DMA_TO_DEVICE);
-
-	/* cipher key */
-	map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
-			       (char *)&ctx->key, 0, DMA_TO_DEVICE);
-
-	/*
-	 * cipher in
-	 */
-	desc->ptr[3].len = cpu_to_be16(cryptlen);
-	desc->ptr[3].j_extent = 0;
-
-	sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1,
-				  (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
-							   : DMA_TO_DEVICE,
-				  edesc->src_is_chained);
-
-	if (sg_count == 1) {
-		desc->ptr[3].ptr = cpu_to_be32(sg_dma_address(areq->src));
-	} else {
-		sg_count = sg_to_link_tbl(areq->src, sg_count, cryptlen,
-					  &edesc->link_tbl[0]);
-		if (sg_count > 1) {
-			desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP;
-			desc->ptr[3].ptr = cpu_to_be32(edesc->dma_link_tbl);
-			dma_sync_single_for_device(dev, edesc->dma_link_tbl,
-						   edesc->dma_len,
-						   DMA_BIDIRECTIONAL);
-		} else {
-			/* Only one segment now, so no link tbl needed */
-			desc->ptr[3].ptr = cpu_to_be32(sg_dma_address(areq->
-								      src));
-		}
-	}
-
-	/* cipher out */
-	desc->ptr[4].len = cpu_to_be16(cryptlen);
-	desc->ptr[4].j_extent = 0;
-
-	if (areq->src != areq->dst)
-		sg_count = talitos_map_sg(dev, areq->dst,
-					  edesc->dst_nents ? : 1,
-					  DMA_FROM_DEVICE,
-					  edesc->dst_is_chained);
-
-	if (sg_count == 1) {
-		desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->dst));
-	} else {
-		struct talitos_ptr *link_tbl_ptr =
-			&edesc->link_tbl[edesc->src_nents + 1];
-
-		desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
-		desc->ptr[4].ptr = cpu_to_be32((struct talitos_ptr *)
-					       edesc->dma_link_tbl +
-					       edesc->src_nents + 1);
-		sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen,
-					  link_tbl_ptr);
-		dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
-					   edesc->dma_len, DMA_BIDIRECTIONAL);
-	}
-
-	/* iv out */
-	map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0,
-			       DMA_FROM_DEVICE);
-
-	/* last DWORD empty */
-	desc->ptr[6].len = 0;
-	desc->ptr[6].ptr = 0;
-	desc->ptr[6].j_extent = 0;
-
-	ret = talitos_submit(dev, desc, callback, areq);
-	if (ret != -EINPROGRESS) {
-		common_nonsnoop_unmap(dev, edesc, areq);
-		kfree(edesc);
-	}
-	return ret;
-}
-
-static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request *
-						    areq)
-{
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-
-	return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, areq->nbytes,
-				   0, 0, areq->base.flags);
-}
-
-static int ablkcipher_encrypt(struct ablkcipher_request *areq)
-{
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	struct talitos_edesc *edesc;
-
-	/* allocate extended descriptor */
-	edesc = ablkcipher_edesc_alloc(areq);
-	if (IS_ERR(edesc))
-		return PTR_ERR(edesc);
-
-	/* set encrypt */
-	edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_MODE0_ENCRYPT;
-
-	return common_nonsnoop(edesc, areq, NULL, ablkcipher_done);
-}
-
-static int ablkcipher_decrypt(struct ablkcipher_request *areq)
-{
-	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	struct talitos_edesc *edesc;
-
-	/* allocate extended descriptor */
-	edesc = ablkcipher_edesc_alloc(areq);
-	if (IS_ERR(edesc))
-		return PTR_ERR(edesc);
-
-	edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
-
-	return common_nonsnoop(edesc, areq, NULL, ablkcipher_done);
-}
-
 struct talitos_alg_template {
-	struct crypto_alg alg;
+	char name[CRYPTO_MAX_ALG_NAME];
+	char driver_name[CRYPTO_MAX_ALG_NAME];
+	unsigned int blocksize;
+	struct aead_alg aead;
+	struct device *dev;
 	__be32 desc_hdr_template;
 };
 
 static struct talitos_alg_template driver_algs[] = {
-	/* AEAD algorithms.  These use a single-pass ipsec_esp descriptor */
+	/* single-pass ipsec_esp descriptor */
 	{
-		.alg = {
-			.cra_name = "authenc(hmac(sha1),cbc(aes))",
-			.cra_driver_name = "authenc-hmac-sha1-cbc-aes-talitos",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
-			.cra_type = &crypto_aead_type,
-			.cra_aead = {
-				.setkey = aead_setkey,
-				.setauthsize = aead_setauthsize,
-				.encrypt = aead_encrypt,
-				.decrypt = aead_decrypt,
-				.givencrypt = aead_givencrypt,
-				.geniv = "<built-in>",
-				.ivsize = AES_BLOCK_SIZE,
-				.maxauthsize = SHA1_DIGEST_SIZE,
-			}
-		},
+		.name = "authenc(hmac(sha1),cbc(aes))",
+		.driver_name = "authenc-hmac-sha1-cbc-aes-talitos",
+		.blocksize = AES_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+			},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_AESU |
 		                     DESC_HDR_MODE0_AESU_CBC |
@@ -1499,23 +1238,19 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
 	},
 	{
-		.alg = {
-			.cra_name = "authenc(hmac(sha1),cbc(des3_ede))",
-			.cra_driver_name = "authenc-hmac-sha1-cbc-3des-talitos",
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
-			.cra_type = &crypto_aead_type,
-			.cra_aead = {
-				.setkey = aead_setkey,
-				.setauthsize = aead_setauthsize,
-				.encrypt = aead_encrypt,
-				.decrypt = aead_decrypt,
-				.givencrypt = aead_givencrypt,
-				.geniv = "<built-in>",
-				.ivsize = DES3_EDE_BLOCK_SIZE,
-				.maxauthsize = SHA1_DIGEST_SIZE,
-			}
-		},
+		.name = "authenc(hmac(sha1),cbc(des3_ede))",
+		.driver_name = "authenc-hmac-sha1-cbc-3des-talitos",
+		.blocksize = DES3_EDE_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+			},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_DEU |
 		                     DESC_HDR_MODE0_DEU_CBC |
@@ -1526,23 +1261,19 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
 	},
 	{
-		.alg = {
-			.cra_name = "authenc(hmac(sha256),cbc(aes))",
-			.cra_driver_name = "authenc-hmac-sha256-cbc-aes-talitos",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
-			.cra_type = &crypto_aead_type,
-			.cra_aead = {
-				.setkey = aead_setkey,
-				.setauthsize = aead_setauthsize,
-				.encrypt = aead_encrypt,
-				.decrypt = aead_decrypt,
-				.givencrypt = aead_givencrypt,
-				.geniv = "<built-in>",
-				.ivsize = AES_BLOCK_SIZE,
-				.maxauthsize = SHA256_DIGEST_SIZE,
-			}
-		},
+		.name = "authenc(hmac(sha256),cbc(aes))",
+		.driver_name = "authenc-hmac-sha256-cbc-aes-talitos",
+		.blocksize = AES_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+			},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_AESU |
 		                     DESC_HDR_MODE0_AESU_CBC |
@@ -1552,23 +1283,19 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
 	},
 	{
-		.alg = {
-			.cra_name = "authenc(hmac(sha256),cbc(des3_ede))",
-			.cra_driver_name = "authenc-hmac-sha256-cbc-3des-talitos",
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
-			.cra_type = &crypto_aead_type,
-			.cra_aead = {
-				.setkey = aead_setkey,
-				.setauthsize = aead_setauthsize,
-				.encrypt = aead_encrypt,
-				.decrypt = aead_decrypt,
-				.givencrypt = aead_givencrypt,
-				.geniv = "<built-in>",
-				.ivsize = DES3_EDE_BLOCK_SIZE,
-				.maxauthsize = SHA256_DIGEST_SIZE,
-			}
-		},
+		.name = "authenc(hmac(sha256),cbc(des3_ede))",
+		.driver_name = "authenc-hmac-sha256-cbc-3des-talitos",
+		.blocksize = DES3_EDE_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+			},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_DEU |
 		                     DESC_HDR_MODE0_DEU_CBC |
@@ -1579,23 +1306,19 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
 	},
 	{
-		.alg = {
-			.cra_name = "authenc(hmac(md5),cbc(aes))",
-			.cra_driver_name = "authenc-hmac-md5-cbc-aes-talitos",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
-			.cra_type = &crypto_aead_type,
-			.cra_aead = {
-				.setkey = aead_setkey,
-				.setauthsize = aead_setauthsize,
-				.encrypt = aead_encrypt,
-				.decrypt = aead_decrypt,
-				.givencrypt = aead_givencrypt,
-				.geniv = "<built-in>",
-				.ivsize = AES_BLOCK_SIZE,
-				.maxauthsize = MD5_DIGEST_SIZE,
-			}
-		},
+		.name = "authenc(hmac(md5),cbc(aes))",
+		.driver_name = "authenc-hmac-md5-cbc-aes-talitos",
+		.blocksize = AES_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+			},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_AESU |
 		                     DESC_HDR_MODE0_AESU_CBC |
@@ -1605,23 +1328,19 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_MD5_HMAC,
 	},
 	{
-		.alg = {
-			.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
-			.cra_driver_name = "authenc-hmac-md5-cbc-3des-talitos",
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
-			.cra_type = &crypto_aead_type,
-			.cra_aead = {
-				.setkey = aead_setkey,
-				.setauthsize = aead_setauthsize,
-				.encrypt = aead_encrypt,
-				.decrypt = aead_decrypt,
-				.givencrypt = aead_givencrypt,
-				.geniv = "<built-in>",
-				.ivsize = DES3_EDE_BLOCK_SIZE,
-				.maxauthsize = MD5_DIGEST_SIZE,
-			}
-		},
+		.name = "authenc(hmac(md5),cbc(des3_ede))",
+		.driver_name = "authenc-hmac-md5-cbc-3des-talitos",
+		.blocksize = DES3_EDE_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+			},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_DEU |
 		                     DESC_HDR_MODE0_DEU_CBC |
@@ -1630,52 +1349,6 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_INIT |
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_MD5_HMAC,
-	},
-	/* ABLKCIPHER algorithms. */
-	{
-		.alg = {
-			.cra_name = "cbc(aes)",
-			.cra_driver_name = "cbc-aes-talitos",
-			.cra_blocksize = AES_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-                                     CRYPTO_ALG_ASYNC,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_ablkcipher = {
-				.setkey = ablkcipher_setkey,
-				.encrypt = ablkcipher_encrypt,
-				.decrypt = ablkcipher_decrypt,
-				.geniv = "eseqiv",
-				.min_keysize = AES_MIN_KEY_SIZE,
-				.max_keysize = AES_MAX_KEY_SIZE,
-				.ivsize = AES_BLOCK_SIZE,
-			}
-		},
-		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
-				     DESC_HDR_SEL0_AESU |
-				     DESC_HDR_MODE0_AESU_CBC,
-	},
-	{
-		.alg = {
-			.cra_name = "cbc(des3_ede)",
-			.cra_driver_name = "cbc-3des-talitos",
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
-                                     CRYPTO_ALG_ASYNC,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_ablkcipher = {
-				.setkey = ablkcipher_setkey,
-				.encrypt = ablkcipher_encrypt,
-				.decrypt = ablkcipher_decrypt,
-				.geniv = "eseqiv",
-				.min_keysize = DES3_EDE_KEY_SIZE,
-				.max_keysize = DES3_EDE_KEY_SIZE,
-				.ivsize = DES3_EDE_BLOCK_SIZE,
-			}
-		},
-		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
-			             DESC_HDR_SEL0_DEU |
-		                     DESC_HDR_MODE0_DEU_CBC |
-		                     DESC_HDR_MODE0_DEU_3DES,
 	}
 };
 
@@ -1689,14 +1362,12 @@ struct talitos_crypto_alg {
 static int talitos_cra_init(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *alg = tfm->__crt_alg;
-	struct talitos_crypto_alg *talitos_alg;
+	struct talitos_crypto_alg *talitos_alg =
+		 container_of(alg, struct talitos_crypto_alg, crypto_alg);
 	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	talitos_alg =  container_of(alg, struct talitos_crypto_alg, crypto_alg);
-
 	/* update context with ptr to dev */
 	ctx->dev = talitos_alg->dev;
-
 	/* copy descriptor header template value */
 	ctx->desc_hdr_template = talitos_alg->desc_hdr_template;
 
@@ -1782,13 +1453,19 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 		return ERR_PTR(-ENOMEM);
 
 	alg = &t_alg->crypto_alg;
-	*alg = template->alg;
 
+	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name);
+	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 template->driver_name);
 	alg->cra_module = THIS_MODULE;
 	alg->cra_init = talitos_cra_init;
 	alg->cra_priority = TALITOS_CRA_PRIORITY;
+	alg->cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+	alg->cra_blocksize = template->blocksize;
 	alg->cra_alignmask = 0;
+	alg->cra_type = &crypto_aead_type;
 	alg->cra_ctxsize = sizeof(struct talitos_ctx);
+	alg->cra_u.aead = template->aead;
 
 	t_alg->desc_hdr_template = template->desc_hdr_template;
 	t_alg->dev = dev;
diff --git a/trunk/drivers/i2c/busses/Kconfig b/trunk/drivers/i2c/busses/Kconfig
index f1c6ca7e2852..c8460fa9cfac 100644
--- a/trunk/drivers/i2c/busses/Kconfig
+++ b/trunk/drivers/i2c/busses/Kconfig
@@ -298,7 +298,7 @@ config I2C_BLACKFIN_TWI
 config I2C_BLACKFIN_TWI_CLK_KHZ
 	int "Blackfin TWI I2C clock (kHz)"
 	depends on I2C_BLACKFIN_TWI
-	range 10 400
+	range 21 400
 	default 50
 	help
 	  The unit of the TWI clock is kHz.
diff --git a/trunk/drivers/i2c/busses/i2c-bfin-twi.c b/trunk/drivers/i2c/busses/i2c-bfin-twi.c
index fc548b3d002e..77cafb6ba923 100644
--- a/trunk/drivers/i2c/busses/i2c-bfin-twi.c
+++ b/trunk/drivers/i2c/busses/i2c-bfin-twi.c
@@ -614,6 +614,7 @@ static int i2c_bfin_twi_probe(struct platform_device *pdev)
 	struct i2c_adapter *p_adap;
 	struct resource *res;
 	int rc;
+	unsigned int clkhilow;
 
 	iface = kzalloc(sizeof(struct bfin_twi_iface), GFP_KERNEL);
 	if (!iface) {
@@ -675,10 +676,14 @@ static int i2c_bfin_twi_probe(struct platform_device *pdev)
 	/* Set TWI internal clock as 10MHz */
 	write_CONTROL(iface, ((get_sclk() / 1024 / 1024 + 5) / 10) & 0x7F);
 
+	/*
+	 * We will not end up with a CLKDIV=0 because no one will specify
+	 * 20kHz SCL or less in Kconfig now. (5 * 1024 / 20 = 0x100)
+	 */
+	clkhilow = 5 * 1024 / CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ;
+
 	/* Set Twi interface clock as specified */
-	write_CLKDIV(iface, ((5*1024 / CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ)
-			<< 8) | ((5*1024 / CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ)
-			& 0xFF));
+	write_CLKDIV(iface, (clkhilow << 8) | clkhilow);
 
 	/* Enable TWI */
 	write_CONTROL(iface, read_CONTROL(iface) | TWI_ENA);
diff --git a/trunk/fs/xfs/Kconfig b/trunk/fs/xfs/Kconfig
index 480f28127f09..29228f5899cd 100644
--- a/trunk/fs/xfs/Kconfig
+++ b/trunk/fs/xfs/Kconfig
@@ -39,7 +39,6 @@ config XFS_QUOTA
 config XFS_POSIX_ACL
 	bool "XFS POSIX ACL support"
 	depends on XFS_FS
-	select FS_POSIX_ACL
 	help
 	  POSIX Access Control Lists (ACLs) support permissions for users and
 	  groups beyond the owner/group/world scheme.
diff --git a/trunk/fs/xfs/Makefile b/trunk/fs/xfs/Makefile
index 7a59daed1782..60f107e47fe9 100644
--- a/trunk/fs/xfs/Makefile
+++ b/trunk/fs/xfs/Makefile
@@ -40,7 +40,7 @@ xfs-$(CONFIG_PROC_FS)		+= quota/xfs_qm_stats.o
 endif
 
 xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL)	+= $(XFS_LINUX)/xfs_acl.o
+xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
 xfs-$(CONFIG_PROC_FS)		+= $(XFS_LINUX)/xfs_stats.o
 xfs-$(CONFIG_SYSCTL)		+= $(XFS_LINUX)/xfs_sysctl.o
 xfs-$(CONFIG_COMPAT)		+= $(XFS_LINUX)/xfs_ioctl32.o
@@ -88,7 +88,8 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_utils.o \
 				   xfs_vnodeops.o \
 				   xfs_rw.o \
-				   xfs_dmops.o
+				   xfs_dmops.o \
+				   xfs_qmops.o
 
 xfs-$(CONFIG_XFS_TRACE)		+= xfs_btree_trace.o \
 				   xfs_dir2_trace.o
diff --git a/trunk/fs/xfs/linux-2.6/xfs_acl.c b/trunk/fs/xfs/linux-2.6/xfs_acl.c
deleted file mode 100644
index 1e9d1246eebc..000000000000
--- a/trunk/fs/xfs/linux-2.6/xfs_acl.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_vnodeops.h"
-#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
-
-
-#define XFS_ACL_NOT_CACHED	((void *)-1)
-
-/*
- * Locking scheme:
- *  - all ACL updates are protected by inode->i_mutex, which is taken before
- *    calling into this file.
- *  - access and updates to the ip->i_acl and ip->i_default_acl pointers are
- *    protected by inode->i_lock.
- */
-
-STATIC struct posix_acl *
-xfs_acl_from_disk(struct xfs_acl *aclp)
-{
-	struct posix_acl_entry *acl_e;
-	struct posix_acl *acl;
-	struct xfs_acl_entry *ace;
-	int count, i;
-
-	count = be32_to_cpu(aclp->acl_cnt);
-
-	acl = posix_acl_alloc(count, GFP_KERNEL);
-	if (!acl)
-		return ERR_PTR(-ENOMEM);
-
-	for (i = 0; i < count; i++) {
-		acl_e = &acl->a_entries[i];
-		ace = &aclp->acl_entry[i];
-
-		/*
-		 * The tag is 32 bits on disk and 16 bits in core.
-		 *
-		 * Because every access to it goes through the core
-		 * format first this is not a problem.
-		 */
-		acl_e->e_tag = be32_to_cpu(ace->ae_tag);
-		acl_e->e_perm = be16_to_cpu(ace->ae_perm);
-
-		switch (acl_e->e_tag) {
-		case ACL_USER:
-		case ACL_GROUP:
-			acl_e->e_id = be32_to_cpu(ace->ae_id);
-			break;
-		case ACL_USER_OBJ:
-		case ACL_GROUP_OBJ:
-		case ACL_MASK:
-		case ACL_OTHER:
-			acl_e->e_id = ACL_UNDEFINED_ID;
-			break;
-		default:
-			goto fail;
-		}
-	}
-	return acl;
-
-fail:
-	posix_acl_release(acl);
-	return ERR_PTR(-EINVAL);
-}
-
-STATIC void
-xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
-{
-	const struct posix_acl_entry *acl_e;
-	struct xfs_acl_entry *ace;
-	int i;
-
-	aclp->acl_cnt = cpu_to_be32(acl->a_count);
-	for (i = 0; i < acl->a_count; i++) {
-		ace = &aclp->acl_entry[i];
-		acl_e = &acl->a_entries[i];
-
-		ace->ae_tag = cpu_to_be32(acl_e->e_tag);
-		ace->ae_id = cpu_to_be32(acl_e->e_id);
-		ace->ae_perm = cpu_to_be16(acl_e->e_perm);
-	}
-}
-
-/*
- * Update the cached ACL pointer in the inode.
- *
- * Because we don't hold any locks while reading/writing the attribute
- * from/to disk another thread could have raced and updated the cached
- * ACL value before us. In that case we release the previous cached value
- * and update it with our new value.
- */
-STATIC void
-xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
-		struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
-		posix_acl_release(*p_acl);
-	*p_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
-struct posix_acl *
-xfs_get_acl(struct inode *inode, int type)
-{
-	struct xfs_inode *ip = XFS_I(inode);
-	struct posix_acl *acl = NULL, **p_acl;
-	struct xfs_acl *xfs_acl;
-	int len = sizeof(struct xfs_acl);
-	char *ea_name;
-	int error;
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		ea_name = SGI_ACL_FILE;
-		p_acl = &ip->i_acl;
-		break;
-	case ACL_TYPE_DEFAULT:
-		ea_name = SGI_ACL_DEFAULT;
-		p_acl = &ip->i_default_acl;
-		break;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-
-	spin_lock(&inode->i_lock);
-	if (*p_acl != XFS_ACL_NOT_CACHED)
-		acl = posix_acl_dup(*p_acl);
-	spin_unlock(&inode->i_lock);
-
-	/*
-	 * If we have a cached ACLs value just return it, not need to
-	 * go out to the disk.
-	 */
-	if (acl)
-		return acl;
-
-	xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
-	if (!xfs_acl)
-		return ERR_PTR(-ENOMEM);
-
-	error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT);
-	if (error) {
-		/*
-		 * If the attribute doesn't exist make sure we have a negative
-		 * cache entry, for any other error assume it is transient and
-		 * leave the cache entry as XFS_ACL_NOT_CACHED.
-		 */
-		if (error == -ENOATTR) {
-			acl = NULL;
-			goto out_update_cache;
-		}
-		goto out;
-	}
-
-	acl = xfs_acl_from_disk(xfs_acl);
-	if (IS_ERR(acl))
-		goto out;
-
- out_update_cache:
-	xfs_update_cached_acl(inode, p_acl, acl);
- out:
-	kfree(xfs_acl);
-	return acl;
-}
-
-STATIC int
-xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
-{
-	struct xfs_inode *ip = XFS_I(inode);
-	struct posix_acl **p_acl;
-	char *ea_name;
-	int error;
-
-	if (S_ISLNK(inode->i_mode))
-		return -EOPNOTSUPP;
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		ea_name = SGI_ACL_FILE;
-		p_acl = &ip->i_acl;
-		break;
-	case ACL_TYPE_DEFAULT:
-		if (!S_ISDIR(inode->i_mode))
-			return acl ? -EACCES : 0;
-		ea_name = SGI_ACL_DEFAULT;
-		p_acl = &ip->i_default_acl;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (acl) {
-		struct xfs_acl *xfs_acl;
-		int len;
-
-		xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
-		if (!xfs_acl)
-			return -ENOMEM;
-
-		xfs_acl_to_disk(xfs_acl, acl);
-		len = sizeof(struct xfs_acl) -
-			(sizeof(struct xfs_acl_entry) *
-			 (XFS_ACL_MAX_ENTRIES - acl->a_count));
-
-		error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl,
-				len, ATTR_ROOT);
-
-		kfree(xfs_acl);
-	} else {
-		/*
-		 * A NULL ACL argument means we want to remove the ACL.
-		 */
-		error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
-
-		/*
-		 * If the attribute didn't exist to start with that's fine.
-		 */
-		if (error == -ENOATTR)
-			error = 0;
-	}
-
-	if (!error)
-		xfs_update_cached_acl(inode, p_acl, acl);
-	return error;
-}
-
-int
-xfs_check_acl(struct inode *inode, int mask)
-{
-	struct xfs_inode *ip = XFS_I(inode);
-	struct posix_acl *acl;
-	int error = -EAGAIN;
-
-	xfs_itrace_entry(ip);
-
-	/*
-	 * If there is no attribute fork no ACL exists on this inode and
-	 * we can skip the whole exercise.
-	 */
-	if (!XFS_IFORK_Q(ip))
-		return -EAGAIN;
-
-	acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
-	if (IS_ERR(acl))
-		return PTR_ERR(acl);
-	if (acl) {
-		error = posix_acl_permission(inode, acl, mask);
-		posix_acl_release(acl);
-	}
-
-	return error;
-}
-
-static int
-xfs_set_mode(struct inode *inode, mode_t mode)
-{
-	int error = 0;
-
-	if (mode != inode->i_mode) {
-		struct iattr iattr;
-
-		iattr.ia_valid = ATTR_MODE;
-		iattr.ia_mode = mode;
-
-		error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
-	}
-
-	return error;
-}
-
-static int
-xfs_acl_exists(struct inode *inode, char *name)
-{
-	int len = sizeof(struct xfs_acl);
-
-	return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
-			    ATTR_ROOT|ATTR_KERNOVAL) == 0);
-}
-
-int
-posix_acl_access_exists(struct inode *inode)
-{
-	return xfs_acl_exists(inode, SGI_ACL_FILE);
-}
-
-int
-posix_acl_default_exists(struct inode *inode)
-{
-	if (!S_ISDIR(inode->i_mode))
-		return 0;
-	return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
-}
-
-/*
- * No need for i_mutex because the inode is not yet exposed to the VFS.
- */
-int
-xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl)
-{
-	struct posix_acl *clone;
-	mode_t mode;
-	int error = 0, inherit = 0;
-
-	if (S_ISDIR(inode->i_mode)) {
-		error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl);
-		if (error)
-			return error;
-	}
-
-	clone = posix_acl_clone(default_acl, GFP_KERNEL);
-	if (!clone)
-		return -ENOMEM;
-
-	mode = inode->i_mode;
-	error = posix_acl_create_masq(clone, &mode);
-	if (error < 0)
-		goto out_release_clone;
-
-	/*
-	 * If posix_acl_create_masq returns a positive value we need to
-	 * inherit a permission that can't be represented using the Unix
-	 * mode bits and we actually need to set an ACL.
-	 */
-	if (error > 0)
-		inherit = 1;
-
-	error = xfs_set_mode(inode, mode);
-	if (error)
-		goto out_release_clone;
-
-	if (inherit)
-		error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
-
- out_release_clone:
-	posix_acl_release(clone);
-	return error;
-}
-
-int
-xfs_acl_chmod(struct inode *inode)
-{
-	struct posix_acl *acl, *clone;
-	int error;
-
-	if (S_ISLNK(inode->i_mode))
-		return -EOPNOTSUPP;
-
-	acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
-	if (IS_ERR(acl) || !acl)
-		return PTR_ERR(acl);
-
-	clone = posix_acl_clone(acl, GFP_KERNEL);
-	posix_acl_release(acl);
-	if (!clone)
-		return -ENOMEM;
-
-	error = posix_acl_chmod_masq(clone, inode->i_mode);
-	if (!error)
-		error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
-
-	posix_acl_release(clone);
-	return error;
-}
-
-void
-xfs_inode_init_acls(struct xfs_inode *ip)
-{
-	/*
-	 * No need for locking, inode is not live yet.
-	 */
-	ip->i_acl = XFS_ACL_NOT_CACHED;
-	ip->i_default_acl = XFS_ACL_NOT_CACHED;
-}
-
-void
-xfs_inode_clear_acls(struct xfs_inode *ip)
-{
-	/*
-	 * No need for locking here, the inode is not live anymore
-	 * and just about to be freed.
-	 */
-	if (ip->i_acl != XFS_ACL_NOT_CACHED)
-		posix_acl_release(ip->i_acl);
-	if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
-		posix_acl_release(ip->i_default_acl);
-}
-
-
-/*
- * System xattr handlers.
- *
- * Currently Posix ACLs are the only system namespace extended attribute
- * handlers supported by XFS, so we just implement the handlers here.
- * If we ever support other system extended attributes this will need
- * some refactoring.
- */
-
-static int
-xfs_decode_acl(const char *name)
-{
-	if (strcmp(name, "posix_acl_access") == 0)
-		return ACL_TYPE_ACCESS;
-	else if (strcmp(name, "posix_acl_default") == 0)
-		return ACL_TYPE_DEFAULT;
-	return -EINVAL;
-}
-
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
-		void *value, size_t size)
-{
-	struct posix_acl *acl;
-	int type, error;
-
-	type = xfs_decode_acl(name);
-	if (type < 0)
-		return type;
-
-	acl = xfs_get_acl(inode, type);
-	if (IS_ERR(acl))
-		return PTR_ERR(acl);
-	if (acl == NULL)
-		return -ENODATA;
-
-	error = posix_acl_to_xattr(acl, value, size);
-	posix_acl_release(acl);
-
-	return error;
-}
-
-static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	struct posix_acl *acl = NULL;
-	int error = 0, type;
-
-	type = xfs_decode_acl(name);
-	if (type < 0)
-		return type;
-	if (flags & XATTR_CREATE)
-		return -EINVAL;
-	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
-		return value ? -EACCES : 0;
-	if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
-		return -EPERM;
-
-	if (!value)
-		goto set_acl;
-
-	acl = posix_acl_from_xattr(value, size);
-	if (!acl) {
-		/*
-		 * acl_set_file(3) may request that we set default ACLs with
-		 * zero length -- defend (gracefully) against that here.
-		 */
-		goto out;
-	}
-	if (IS_ERR(acl)) {
-		error = PTR_ERR(acl);
-		goto out;
-	}
-
-	error = posix_acl_valid(acl);
-	if (error)
-		goto out_release;
-
-	error = -EINVAL;
-	if (acl->a_count > XFS_ACL_MAX_ENTRIES)
-		goto out_release;
-
-	if (type == ACL_TYPE_ACCESS) {
-		mode_t mode = inode->i_mode;
-		error = posix_acl_equiv_mode(acl, &mode);
-
-		if (error <= 0) {
-			posix_acl_release(acl);
-			acl = NULL;
-
-			if (error < 0)
-				return error;
-		}
-
-		error = xfs_set_mode(inode, mode);
-		if (error)
-			goto out_release;
-	}
-
- set_acl:
-	error = xfs_set_acl(inode, type, acl);
- out_release:
-	posix_acl_release(acl);
- out:
-	return error;
-}
-
-struct xattr_handler xfs_xattr_system_handler = {
-	.prefix	= XATTR_SYSTEM_PREFIX,
-	.get	= xfs_xattr_system_get,
-	.set	= xfs_xattr_system_set,
-};
diff --git a/trunk/fs/xfs/linux-2.6/xfs_ioctl.c b/trunk/fs/xfs/linux-2.6/xfs_ioctl.c
index 5bb523d7f37e..34eaab608e6e 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -41,6 +41,7 @@
 #include "xfs_itable.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_bmap.h"
 #include "xfs_buf_item.h"
@@ -898,8 +899,7 @@ xfs_ioctl_setattr(
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
 	unsigned int		lock_flags = 0;
-	struct xfs_dquot	*udqp = NULL;
-	struct xfs_dquot	*gdqp = NULL;
+	struct xfs_dquot	*udqp = NULL, *gdqp = NULL;
 	struct xfs_dquot	*olddquot = NULL;
 	int			code;
 
@@ -919,7 +919,7 @@ xfs_ioctl_setattr(
 	 * because the i_*dquot fields will get updated anyway.
 	 */
 	if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
-		code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
+		code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
 					 ip->i_d.di_gid, fa->fsx_projid,
 					 XFS_QMOPT_PQUOTA, &udqp, &gdqp);
 		if (code)
@@ -954,11 +954,10 @@ xfs_ioctl_setattr(
 	 * Do a quota reservation only if projid is actually going to change.
 	 */
 	if (mask & FSX_PROJID) {
-		if (XFS_IS_QUOTA_RUNNING(mp) &&
-		    XFS_IS_PQUOTA_ON(mp) &&
+		if (XFS_IS_PQUOTA_ON(mp) &&
 		    ip->i_d.di_projid != fa->fsx_projid) {
 			ASSERT(tp);
-			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
 						capable(CAP_FOWNER) ?
 						XFS_QMOPT_FORCE_RES : 0);
 			if (code)	/* out of quota */
@@ -1060,8 +1059,8 @@ xfs_ioctl_setattr(
 		 * in the transaction.
 		 */
 		if (ip->i_d.di_projid != fa->fsx_projid) {
-			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
-				olddquot = xfs_qm_vop_chown(tp, ip,
+			if (XFS_IS_PQUOTA_ON(mp)) {
+				olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
 							&ip->i_gdquot, gdqp);
 			}
 			ip->i_d.di_projid = fa->fsx_projid;
@@ -1107,9 +1106,9 @@ xfs_ioctl_setattr(
 	/*
 	 * Release any dquot(s) the inode had kept before chown.
 	 */
-	xfs_qm_dqrele(olddquot);
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, olddquot);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	if (code)
 		return code;
@@ -1123,8 +1122,8 @@ xfs_ioctl_setattr(
 	return 0;
 
  error_return:
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 	xfs_trans_cancel(tp, 0);
 	if (lock_flags)
 		xfs_iunlock(ip, lock_flags);
diff --git a/trunk/fs/xfs/linux-2.6/xfs_iops.c b/trunk/fs/xfs/linux-2.6/xfs_iops.c
index 58973bb46038..6075382336d7 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_iops.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_iops.c
@@ -17,7 +17,6 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_acl.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -52,7 +51,6 @@
 #include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/namei.h>
-#include <linux/posix_acl.h>
 #include <linux/security.h>
 #include <linux/falloc.h>
 #include <linux/fiemap.h>
@@ -204,8 +202,9 @@ xfs_vn_mknod(
 {
 	struct inode	*inode;
 	struct xfs_inode *ip = NULL;
-	struct posix_acl *default_acl = NULL;
+	xfs_acl_t	*default_acl = NULL;
 	struct xfs_name	name;
+	int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
 	int		error;
 
 	/*
@@ -220,15 +219,19 @@ xfs_vn_mknod(
 		rdev = 0;
 	}
 
-	if (IS_POSIXACL(dir)) {
-		default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
-		if (IS_ERR(default_acl))
-			return -PTR_ERR(default_acl);
-
-		if (!default_acl)
-			mode &= ~current_umask();
+	if (test_default_acl && test_default_acl(dir)) {
+		if (!_ACL_ALLOC(default_acl)) {
+			return -ENOMEM;
+		}
+		if (!_ACL_GET_DEFAULT(dir, default_acl)) {
+			_ACL_FREE(default_acl);
+			default_acl = NULL;
+		}
 	}
 
+	if (IS_POSIXACL(dir) && !default_acl)
+		mode &= ~current_umask();
+
 	xfs_dentry_to_name(&name, dentry);
 	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
 	if (unlikely(error))
@@ -241,10 +244,10 @@ xfs_vn_mknod(
 		goto out_cleanup_inode;
 
 	if (default_acl) {
-		error = -xfs_inherit_acl(inode, default_acl);
+		error = _ACL_INHERIT(inode, mode, default_acl);
 		if (unlikely(error))
 			goto out_cleanup_inode;
-		posix_acl_release(default_acl);
+		_ACL_FREE(default_acl);
 	}
 
 
@@ -254,7 +257,8 @@ xfs_vn_mknod(
  out_cleanup_inode:
 	xfs_cleanup_inode(dir, inode, dentry);
  out_free_acl:
-	posix_acl_release(default_acl);
+	if (default_acl)
+		_ACL_FREE(default_acl);
 	return -error;
 }
 
@@ -484,6 +488,26 @@ xfs_vn_put_link(
 		kfree(s);
 }
 
+#ifdef CONFIG_XFS_POSIX_ACL
+STATIC int
+xfs_check_acl(
+	struct inode		*inode,
+	int			mask)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	int			error;
+
+	xfs_itrace_entry(ip);
+
+	if (XFS_IFORK_Q(ip)) {
+		error = xfs_acl_iaccess(ip, mask, NULL);
+		if (error != -1)
+			return -error;
+	}
+
+	return -EAGAIN;
+}
+
 STATIC int
 xfs_vn_permission(
 	struct inode		*inode,
@@ -491,6 +515,9 @@ xfs_vn_permission(
 {
 	return generic_permission(inode, mask, xfs_check_acl);
 }
+#else
+#define xfs_vn_permission NULL
+#endif
 
 STATIC int
 xfs_vn_getattr(
diff --git a/trunk/fs/xfs/linux-2.6/xfs_lrw.c b/trunk/fs/xfs/linux-2.6/xfs_lrw.c
index 7078974a6eee..9142192ccbe6 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_lrw.c
@@ -42,6 +42,7 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_inode_item.h"
 #include "xfs_buf_item.h"
diff --git a/trunk/fs/xfs/linux-2.6/xfs_quotaops.c b/trunk/fs/xfs/linux-2.6/xfs_quotaops.c
index cb6e2cca214f..94d9a633d3d9 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -50,11 +50,9 @@ xfs_fs_quota_sync(
 {
 	struct xfs_mount	*mp = XFS_M(sb);
 
-	if (sb->s_flags & MS_RDONLY)
-		return -EROFS;
 	if (!XFS_IS_QUOTA_RUNNING(mp))
 		return -ENOSYS;
-	return -xfs_sync_data(mp, 0);
+	return -xfs_sync_inodes(mp, SYNC_DELWRI);
 }
 
 STATIC int
diff --git a/trunk/fs/xfs/linux-2.6/xfs_super.c b/trunk/fs/xfs/linux-2.6/xfs_super.c
index 2e09efbca8db..08d6bd9a3947 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_super.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_super.c
@@ -43,6 +43,7 @@
 #include "xfs_itable.h"
 #include "xfs_fsops.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
@@ -404,14 +405,6 @@ xfs_parseargs(
 		return EINVAL;
 	}
 
-#ifndef CONFIG_XFS_QUOTA
-	if (XFS_IS_QUOTA_RUNNING(mp)) {
-		cmn_err(CE_WARN,
-			"XFS: quota support not available in this kernel.");
-		return EINVAL;
-	}
-#endif
-
 	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
 	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
 		cmn_err(CE_WARN,
@@ -1070,18 +1063,7 @@ xfs_fs_put_super(
 	int			unmount_event_flags = 0;
 
 	xfs_syncd_stop(mp);
-
-	if (!(sb->s_flags & MS_RDONLY)) {
-		/*
-		 * XXX(hch): this should be SYNC_WAIT.
-		 *
-		 * Or more likely not needed at all because the VFS is already
-		 * calling ->sync_fs after shutting down all filestem
-		 * operations and just before calling ->put_super.
-		 */
-		xfs_sync_data(mp, 0);
-		xfs_sync_attr(mp, 0);
-	}
+	xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
 
 #ifdef HAVE_DMAPI
 	if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1116,6 +1098,7 @@ xfs_fs_put_super(
 	xfs_freesb(mp);
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
+	xfs_qmops_put(mp);
 	xfs_dmops_put(mp);
 	xfs_free_fsname(mp);
 	kfree(mp);
@@ -1175,7 +1158,6 @@ xfs_fs_statfs(
 {
 	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
 	xfs_sb_t		*sbp = &mp->m_sb;
-	struct xfs_inode	*ip = XFS_I(dentry->d_inode);
 	__uint64_t		fakeinos, id;
 	xfs_extlen_t		lsize;
 
@@ -1204,10 +1186,7 @@ xfs_fs_statfs(
 	statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
 	spin_unlock(&mp->m_sb_lock);
 
-	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
-	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
-			      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
-		xfs_qm_statvfs(ip, statp);
+	XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
 	return 0;
 }
 
@@ -1415,13 +1394,16 @@ xfs_fs_fill_super(
 	error = xfs_dmops_get(mp);
 	if (error)
 		goto out_free_fsname;
+	error = xfs_qmops_get(mp);
+	if (error)
+		goto out_put_dmops;
 
 	if (silent)
 		flags |= XFS_MFSI_QUIET;
 
 	error = xfs_open_devices(mp);
 	if (error)
-		goto out_put_dmops;
+		goto out_put_qmops;
 
 	if (xfs_icsb_init_counters(mp))
 		mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
@@ -1489,6 +1471,8 @@ xfs_fs_fill_super(
  out_destroy_counters:
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
+ out_put_qmops:
+	xfs_qmops_put(mp);
  out_put_dmops:
 	xfs_dmops_put(mp);
  out_free_fsname:
@@ -1722,8 +1706,18 @@ xfs_init_zones(void)
 	if (!xfs_ili_zone)
 		goto out_destroy_inode_zone;
 
+#ifdef CONFIG_XFS_POSIX_ACL
+	xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
+	if (!xfs_acl_zone)
+		goto out_destroy_ili_zone;
+#endif
+
 	return 0;
 
+#ifdef CONFIG_XFS_POSIX_ACL
+ out_destroy_ili_zone:
+#endif
+	kmem_zone_destroy(xfs_ili_zone);
  out_destroy_inode_zone:
 	kmem_zone_destroy(xfs_inode_zone);
  out_destroy_efi_zone:
@@ -1757,6 +1751,9 @@ xfs_init_zones(void)
 STATIC void
 xfs_destroy_zones(void)
 {
+#ifdef CONFIG_XFS_POSIX_ACL
+	kmem_zone_destroy(xfs_acl_zone);
+#endif
 	kmem_zone_destroy(xfs_ili_zone);
 	kmem_zone_destroy(xfs_inode_zone);
 	kmem_zone_destroy(xfs_efi_zone);
diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c
index b619d6b8ca43..f7ba76633c29 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_sync.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c
@@ -43,267 +43,166 @@
 #include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
 #include "xfs_rw.h"
-#include "xfs_quota.h"
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 
-
-STATIC xfs_inode_t *
-xfs_inode_ag_lookup(
-	struct xfs_mount	*mp,
-	struct xfs_perag	*pag,
-	uint32_t		*first_index,
-	int			tag)
+/*
+ * Sync all the inodes in the given AG according to the
+ * direction given by the flags.
+ */
+STATIC int
+xfs_sync_inodes_ag(
+	xfs_mount_t	*mp,
+	int		ag,
+	int		flags)
 {
-	int			nr_found;
-	struct xfs_inode	*ip;
-
-	/*
-	 * use a gang lookup to find the next inode in the tree
-	 * as the tree is sparse and a gang lookup walks to find
-	 * the number of objects requested.
-	 */
-	read_lock(&pag->pag_ici_lock);
-	if (tag == XFS_ICI_NO_TAG) {
-		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-				(void **)&ip, *first_index, 1);
-	} else {
-		nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
-				(void **)&ip, *first_index, 1, tag);
-	}
-	if (!nr_found)
-		goto unlock;
+	xfs_perag_t	*pag = &mp->m_perag[ag];
+	int		nr_found;
+	uint32_t	first_index = 0;
+	int		error = 0;
+	int		last_error = 0;
 
-	/*
-	 * Update the index for the next lookup. Catch overflows
-	 * into the next AG range which can occur if we have inodes
-	 * in the last block of the AG and we are currently
-	 * pointing to the last inode.
-	 */
-	*first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-	if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-		goto unlock;
+	do {
+		struct inode	*inode;
+		xfs_inode_t	*ip = NULL;
+		int		lock_flags = XFS_ILOCK_SHARED;
 
-	return ip;
+		/*
+		 * use a gang lookup to find the next inode in the tree
+		 * as the tree is sparse and a gang lookup walks to find
+		 * the number of objects requested.
+		 */
+		read_lock(&pag->pag_ici_lock);
+		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+				(void**)&ip, first_index, 1);
 
-unlock:
-	read_unlock(&pag->pag_ici_lock);
-	return NULL;
-}
+		if (!nr_found) {
+			read_unlock(&pag->pag_ici_lock);
+			break;
+		}
 
-STATIC int
-xfs_inode_ag_walk(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		ag,
-	int			(*execute)(struct xfs_inode *ip,
-					   struct xfs_perag *pag, int flags),
-	int			flags,
-	int			tag)
-{
-	struct xfs_perag	*pag = &mp->m_perag[ag];
-	uint32_t		first_index;
-	int			last_error = 0;
-	int			skipped;
+		/*
+		 * Update the index for the next lookup. Catch overflows
+		 * into the next AG range which can occur if we have inodes
+		 * in the last block of the AG and we are currently
+		 * pointing to the last inode.
+		 */
+		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+		if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+			read_unlock(&pag->pag_ici_lock);
+			break;
+		}
 
-restart:
-	skipped = 0;
-	first_index = 0;
-	do {
-		int		error = 0;
-		xfs_inode_t	*ip;
+		/* nothing to sync during shutdown */
+		if (XFS_FORCED_SHUTDOWN(mp)) {
+			read_unlock(&pag->pag_ici_lock);
+			return 0;
+		}
 
-		ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
-		if (!ip)
-			break;
+		/*
+		 * If we can't get a reference on the inode, it must be
+		 * in reclaim. Leave it for the reclaim code to flush.
+		 */
+		inode = VFS_I(ip);
+		if (!igrab(inode)) {
+			read_unlock(&pag->pag_ici_lock);
+			continue;
+		}
+		read_unlock(&pag->pag_ici_lock);
 
-		error = execute(ip, pag, flags);
-		if (error == EAGAIN) {
-			skipped++;
+		/* avoid new or bad inodes */
+		if (is_bad_inode(inode) ||
+		    xfs_iflags_test(ip, XFS_INEW)) {
+			IRELE(ip);
 			continue;
 		}
+
+		/*
+		 * If we have to flush data or wait for I/O completion
+		 * we need to hold the iolock.
+		 */
+		if (flags & SYNC_DELWRI) {
+			if (VN_DIRTY(inode)) {
+				if (flags & SYNC_TRYLOCK) {
+					if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
+						lock_flags |= XFS_IOLOCK_SHARED;
+				} else {
+					xfs_ilock(ip, XFS_IOLOCK_SHARED);
+					lock_flags |= XFS_IOLOCK_SHARED;
+				}
+				if (lock_flags & XFS_IOLOCK_SHARED) {
+					error = xfs_flush_pages(ip, 0, -1,
+							(flags & SYNC_WAIT) ? 0
+								: XFS_B_ASYNC,
+							FI_NONE);
+				}
+			}
+			if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
+				xfs_ioend_wait(ip);
+		}
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+		if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
+			if (flags & SYNC_WAIT) {
+				xfs_iflock(ip);
+				if (!xfs_inode_clean(ip))
+					error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
+				else
+					xfs_ifunlock(ip);
+			} else if (xfs_iflock_nowait(ip)) {
+				if (!xfs_inode_clean(ip))
+					error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
+				else
+					xfs_ifunlock(ip);
+			}
+		}
+		xfs_iput(ip, lock_flags);
+
 		if (error)
 			last_error = error;
 		/*
 		 * bail out if the filesystem is corrupted.
 		 */
 		if (error == EFSCORRUPTED)
-			break;
+			return XFS_ERROR(error);
 
-	} while (1);
-
-	if (skipped) {
-		delay(1);
-		goto restart;
-	}
+	} while (nr_found);
 
-	xfs_put_perag(mp, pag);
 	return last_error;
 }
 
 int
-xfs_inode_ag_iterator(
-	struct xfs_mount	*mp,
-	int			(*execute)(struct xfs_inode *ip,
-					   struct xfs_perag *pag, int flags),
-	int			flags,
-	int			tag)
-{
-	int			error = 0;
-	int			last_error = 0;
-	xfs_agnumber_t		ag;
-
-	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
-		if (!mp->m_perag[ag].pag_ici_init)
-			continue;
-		error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
-		if (error) {
-			last_error = error;
-			if (error == EFSCORRUPTED)
-				break;
-		}
-	}
-	return XFS_ERROR(last_error);
-}
-
-/* must be called with pag_ici_lock held and releases it */
-int
-xfs_sync_inode_valid(
-	struct xfs_inode	*ip,
-	struct xfs_perag	*pag)
-{
-	struct inode		*inode = VFS_I(ip);
-
-	/* nothing to sync during shutdown */
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		read_unlock(&pag->pag_ici_lock);
-		return EFSCORRUPTED;
-	}
-
-	/*
-	 * If we can't get a reference on the inode, it must be in reclaim.
-	 * Leave it for the reclaim code to flush. Also avoid inodes that
-	 * haven't been fully initialised.
-	 */
-	if (!igrab(inode)) {
-		read_unlock(&pag->pag_ici_lock);
-		return ENOENT;
-	}
-	read_unlock(&pag->pag_ici_lock);
-
-	if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
-		IRELE(ip);
-		return ENOENT;
-	}
-
-	return 0;
-}
-
-STATIC int
-xfs_sync_inode_data(
-	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
-	int			flags)
+xfs_sync_inodes(
+	xfs_mount_t	*mp,
+	int		flags)
 {
-	struct inode		*inode = VFS_I(ip);
-	struct address_space *mapping = inode->i_mapping;
-	int			error = 0;
-
-	error = xfs_sync_inode_valid(ip, pag);
-	if (error)
-		return error;
-
-	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
-		goto out_wait;
-
-	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
-		if (flags & SYNC_TRYLOCK)
-			goto out_wait;
-		xfs_ilock(ip, XFS_IOLOCK_SHARED);
-	}
+	int		error;
+	int		last_error;
+	int		i;
+	int		lflags = XFS_LOG_FORCE;
 
-	error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
-				0 : XFS_B_ASYNC, FI_NONE);
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return 0;
+	error = 0;
+	last_error = 0;
 
- out_wait:
 	if (flags & SYNC_WAIT)
-		xfs_ioend_wait(ip);
-	IRELE(ip);
-	return error;
-}
-
-STATIC int
-xfs_sync_inode_attr(
-	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
-	int			flags)
-{
-	int			error = 0;
-
-	error = xfs_sync_inode_valid(ip, pag);
-	if (error)
-		return error;
+		lflags |= XFS_LOG_SYNC;
 
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	if (xfs_inode_clean(ip))
-		goto out_unlock;
-	if (!xfs_iflock_nowait(ip)) {
-		if (!(flags & SYNC_WAIT))
-			goto out_unlock;
-		xfs_iflock(ip);
-	}
-
-	if (xfs_inode_clean(ip)) {
-		xfs_ifunlock(ip);
-		goto out_unlock;
+	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+		if (!mp->m_perag[i].pag_ici_init)
+			continue;
+		error = xfs_sync_inodes_ag(mp, i, flags);
+		if (error)
+			last_error = error;
+		if (error == EFSCORRUPTED)
+			break;
 	}
+	if (flags & SYNC_DELWRI)
+		xfs_log_force(mp, 0, lflags);
 
-	error = xfs_iflush(ip, (flags & SYNC_WAIT) ?
-			   XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
-
- out_unlock:
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-	IRELE(ip);
-	return error;
-}
-
-/*
- * Write out pagecache data for the whole filesystem.
- */
-int
-xfs_sync_data(
-	struct xfs_mount	*mp,
-	int			flags)
-{
-	int			error;
-
-	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
-
-	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
-				      XFS_ICI_NO_TAG);
-	if (error)
-		return XFS_ERROR(error);
-
-	xfs_log_force(mp, 0,
-		      (flags & SYNC_WAIT) ?
-		       XFS_LOG_FORCE | XFS_LOG_SYNC :
-		       XFS_LOG_FORCE);
-	return 0;
-}
-
-/*
- * Write out inode metadata (attributes) for the whole filesystem.
- */
-int
-xfs_sync_attr(
-	struct xfs_mount	*mp,
-	int			flags)
-{
-	ASSERT((flags & ~SYNC_WAIT) == 0);
-
-	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
-				     XFS_ICI_NO_TAG);
+	return XFS_ERROR(last_error);
 }
 
 STATIC int
@@ -353,7 +252,7 @@ xfs_sync_fsdata(
 	 * If this is xfssyncd() then only sync the superblock if we can
 	 * lock it without sleeping and it is not pinned.
 	 */
-	if (flags & SYNC_TRYLOCK) {
+	if (flags & SYNC_BDFLUSH) {
 		ASSERT(!(flags & SYNC_WAIT));
 
 		bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
@@ -417,13 +316,13 @@ xfs_quiesce_data(
 	int error;
 
 	/* push non-blocking */
-	xfs_sync_data(mp, 0);
-	xfs_qm_sync(mp, SYNC_TRYLOCK);
+	xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
+	XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
 	xfs_filestream_flush(mp);
 
 	/* push and block */
-	xfs_sync_data(mp, SYNC_WAIT);
-	xfs_qm_sync(mp, SYNC_WAIT);
+	xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
+	XFS_QM_DQSYNC(mp, SYNC_WAIT);
 
 	/* write superblock and hoover up shutdown errors */
 	error = xfs_sync_fsdata(mp, 0);
@@ -442,7 +341,7 @@ xfs_quiesce_fs(
 	int	count = 0, pincount;
 
 	xfs_flush_buftarg(mp->m_ddev_targp, 0);
-	xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 
 	/*
 	 * This loop must run at least twice.  The first instance of the loop
@@ -451,7 +350,7 @@ xfs_quiesce_fs(
 	 * logged before we can write the unmount record.
 	 */
 	do {
-		xfs_sync_attr(mp, SYNC_WAIT);
+		xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
 		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
 		if (!pincount) {
 			delay(50);
@@ -534,8 +433,8 @@ xfs_flush_inodes_work(
 	void		*arg)
 {
 	struct inode	*inode = arg;
-	xfs_sync_data(mp, SYNC_TRYLOCK);
-	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
+	xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
+	xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
 	iput(inode);
 }
 
@@ -566,10 +465,10 @@ xfs_sync_worker(
 
 	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
-		xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+		xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 		/* dgc: errors ignored here */
-		error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-		error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
+		error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+		error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
 		if (xfs_log_need_covered(mp))
 			error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
 	}
@@ -670,7 +569,7 @@ xfs_reclaim_inode(
 			xfs_ifunlock(ip);
 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		}
-		return -EAGAIN;
+		return 1;
 	}
 	__xfs_iflags_set(ip, XFS_IRECLAIM);
 	spin_unlock(&ip->i_flags_lock);
@@ -755,27 +654,101 @@ xfs_inode_clear_reclaim_tag(
 	xfs_put_perag(mp, pag);
 }
 
-STATIC int
-xfs_reclaim_inode_now(
-	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
-	int			flags)
+
+STATIC void
+xfs_reclaim_inodes_ag(
+	xfs_mount_t	*mp,
+	int		ag,
+	int		noblock,
+	int		mode)
 {
-	/* ignore if already under reclaim */
-	if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
+	xfs_inode_t	*ip = NULL;
+	xfs_perag_t	*pag = &mp->m_perag[ag];
+	int		nr_found;
+	uint32_t	first_index;
+	int		skipped;
+
+restart:
+	first_index = 0;
+	skipped = 0;
+	do {
+		/*
+		 * use a gang lookup to find the next inode in the tree
+		 * as the tree is sparse and a gang lookup walks to find
+		 * the number of objects requested.
+		 */
+		read_lock(&pag->pag_ici_lock);
+		nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+					(void**)&ip, first_index, 1,
+					XFS_ICI_RECLAIM_TAG);
+
+		if (!nr_found) {
+			read_unlock(&pag->pag_ici_lock);
+			break;
+		}
+
+		/*
+		 * Update the index for the next lookup. Catch overflows
+		 * into the next AG range which can occur if we have inodes
+		 * in the last block of the AG and we are currently
+		 * pointing to the last inode.
+		 */
+		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+		if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+			read_unlock(&pag->pag_ici_lock);
+			break;
+		}
+
+		/* ignore if already under reclaim */
+		if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
+			read_unlock(&pag->pag_ici_lock);
+			continue;
+		}
+
+		if (noblock) {
+			if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
+				read_unlock(&pag->pag_ici_lock);
+				continue;
+			}
+			if (xfs_ipincount(ip) ||
+			    !xfs_iflock_nowait(ip)) {
+				xfs_iunlock(ip, XFS_ILOCK_EXCL);
+				read_unlock(&pag->pag_ici_lock);
+				continue;
+			}
+		}
 		read_unlock(&pag->pag_ici_lock);
-		return 0;
+
+		/*
+		 * hmmm - this is an inode already in reclaim. Do
+		 * we even bother catching it here?
+		 */
+		if (xfs_reclaim_inode(ip, noblock, mode))
+			skipped++;
+	} while (nr_found);
+
+	if (skipped) {
+		delay(1);
+		goto restart;
 	}
-	read_unlock(&pag->pag_ici_lock);
+	return;
 
-	return xfs_reclaim_inode(ip, 0, flags);
 }
 
 int
 xfs_reclaim_inodes(
 	xfs_mount_t	*mp,
+	int		 noblock,
 	int		mode)
 {
-	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
-					XFS_ICI_RECLAIM_TAG);
+	int		i;
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+		if (!mp->m_perag[i].pag_ici_init)
+			continue;
+		xfs_reclaim_inodes_ag(mp, i, noblock, mode);
+	}
+	return 0;
 }
+
+
diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.h b/trunk/fs/xfs/linux-2.6/xfs_sync.h
index 2a10301c99c7..308d5bf6dfbd 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_sync.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_sync.h
@@ -29,14 +29,17 @@ typedef struct xfs_sync_work {
 	struct completion	*w_completion;
 } xfs_sync_work_t;
 
-#define SYNC_WAIT		0x0001	/* wait for i/o to complete */
-#define SYNC_TRYLOCK		0x0002  /* only try to lock inodes */
+#define SYNC_ATTR		0x0001	/* sync attributes */
+#define SYNC_DELWRI		0x0002	/* look at delayed writes */
+#define SYNC_WAIT		0x0004	/* wait for i/o to complete */
+#define SYNC_BDFLUSH		0x0008	/* BDFLUSH is calling -- don't block */
+#define SYNC_IOWAIT		0x0010  /* wait for all I/O to complete */
+#define SYNC_TRYLOCK		0x0020  /* only try to lock inodes */
 
 int xfs_syncd_init(struct xfs_mount *mp);
 void xfs_syncd_stop(struct xfs_mount *mp);
 
-int xfs_sync_attr(struct xfs_mount *mp, int flags);
-int xfs_sync_data(struct xfs_mount *mp, int flags);
+int xfs_sync_inodes(struct xfs_mount *mp, int flags);
 int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
 
 int xfs_quiesce_data(struct xfs_mount *mp);
@@ -45,16 +48,10 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
 void xfs_flush_inodes(struct xfs_inode *ip);
 
 int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
-int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
 
 void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
 void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
 void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
 				struct xfs_inode *ip);
-
-int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
-int xfs_inode_ag_iterator(struct xfs_mount *mp,
-	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-	int flags, int tag);
-
 #endif
diff --git a/trunk/fs/xfs/linux-2.6/xfs_xattr.c b/trunk/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc1..964621fde6ed 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_xattr.c
@@ -29,6 +29,67 @@
 #include <linux/xattr.h>
 
 
+/*
+ * ACL handling.  Should eventually be moved into xfs_acl.c
+ */
+
+static int
+xfs_decode_acl(const char *name)
+{
+	if (strcmp(name, "posix_acl_access") == 0)
+		return _ACL_TYPE_ACCESS;
+	else if (strcmp(name, "posix_acl_default") == 0)
+		return _ACL_TYPE_DEFAULT;
+	return -EINVAL;
+}
+
+/*
+ * Get system extended attributes which at the moment only
+ * includes Posix ACLs.
+ */
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+		void *buffer, size_t size)
+{
+	int acl;
+
+	acl = xfs_decode_acl(name);
+	if (acl < 0)
+		return acl;
+
+	return xfs_acl_vget(inode, buffer, size, acl);
+}
+
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+		const void *value, size_t size, int flags)
+{
+	int acl;
+
+	acl = xfs_decode_acl(name);
+	if (acl < 0)
+		return acl;
+	if (flags & XATTR_CREATE)
+		return -EINVAL;
+
+	if (!value)
+		return xfs_acl_vremove(inode, acl);
+
+	return xfs_acl_vset(inode, (void *)value, size, acl);
+}
+
+static struct xattr_handler xfs_xattr_system_handler = {
+	.prefix	= XATTR_SYSTEM_PREFIX,
+	.get	= xfs_xattr_system_get,
+	.set	= xfs_xattr_system_set,
+};
+
+
+/*
+ * Real xattr handling.  The only difference between the namespaces is
+ * a flag passed to the low-level attr code.
+ */
+
 static int
 __xfs_xattr_get(struct inode *inode, const char *name,
 		void *value, size_t size, int xflags)
@@ -138,9 +199,7 @@ struct xattr_handler *xfs_xattr_handlers[] = {
 	&xfs_xattr_user_handler,
 	&xfs_xattr_trusted_handler,
 	&xfs_xattr_security_handler,
-#ifdef CONFIG_XFS_POSIX_ACL
 	&xfs_xattr_system_handler,
-#endif
 	NULL
 };
 
@@ -251,7 +310,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
 	/*
 	 * Then add the two synthetic ACL attributes.
 	 */
-	if (posix_acl_access_exists(inode)) {
+	if (xfs_acl_vhasacl_access(inode)) {
 		error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
 				strlen(POSIX_ACL_XATTR_ACCESS) + 1,
 				data, size, &context.count);
@@ -259,7 +318,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
 			return error;
 	}
 
-	if (posix_acl_default_exists(inode)) {
+	if (xfs_acl_vhasacl_default(inode)) {
 		error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
 				strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
 				data, size, &context.count);
diff --git a/trunk/fs/xfs/quota/xfs_dquot.c b/trunk/fs/xfs/quota/xfs_dquot.c
index 2f3f2229eaaf..e4babcc63423 100644
--- a/trunk/fs/xfs/quota/xfs_dquot.c
+++ b/trunk/fs/xfs/quota/xfs_dquot.c
@@ -42,6 +42,7 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
@@ -1193,9 +1194,7 @@ void
 xfs_qm_dqrele(
 	xfs_dquot_t	*dqp)
 {
-	if (!dqp)
-		return;
-
+	ASSERT(dqp);
 	xfs_dqtrace_entry(dqp, "DQRELE");
 
 	xfs_dqlock(dqp);
diff --git a/trunk/fs/xfs/quota/xfs_dquot.h b/trunk/fs/xfs/quota/xfs_dquot.h
index 6533ead9b889..de0f402ddb4c 100644
--- a/trunk/fs/xfs/quota/xfs_dquot.h
+++ b/trunk/fs/xfs/quota/xfs_dquot.h
@@ -181,6 +181,7 @@ extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,
 extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
 					xfs_dqid_t, uint, uint, xfs_dquot_t **);
 extern void		xfs_qm_dqput(xfs_dquot_t *);
+extern void		xfs_qm_dqrele(xfs_dquot_t *);
 extern void		xfs_dqlock(xfs_dquot_t *);
 extern void		xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
 extern void		xfs_dqunlock(xfs_dquot_t *);
diff --git a/trunk/fs/xfs/quota/xfs_dquot_item.c b/trunk/fs/xfs/quota/xfs_dquot_item.c
index d0d4a9a0bbd7..1728f6a7c4f5 100644
--- a/trunk/fs/xfs/quota/xfs_dquot_item.c
+++ b/trunk/fs/xfs/quota/xfs_dquot_item.c
@@ -42,6 +42,7 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
diff --git a/trunk/fs/xfs/quota/xfs_qm.c b/trunk/fs/xfs/quota/xfs_qm.c
index 45b1bfef7388..5b6695049e00 100644
--- a/trunk/fs/xfs/quota/xfs_qm.c
+++ b/trunk/fs/xfs/quota/xfs_qm.c
@@ -42,6 +42,7 @@
 #include "xfs_error.h"
 #include "xfs_bmap.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
@@ -286,13 +287,11 @@ xfs_qm_rele_quotafs_ref(
  * Just destroy the quotainfo structure.
  */
 void
-xfs_qm_unmount(
-	struct xfs_mount	*mp)
+xfs_qm_unmount_quotadestroy(
+	xfs_mount_t	*mp)
 {
-	if (mp->m_quotainfo) {
-		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
+	if (mp->m_quotainfo)
 		xfs_qm_destroy_quotainfo(mp);
-	}
 }
 
 
@@ -386,13 +385,8 @@ xfs_qm_mount_quotas(
 	if (error) {
 		xfs_fs_cmn_err(CE_WARN, mp,
 			"Failed to initialize disk quotas.");
-		return;
 	}
-
-#ifdef QUOTADEBUG
-	if (XFS_IS_QUOTA_ON(mp))
-		xfs_qm_internalqcheck(mp);
-#endif
+	return;
 }
 
 /*
@@ -780,11 +774,12 @@ xfs_qm_dqattach_grouphint(
  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
  * into account.
  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
+ * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
  * Inode may get unlocked and relocked in here, and the caller must deal with
  * the consequences.
  */
 int
-xfs_qm_dqattach_locked(
+xfs_qm_dqattach(
 	xfs_inode_t	*ip,
 	uint		flags)
 {
@@ -792,14 +787,17 @@ xfs_qm_dqattach_locked(
 	uint		nquotas = 0;
 	int		error = 0;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) ||
-	    !XFS_IS_QUOTA_ON(mp) ||
-	    !XFS_NOT_DQATTACHED(mp, ip) ||
-	    ip->i_ino == mp->m_sb.sb_uquotino ||
-	    ip->i_ino == mp->m_sb.sb_gquotino)
+	if ((! XFS_IS_QUOTA_ON(mp)) ||
+	    (! XFS_NOT_DQATTACHED(mp, ip)) ||
+	    (ip->i_ino == mp->m_sb.sb_uquotino) ||
+	    (ip->i_ino == mp->m_sb.sb_gquotino))
 		return 0;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
+	       xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+	if (! (flags & XFS_QMOPT_ILOCKED))
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
 
 	if (XFS_IS_UQUOTA_ON(mp)) {
 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
@@ -851,7 +849,8 @@ xfs_qm_dqattach_locked(
 		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
 	}
 
- done:
+      done:
+
 #ifdef QUOTADEBUG
 	if (! error) {
 		if (XFS_IS_UQUOTA_ON(mp))
@@ -859,22 +858,15 @@ xfs_qm_dqattach_locked(
 		if (XFS_IS_OQUOTA_ON(mp))
 			ASSERT(ip->i_gdquot);
 	}
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 #endif
-	return error;
-}
 
-int
-xfs_qm_dqattach(
-	struct xfs_inode	*ip,
-	uint			flags)
-{
-	int			error;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	error = xfs_qm_dqattach_locked(ip, flags);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	if (! (flags & XFS_QMOPT_ILOCKED))
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
+#ifdef QUOTADEBUG
+	else
+		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+#endif
 	return error;
 }
 
@@ -904,6 +896,11 @@ xfs_qm_dqdetach(
 	}
 }
 
+/*
+ * This is called to sync quotas. We can be told to use non-blocking
+ * semantics by either the SYNC_BDFLUSH flag or the absence of the
+ * SYNC_WAIT flag.
+ */
 int
 xfs_qm_sync(
 	xfs_mount_t	*mp,
@@ -912,13 +909,17 @@ xfs_qm_sync(
 	int		recl, restarts;
 	xfs_dquot_t	*dqp;
 	uint		flush_flags;
+	boolean_t	nowait;
 	int		error;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (! XFS_IS_QUOTA_ON(mp))
 		return 0;
 
-	flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
 	restarts = 0;
+	/*
+	 * We won't block unless we are asked to.
+	 */
+	nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
 
   again:
 	xfs_qm_mplist_lock(mp);
@@ -938,10 +939,18 @@ xfs_qm_sync(
 		 * don't 'seem' to be dirty. ie. don't acquire dqlock.
 		 * This is very similar to what xfs_sync does with inodes.
 		 */
-		if (flags & SYNC_TRYLOCK) {
-			if (!XFS_DQ_IS_DIRTY(dqp))
+		if (flags & SYNC_BDFLUSH) {
+			if (! XFS_DQ_IS_DIRTY(dqp))
 				continue;
-			if (!xfs_qm_dqlock_nowait(dqp))
+		}
+
+		if (nowait) {
+			/*
+			 * Try to acquire the dquot lock. We are NOT out of
+			 * lock order, but we just don't want to wait for this
+			 * lock, unless somebody wanted us to.
+			 */
+			if (! xfs_qm_dqlock_nowait(dqp))
 				continue;
 		} else {
 			xfs_dqlock(dqp);
@@ -958,7 +967,7 @@ xfs_qm_sync(
 		/* XXX a sentinel would be better */
 		recl = XFS_QI_MPLRECLAIMS(mp);
 		if (!xfs_dqflock_nowait(dqp)) {
-			if (flags & SYNC_TRYLOCK) {
+			if (nowait) {
 				xfs_dqunlock(dqp);
 				continue;
 			}
@@ -976,6 +985,7 @@ xfs_qm_sync(
 		 * Let go of the mplist lock. We don't want to hold it
 		 * across a disk write
 		 */
+		flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
 		xfs_qm_mplist_unlock(mp);
 		xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
 		error = xfs_qm_dqflush(dqp, flush_flags);
@@ -2309,20 +2319,20 @@ xfs_qm_write_sb_changes(
  */
 int
 xfs_qm_vop_dqalloc(
-	struct xfs_inode	*ip,
-	uid_t			uid,
-	gid_t			gid,
-	prid_t			prid,
-	uint			flags,
-	struct xfs_dquot	**O_udqpp,
-	struct xfs_dquot	**O_gdqpp)
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,
+	uid_t		uid,
+	gid_t		gid,
+	prid_t		prid,
+	uint		flags,
+	xfs_dquot_t	**O_udqpp,
+	xfs_dquot_t	**O_gdqpp)
 {
-	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_dquot	*uq, *gq;
-	int			error;
-	uint			lockflags;
+	int		error;
+	xfs_dquot_t	*uq, *gq;
+	uint		lockflags;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	lockflags = XFS_ILOCK_EXCL;
@@ -2336,8 +2346,8 @@ xfs_qm_vop_dqalloc(
 	 * if necessary. The dquot(s) will not be locked.
 	 */
 	if (XFS_NOT_DQATTACHED(mp, ip)) {
-		error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
-		if (error) {
+		if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
+					    XFS_QMOPT_ILOCKED))) {
 			xfs_iunlock(ip, lockflags);
 			return error;
 		}
@@ -2459,7 +2469,6 @@ xfs_qm_vop_chown(
 	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
 				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
 
-
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
 
@@ -2499,13 +2508,13 @@ xfs_qm_vop_chown_reserve(
 	xfs_dquot_t	*gdqp,
 	uint		flags)
 {
-	xfs_mount_t	*mp = ip->i_mount;
+	int		error;
+	xfs_mount_t	*mp;
 	uint		delblks, blkflags, prjflags = 0;
 	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
-	int		error;
-
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+	mp = ip->i_mount;
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
 	delblks = ip->i_delayed_blks;
@@ -2573,23 +2582,28 @@ xfs_qm_vop_chown_reserve(
 
 int
 xfs_qm_vop_rename_dqattach(
-	struct xfs_inode	**i_tab)
+	xfs_inode_t	**i_tab)
 {
-	struct xfs_mount	*mp = i_tab[0]->i_mount;
-	int			i;
+	xfs_inode_t	*ip;
+	int		i;
+	int		error;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-		return 0;
+	ip = i_tab[0];
 
-	for (i = 0; (i < 4 && i_tab[i]); i++) {
-		struct xfs_inode	*ip = i_tab[i];
-		int			error;
+	if (! XFS_IS_QUOTA_ON(ip->i_mount))
+		return 0;
 
+	if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+		error = xfs_qm_dqattach(ip, 0);
+		if (error)
+			return error;
+	}
+	for (i = 1; (i < 4 && i_tab[i]); i++) {
 		/*
 		 * Watch out for duplicate entries in the table.
 		 */
-		if (i == 0 || ip != i_tab[i-1]) {
-			if (XFS_NOT_DQATTACHED(mp, ip)) {
+		if ((ip = i_tab[i]) != i_tab[i-1]) {
+			if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
 				error = xfs_qm_dqattach(ip, 0);
 				if (error)
 					return error;
@@ -2600,19 +2614,17 @@ xfs_qm_vop_rename_dqattach(
 }
 
 void
-xfs_qm_vop_create_dqattach(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*ip,
-	struct xfs_dquot	*udqp,
-	struct xfs_dquot	*gdqp)
+xfs_qm_vop_dqattach_and_dqmod_newinode(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	xfs_dquot_t	*udqp,
+	xfs_dquot_t	*gdqp)
 {
-	struct xfs_mount	*mp = tp->t_mountp;
-
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_ON(tp->t_mountp))
 		return;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
 
 	if (udqp) {
 		xfs_dqlock(udqp);
@@ -2620,7 +2632,7 @@ xfs_qm_vop_create_dqattach(
 		xfs_dqunlock(udqp);
 		ASSERT(ip->i_udquot == NULL);
 		ip->i_udquot = udqp;
-		ASSERT(XFS_IS_UQUOTA_ON(mp));
+		ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
 		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
 		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
 	}
@@ -2630,8 +2642,8 @@ xfs_qm_vop_create_dqattach(
 		xfs_dqunlock(gdqp);
 		ASSERT(ip->i_gdquot == NULL);
 		ip->i_gdquot = gdqp;
-		ASSERT(XFS_IS_OQUOTA_ON(mp));
-		ASSERT((XFS_IS_GQUOTA_ON(mp) ?
+		ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
+		ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
 			ip->i_d.di_gid : ip->i_d.di_projid) ==
 				be32_to_cpu(gdqp->q_core.d_id));
 		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
diff --git a/trunk/fs/xfs/quota/xfs_qm.h b/trunk/fs/xfs/quota/xfs_qm.h
index 495564b8af38..a371954cae1b 100644
--- a/trunk/fs/xfs/quota/xfs_qm.h
+++ b/trunk/fs/xfs/quota/xfs_qm.h
@@ -127,6 +127,8 @@ typedef struct xfs_quotainfo {
 } xfs_quotainfo_t;
 
 
+extern xfs_dqtrxops_t	xfs_trans_dquot_ops;
+
 extern void	xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
 extern int	xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
 			xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
@@ -157,11 +159,17 @@ typedef struct xfs_dquot_acct {
 #define XFS_QM_RTBWARNLIMIT	5
 
 extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
+extern void		xfs_qm_mount_quotas(xfs_mount_t *);
 extern int		xfs_qm_quotacheck(xfs_mount_t *);
+extern void		xfs_qm_unmount_quotadestroy(xfs_mount_t *);
+extern void		xfs_qm_unmount_quotas(xfs_mount_t *);
 extern int		xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
+extern int		xfs_qm_sync(xfs_mount_t *, int);
 
 /* dquot stuff */
 extern boolean_t	xfs_qm_dqalloc_incore(xfs_dquot_t **);
+extern int		xfs_qm_dqattach(xfs_inode_t *, uint);
+extern void		xfs_qm_dqdetach(xfs_inode_t *);
 extern int		xfs_qm_dqpurge_all(xfs_mount_t *, uint);
 extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
 
@@ -175,6 +183,19 @@ extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
 extern int		xfs_qm_scall_quotaon(xfs_mount_t *, uint);
 extern int		xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
 
+/* vop stuff */
+extern int		xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
+					uid_t, gid_t, prid_t, uint,
+					xfs_dquot_t **, xfs_dquot_t **);
+extern void		xfs_qm_vop_dqattach_and_dqmod_newinode(
+					xfs_trans_t *, xfs_inode_t *,
+					xfs_dquot_t *, xfs_dquot_t *);
+extern int		xfs_qm_vop_rename_dqattach(xfs_inode_t **);
+extern xfs_dquot_t *	xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *,
+					xfs_dquot_t **, xfs_dquot_t *);
+extern int		xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
+					xfs_dquot_t *, xfs_dquot_t *, uint);
+
 /* list stuff */
 extern void		xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
 extern void		xfs_qm_freelist_unlink(xfs_dquot_t *);
diff --git a/trunk/fs/xfs/quota/xfs_qm_bhv.c b/trunk/fs/xfs/quota/xfs_qm_bhv.c
index a5346630dfae..63037c689a4b 100644
--- a/trunk/fs/xfs/quota/xfs_qm_bhv.c
+++ b/trunk/fs/xfs/quota/xfs_qm_bhv.c
@@ -42,6 +42,7 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
@@ -83,7 +84,7 @@ xfs_fill_statvfs_from_dquot(
  * return a statvfs of the project, not the entire filesystem.
  * This makes such trees appear as if they are filesystems in themselves.
  */
-void
+STATIC void
 xfs_qm_statvfs(
 	xfs_inode_t		*ip,
 	struct kstatfs		*statp)
@@ -91,13 +92,20 @@ xfs_qm_statvfs(
 	xfs_mount_t		*mp = ip->i_mount;
 	xfs_dquot_t		*dqp;
 
+	if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+	    !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+	    		      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+		return;
+
 	if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) {
-		xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
+		xfs_disk_dquot_t	*dp = &dqp->q_core;
+
+		xfs_fill_statvfs_from_dquot(statp, dp);
 		xfs_qm_dqput(dqp);
 	}
 }
 
-int
+STATIC int
 xfs_qm_newmount(
 	xfs_mount_t	*mp,
 	uint		*needquotamount,
@@ -106,6 +114,9 @@ xfs_qm_newmount(
 	uint		quotaondisk;
 	uint		uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
 
+	*quotaflags = 0;
+	*needquotamount = B_FALSE;
+
 	quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
 				(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
 
@@ -168,6 +179,66 @@ xfs_qm_newmount(
 	return 0;
 }
 
+STATIC int
+xfs_qm_endmount(
+	xfs_mount_t	*mp,
+	uint		needquotamount,
+	uint		quotaflags)
+{
+	if (needquotamount) {
+		ASSERT(mp->m_qflags == 0);
+		mp->m_qflags = quotaflags;
+		xfs_qm_mount_quotas(mp);
+	}
+
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+	if (! (XFS_IS_QUOTA_ON(mp)))
+		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
+	else
+		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
+#endif
+
+#ifdef QUOTADEBUG
+	if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
+		cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
+#endif
+
+	return 0;
+}
+
+STATIC void
+xfs_qm_dqrele_null(
+	xfs_dquot_t	*dq)
+{
+	/*
+	 * Called from XFS, where we always check first for a NULL dquot.
+	 */
+	if (!dq)
+		return;
+	xfs_qm_dqrele(dq);
+}
+
+
+struct xfs_qmops xfs_qmcore_xfs = {
+	.xfs_qminit		= xfs_qm_newmount,
+	.xfs_qmdone		= xfs_qm_unmount_quotadestroy,
+	.xfs_qmmount		= xfs_qm_endmount,
+	.xfs_qmunmount		= xfs_qm_unmount_quotas,
+	.xfs_dqrele		= xfs_qm_dqrele_null,
+	.xfs_dqattach		= xfs_qm_dqattach,
+	.xfs_dqdetach		= xfs_qm_dqdetach,
+	.xfs_dqpurgeall		= xfs_qm_dqpurge_all,
+	.xfs_dqvopalloc		= xfs_qm_vop_dqalloc,
+	.xfs_dqvopcreate	= xfs_qm_vop_dqattach_and_dqmod_newinode,
+	.xfs_dqvoprename	= xfs_qm_vop_rename_dqattach,
+	.xfs_dqvopchown		= xfs_qm_vop_chown,
+	.xfs_dqvopchownresv	= xfs_qm_vop_chown_reserve,
+	.xfs_dqstatvfs		= xfs_qm_statvfs,
+	.xfs_dqsync		= xfs_qm_sync,
+	.xfs_dqtrxops		= &xfs_trans_dquot_ops,
+};
+EXPORT_SYMBOL(xfs_qmcore_xfs);
+
 void __init
 xfs_qm_init(void)
 {
diff --git a/trunk/fs/xfs/quota/xfs_qm_stats.c b/trunk/fs/xfs/quota/xfs_qm_stats.c
index 21b08c0396a1..709f5f545cf5 100644
--- a/trunk/fs/xfs/quota/xfs_qm_stats.c
+++ b/trunk/fs/xfs/quota/xfs_qm_stats.c
@@ -42,6 +42,7 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
diff --git a/trunk/fs/xfs/quota/xfs_qm_syscalls.c b/trunk/fs/xfs/quota/xfs_qm_syscalls.c
index 4e4276b956e8..c7b66f6506ce 100644
--- a/trunk/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/trunk/fs/xfs/quota/xfs_qm_syscalls.c
@@ -45,6 +45,7 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
@@ -846,55 +847,105 @@ xfs_qm_export_flags(
 }
 
 
-STATIC int
-xfs_dqrele_inode(
-	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
-	int			flags)
+/*
+ * Release all the dquots on the inodes in an AG.
+ */
+STATIC void
+xfs_qm_dqrele_inodes_ag(
+	xfs_mount_t	*mp,
+	int		ag,
+	uint		flags)
 {
-	int			error;
+	xfs_inode_t	*ip = NULL;
+	xfs_perag_t	*pag = &mp->m_perag[ag];
+	int		first_index = 0;
+	int		nr_found;
+
+	do {
+		/*
+		 * use a gang lookup to find the next inode in the tree
+		 * as the tree is sparse and a gang lookup walks to find
+		 * the number of objects requested.
+		 */
+		read_lock(&pag->pag_ici_lock);
+		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+				(void**)&ip, first_index, 1);
+
+		if (!nr_found) {
+			read_unlock(&pag->pag_ici_lock);
+			break;
+		}
+
+		/*
+		 * Update the index for the next lookup. Catch overflows
+		 * into the next AG range which can occur if we have inodes
+		 * in the last block of the AG and we are currently
+		 * pointing to the last inode.
+		 */
+		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+		if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+			read_unlock(&pag->pag_ici_lock);
+			break;
+		}
+
+		/* skip quota inodes */
+		if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
+			ASSERT(ip->i_udquot == NULL);
+			ASSERT(ip->i_gdquot == NULL);
+			read_unlock(&pag->pag_ici_lock);
+			continue;
+		}
 
-	/* skip quota inodes */
-	if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) {
-		ASSERT(ip->i_udquot == NULL);
-		ASSERT(ip->i_gdquot == NULL);
+		/*
+		 * If we can't get a reference on the inode, it must be
+		 * in reclaim. Leave it for the reclaim code to flush.
+		 */
+		if (!igrab(VFS_I(ip))) {
+			read_unlock(&pag->pag_ici_lock);
+			continue;
+		}
 		read_unlock(&pag->pag_ici_lock);
-		return 0;
-	}
 
-	error = xfs_sync_inode_valid(ip, pag);
-	if (error)
-		return error;
+		/* avoid new inodes though we shouldn't find any here */
+		if (xfs_iflags_test(ip, XFS_INEW)) {
+			IRELE(ip);
+			continue;
+		}
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
-		xfs_qm_dqrele(ip->i_udquot);
-		ip->i_udquot = NULL;
-	}
-	if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
-		xfs_qm_dqrele(ip->i_gdquot);
-		ip->i_gdquot = NULL;
-	}
-	xfs_iput(ip, XFS_ILOCK_EXCL);
-	IRELE(ip);
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+			xfs_qm_dqrele(ip->i_udquot);
+			ip->i_udquot = NULL;
+		}
+		if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
+		    ip->i_gdquot) {
+			xfs_qm_dqrele(ip->i_gdquot);
+			ip->i_gdquot = NULL;
+		}
+		xfs_iput(ip, XFS_ILOCK_EXCL);
 
-	return 0;
+	} while (nr_found);
 }
 
-
 /*
  * Go thru all the inodes in the file system, releasing their dquots.
- *
  * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff.
+ * AFTER this, in the case of quotaoff. This also gets called from
+ * xfs_rootumount.
  */
 void
 xfs_qm_dqrele_all_inodes(
 	struct xfs_mount *mp,
 	uint		 flags)
 {
+	int		i;
+
 	ASSERT(mp->m_quotainfo);
-	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG);
+	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+		if (!mp->m_perag[i].pag_ici_init)
+			continue;
+		xfs_qm_dqrele_inodes_ag(mp, i, flags);
+	}
 }
 
 /*------------------------------------------------------------------------*/
diff --git a/trunk/fs/xfs/quota/xfs_trans_dquot.c b/trunk/fs/xfs/quota/xfs_trans_dquot.c
index 97ac9640be98..447173bcf96d 100644
--- a/trunk/fs/xfs/quota/xfs_trans_dquot.c
+++ b/trunk/fs/xfs/quota/xfs_trans_dquot.c
@@ -42,6 +42,7 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
@@ -110,7 +111,7 @@ xfs_trans_log_dquot(
  * Carry forward whatever is left of the quota blk reservation to
  * the spanky new transaction
  */
-void
+STATIC void
 xfs_trans_dup_dqinfo(
 	xfs_trans_t	*otp,
 	xfs_trans_t	*ntp)
@@ -166,17 +167,19 @@ xfs_trans_dup_dqinfo(
 /*
  * Wrap around mod_dquot to account for both user and group quotas.
  */
-void
+STATIC void
 xfs_trans_mod_dquot_byino(
 	xfs_trans_t	*tp,
 	xfs_inode_t	*ip,
 	uint		field,
 	long		delta)
 {
-	xfs_mount_t	*mp = tp->t_mountp;
+	xfs_mount_t	*mp;
+
+	ASSERT(tp);
+	mp = tp->t_mountp;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) ||
-	    !XFS_IS_QUOTA_ON(mp) ||
+	if (!XFS_IS_QUOTA_ON(mp) ||
 	    ip->i_ino == mp->m_sb.sb_uquotino ||
 	    ip->i_ino == mp->m_sb.sb_gquotino)
 		return;
@@ -226,7 +229,6 @@ xfs_trans_mod_dquot(
 	xfs_dqtrx_t	*qtrx;
 
 	ASSERT(tp);
-	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
 	qtrx = NULL;
 
 	if (tp->t_dqinfo == NULL)
@@ -344,7 +346,7 @@ xfs_trans_dqlockedjoin(
  * Unreserve just the reservations done by this transaction.
  * dquot is still left locked at exit.
  */
-void
+STATIC void
 xfs_trans_apply_dquot_deltas(
 	xfs_trans_t		*tp)
 {
@@ -355,7 +357,7 @@ xfs_trans_apply_dquot_deltas(
 	long			totalbdelta;
 	long			totalrtbdelta;
 
-	if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+	if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY))
 		return;
 
 	ASSERT(tp->t_dqinfo);
@@ -529,7 +531,7 @@ xfs_trans_apply_dquot_deltas(
  * we simply throw those away, since that's the expected behavior
  * when a transaction is curtailed without a commit.
  */
-void
+STATIC void
 xfs_trans_unreserve_and_mod_dquots(
 	xfs_trans_t		*tp)
 {
@@ -766,7 +768,7 @@ xfs_trans_reserve_quota_bydquots(
 {
 	int		resvd = 0, error;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	if (tp && tp->t_dqinfo == NULL)
@@ -809,17 +811,18 @@ xfs_trans_reserve_quota_bydquots(
  * This doesn't change the actual usage, just the reservation.
  * The inode sent in is locked.
  */
-int
+STATIC int
 xfs_trans_reserve_quota_nblks(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*ip,
-	long			nblks,
-	long			ninos,
-	uint			flags)
+	xfs_trans_t	*tp,
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,
+	long		nblks,
+	long		ninos,
+	uint		flags)
 {
-	struct xfs_mount	*mp = ip->i_mount;
+	int		error;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return 0;
 	if (XFS_IS_PQUOTA_ON(mp))
 		flags |= XFS_QMOPT_ENOSPC;
@@ -828,6 +831,7 @@ xfs_trans_reserve_quota_nblks(
 	ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
 	ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
 				XFS_TRANS_DQ_RES_RTBLKS ||
 	       (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
@@ -836,9 +840,11 @@ xfs_trans_reserve_quota_nblks(
 	/*
 	 * Reserve nblks against these dquots, with trans as the mediator.
 	 */
-	return xfs_trans_reserve_quota_bydquots(tp, mp,
-						ip->i_udquot, ip->i_gdquot,
-						nblks, ninos, flags);
+	error = xfs_trans_reserve_quota_bydquots(tp, mp,
+						 ip->i_udquot, ip->i_gdquot,
+						 nblks, ninos,
+						 flags);
+	return error;
 }
 
 /*
@@ -889,15 +895,25 @@ STATIC void
 xfs_trans_alloc_dqinfo(
 	xfs_trans_t	*tp)
 {
-	tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+	(tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
 }
 
-void
+STATIC void
 xfs_trans_free_dqinfo(
 	xfs_trans_t	*tp)
 {
 	if (!tp->t_dqinfo)
 		return;
-	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
-	tp->t_dqinfo = NULL;
+	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo);
+	(tp)->t_dqinfo = NULL;
 }
+
+xfs_dqtrxops_t	xfs_trans_dquot_ops = {
+	.qo_dup_dqinfo			= xfs_trans_dup_dqinfo,
+	.qo_free_dqinfo			= xfs_trans_free_dqinfo,
+	.qo_mod_dquot_byino		= xfs_trans_mod_dquot_byino,
+	.qo_apply_dquot_deltas		= xfs_trans_apply_dquot_deltas,
+	.qo_reserve_quota_nblks		= xfs_trans_reserve_quota_nblks,
+	.qo_reserve_quota_bydquots	= xfs_trans_reserve_quota_bydquots,
+	.qo_unreserve_and_mod_dquots	= xfs_trans_unreserve_and_mod_dquots,
+};
diff --git a/trunk/fs/xfs/xfs_acl.c b/trunk/fs/xfs/xfs_acl.c
new file mode 100644
index 000000000000..a8cdd73999a4
--- /dev/null
+++ b/trunk/fs/xfs/xfs_acl.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_inum.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_vnodeops.h"
+
+#include <linux/capability.h>
+#include <linux/posix_acl_xattr.h>
+
+STATIC int	xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);
+STATIC void     xfs_acl_filter_mode(mode_t, xfs_acl_t *);
+STATIC void	xfs_acl_get_endian(xfs_acl_t *);
+STATIC int	xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
+STATIC int	xfs_acl_invalid(xfs_acl_t *);
+STATIC void	xfs_acl_sync_mode(mode_t, xfs_acl_t *);
+STATIC void	xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *);
+STATIC void	xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *);
+STATIC int	xfs_acl_allow_set(struct inode *, int);
+
+kmem_zone_t *xfs_acl_zone;
+
+
+/*
+ * Test for existence of access ACL attribute as efficiently as possible.
+ */
+int
+xfs_acl_vhasacl_access(
+	struct inode	*vp)
+{
+	int		error;
+
+	xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error);
+	return (error == 0);
+}
+
+/*
+ * Test for existence of default ACL attribute as efficiently as possible.
+ */
+int
+xfs_acl_vhasacl_default(
+	struct inode	*vp)
+{
+	int		error;
+
+	if (!S_ISDIR(vp->i_mode))
+		return 0;
+	xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
+	return (error == 0);
+}
+
+/*
+ * Convert from extended attribute representation to in-memory for XFS.
+ */
+STATIC int
+posix_acl_xattr_to_xfs(
+	posix_acl_xattr_header	*src,
+	size_t			size,
+	xfs_acl_t		*dest)
+{
+	posix_acl_xattr_entry	*src_entry;
+	xfs_acl_entry_t		*dest_entry;
+	int			n;
+
+	if (!src || !dest)
+		return EINVAL;
+
+	if (size < sizeof(posix_acl_xattr_header))
+		return EINVAL;
+
+	if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
+		return EOPNOTSUPP;
+
+	memset(dest, 0, sizeof(xfs_acl_t));
+	dest->acl_cnt = posix_acl_xattr_count(size);
+	if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES)
+		return EINVAL;
+
+	/*
+	 * acl_set_file(3) may request that we set default ACLs with
+	 * zero length -- defend (gracefully) against that here.
+	 */
+	if (!dest->acl_cnt)
+		return 0;
+
+	src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src));
+	dest_entry = &dest->acl_entry[0];
+
+	for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) {
+		dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm);
+		if (_ACL_PERM_INVALID(dest_entry->ae_perm))
+			return EINVAL;
+		dest_entry->ae_tag  = le16_to_cpu(src_entry->e_tag);
+		switch(dest_entry->ae_tag) {
+		case ACL_USER:
+		case ACL_GROUP:
+			dest_entry->ae_id = le32_to_cpu(src_entry->e_id);
+			break;
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			dest_entry->ae_id = ACL_UNDEFINED_ID;
+			break;
+		default:
+			return EINVAL;
+		}
+	}
+	if (xfs_acl_invalid(dest))
+		return EINVAL;
+
+	return 0;
+}
+
+/*
+ * Comparison function called from xfs_sort().
+ * Primary key is ae_tag, secondary key is ae_id.
+ */
+STATIC int
+xfs_acl_entry_compare(
+	const void	*va,
+	const void	*vb)
+{
+	xfs_acl_entry_t	*a = (xfs_acl_entry_t *)va,
+			*b = (xfs_acl_entry_t *)vb;
+
+	if (a->ae_tag == b->ae_tag)
+		return (a->ae_id - b->ae_id);
+	return (a->ae_tag - b->ae_tag);
+}
+
+/*
+ * Convert from in-memory XFS to extended attribute representation.
+ */
+STATIC int
+posix_acl_xfs_to_xattr(
+	xfs_acl_t		*src,
+	posix_acl_xattr_header	*dest,
+	size_t			size)
+{
+	int			n;
+	size_t			new_size = posix_acl_xattr_size(src->acl_cnt);
+	posix_acl_xattr_entry	*dest_entry;
+	xfs_acl_entry_t		*src_entry;
+
+	if (size < new_size)
+		return -ERANGE;
+
+	/* Need to sort src XFS ACL by <ae_tag,ae_id> */
+	xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]),
+		 xfs_acl_entry_compare);
+
+	dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
+	dest_entry = &dest->a_entries[0];
+	src_entry = &src->acl_entry[0];
+	for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) {
+		dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm);
+		if (_ACL_PERM_INVALID(src_entry->ae_perm))
+			return -EINVAL;
+		dest_entry->e_tag  = cpu_to_le16(src_entry->ae_tag);
+		switch (src_entry->ae_tag) {
+		case ACL_USER:
+		case ACL_GROUP:
+			dest_entry->e_id = cpu_to_le32(src_entry->ae_id);
+				break;
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return new_size;
+}
+
+int
+xfs_acl_vget(
+	struct inode	*vp,
+	void		*acl,
+	size_t		size,
+	int		kind)
+{
+	int			error;
+	xfs_acl_t		*xfs_acl = NULL;
+	posix_acl_xattr_header	*ext_acl = acl;
+	int			flags = 0;
+
+	if(size) {
+		if (!(_ACL_ALLOC(xfs_acl))) {
+			error = ENOMEM;
+			goto out;
+		}
+		memset(xfs_acl, 0, sizeof(xfs_acl_t));
+	} else
+		flags = ATTR_KERNOVAL;
+
+	xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error);
+	if (error)
+		goto out;
+
+	if (!size) {
+		error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES);
+	} else {
+		if (xfs_acl_invalid(xfs_acl)) {
+			error = EINVAL;
+			goto out;
+		}
+		if (kind == _ACL_TYPE_ACCESS)
+			xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);
+		error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
+	}
+out:
+	if(xfs_acl)
+		_ACL_FREE(xfs_acl);
+	return -error;
+}
+
+int
+xfs_acl_vremove(
+	struct inode	*vp,
+	int		kind)
+{
+	int		error;
+
+	error = xfs_acl_allow_set(vp, kind);
+	if (!error) {
+		error = xfs_attr_remove(XFS_I(vp),
+						kind == _ACL_TYPE_DEFAULT?
+						SGI_ACL_DEFAULT: SGI_ACL_FILE,
+						ATTR_ROOT);
+		if (error == ENOATTR)
+			error = 0;	/* 'scool */
+	}
+	return -error;
+}
+
+int
+xfs_acl_vset(
+	struct inode		*vp,
+	void			*acl,
+	size_t			size,
+	int			kind)
+{
+	posix_acl_xattr_header	*ext_acl = acl;
+	xfs_acl_t		*xfs_acl;
+	int			error;
+	int			basicperms = 0; /* more than std unix perms? */
+
+	if (!acl)
+		return -EINVAL;
+
+	if (!(_ACL_ALLOC(xfs_acl)))
+		return -ENOMEM;
+
+	error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl);
+	if (error) {
+		_ACL_FREE(xfs_acl);
+		return -error;
+	}
+	if (!xfs_acl->acl_cnt) {
+		_ACL_FREE(xfs_acl);
+		return 0;
+	}
+
+	error = xfs_acl_allow_set(vp, kind);
+
+	/* Incoming ACL exists, set file mode based on its value */
+	if (!error && kind == _ACL_TYPE_ACCESS)
+		error = xfs_acl_setmode(vp, xfs_acl, &basicperms);
+
+	if (error)
+		goto out;
+
+	/*
+	 * If we have more than std unix permissions, set up the actual attr.
+	 * Otherwise, delete any existing attr.  This prevents us from
+	 * having actual attrs for permissions that can be stored in the
+	 * standard permission bits.
+	 */
+	if (!basicperms) {
+		xfs_acl_set_attr(vp, xfs_acl, kind, &error);
+	} else {
+		error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
+	}
+
+out:
+	_ACL_FREE(xfs_acl);
+	return -error;
+}
+
+int
+xfs_acl_iaccess(
+	xfs_inode_t	*ip,
+	mode_t		mode,
+	cred_t		*cr)
+{
+	xfs_acl_t	*acl;
+	int		rval;
+	struct xfs_name	acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
+
+	if (!(_ACL_ALLOC(acl)))
+		return -1;
+
+	/* If the file has no ACL return -1. */
+	rval = sizeof(xfs_acl_t);
+	if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
+		_ACL_FREE(acl);
+		return -1;
+	}
+	xfs_acl_get_endian(acl);
+
+	/* If the file has an empty ACL return -1. */
+	if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) {
+		_ACL_FREE(acl);
+		return -1;
+	}
+
+	/* Synchronize ACL with mode bits */
+	xfs_acl_sync_mode(ip->i_d.di_mode, acl);
+
+	rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr);
+	_ACL_FREE(acl);
+	return rval;
+}
+
+STATIC int
+xfs_acl_allow_set(
+	struct inode	*vp,
+	int		kind)
+{
+	if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
+		return EPERM;
+	if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
+		return ENOTDIR;
+	if (vp->i_sb->s_flags & MS_RDONLY)
+		return EROFS;
+	if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER))
+		return EPERM;
+	return 0;
+}
+
+/*
+ * Note: cr is only used here for the capability check if the ACL test fails.
+ *       It is not used to find out the credentials uid or groups etc, as was
+ *       done in IRIX. It is assumed that the uid and groups for the current
+ *       thread are taken from "current" instead of the cr parameter.
+ */
+STATIC int
+xfs_acl_access(
+	uid_t		fuid,
+	gid_t		fgid,
+	xfs_acl_t	*fap,
+	mode_t		md,
+	cred_t		*cr)
+{
+	xfs_acl_entry_t	matched;
+	int		i, allows;
+	int		maskallows = -1;	/* true, but not 1, either */
+	int		seen_userobj = 0;
+
+	matched.ae_tag = 0;	/* Invalid type */
+	matched.ae_perm = 0;
+
+	for (i = 0; i < fap->acl_cnt; i++) {
+		/*
+		 * Break out if we've got a user_obj entry or
+		 * a user entry and the mask (and have processed USER_OBJ)
+		 */
+		if (matched.ae_tag == ACL_USER_OBJ)
+			break;
+		if (matched.ae_tag == ACL_USER) {
+			if (maskallows != -1 && seen_userobj)
+				break;
+			if (fap->acl_entry[i].ae_tag != ACL_MASK &&
+			    fap->acl_entry[i].ae_tag != ACL_USER_OBJ)
+				continue;
+		}
+		/* True if this entry allows the requested access */
+		allows = ((fap->acl_entry[i].ae_perm & md) == md);
+
+		switch (fap->acl_entry[i].ae_tag) {
+		case ACL_USER_OBJ:
+			seen_userobj = 1;
+			if (fuid != current_fsuid())
+				continue;
+			matched.ae_tag = ACL_USER_OBJ;
+			matched.ae_perm = allows;
+			break;
+		case ACL_USER:
+			if (fap->acl_entry[i].ae_id != current_fsuid())
+				continue;
+			matched.ae_tag = ACL_USER;
+			matched.ae_perm = allows;
+			break;
+		case ACL_GROUP_OBJ:
+			if ((matched.ae_tag == ACL_GROUP_OBJ ||
+			    matched.ae_tag == ACL_GROUP) && !allows)
+				continue;
+			if (!in_group_p(fgid))
+				continue;
+			matched.ae_tag = ACL_GROUP_OBJ;
+			matched.ae_perm = allows;
+			break;
+		case ACL_GROUP:
+			if ((matched.ae_tag == ACL_GROUP_OBJ ||
+			    matched.ae_tag == ACL_GROUP) && !allows)
+				continue;
+			if (!in_group_p(fap->acl_entry[i].ae_id))
+				continue;
+			matched.ae_tag = ACL_GROUP;
+			matched.ae_perm = allows;
+			break;
+		case ACL_MASK:
+			maskallows = allows;
+			break;
+		case ACL_OTHER:
+			if (matched.ae_tag != 0)
+				continue;
+			matched.ae_tag = ACL_OTHER;
+			matched.ae_perm = allows;
+			break;
+		}
+	}
+	/*
+	 * First possibility is that no matched entry allows access.
+	 * The capability to override DAC may exist, so check for it.
+	 */
+	switch (matched.ae_tag) {
+	case ACL_OTHER:
+	case ACL_USER_OBJ:
+		if (matched.ae_perm)
+			return 0;
+		break;
+	case ACL_USER:
+	case ACL_GROUP_OBJ:
+	case ACL_GROUP:
+		if (maskallows && matched.ae_perm)
+			return 0;
+		break;
+	case 0:
+		break;
+	}
+
+	/* EACCES tells generic_permission to check for capability overrides */
+	return EACCES;
+}
+
+/*
+ * ACL validity checker.
+ *   This acl validation routine checks each ACL entry read in makes sense.
+ */
+STATIC int
+xfs_acl_invalid(
+	xfs_acl_t	*aclp)
+{
+	xfs_acl_entry_t	*entry, *e;
+	int		user = 0, group = 0, other = 0, mask = 0;
+	int		mask_required = 0;
+	int		i, j;
+
+	if (!aclp)
+		goto acl_invalid;
+
+	if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES)
+		goto acl_invalid;
+
+	for (i = 0; i < aclp->acl_cnt; i++) {
+		entry = &aclp->acl_entry[i];
+		switch (entry->ae_tag) {
+		case ACL_USER_OBJ:
+			if (user++)
+				goto acl_invalid;
+			break;
+		case ACL_GROUP_OBJ:
+			if (group++)
+				goto acl_invalid;
+			break;
+		case ACL_OTHER:
+			if (other++)
+				goto acl_invalid;
+			break;
+		case ACL_USER:
+		case ACL_GROUP:
+			for (j = i + 1; j < aclp->acl_cnt; j++) {
+				e = &aclp->acl_entry[j];
+				if (e->ae_id == entry->ae_id &&
+				    e->ae_tag == entry->ae_tag)
+					goto acl_invalid;
+			}
+			mask_required++;
+			break;
+		case ACL_MASK:
+			if (mask++)
+				goto acl_invalid;
+			break;
+		default:
+			goto acl_invalid;
+		}
+	}
+	if (!user || !group || !other || (mask_required && !mask))
+		goto acl_invalid;
+	else
+		return 0;
+acl_invalid:
+	return EINVAL;
+}
+
+/*
+ * Do ACL endian conversion.
+ */
+STATIC void
+xfs_acl_get_endian(
+	xfs_acl_t	*aclp)
+{
+	xfs_acl_entry_t	*ace, *end;
+
+	INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
+	end = &aclp->acl_entry[0]+aclp->acl_cnt;
+	for (ace = &aclp->acl_entry[0]; ace < end; ace++) {
+		INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag);
+		INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id);
+		INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm);
+	}
+}
+
+/*
+ * Get the ACL from the EA and do endian conversion.
+ */
+STATIC void
+xfs_acl_get_attr(
+	struct inode	*vp,
+	xfs_acl_t	*aclp,
+	int		kind,
+	int		flags,
+	int		*error)
+{
+	int		len = sizeof(xfs_acl_t);
+
+	ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
+	flags |= ATTR_ROOT;
+	*error = xfs_attr_get(XFS_I(vp),
+					kind == _ACL_TYPE_ACCESS ?
+					SGI_ACL_FILE : SGI_ACL_DEFAULT,
+					(char *)aclp, &len, flags);
+	if (*error || (flags & ATTR_KERNOVAL))
+		return;
+	xfs_acl_get_endian(aclp);
+}
+
+/*
+ * Set the EA with the ACL and do endian conversion.
+ */
+STATIC void
+xfs_acl_set_attr(
+	struct inode	*vp,
+	xfs_acl_t	*aclp,
+	int		kind,
+	int		*error)
+{
+	xfs_acl_entry_t	*ace, *newace, *end;
+	xfs_acl_t	*newacl;
+	int		len;
+
+	if (!(_ACL_ALLOC(newacl))) {
+		*error = ENOMEM;
+		return;
+	}
+
+	len = sizeof(xfs_acl_t) -
+	      (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt));
+	end = &aclp->acl_entry[0]+aclp->acl_cnt;
+	for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0];
+	     ace < end;
+	     ace++, newace++) {
+		INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag);
+		INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id);
+		INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
+	}
+	INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
+	*error = xfs_attr_set(XFS_I(vp),
+				kind == _ACL_TYPE_ACCESS ?
+				SGI_ACL_FILE: SGI_ACL_DEFAULT,
+				(char *)newacl, len, ATTR_ROOT);
+	_ACL_FREE(newacl);
+}
+
+int
+xfs_acl_vtoacl(
+	struct inode	*vp,
+	xfs_acl_t	*access_acl,
+	xfs_acl_t	*default_acl)
+{
+	int		error = 0;
+
+	if (access_acl) {
+		/*
+		 * Get the Access ACL and the mode.  If either cannot
+		 * be obtained for some reason, invalidate the access ACL.
+		 */
+		xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
+		if (error)
+			access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
+		else /* We have a good ACL and the file mode, synchronize. */
+			xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);
+	}
+
+	if (default_acl) {
+		xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error);
+		if (error)
+			default_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
+	}
+	return error;
+}
+
+/*
+ * This function retrieves the parent directory's acl, processes it
+ * and lets the child inherit the acl(s) that it should.
+ */
+int
+xfs_acl_inherit(
+	struct inode	*vp,
+	mode_t		mode,
+	xfs_acl_t	*pdaclp)
+{
+	xfs_acl_t	*cacl;
+	int		error = 0;
+	int		basicperms = 0;
+
+	/*
+	 * If the parent does not have a default ACL, or it's an
+	 * invalid ACL, we're done.
+	 */
+	if (!vp)
+		return 0;
+	if (!pdaclp || xfs_acl_invalid(pdaclp))
+		return 0;
+
+	/*
+	 * Copy the default ACL of the containing directory to
+	 * the access ACL of the new file and use the mode that
+	 * was passed in to set up the correct initial values for
+	 * the u::,g::[m::], and o:: entries.  This is what makes
+	 * umask() "work" with ACL's.
+	 */
+
+	if (!(_ACL_ALLOC(cacl)))
+		return ENOMEM;
+
+	memcpy(cacl, pdaclp, sizeof(xfs_acl_t));
+	xfs_acl_filter_mode(mode, cacl);
+	error = xfs_acl_setmode(vp, cacl, &basicperms);
+	if (error)
+		goto out_error;
+
+	/*
+	 * Set the Default and Access ACL on the file.  The mode is already
+	 * set on the file, so we don't need to worry about that.
+	 *
+	 * If the new file is a directory, its default ACL is a copy of
+	 * the containing directory's default ACL.
+	 */
+	if (S_ISDIR(vp->i_mode))
+		xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
+	if (!error && !basicperms)
+		xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
+out_error:
+	_ACL_FREE(cacl);
+	return error;
+}
+
+/*
+ * Set up the correct mode on the file based on the supplied ACL.  This
+ * makes sure that the mode on the file reflects the state of the
+ * u::,g::[m::], and o:: entries in the ACL.  Since the mode is where
+ * the ACL is going to get the permissions for these entries, we must
+ * synchronize the mode whenever we set the ACL on a file.
+ */
+STATIC int
+xfs_acl_setmode(
+	struct inode	*vp,
+	xfs_acl_t	*acl,
+	int		*basicperms)
+{
+	struct iattr	iattr;
+	xfs_acl_entry_t	*ap;
+	xfs_acl_entry_t	*gap = NULL;
+	int		i, nomask = 1;
+
+	*basicperms = 1;
+
+	if (acl->acl_cnt == XFS_ACL_NOT_PRESENT)
+		return 0;
+
+	/*
+	 * Copy the u::, g::, o::, and m:: bits from the ACL into the
+	 * mode.  The m:: bits take precedence over the g:: bits.
+	 */
+	iattr.ia_valid = ATTR_MODE;
+	iattr.ia_mode = XFS_I(vp)->i_d.di_mode;
+	iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
+	ap = acl->acl_entry;
+	for (i = 0; i < acl->acl_cnt; ++i) {
+		switch (ap->ae_tag) {
+		case ACL_USER_OBJ:
+			iattr.ia_mode |= ap->ae_perm << 6;
+			break;
+		case ACL_GROUP_OBJ:
+			gap = ap;
+			break;
+		case ACL_MASK:	/* more than just standard modes */
+			nomask = 0;
+			iattr.ia_mode |= ap->ae_perm << 3;
+			*basicperms = 0;
+			break;
+		case ACL_OTHER:
+			iattr.ia_mode |= ap->ae_perm;
+			break;
+		default:	/* more than just standard modes */
+			*basicperms = 0;
+			break;
+		}
+		ap++;
+	}
+
+	/* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
+	if (gap && nomask)
+		iattr.ia_mode |= gap->ae_perm << 3;
+
+	return xfs_setattr(XFS_I(vp), &iattr, 0);
+}
+
+/*
+ * The permissions for the special ACL entries (u::, g::[m::], o::) are
+ * actually stored in the file mode (if there is both a group and a mask,
+ * the group is stored in the ACL entry and the mask is stored on the file).
+ * This allows the mode to remain automatically in sync with the ACL without
+ * the need for a call-back to the ACL system at every point where the mode
+ * could change.  This function takes the permissions from the specified mode
+ * and places it in the supplied ACL.
+ *
+ * This implementation draws its validity from the fact that, when the ACL
+ * was assigned, the mode was copied from the ACL.
+ * If the mode did not change, therefore, the mode remains exactly what was
+ * taken from the special ACL entries at assignment.
+ * If a subsequent chmod() was done, the POSIX spec says that the change in
+ * mode must cause an update to the ACL seen at user level and used for
+ * access checks.  Before and after a mode change, therefore, the file mode
+ * most accurately reflects what the special ACL entries should permit/deny.
+ *
+ * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly,
+ *         the existing mode bits will override whatever is in the
+ *         ACL. Similarly, if there is a pre-existing ACL that was
+ *         never in sync with its mode (owing to a bug in 6.5 and
+ *         before), it will now magically (or mystically) be
+ *         synchronized.  This could cause slight astonishment, but
+ *         it is better than inconsistent permissions.
+ *
+ * The supplied ACL is a template that may contain any combination
+ * of special entries.  These are treated as place holders when we fill
+ * out the ACL.  This routine does not add or remove special entries, it
+ * simply unites each special entry with its associated set of permissions.
+ */
+STATIC void
+xfs_acl_sync_mode(
+	mode_t		mode,
+	xfs_acl_t	*acl)
+{
+	int		i, nomask = 1;
+	xfs_acl_entry_t	*ap;
+	xfs_acl_entry_t	*gap = NULL;
+
+	/*
+	 * Set ACL entries. POSIX1003.1eD16 requires that the MASK
+	 * be set instead of the GROUP entry, if there is a MASK.
+	 */
+	for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
+		switch (ap->ae_tag) {
+		case ACL_USER_OBJ:
+			ap->ae_perm = (mode >> 6) & 0x7;
+			break;
+		case ACL_GROUP_OBJ:
+			gap = ap;
+			break;
+		case ACL_MASK:
+			nomask = 0;
+			ap->ae_perm = (mode >> 3) & 0x7;
+			break;
+		case ACL_OTHER:
+			ap->ae_perm = mode & 0x7;
+			break;
+		default:
+			break;
+		}
+	}
+	/* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
+	if (gap && nomask)
+		gap->ae_perm = (mode >> 3) & 0x7;
+}
+
+/*
+ * When inheriting an Access ACL from a directory Default ACL,
+ * the ACL bits are set to the intersection of the ACL default
+ * permission bits and the file permission bits in mode. If there
+ * are no permission bits on the file then we must not give them
+ * the ACL. This is what what makes umask() work with ACLs.
+ */
+STATIC void
+xfs_acl_filter_mode(
+	mode_t		mode,
+	xfs_acl_t	*acl)
+{
+	int		i, nomask = 1;
+	xfs_acl_entry_t	*ap;
+	xfs_acl_entry_t	*gap = NULL;
+
+	/*
+	 * Set ACL entries. POSIX1003.1eD16 requires that the MASK
+	 * be merged with GROUP entry, if there is a MASK.
+	 */
+	for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
+		switch (ap->ae_tag) {
+		case ACL_USER_OBJ:
+			ap->ae_perm &= (mode >> 6) & 0x7;
+			break;
+		case ACL_GROUP_OBJ:
+			gap = ap;
+			break;
+		case ACL_MASK:
+			nomask = 0;
+			ap->ae_perm &= (mode >> 3) & 0x7;
+			break;
+		case ACL_OTHER:
+			ap->ae_perm &= mode & 0x7;
+			break;
+		default:
+			break;
+		}
+	}
+	/* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
+	if (gap && nomask)
+		gap->ae_perm &= (mode >> 3) & 0x7;
+}
diff --git a/trunk/fs/xfs/xfs_acl.h b/trunk/fs/xfs/xfs_acl.h
index 63dc1f2efad5..642f1db4def4 100644
--- a/trunk/fs/xfs/xfs_acl.h
+++ b/trunk/fs/xfs/xfs_acl.h
@@ -18,48 +18,81 @@
 #ifndef __XFS_ACL_H__
 #define __XFS_ACL_H__
 
-struct inode;
-struct posix_acl;
-struct xfs_inode;
+/*
+ * Access Control Lists
+ */
+typedef __uint16_t	xfs_acl_perm_t;
+typedef __int32_t	xfs_acl_tag_t;
+typedef __int32_t	xfs_acl_id_t;
 
 #define XFS_ACL_MAX_ENTRIES 25
 #define XFS_ACL_NOT_PRESENT (-1)
 
-/* On-disk XFS access control list structure */
-struct xfs_acl {
-	__be32		acl_cnt;
-	struct xfs_acl_entry {
-		__be32	ae_tag;
-		__be32	ae_id;
-		__be16	ae_perm;
-	} acl_entry[XFS_ACL_MAX_ENTRIES];
-};
+typedef struct xfs_acl_entry {
+	xfs_acl_tag_t	ae_tag;
+	xfs_acl_id_t	ae_id;
+	xfs_acl_perm_t	ae_perm;
+} xfs_acl_entry_t;
+
+typedef struct xfs_acl {
+	__int32_t	acl_cnt;
+	xfs_acl_entry_t	acl_entry[XFS_ACL_MAX_ENTRIES];
+} xfs_acl_t;
 
 /* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE		"SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT		"SGI_ACL_DEFAULT"
+#define SGI_ACL_FILE	"SGI_ACL_FILE"
+#define SGI_ACL_DEFAULT	"SGI_ACL_DEFAULT"
 #define SGI_ACL_FILE_SIZE	(sizeof(SGI_ACL_FILE)-1)
 #define SGI_ACL_DEFAULT_SIZE	(sizeof(SGI_ACL_DEFAULT)-1)
 
+#define _ACL_TYPE_ACCESS	1
+#define _ACL_TYPE_DEFAULT	2
+
 #ifdef CONFIG_XFS_POSIX_ACL
-extern int xfs_check_acl(struct inode *inode, int mask);
-extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
-extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
-extern int xfs_acl_chmod(struct inode *inode);
-extern void xfs_inode_init_acls(struct xfs_inode *ip);
-extern void xfs_inode_clear_acls(struct xfs_inode *ip);
-extern int posix_acl_access_exists(struct inode *inode);
-extern int posix_acl_default_exists(struct inode *inode);
 
-extern struct xattr_handler xfs_xattr_system_handler;
+struct vattr;
+struct xfs_inode;
+
+extern struct kmem_zone *xfs_acl_zone;
+#define xfs_acl_zone_init(zone, name)	\
+		(zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
+#define xfs_acl_zone_destroy(zone)	kmem_zone_destroy(zone)
+
+extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
+extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
+extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
+extern int xfs_acl_vhasacl_access(struct inode *);
+extern int xfs_acl_vhasacl_default(struct inode *);
+extern int xfs_acl_vset(struct inode *, void *, size_t, int);
+extern int xfs_acl_vget(struct inode *, void *, size_t, int);
+extern int xfs_acl_vremove(struct inode *, int);
+
+#define _ACL_PERM_INVALID(perm)	((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
+
+#define _ACL_INHERIT(c,m,d)	(xfs_acl_inherit(c,m,d))
+#define _ACL_GET_ACCESS(pv,pa)	(xfs_acl_vtoacl(pv,pa,NULL) == 0)
+#define _ACL_GET_DEFAULT(pv,pd)	(xfs_acl_vtoacl(pv,NULL,pd) == 0)
+#define _ACL_ACCESS_EXISTS	xfs_acl_vhasacl_access
+#define _ACL_DEFAULT_EXISTS	xfs_acl_vhasacl_default
+
+#define _ACL_ALLOC(a)		((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP))
+#define _ACL_FREE(a)		((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0)
+
 #else
-# define xfs_check_acl					NULL
-# define xfs_get_acl(inode, type)			NULL
-# define xfs_inherit_acl(inode, default_acl)		0
-# define xfs_acl_chmod(inode)				0
-# define xfs_inode_init_acls(ip)
-# define xfs_inode_clear_acls(ip)
-# define posix_acl_access_exists(inode)			0
-# define posix_acl_default_exists(inode)		0
-#endif /* CONFIG_XFS_POSIX_ACL */
+#define xfs_acl_zone_init(zone,name)
+#define xfs_acl_zone_destroy(zone)
+#define xfs_acl_vset(v,p,sz,t)	(-EOPNOTSUPP)
+#define xfs_acl_vget(v,p,sz,t)	(-EOPNOTSUPP)
+#define xfs_acl_vremove(v,t)	(-EOPNOTSUPP)
+#define xfs_acl_vhasacl_access(v)	(0)
+#define xfs_acl_vhasacl_default(v)	(0)
+#define _ACL_ALLOC(a)		(1)	/* successfully allocate nothing */
+#define _ACL_FREE(a)		((void)0)
+#define _ACL_INHERIT(c,m,d)	(0)
+#define _ACL_GET_ACCESS(pv,pa)	(0)
+#define _ACL_GET_DEFAULT(pv,pd)	(0)
+#define _ACL_ACCESS_EXISTS	(NULL)
+#define _ACL_DEFAULT_EXISTS	(NULL)
+#endif
+
 #endif	/* __XFS_ACL_H__ */
diff --git a/trunk/fs/xfs/xfs_ag.h b/trunk/fs/xfs/xfs_ag.h
index f24b50b68d03..c8641f713caa 100644
--- a/trunk/fs/xfs/xfs_ag.h
+++ b/trunk/fs/xfs/xfs_ag.h
@@ -212,8 +212,6 @@ typedef struct xfs_perag
 /*
  * tags for inode radix tree
  */
-#define XFS_ICI_NO_TAG		(-1)	/* special flag for an untagged lookup
-					   in xfs_inode_ag_iterator */
 #define XFS_ICI_RECLAIM_TAG	0	/* inode is to be reclaimed */
 
 #define	XFS_AG_MAXLEVELS(mp)		((mp)->m_ag_maxlevels)
diff --git a/trunk/fs/xfs/xfs_arch.h b/trunk/fs/xfs/xfs_arch.h
index 0902249354a0..53d5e70d1360 100644
--- a/trunk/fs/xfs/xfs_arch.h
+++ b/trunk/fs/xfs/xfs_arch.h
@@ -73,6 +73,28 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
 
 #endif	/* __KERNEL__ */
 
+/* do we need conversion? */
+#define ARCH_NOCONVERT 1
+#ifdef XFS_NATIVE_HOST
+# define ARCH_CONVERT	ARCH_NOCONVERT
+#else
+# define ARCH_CONVERT	0
+#endif
+
+/* generic swapping macros */
+
+#ifndef HAVE_SWABMACROS
+#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var))))
+#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var))))
+#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var))))
+#endif
+
+#define INT_SWAP(type, var) \
+    ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \
+    ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \
+    ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \
+    (var))))
+
 /*
  * get and set integers from potentially unaligned locations
  */
@@ -85,6 +107,16 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
 	((__u8*)(pointer))[1] = (((value)     ) & 0xff); \
     }
 
+/* does not return a value */
+#define INT_SET(reference,arch,valueref) \
+    (__builtin_constant_p(valueref) ? \
+	(void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \
+	(void)( \
+	    ((reference) = (valueref)), \
+	    ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \
+	) \
+    )
+
 /*
  * In directories inode numbers are stored as unaligned arrays of unsigned
  * 8bit integers on disk.
diff --git a/trunk/fs/xfs/xfs_attr.c b/trunk/fs/xfs/xfs_attr.c
index db15feb906ff..5fde1654b430 100644
--- a/trunk/fs/xfs/xfs_attr.c
+++ b/trunk/fs/xfs/xfs_attr.c
@@ -45,6 +45,7 @@
 #include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
+#include "xfs_acl.h"
 #include "xfs_rw.h"
 #include "xfs_vnodeops.h"
 
@@ -248,9 +249,8 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
 	/*
 	 * Attach the dquots to the inode.
 	 */
-	error = xfs_qm_dqattach(dp, 0);
-	if (error)
-		return error;
+	if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
+		return (error);
 
 	/*
 	 * If the inode doesn't have an attribute fork, add one.
@@ -311,7 +311,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
 	}
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 
-	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
+	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0,
 				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
 				       XFS_QMOPT_RES_REGBLKS);
 	if (error) {
@@ -501,9 +501,8 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
 	/*
 	 * Attach the dquots to the inode.
 	 */
-	error = xfs_qm_dqattach(dp, 0);
-	if (error)
-		return error;
+	if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
+		return (error);
 
 	/*
 	 * Start our first transaction of the day.
diff --git a/trunk/fs/xfs/xfs_bmap.c b/trunk/fs/xfs/xfs_bmap.c
index 7928b9983c1d..ca7c6005a487 100644
--- a/trunk/fs/xfs/xfs_bmap.c
+++ b/trunk/fs/xfs/xfs_bmap.c
@@ -2691,7 +2691,7 @@ xfs_bmap_rtalloc(
 		 * Adjust the disk quota also. This was reserved
 		 * earlier.
 		 */
-		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
+		XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
 			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
 					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
 	} else {
@@ -2995,7 +2995,7 @@ xfs_bmap_btalloc(
 		 * Adjust the disk quota also. This was reserved
 		 * earlier.
 		 */
-		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
+		XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
 			ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
 					XFS_TRANS_DQ_BCOUNT,
 			(long) args.len);
@@ -3066,7 +3066,7 @@ xfs_bmap_btree_to_extents(
 		return error;
 	xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
 	ip->i_d.di_nblocks--;
-	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, cbp);
 	if (cur->bc_bufs[0] == cbp)
 		cur->bc_bufs[0] = NULL;
@@ -3386,7 +3386,7 @@ xfs_bmap_del_extent(
 	 * Adjust quota data.
 	 */
 	if (qfield)
-		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
+		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks);
 
 	/*
 	 * Account for change in delayed indirect blocks.
@@ -3523,7 +3523,7 @@ xfs_bmap_extents_to_btree(
 	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
 	cur->bc_private.b.allocated++;
 	ip->i_d.di_nblocks++;
-	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 	abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
 	/*
 	 * Fill in the child block.
@@ -3690,7 +3690,7 @@ xfs_bmap_local_to_extents(
 		XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
 		XFS_IFORK_NEXT_SET(ip, whichfork, 1);
 		ip->i_d.di_nblocks = 1;
-		xfs_trans_mod_dquot_byino(tp, ip,
+		XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
 			XFS_TRANS_DQ_BCOUNT, 1L);
 		flags |= xfs_ilog_fext(whichfork);
 	} else {
@@ -4048,7 +4048,7 @@ xfs_bmap_add_attrfork(
 			XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
 		goto error0;
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
+	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ?
 			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
 			XFS_QMOPT_RES_REGBLKS);
 	if (error) {
@@ -4983,11 +4983,10 @@ xfs_bmapi(
 				 * adjusted later.  We return if we haven't
 				 * allocated blocks already inside this loop.
 				 */
-				error = xfs_trans_reserve_quota_nblks(
-						NULL, ip, (long)alen, 0,
+				if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS(
+						mp, NULL, ip, (long)alen, 0,
 						rt ? XFS_QMOPT_RES_RTBLKS :
-						     XFS_QMOPT_RES_REGBLKS);
-				if (error) {
+						     XFS_QMOPT_RES_REGBLKS))) {
 					if (n == 0) {
 						*nmap = 0;
 						ASSERT(cur == NULL);
@@ -5036,8 +5035,8 @@ xfs_bmapi(
 					if (XFS_IS_QUOTA_ON(mp))
 						/* unreserve the blocks now */
 						(void)
-						xfs_trans_unreserve_quota_nblks(
-							NULL, ip,
+						XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
+							mp, NULL, ip,
 							(long)alen, 0, rt ?
 							XFS_QMOPT_RES_RTBLKS :
 							XFS_QMOPT_RES_REGBLKS);
@@ -5692,14 +5691,14 @@ xfs_bunmapi(
 				do_div(rtexts, mp->m_sb.sb_rextsize);
 				xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
 						(int64_t)rtexts, rsvd);
-				(void)xfs_trans_reserve_quota_nblks(NULL,
-					ip, -((long)del.br_blockcount), 0,
+				(void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+					NULL, ip, -((long)del.br_blockcount), 0,
 					XFS_QMOPT_RES_RTBLKS);
 			} else {
 				xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
 						(int64_t)del.br_blockcount, rsvd);
-				(void)xfs_trans_reserve_quota_nblks(NULL,
-					ip, -((long)del.br_blockcount), 0,
+				(void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+					NULL, ip, -((long)del.br_blockcount), 0,
 					XFS_QMOPT_RES_REGBLKS);
 			}
 			ip->i_delayed_blks -= del.br_blockcount;
@@ -6086,7 +6085,6 @@ xfs_getbmap(
 			break;
 	}
 
-	kmem_free(out);
 	return error;
 }
 
diff --git a/trunk/fs/xfs/xfs_bmap_btree.c b/trunk/fs/xfs/xfs_bmap_btree.c
index 5c1ade06578e..0760d352586f 100644
--- a/trunk/fs/xfs/xfs_bmap_btree.c
+++ b/trunk/fs/xfs/xfs_bmap_btree.c
@@ -590,7 +590,7 @@ xfs_bmbt_alloc_block(
 	cur->bc_private.b.allocated++;
 	cur->bc_private.b.ip->i_d.di_nblocks++;
 	xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
-	xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
+	XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
 			XFS_TRANS_DQ_BCOUNT, 1L);
 
 	new->l = cpu_to_be64(args.fsbno);
@@ -618,7 +618,7 @@ xfs_bmbt_free_block(
 	ip->i_d.di_nblocks--;
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, bp);
 	return 0;
 }
diff --git a/trunk/fs/xfs/xfs_filestream.c b/trunk/fs/xfs/xfs_filestream.c
index edf8bdf4141f..6c87c8f304ef 100644
--- a/trunk/fs/xfs/xfs_filestream.c
+++ b/trunk/fs/xfs/xfs_filestream.c
@@ -542,8 +542,10 @@ xfs_filestream_associate(
 	 * waiting for the lock because someone else is waiting on the lock we
 	 * hold and we cannot drop that as we are in a transaction here.
 	 *
-	 * Lucky for us, this inversion is not a problem because it's a
-	 * directory inode that we are trying to lock here.
+	 * Lucky for us, this inversion is rarely a problem because it's a
+	 * directory inode that we are trying to lock here and that means the
+	 * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
+	 * used. i.e. freeze, remount-ro, quotasync or unmount.
 	 *
 	 * So, if we can't get the iolock without sleeping then just give up
 	 */
diff --git a/trunk/fs/xfs/xfs_fs.h b/trunk/fs/xfs/xfs_fs.h
index c4ea51b55dce..f7c06fac8229 100644
--- a/trunk/fs/xfs/xfs_fs.h
+++ b/trunk/fs/xfs/xfs_fs.h
@@ -239,13 +239,10 @@ typedef struct xfs_fsop_resblks {
  * Minimum and maximum sizes need for growth checks
  */
 #define XFS_MIN_AG_BLOCKS	64
-#define XFS_MIN_LOG_BLOCKS	512ULL
-#define XFS_MAX_LOG_BLOCKS	(1024 * 1024ULL)
-#define XFS_MIN_LOG_BYTES	(10 * 1024 * 1024ULL)
-
-/* keep the maximum size under 2^31 by a small amount */
-#define XFS_MAX_LOG_BYTES \
-	((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
+#define XFS_MIN_LOG_BLOCKS	512
+#define XFS_MAX_LOG_BLOCKS	(64 * 1024)
+#define XFS_MIN_LOG_BYTES	(256 * 1024)
+#define XFS_MAX_LOG_BYTES	(128 * 1024 * 1024)
 
 /*
  * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c
index 76c540f719e4..89b81eedce6a 100644
--- a/trunk/fs/xfs/xfs_iget.c
+++ b/trunk/fs/xfs/xfs_iget.c
@@ -18,7 +18,6 @@
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_acl.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -83,7 +82,6 @@ xfs_inode_alloc(
 	memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
 	ip->i_size = 0;
 	ip->i_new_size = 0;
-	xfs_inode_init_acls(ip);
 
 	/*
 	 * Initialize inode's trace buffers.
@@ -502,7 +500,10 @@ xfs_ireclaim(
 	 * ilock one but will still hold the iolock.
 	 */
 	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-	xfs_qm_dqdetach(ip);
+	/*
+	 * Release dquots (and their references) if any.
+	 */
+	XFS_QM_DQDETACH(ip->i_mount, ip);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 
 	switch (ip->i_d.di_mode & S_IFMT) {
@@ -560,7 +561,6 @@ xfs_ireclaim(
 	ASSERT(atomic_read(&ip->i_pincount) == 0);
 	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(completion_done(&ip->i_flush));
-	xfs_inode_clear_acls(ip);
 	kmem_zone_free(xfs_inode_zone, ip);
 }
 
diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c
index 1f22d65fed0a..123b20c8cbf2 100644
--- a/trunk/fs/xfs/xfs_inode.c
+++ b/trunk/fs/xfs/xfs_inode.c
@@ -49,6 +49,7 @@
 #include "xfs_utils.h"
 #include "xfs_dir2_trace.h"
 #include "xfs_quota.h"
+#include "xfs_acl.h"
 #include "xfs_filestream.h"
 #include "xfs_vnodeops.h"
 
diff --git a/trunk/fs/xfs/xfs_inode.h b/trunk/fs/xfs/xfs_inode.h
index 77016702938b..f879c1bc4b96 100644
--- a/trunk/fs/xfs/xfs_inode.h
+++ b/trunk/fs/xfs/xfs_inode.h
@@ -18,7 +18,6 @@
 #ifndef	__XFS_INODE_H__
 #define	__XFS_INODE_H__
 
-struct posix_acl;
 struct xfs_dinode;
 struct xfs_inode;
 
@@ -273,11 +272,6 @@ typedef struct xfs_inode {
 	/* VFS inode */
 	struct inode		i_vnode;	/* embedded VFS inode */
 
-#ifdef CONFIG_XFS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
-
 	/* Trace buffers per inode. */
 #ifdef XFS_INODE_TRACE
 	struct ktrace		*i_trace;	/* general inode trace */
diff --git a/trunk/fs/xfs/xfs_iomap.c b/trunk/fs/xfs/xfs_iomap.c
index 67ae5555a30a..5aaa2d7ec155 100644
--- a/trunk/fs/xfs/xfs_iomap.c
+++ b/trunk/fs/xfs/xfs_iomap.c
@@ -42,6 +42,7 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
@@ -384,7 +385,7 @@ xfs_iomap_write_direct(
 	 * Make sure that the dquots are there. This doesn't hold
 	 * the ilock across a disk read.
 	 */
-	error = xfs_qm_dqattach_locked(ip, 0);
+	error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -443,7 +444,8 @@ xfs_iomap_write_direct(
 	if (error)
 		goto error_out;
 
-	error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
+	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+					      qblocks, 0, quota_flag);
 	if (error)
 		goto error1;
 
@@ -493,7 +495,7 @@ xfs_iomap_write_direct(
 
 error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
 	xfs_bmap_cancel(&free_list);
-	xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
+	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
 
 error1:	/* Just cancel transaction */
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -580,7 +582,7 @@ xfs_iomap_write_delay(
 	 * Make sure that the dquots are there. This doesn't hold
 	 * the ilock across a disk read.
 	 */
-	error = xfs_qm_dqattach_locked(ip, 0);
+	error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -682,8 +684,7 @@ xfs_iomap_write_allocate(
 	/*
 	 * Make sure that the dquots are there.
 	 */
-	error = xfs_qm_dqattach(ip, 0);
-	if (error)
+	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return XFS_ERROR(error);
 
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/trunk/fs/xfs/xfs_log_recover.c b/trunk/fs/xfs/xfs_log_recover.c
index 47da2fb45377..7ba450116d4f 100644
--- a/trunk/fs/xfs/xfs_log_recover.c
+++ b/trunk/fs/xfs/xfs_log_recover.c
@@ -1975,30 +1975,16 @@ xlog_recover_do_reg_buffer(
 		error = 0;
 		if (buf_f->blf_flags &
 		   (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
-			if (item->ri_buf[i].i_addr == NULL) {
-				cmn_err(CE_ALERT,
-					"XFS: NULL dquot in %s.", __func__);
-				goto next;
-			}
-			if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) {
-				cmn_err(CE_ALERT,
-					"XFS: dquot too small (%d) in %s.",
-					item->ri_buf[i].i_len, __func__);
-				goto next;
-			}
 			error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
 					       item->ri_buf[i].i_addr,
 					       -1, 0, XFS_QMOPT_DOWARN,
 					       "dquot_buf_recover");
-			if (error)
-				goto next;
 		}
-
-		memcpy(xfs_buf_offset(bp,
-			(uint)bit << XFS_BLI_SHIFT),	/* dest */
-			item->ri_buf[i].i_addr,		/* source */
-			nbits<<XFS_BLI_SHIFT);		/* length */
- next:
+		if (!error)
+			memcpy(xfs_buf_offset(bp,
+				(uint)bit << XFS_BLI_SHIFT),	/* dest */
+				item->ri_buf[i].i_addr,		/* source */
+				nbits<<XFS_BLI_SHIFT);		/* length */
 		i++;
 		bit += nbits;
 	}
@@ -2629,19 +2615,7 @@ xlog_recover_do_dquot_trans(
 		return (0);
 
 	recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
-
-	if (item->ri_buf[1].i_addr == NULL) {
-		cmn_err(CE_ALERT,
-			"XFS: NULL dquot in %s.", __func__);
-		return XFS_ERROR(EIO);
-	}
-	if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) {
-		cmn_err(CE_ALERT,
-			"XFS: dquot too small (%d) in %s.",
-			item->ri_buf[1].i_len, __func__);
-		return XFS_ERROR(EIO);
-	}
-
+	ASSERT(recddq);
 	/*
 	 * This type of quotas was turned off, so ignore this record.
 	 */
diff --git a/trunk/fs/xfs/xfs_mount.c b/trunk/fs/xfs/xfs_mount.c
index 5c6f092659c1..65a99725d0cc 100644
--- a/trunk/fs/xfs/xfs_mount.c
+++ b/trunk/fs/xfs/xfs_mount.c
@@ -959,53 +959,6 @@ xfs_check_sizes(xfs_mount_t *mp)
 	return 0;
 }
 
-/*
- * Clear the quotaflags in memory and in the superblock.
- */
-int
-xfs_mount_reset_sbqflags(
-	struct xfs_mount	*mp)
-{
-	int			error;
-	struct xfs_trans	*tp;
-
-	mp->m_qflags = 0;
-
-	/*
-	 * It is OK to look at sb_qflags here in mount path,
-	 * without m_sb_lock.
-	 */
-	if (mp->m_sb.sb_qflags == 0)
-		return 0;
-	spin_lock(&mp->m_sb_lock);
-	mp->m_sb.sb_qflags = 0;
-	spin_unlock(&mp->m_sb_lock);
-
-	/*
-	 * If the fs is readonly, let the incore superblock run
-	 * with quotas off but don't flush the update out to disk
-	 */
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return 0;
-
-#ifdef QUOTADEBUG
-	xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
-#endif
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-	error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
-				      XFS_DEFAULT_LOG_COUNT);
-	if (error) {
-		xfs_trans_cancel(tp, 0);
-		xfs_fs_cmn_err(CE_ALERT, mp,
-			"xfs_mount_reset_sbqflags: Superblock update failed!");
-		return error;
-	}
-
-	xfs_mod_sb(tp, XFS_SB_QFLAGS);
-	return xfs_trans_commit(tp, 0);
-}
-
 /*
  * This function does the following on an initial mount of a file system:
  *	- reads the superblock from disk and init the mount struct
@@ -1023,8 +976,7 @@ xfs_mountfs(
 	xfs_sb_t	*sbp = &(mp->m_sb);
 	xfs_inode_t	*rip;
 	__uint64_t	resblks;
-	uint		quotamount = 0;
-	uint		quotaflags = 0;
+	uint		quotamount, quotaflags;
 	int		error = 0;
 
 	xfs_mount_common(mp, sbp);
@@ -1258,28 +1210,9 @@ xfs_mountfs(
 	/*
 	 * Initialise the XFS quota management subsystem for this mount
 	 */
-	if (XFS_IS_QUOTA_RUNNING(mp)) {
-		error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
-		if (error)
-			goto out_rtunmount;
-	} else {
-		ASSERT(!XFS_IS_QUOTA_ON(mp));
-
-		/*
-		 * If a file system had quotas running earlier, but decided to
-		 * mount without -o uquota/pquota/gquota options, revoke the
-		 * quotachecked license.
-		 */
-		if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
-			cmn_err(CE_NOTE,
-				"XFS: resetting qflags for filesystem %s",
-				mp->m_fsname);
-
-			error = xfs_mount_reset_sbqflags(mp);
-			if (error)
-				return error;
-		}
-	}
+	error = XFS_QM_INIT(mp, &quotamount, &quotaflags);
+	if (error)
+		goto out_rtunmount;
 
 	/*
 	 * Finish recovering the file system.  This part needed to be
@@ -1295,19 +1228,9 @@ xfs_mountfs(
 	/*
 	 * Complete the quota initialisation, post-log-replay component.
 	 */
-	if (quotamount) {
-		ASSERT(mp->m_qflags == 0);
-		mp->m_qflags = quotaflags;
-
-		xfs_qm_mount_quotas(mp);
-	}
-
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-	if (XFS_IS_QUOTA_ON(mp))
-		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
-	else
-		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
-#endif
+	error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
+	if (error)
+		goto out_rtunmount;
 
 	/*
 	 * Now we are mounted, reserve a small amount of unused space for
@@ -1356,7 +1279,12 @@ xfs_unmountfs(
 	__uint64_t		resblks;
 	int			error;
 
-	xfs_qm_unmount_quotas(mp);
+	/*
+	 * Release dquot that rootinode, rbmino and rsumino might be holding,
+	 * and release the quota inodes.
+	 */
+	XFS_QM_UNMOUNT(mp);
+
 	xfs_rtunmount_inodes(mp);
 	IRELE(mp->m_rootip);
 
@@ -1371,9 +1299,12 @@ xfs_unmountfs(
 	 * need to force the log first.
 	 */
 	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
-	xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC);
+	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
+
+	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
 
-	xfs_qm_unmount(mp);
+	if (mp->m_quotainfo)
+		XFS_QM_DONE(mp);
 
 	/*
 	 * Flush out the log synchronously so that we know for sure
diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h
index a5122382afde..d6a64392f983 100644
--- a/trunk/fs/xfs/xfs_mount.h
+++ b/trunk/fs/xfs/xfs_mount.h
@@ -64,8 +64,6 @@ struct xfs_swapext;
 struct xfs_mru_cache;
 struct xfs_nameops;
 struct xfs_ail;
-struct xfs_quotainfo;
-
 
 /*
  * Prototypes and functions for the Data Migration subsystem.
@@ -109,6 +107,86 @@ typedef struct xfs_dmops {
 	(*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
 
 
+/*
+ * Prototypes and functions for the Quota Management subsystem.
+ */
+
+struct xfs_dquot;
+struct xfs_dqtrxops;
+struct xfs_quotainfo;
+
+typedef int	(*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
+typedef int	(*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
+typedef void	(*xfs_qmunmount_t)(struct xfs_mount *);
+typedef void	(*xfs_qmdone_t)(struct xfs_mount *);
+typedef void	(*xfs_dqrele_t)(struct xfs_dquot *);
+typedef int	(*xfs_dqattach_t)(struct xfs_inode *, uint);
+typedef void	(*xfs_dqdetach_t)(struct xfs_inode *);
+typedef int	(*xfs_dqpurgeall_t)(struct xfs_mount *, uint);
+typedef int	(*xfs_dqvopalloc_t)(struct xfs_mount *,
+			struct xfs_inode *, uid_t, gid_t, prid_t, uint,
+			struct xfs_dquot **, struct xfs_dquot **);
+typedef void	(*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *,
+			struct xfs_dquot *, struct xfs_dquot *);
+typedef int	(*xfs_dqvoprename_t)(struct xfs_inode **);
+typedef struct xfs_dquot * (*xfs_dqvopchown_t)(
+			struct xfs_trans *, struct xfs_inode *,
+			struct xfs_dquot **, struct xfs_dquot *);
+typedef int	(*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
+			struct xfs_dquot *, struct xfs_dquot *, uint);
+typedef void	(*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
+typedef int	(*xfs_dqsync_t)(struct xfs_mount *, int flags);
+
+typedef struct xfs_qmops {
+	xfs_qminit_t		xfs_qminit;
+	xfs_qmdone_t		xfs_qmdone;
+	xfs_qmmount_t		xfs_qmmount;
+	xfs_qmunmount_t		xfs_qmunmount;
+	xfs_dqrele_t		xfs_dqrele;
+	xfs_dqattach_t		xfs_dqattach;
+	xfs_dqdetach_t		xfs_dqdetach;
+	xfs_dqpurgeall_t	xfs_dqpurgeall;
+	xfs_dqvopalloc_t	xfs_dqvopalloc;
+	xfs_dqvopcreate_t	xfs_dqvopcreate;
+	xfs_dqvoprename_t	xfs_dqvoprename;
+	xfs_dqvopchown_t	xfs_dqvopchown;
+	xfs_dqvopchownresv_t	xfs_dqvopchownresv;
+	xfs_dqstatvfs_t		xfs_dqstatvfs;
+	xfs_dqsync_t		xfs_dqsync;
+	struct xfs_dqtrxops	*xfs_dqtrxops;
+} xfs_qmops_t;
+
+#define XFS_QM_INIT(mp, mnt, fl) \
+	(*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl)
+#define XFS_QM_MOUNT(mp, mnt, fl) \
+	(*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)
+#define XFS_QM_UNMOUNT(mp) \
+	(*(mp)->m_qm_ops->xfs_qmunmount)(mp)
+#define XFS_QM_DONE(mp) \
+	(*(mp)->m_qm_ops->xfs_qmdone)(mp)
+#define XFS_QM_DQRELE(mp, dq) \
+	(*(mp)->m_qm_ops->xfs_dqrele)(dq)
+#define XFS_QM_DQATTACH(mp, ip, fl) \
+	(*(mp)->m_qm_ops->xfs_dqattach)(ip, fl)
+#define XFS_QM_DQDETACH(mp, ip) \
+	(*(mp)->m_qm_ops->xfs_dqdetach)(ip)
+#define XFS_QM_DQPURGEALL(mp, fl) \
+	(*(mp)->m_qm_ops->xfs_dqpurgeall)(mp, fl)
+#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \
+	(*(mp)->m_qm_ops->xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2)
+#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \
+	(*(mp)->m_qm_ops->xfs_dqvopcreate)(tp, ip, dq1, dq2)
+#define XFS_QM_DQVOPRENAME(mp, ip) \
+	(*(mp)->m_qm_ops->xfs_dqvoprename)(ip)
+#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \
+	(*(mp)->m_qm_ops->xfs_dqvopchown)(tp, ip, dqp, dq)
+#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \
+	(*(mp)->m_qm_ops->xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl)
+#define XFS_QM_DQSTATVFS(ip, statp) \
+	(*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp)
+#define XFS_QM_DQSYNC(mp, flags) \
+	(*(mp)->m_qm_ops->xfs_dqsync)(mp, flags)
+
 #ifdef HAVE_PERCPU_SB
 
 /*
@@ -432,6 +510,8 @@ extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
 
 extern int	xfs_dmops_get(struct xfs_mount *);
 extern void	xfs_dmops_put(struct xfs_mount *);
+extern int	xfs_qmops_get(struct xfs_mount *);
+extern void	xfs_qmops_put(struct xfs_mount *);
 
 extern struct xfs_dmops xfs_dmcore_xfs;
 
diff --git a/trunk/fs/xfs/xfs_qmops.c b/trunk/fs/xfs/xfs_qmops.c
new file mode 100644
index 000000000000..e101790ea8e7
--- /dev/null
+++ b/trunk/fs/xfs/xfs_qmops.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_error.h"
+
+
+STATIC struct xfs_dquot *
+xfs_dqvopchown_default(
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	struct xfs_dquot	**dqp,
+	struct xfs_dquot	*dq)
+{
+	return NULL;
+}
+
+/*
+ * Clear the quotaflags in memory and in the superblock.
+ */
+int
+xfs_mount_reset_sbqflags(xfs_mount_t *mp)
+{
+	int			error;
+	xfs_trans_t		*tp;
+
+	mp->m_qflags = 0;
+	/*
+	 * It is OK to look at sb_qflags here in mount path,
+	 * without m_sb_lock.
+	 */
+	if (mp->m_sb.sb_qflags == 0)
+		return 0;
+	spin_lock(&mp->m_sb_lock);
+	mp->m_sb.sb_qflags = 0;
+	spin_unlock(&mp->m_sb_lock);
+
+	/*
+	 * if the fs is readonly, let the incore superblock run
+	 * with quotas off but don't flush the update out to disk
+	 */
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return 0;
+#ifdef QUOTADEBUG
+	xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+	if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+				      XFS_DEFAULT_LOG_COUNT))) {
+		xfs_trans_cancel(tp, 0);
+		xfs_fs_cmn_err(CE_ALERT, mp,
+			"xfs_mount_reset_sbqflags: Superblock update failed!");
+		return error;
+	}
+	xfs_mod_sb(tp, XFS_SB_QFLAGS);
+	error = xfs_trans_commit(tp, 0);
+	return error;
+}
+
+STATIC int
+xfs_noquota_init(
+	xfs_mount_t	*mp,
+	uint		*needquotamount,
+	uint		*quotaflags)
+{
+	int		error = 0;
+
+	*quotaflags = 0;
+	*needquotamount = B_FALSE;
+
+	ASSERT(!XFS_IS_QUOTA_ON(mp));
+
+	/*
+	 * If a file system had quotas running earlier, but decided to
+	 * mount without -o uquota/pquota/gquota options, revoke the
+	 * quotachecked license.
+	 */
+	if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
+		cmn_err(CE_NOTE,
+                        "XFS resetting qflags for filesystem %s",
+                        mp->m_fsname);
+
+		error = xfs_mount_reset_sbqflags(mp);
+	}
+	return error;
+}
+
+static struct xfs_qmops xfs_qmcore_stub = {
+	.xfs_qminit		= (xfs_qminit_t) xfs_noquota_init,
+	.xfs_qmdone		= (xfs_qmdone_t) fs_noerr,
+	.xfs_qmmount		= (xfs_qmmount_t) fs_noerr,
+	.xfs_qmunmount		= (xfs_qmunmount_t) fs_noerr,
+	.xfs_dqrele		= (xfs_dqrele_t) fs_noerr,
+	.xfs_dqattach		= (xfs_dqattach_t) fs_noerr,
+	.xfs_dqdetach		= (xfs_dqdetach_t) fs_noerr,
+	.xfs_dqpurgeall		= (xfs_dqpurgeall_t) fs_noerr,
+	.xfs_dqvopalloc		= (xfs_dqvopalloc_t) fs_noerr,
+	.xfs_dqvopcreate	= (xfs_dqvopcreate_t) fs_noerr,
+	.xfs_dqvoprename	= (xfs_dqvoprename_t) fs_noerr,
+	.xfs_dqvopchown		= xfs_dqvopchown_default,
+	.xfs_dqvopchownresv	= (xfs_dqvopchownresv_t) fs_noerr,
+	.xfs_dqstatvfs		= (xfs_dqstatvfs_t) fs_noval,
+	.xfs_dqsync		= (xfs_dqsync_t) fs_noerr,
+};
+
+int
+xfs_qmops_get(struct xfs_mount *mp)
+{
+	if (XFS_IS_QUOTA_RUNNING(mp)) {
+#ifdef CONFIG_XFS_QUOTA
+		mp->m_qm_ops = &xfs_qmcore_xfs;
+#else
+		cmn_err(CE_WARN,
+			"XFS: qouta support not available in this kernel.");
+		return EINVAL;
+#endif
+	} else {
+		mp->m_qm_ops = &xfs_qmcore_stub;
+	}
+
+	return 0;
+}
+
+void
+xfs_qmops_put(struct xfs_mount *mp)
+{
+}
diff --git a/trunk/fs/xfs/xfs_quota.h b/trunk/fs/xfs/xfs_quota.h
index 3ec91ac74c2a..f5d1202dde25 100644
--- a/trunk/fs/xfs/xfs_quota.h
+++ b/trunk/fs/xfs/xfs_quota.h
@@ -197,6 +197,7 @@ typedef struct xfs_qoff_logformat {
 #define XFS_QMOPT_UMOUNTING	0x0000100 /* filesys is being unmounted */
 #define XFS_QMOPT_DOLOG		0x0000200 /* log buf changes (in quotacheck) */
 #define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
+#define XFS_QMOPT_ILOCKED	0x0000800 /* inode is already locked (excl) */
 #define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot if damaged */
 #define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
 #define XFS_QMOPT_ENOSPC	0x0004000 /* enospc instead of edquot (prj) */
@@ -301,79 +302,69 @@ typedef struct xfs_dqtrx {
 	long		qt_delrtb_delta;  /* delayed RT blk count changes */
 } xfs_dqtrx_t;
 
-#ifdef CONFIG_XFS_QUOTA
-extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *);
-extern void xfs_trans_free_dqinfo(struct xfs_trans *);
-extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
-		uint, long);
-extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
-extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
-extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *,
-		struct xfs_inode *, long, long, uint);
-extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
-		struct xfs_mount *, struct xfs_dquot *,
-		struct xfs_dquot *, long, long, uint);
-
-extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint,
-		struct xfs_dquot **, struct xfs_dquot **);
-extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *,
-		struct xfs_dquot *, struct xfs_dquot *);
-extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);
-extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *,
-		struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *);
-extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *,
-		struct xfs_dquot *, struct xfs_dquot *, uint);
-extern int xfs_qm_dqattach(struct xfs_inode *, uint);
-extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
-extern void xfs_qm_dqdetach(struct xfs_inode *);
-extern void xfs_qm_dqrele(struct xfs_dquot *);
-extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
-extern int xfs_qm_sync(struct xfs_mount *, int);
-extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
-extern void xfs_qm_mount_quotas(struct xfs_mount *);
-extern void xfs_qm_unmount(struct xfs_mount *);
-extern void xfs_qm_unmount_quotas(struct xfs_mount *);
-
-#else
-static inline int
-xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
-		uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp)
-{
-	*udqp = NULL;
-	*gdqp = NULL;
-	return 0;
-}
-#define xfs_trans_dup_dqinfo(tp, tp2)
-#define xfs_trans_free_dqinfo(tp)
-#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
-#define xfs_trans_apply_dquot_deltas(tp)
-#define xfs_trans_unreserve_and_mod_dquots(tp)
-#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags)	(0)
-#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl)	(0)
-#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
-#define xfs_qm_vop_rename_dqattach(it)					(0)
-#define xfs_qm_vop_chown(tp, ip, old, new)				(NULL)
-#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl)			(0)
-#define xfs_qm_dqattach(ip, fl)						(0)
-#define xfs_qm_dqattach_locked(ip, fl)					(0)
-#define xfs_qm_dqdetach(ip)
-#define xfs_qm_dqrele(d)
-#define xfs_qm_statvfs(ip, s)
-#define xfs_qm_sync(mp, fl)						(0)
-#define xfs_qm_newmount(mp, a, b)					(0)
-#define xfs_qm_mount_quotas(mp)
-#define xfs_qm_unmount(mp)
-#define xfs_qm_unmount_quotas(mp)					(0)
-#endif /* CONFIG_XFS_QUOTA */
-
-#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
-	xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags)
-#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \
-	xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
+/*
+ * Dquot transaction functions, used if quota is enabled.
+ */
+typedef void	(*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *);
+typedef void	(*qo_mod_dquot_byino_t)(struct xfs_trans *,
+				struct xfs_inode *, uint, long);
+typedef void	(*qo_free_dqinfo_t)(struct xfs_trans *);
+typedef void	(*qo_apply_dquot_deltas_t)(struct xfs_trans *);
+typedef void	(*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *);
+typedef int	(*qo_reserve_quota_nblks_t)(
+				struct xfs_trans *, struct xfs_mount *,
+				struct xfs_inode *, long, long, uint);
+typedef int	(*qo_reserve_quota_bydquots_t)(
+				struct xfs_trans *, struct xfs_mount *,
+				struct xfs_dquot *, struct xfs_dquot *,
+				long, long, uint);
+typedef struct xfs_dqtrxops {
+	qo_dup_dqinfo_t			qo_dup_dqinfo;
+	qo_free_dqinfo_t		qo_free_dqinfo;
+	qo_mod_dquot_byino_t		qo_mod_dquot_byino;
+	qo_apply_dquot_deltas_t		qo_apply_dquot_deltas;
+	qo_reserve_quota_nblks_t	qo_reserve_quota_nblks;
+	qo_reserve_quota_bydquots_t	qo_reserve_quota_bydquots;
+	qo_unreserve_and_mod_dquots_t	qo_unreserve_and_mod_dquots;
+} xfs_dqtrxops_t;
+
+#define XFS_DQTRXOP(mp, tp, op, args...) \
+		((mp)->m_qm_ops->xfs_dqtrxops ? \
+		((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : 0)
+
+#define XFS_DQTRXOP_VOID(mp, tp, op, args...) \
+		((mp)->m_qm_ops->xfs_dqtrxops ? \
+		((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : (void)0)
+
+#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \
+	XFS_DQTRXOP_VOID(mp, otp, qo_dup_dqinfo, ntp)
+#define XFS_TRANS_FREE_DQINFO(mp, tp) \
+	XFS_DQTRXOP_VOID(mp, tp, qo_free_dqinfo)
+#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \
+	XFS_DQTRXOP_VOID(mp, tp, qo_mod_dquot_byino, ip, field, delta)
+#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \
+	XFS_DQTRXOP_VOID(mp, tp, qo_apply_dquot_deltas)
+#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \
+	XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl)
+#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \
+	XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl)
+#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \
+	XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots)
+
+#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \
+	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags)
+#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
+	XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \
+				f | XFS_QMOPT_RES_REGBLKS)
+#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
+	XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \
 				f | XFS_QMOPT_RES_REGBLKS)
 
 extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
 extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
 
+extern struct xfs_qmops xfs_qmcore_xfs;
+
 #endif	/* __KERNEL__ */
+
 #endif	/* __XFS_QUOTA_H__ */
diff --git a/trunk/fs/xfs/xfs_rename.c b/trunk/fs/xfs/xfs_rename.c
index b81deea0ce19..58f85e9cd11d 100644
--- a/trunk/fs/xfs/xfs_rename.c
+++ b/trunk/fs/xfs/xfs_rename.c
@@ -166,8 +166,7 @@ xfs_rename(
 	/*
 	 * Attach the dquots to the inodes
 	 */
-	error = xfs_qm_vop_rename_dqattach(inodes);
-	if (error) {
+	if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
 		xfs_trans_cancel(tp, cancel_flags);
 		goto std_return;
 	}
diff --git a/trunk/fs/xfs/xfs_rw.c b/trunk/fs/xfs/xfs_rw.c
index fea68615ed23..36f3a21c54d2 100644
--- a/trunk/fs/xfs/xfs_rw.c
+++ b/trunk/fs/xfs/xfs_rw.c
@@ -41,6 +41,7 @@
 #include "xfs_ialloc.h"
 #include "xfs_attr.h"
 #include "xfs_bmap.h"
+#include "xfs_acl.h"
 #include "xfs_error.h"
 #include "xfs_buf_item.h"
 #include "xfs_rw.h"
diff --git a/trunk/fs/xfs/xfs_trans.c b/trunk/fs/xfs/xfs_trans.c
index 66b849358e62..bcc39d358ad3 100644
--- a/trunk/fs/xfs/xfs_trans.c
+++ b/trunk/fs/xfs/xfs_trans.c
@@ -297,7 +297,7 @@ xfs_trans_dup(
 	tp->t_rtx_res = tp->t_rtx_res_used;
 	ntp->t_pflags = tp->t_pflags;
 
-	xfs_trans_dup_dqinfo(tp, ntp);
+	XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp);
 
 	atomic_inc(&tp->t_mountp->m_active_trans);
 	return ntp;
@@ -829,7 +829,7 @@ _xfs_trans_commit(
 		 * means is that we have some (non-persistent) quota
 		 * reservations that need to be unreserved.
 		 */
-		xfs_trans_unreserve_and_mod_dquots(tp);
+		XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
 		if (tp->t_ticket) {
 			commit_lsn = xfs_log_done(mp, tp->t_ticket,
 							NULL, log_flags);
@@ -848,9 +848,10 @@ _xfs_trans_commit(
 	/*
 	 * If we need to update the superblock, then do it now.
 	 */
-	if (tp->t_flags & XFS_TRANS_SB_DIRTY)
+	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
 		xfs_trans_apply_sb_deltas(tp);
-	xfs_trans_apply_dquot_deltas(tp);
+	}
+	XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp);
 
 	/*
 	 * Ask each log item how many log_vector entries it will
@@ -1055,7 +1056,7 @@ xfs_trans_uncommit(
 	}
 
 	xfs_trans_unreserve_and_mod_sb(tp);
-	xfs_trans_unreserve_and_mod_dquots(tp);
+	XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
 
 	xfs_trans_free_items(tp, flags);
 	xfs_trans_free_busy(tp);
@@ -1180,7 +1181,7 @@ xfs_trans_cancel(
 	}
 #endif
 	xfs_trans_unreserve_and_mod_sb(tp);
-	xfs_trans_unreserve_and_mod_dquots(tp);
+	XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
 
 	if (tp->t_ticket) {
 		if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1210,7 +1211,7 @@ xfs_trans_free(
 	xfs_trans_t	*tp)
 {
 	atomic_dec(&tp->t_mountp->m_active_trans);
-	xfs_trans_free_dqinfo(tp);
+	XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
 	kmem_zone_free(xfs_trans_zone, tp);
 }
 
diff --git a/trunk/fs/xfs/xfs_utils.c b/trunk/fs/xfs/xfs_utils.c
index 4d88616bde91..79b9e5ea5359 100644
--- a/trunk/fs/xfs/xfs_utils.c
+++ b/trunk/fs/xfs/xfs_utils.c
@@ -166,7 +166,7 @@ xfs_dir_ialloc(
 			xfs_buf_relse(ialloc_context);
 			if (dqinfo) {
 				tp->t_dqinfo = dqinfo;
-				xfs_trans_free_dqinfo(tp);
+				XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
 			}
 			*tpp = ntp;
 			*ipp = NULL;
diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c
index c4eca5ed5dab..19cf90a9c762 100644
--- a/trunk/fs/xfs/xfs_vnodeops.c
+++ b/trunk/fs/xfs/xfs_vnodeops.c
@@ -42,7 +42,6 @@
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_rw.h"
 #include "xfs_error.h"
@@ -119,7 +118,7 @@ xfs_setattr(
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
+		code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
 					 qflags, &udqp, &gdqp);
 		if (code)
 			return code;
@@ -181,11 +180,10 @@ xfs_setattr(
 		 * Do a quota reservation only if uid/gid is actually
 		 * going to change.
 		 */
-		if (XFS_IS_QUOTA_RUNNING(mp) &&
-		    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-		     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
+		if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+		    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
 			ASSERT(tp);
-			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
 						capable(CAP_FOWNER) ?
 						XFS_QMOPT_FORCE_RES : 0);
 			if (code)	/* out of quota */
@@ -219,7 +217,7 @@ xfs_setattr(
 		/*
 		 * Make sure that the dquots are attached to the inode.
 		 */
-		code = xfs_qm_dqattach_locked(ip, 0);
+		code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
 		if (code)
 			goto error_return;
 
@@ -353,21 +351,21 @@ xfs_setattr(
 		 * in the transaction.
 		 */
 		if (iuid != uid) {
-			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
+			if (XFS_IS_UQUOTA_ON(mp)) {
 				ASSERT(mask & ATTR_UID);
 				ASSERT(udqp);
-				olddquot1 = xfs_qm_vop_chown(tp, ip,
+				olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
 							&ip->i_udquot, udqp);
 			}
 			ip->i_d.di_uid = uid;
 			inode->i_uid = uid;
 		}
 		if (igid != gid) {
-			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+			if (XFS_IS_GQUOTA_ON(mp)) {
 				ASSERT(!XFS_IS_PQUOTA_ON(mp));
 				ASSERT(mask & ATTR_GID);
 				ASSERT(gdqp);
-				olddquot2 = xfs_qm_vop_chown(tp, ip,
+				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
 							&ip->i_gdquot, gdqp);
 			}
 			ip->i_d.di_gid = gid;
@@ -463,25 +461,13 @@ xfs_setattr(
 	/*
 	 * Release any dquot(s) the inode had kept before chown.
 	 */
-	xfs_qm_dqrele(olddquot1);
-	xfs_qm_dqrele(olddquot2);
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, olddquot1);
+	XFS_QM_DQRELE(mp, olddquot2);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
-	if (code)
+	if (code) {
 		return code;
-
-	/*
-	 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
-	 * 	     update.  We could avoid this with linked transactions
-	 * 	     and passing down the transaction pointer all the way
-	 *	     to attr_set.  No previous user of the generic
-	 * 	     Posix ACL code seems to care about this issue either.
-	 */
-	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
-		code = -xfs_acl_chmod(inode);
-		if (code)
-			return XFS_ERROR(code);
 	}
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
@@ -496,8 +482,8 @@ xfs_setattr(
 	commit_flags |= XFS_TRANS_ABORT;
 	/* FALLTHROUGH */
  error_return:
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 	if (tp) {
 		xfs_trans_cancel(tp, commit_flags);
 	}
@@ -753,8 +739,7 @@ xfs_free_eofblocks(
 		/*
 		 * Attach the dquots to the inode up front.
 		 */
-		error = xfs_qm_dqattach(ip, 0);
-		if (error)
+		if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 			return error;
 
 		/*
@@ -1196,8 +1181,7 @@ xfs_inactive(
 
 	ASSERT(ip->i_d.di_nlink == 0);
 
-	error = xfs_qm_dqattach(ip, 0);
-	if (error)
+	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return VN_INACTIVE_CACHE;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
@@ -1323,7 +1307,7 @@ xfs_inactive(
 		/*
 		 * Credit the quota account(s). The inode is gone.
 		 */
-		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
+		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
 
 		/*
 		 * Just ignore errors at this point.  There is nothing we can
@@ -1339,11 +1323,11 @@ xfs_inactive(
 			xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
 				"xfs_trans_commit() returned error %d", error);
 	}
-
 	/*
 	 * Release the dquots held by inode, if any.
 	 */
-	xfs_qm_dqdetach(ip);
+	XFS_QM_DQDETACH(mp, ip);
+
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 
  out:
@@ -1443,7 +1427,8 @@ xfs_create(
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
+	error = XFS_QM_DQVOPALLOC(mp, dp,
+			current_fsuid(), current_fsgid(), prid,
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -1504,7 +1489,7 @@ xfs_create(
 	/*
 	 * Reserve disk quota and the inode.
 	 */
-	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
+	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
 	if (error)
 		goto out_trans_cancel;
 
@@ -1576,7 +1561,7 @@ xfs_create(
 	 * These ids of the inode couldn't have changed since the new
 	 * inode has been locked ever since it was created.
 	 */
-	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
+	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
 
 	/*
 	 * xfs_trans_commit normally decrements the vnode ref count
@@ -1595,8 +1580,8 @@ xfs_create(
 		goto out_dqrele;
 	}
 
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	*ipp = ip;
 
@@ -1617,8 +1602,8 @@ xfs_create(
  out_trans_cancel:
 	xfs_trans_cancel(tp, cancel_flags);
  out_dqrele:
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	if (unlock_dp_on_error)
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -1852,11 +1837,11 @@ xfs_remove(
 			return error;
 	}
 
-	error = xfs_qm_dqattach(dp, 0);
+	error = XFS_QM_DQATTACH(mp, dp, 0);
 	if (error)
 		goto std_return;
 
-	error = xfs_qm_dqattach(ip, 0);
+	error = XFS_QM_DQATTACH(mp, ip, 0);
 	if (error)
 		goto std_return;
 
@@ -2043,11 +2028,11 @@ xfs_link(
 
 	/* Return through std_return after this point. */
 
-	error = xfs_qm_dqattach(sip, 0);
+	error = XFS_QM_DQATTACH(mp, sip, 0);
 	if (error)
 		goto std_return;
 
-	error = xfs_qm_dqattach(tdp, 0);
+	error = XFS_QM_DQATTACH(mp, tdp, 0);
 	if (error)
 		goto std_return;
 
@@ -2220,7 +2205,8 @@ xfs_symlink(
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
+	error = XFS_QM_DQVOPALLOC(mp, dp,
+			current_fsuid(), current_fsgid(), prid,
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -2262,7 +2248,7 @@ xfs_symlink(
 	/*
 	 * Reserve disk quota : blocks and inode.
 	 */
-	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
+	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
 	if (error)
 		goto error_return;
 
@@ -2302,7 +2288,7 @@ xfs_symlink(
 	/*
 	 * Also attach the dquot(s) to it, if applicable.
 	 */
-	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
+	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
 
 	if (resblks)
 		resblks -= XFS_IALLOC_SPACE_RES(mp);
@@ -2390,8 +2376,8 @@ xfs_symlink(
 		goto error2;
 	}
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	/* Fall through to std_return with error = 0 or errno from
 	 * xfs_trans_commit	*/
@@ -2415,8 +2401,8 @@ xfs_symlink(
 	cancel_flags |= XFS_TRANS_ABORT;
  error_return:
 	xfs_trans_cancel(tp, cancel_flags);
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	if (unlock_dp_on_error)
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -2555,8 +2541,7 @@ xfs_alloc_file_space(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	error = xfs_qm_dqattach(ip, 0);
-	if (error)
+	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return error;
 
 	if (len <= 0)
@@ -2643,8 +2628,8 @@ xfs_alloc_file_space(
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
-						      0, quota_flag);
+		error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+						      qblocks, 0, quota_flag);
 		if (error)
 			goto error1;
 
@@ -2703,7 +2688,7 @@ xfs_alloc_file_space(
 
 error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
 	xfs_bmap_cancel(&free_list);
-	xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
+	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
 
 error1:	/* Just cancel transaction */
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -2842,8 +2827,7 @@ xfs_free_file_space(
 
 	xfs_itrace_entry(ip);
 
-	error = xfs_qm_dqattach(ip, 0);
-	if (error)
+	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return error;
 
 	error = 0;
@@ -2969,9 +2953,9 @@ xfs_free_file_space(
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = xfs_trans_reserve_quota(tp, mp,
-				ip->i_udquot, ip->i_gdquot,
-				resblks, 0, XFS_QMOPT_RES_REGBLKS);
+		error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
+				ip->i_udquot, ip->i_gdquot, resblks, 0,
+				XFS_QMOPT_RES_REGBLKS);
 		if (error)
 			goto error1;
 
diff --git a/trunk/fs/xfs/xfs_vnodeops.h b/trunk/fs/xfs/xfs_vnodeops.h
index a9e102de71a1..04373c6c61ff 100644
--- a/trunk/fs/xfs/xfs_vnodeops.h
+++ b/trunk/fs/xfs/xfs_vnodeops.h
@@ -18,7 +18,6 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
 #define	XFS_ATTR_DMI		0x01	/* invocation from a DMI function */
 #define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if operation would block */
 #define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */
-#define XFS_ATTR_NOACL		0x08	/* Don't call xfs_acl_chmod */
 
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_fsync(struct xfs_inode *ip);
diff --git a/trunk/kernel/timer.c b/trunk/kernel/timer.c
index faf2db897de4..c01e568935ea 100644
--- a/trunk/kernel/timer.c
+++ b/trunk/kernel/timer.c
@@ -757,7 +757,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	wake_up_idle_cpu(cpu);
 	spin_unlock_irqrestore(&base->lock, flags);
 }
-EXPORT_SYMBOL_GPL(add_timer_on);
 
 /**
  * del_timer - deactive a timer.