From 6dda9266913ad57e09afc1a10d6473f10c806a63 Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Thu, 11 Aug 2011 15:14:39 -0700
Subject: [PATCH 1/3] pstore: defer inserting OOPS entries into pstore

Life is simple for all the kernel terminating types of kmsg_dump
call backs - pstore just saves the tail end of the console log. But
for "oops" the situation is more complex - the kernel may carry on
running (possibly for ever).  So we'd like to make the logged copy
of the oops appear in the pstore filesystem - so that the user has
a handle to clear the entry from the persistent backing store (if
we don't, the store may fill with "oops" entries (that are also
safely stashed in /var/log/messages) leaving no space for real
errors.

Current code calls pstore_mkfile() immediately. But this may
not be safe. The oops could have happened with arbitrary locks
held, or in interrupt or NMI context. So allocating memory and
calling into generic filesystem code seems unwise.

This patch defers making the entry appear. At the time
of the oops, we merely set a flag "pstore_new_entry" noting that
a new entry has been added. A periodic timer checks once a minute
to see if the flag is set - if so, it schedules a work queue to
rescan the backing store and make all new entries appear in the
pstore filesystem.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 fs/pstore/inode.c    | 40 ++++++++++++++++++++++++++++----
 fs/pstore/internal.h |  2 +-
 fs/pstore/platform.c | 54 ++++++++++++++++++++++++++++++++++++--------
 3 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 893b961dcfd8b..379a02dc1217a 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -24,6 +24,7 @@
 #include <linux/highmem.h>
 #include <linux/time.h>
 #include <linux/init.h>
+#include <linux/list.h>
 #include <linux/string.h>
 #include <linux/mount.h>
 #include <linux/ramfs.h>
@@ -32,13 +33,18 @@
 #include <linux/magic.h>
 #include <linux/pstore.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/uaccess.h>
 
 #include "internal.h"
 
 #define	PSTORE_NAMELEN	64
 
+static DEFINE_SPINLOCK(allpstore_lock);
+static LIST_HEAD(allpstore);
+
 struct pstore_private {
+	struct list_head list;
 	struct pstore_info *psi;
 	enum pstore_type_id type;
 	u64	id;
@@ -81,8 +87,16 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 
 static void pstore_evict_inode(struct inode *inode)
 {
+	struct pstore_private	*p = inode->i_private;
+	unsigned long		flags;
+
 	end_writeback(inode);
-	kfree(inode->i_private);
+	if (p) {
+		spin_lock_irqsave(&allpstore_lock, flags);
+		list_del(&p->list);
+		spin_unlock_irqrestore(&allpstore_lock, flags);
+		kfree(p);
+	}
 }
 
 static const struct inode_operations pstore_dir_inode_operations = {
@@ -182,9 +196,23 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
 	struct dentry		*root = pstore_sb->s_root;
 	struct dentry		*dentry;
 	struct inode		*inode;
-	int			rc;
+	int			rc = 0;
 	char			name[PSTORE_NAMELEN];
-	struct pstore_private	*private;
+	struct pstore_private	*private, *pos;
+	unsigned long		flags;
+
+	spin_lock_irqsave(&allpstore_lock, flags);
+	list_for_each_entry(pos, &allpstore, list) {
+		if (pos->type == type &&
+		    pos->id == id &&
+		    pos->psi == psi) {
+			rc = -EEXIST;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&allpstore_lock, flags);
+	if (rc)
+		return rc;
 
 	rc = -ENOMEM;
 	inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0);
@@ -229,6 +257,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
 
 	d_add(dentry, inode);
 
+	spin_lock_irqsave(&allpstore_lock, flags);
+	list_add(&private->list, &allpstore);
+	spin_unlock_irqrestore(&allpstore_lock, flags);
+
 	mutex_unlock(&root->d_inode->i_mutex);
 
 	return 0;
@@ -277,7 +309,7 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent)
 		goto fail;
 	}
 
-	pstore_get_records();
+	pstore_get_records(0);
 
 	return 0;
 fail:
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 611c1b3c46fae..3bde461c3f349 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -1,5 +1,5 @@
 extern void	pstore_set_kmsg_bytes(int);
-extern void	pstore_get_records(void);
+extern void	pstore_get_records(int);
 extern int	pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
 			      char *data, size_t size,
 			      struct timespec time, struct pstore_info *psi);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index c5300ec316965..ca60ebcfb15f8 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -25,11 +25,28 @@
 #include <linux/module.h>
 #include <linux/pstore.h>
 #include <linux/string.h>
+#include <linux/timer.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/workqueue.h>
 
 #include "internal.h"
 
+/*
+ * We defer making "oops" entries appear in pstore - see
+ * whether the system is actually still running well enough
+ * to let someone see the entry
+ */
+#define	PSTORE_INTERVAL	(60 * HZ)
+
+static int pstore_new_entry;
+
+static void pstore_timefunc(unsigned long);
+static DEFINE_TIMER(pstore_timer, pstore_timefunc, 0, 0);
+
+static void pstore_dowork(struct work_struct *);
+static DECLARE_WORK(pstore_work, pstore_dowork);
+
 /*
  * pstore_lock just protects "psinfo" during
  * calls to pstore_register()
@@ -100,9 +117,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		id = psinfo->write(PSTORE_TYPE_DMESG, part,
 				   hsize + l1_cpy + l2_cpy, psinfo);
 		if (reason == KMSG_DUMP_OOPS && pstore_is_mounted())
-			pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id,
-				      psinfo->buf, hsize + l1_cpy + l2_cpy,
-				      CURRENT_TIME, psinfo);
+			pstore_new_entry = 1;
 		l1 -= l1_cpy;
 		l2 -= l2_cpy;
 		total += l1_cpy + l2_cpy;
@@ -148,19 +163,24 @@ int pstore_register(struct pstore_info *psi)
 	}
 
 	if (pstore_is_mounted())
-		pstore_get_records();
+		pstore_get_records(0);
 
 	kmsg_dump_register(&pstore_dumper);
 
+	pstore_timer.expires = jiffies + PSTORE_INTERVAL;
+	add_timer(&pstore_timer);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pstore_register);
 
 /*
- * Read all the records from the persistent store. Create and
- * file files in our filesystem.
+ * Read all the records from the persistent store. Create
+ * files in our filesystem.  Don't warn about -EEXIST errors
+ * when we are re-scanning the backing store looking to add new
+ * error records.
  */
-void pstore_get_records(void)
+void pstore_get_records(int quiet)
 {
 	struct pstore_info *psi = psinfo;
 	ssize_t			size;
@@ -178,8 +198,9 @@ void pstore_get_records(void)
 		goto out;
 
 	while ((size = psi->read(&id, &type, &time, psi)) > 0) {
-		if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
-				  time, psi))
+		rc = pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
+				  time, psi);
+		if (rc && (rc != -EEXIST || !quiet))
 			failed++;
 	}
 	psi->close(psi);
@@ -191,6 +212,21 @@ void pstore_get_records(void)
 		       failed, psi->name);
 }
 
+static void pstore_dowork(struct work_struct *work)
+{
+	pstore_get_records(1);
+}
+
+static void pstore_timefunc(unsigned long dummy)
+{
+	if (pstore_new_entry) {
+		pstore_new_entry = 0;
+		schedule_work(&pstore_work);
+	}
+
+	mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL);
+}
+
 /*
  * Call platform driver to write a record to the
  * persistent store.

From abd4d5587be911f63592537284dad78766d97d62 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 12 Aug 2011 10:54:51 -0700
Subject: [PATCH 2/3] pstore: change mutex locking to spin_locks

pstore was using mutex locking to protect read/write access to the
backend plug-ins.  This causes problems when pstore is executed in
an NMI context through panic() -> kmsg_dump().

This patch changes the mutex to a spin_lock_irqsave then also checks to
see if we are in an NMI context.  If we are in an NMI and can't get the
lock, just print a message stating that and blow by the locking.

All this is probably a hack around the bigger locking problem but it
solves my current situation of trying to sleep in an NMI context.

Tested by loading the lkdtm module and executing a HARDLOCKUP which
will cause the machine to panic inside the nmi handler.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c   |  2 +-
 drivers/firmware/efivars.c |  2 +-
 fs/pstore/platform.c       | 28 +++++++++++++++++++++-------
 include/linux/pstore.h     |  2 +-
 4 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 2ca59dc69f7f6..5e820ea355703 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1165,7 +1165,7 @@ static int __init erst_init(void)
 		goto err_release_erange;
 
 	buf = kmalloc(erst_erange.size, GFP_KERNEL);
-	mutex_init(&erst_info.buf_mutex);
+	spin_lock_init(&erst_info.buf_lock);
 	if (buf) {
 		erst_info.buf = buf + sizeof(struct cper_pstore_record);
 		erst_info.bufsize = erst_erange.size -
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index eb80b549ed8d1..be8bcb035e2a3 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -978,7 +978,7 @@ int register_efivars(struct efivars *efivars,
 	if (efivars->efi_pstore_info.buf) {
 		efivars->efi_pstore_info.bufsize = 1024;
 		efivars->efi_pstore_info.data = efivars;
-		mutex_init(&efivars->efi_pstore_info.buf_mutex);
+		spin_lock_init(&efivars->efi_pstore_info.buf_lock);
 		pstore_register(&efivars->efi_pstore_info);
 	}
 
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index ca60ebcfb15f8..0472924024cc2 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -28,6 +28,7 @@
 #include <linux/timer.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/hardirq.h>
 #include <linux/workqueue.h>
 
 #include "internal.h"
@@ -88,13 +89,20 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 	u64		id;
 	int		hsize;
 	unsigned int	part = 1;
+	unsigned long	flags = 0;
+	int		is_locked = 0;
 
 	if (reason < ARRAY_SIZE(reason_str))
 		why = reason_str[reason];
 	else
 		why = "Unknown";
 
-	mutex_lock(&psinfo->buf_mutex);
+	if (in_nmi()) {
+		is_locked = spin_trylock(&psinfo->buf_lock);
+		if (!is_locked)
+			pr_err("pstore dump routine blocked in NMI, may corrupt error record\n");
+	} else
+		spin_lock_irqsave(&psinfo->buf_lock, flags);
 	oopscount++;
 	while (total < kmsg_bytes) {
 		dst = psinfo->buf;
@@ -123,7 +131,11 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		total += l1_cpy + l2_cpy;
 		part++;
 	}
-	mutex_unlock(&psinfo->buf_mutex);
+	if (in_nmi()) {
+		if (is_locked)
+			spin_unlock(&psinfo->buf_lock);
+	} else
+		spin_unlock_irqrestore(&psinfo->buf_lock, flags);
 }
 
 static struct kmsg_dumper pstore_dumper = {
@@ -188,11 +200,12 @@ void pstore_get_records(int quiet)
 	enum pstore_type_id	type;
 	struct timespec		time;
 	int			failed = 0, rc;
+	unsigned long		flags;
 
 	if (!psi)
 		return;
 
-	mutex_lock(&psinfo->buf_mutex);
+	spin_lock_irqsave(&psinfo->buf_lock, flags);
 	rc = psi->open(psi);
 	if (rc)
 		goto out;
@@ -205,7 +218,7 @@ void pstore_get_records(int quiet)
 	}
 	psi->close(psi);
 out:
-	mutex_unlock(&psinfo->buf_mutex);
+	spin_unlock_irqrestore(&psinfo->buf_lock, flags);
 
 	if (failed)
 		printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
@@ -233,7 +246,8 @@ static void pstore_timefunc(unsigned long dummy)
  */
 int pstore_write(enum pstore_type_id type, char *buf, size_t size)
 {
-	u64	id;
+	u64		id;
+	unsigned long	flags;
 
 	if (!psinfo)
 		return -ENODEV;
@@ -241,13 +255,13 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size)
 	if (size > psinfo->bufsize)
 		return -EFBIG;
 
-	mutex_lock(&psinfo->buf_mutex);
+	spin_lock_irqsave(&psinfo->buf_lock, flags);
 	memcpy(psinfo->buf, buf, size);
 	id = psinfo->write(type, 0, size, psinfo);
 	if (pstore_is_mounted())
 		pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
 			      size, CURRENT_TIME, psinfo);
-	mutex_unlock(&psinfo->buf_mutex);
+	spin_unlock_irqrestore(&psinfo->buf_lock, flags);
 
 	return 0;
 }
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index cc03bbf5c4b8e..b91440e64d6e6 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -32,7 +32,7 @@ enum pstore_type_id {
 struct pstore_info {
 	struct module	*owner;
 	char		*name;
-	struct mutex	buf_mutex;	/* serialize access to 'buf' */
+	spinlock_t	buf_lock;	/* serialize access to 'buf' */
 	char		*buf;
 	size_t		bufsize;
 	int		(*open)(struct pstore_info *psi);

From b238b8fa93353ab50c9a2b1e2fa47a0ab01c37cd Mon Sep 17 00:00:00 2001
From: Chen Gong <gong.chen@linux.intel.com>
Date: Wed, 12 Oct 2011 09:17:24 -0700
Subject: [PATCH 3/3] pstore: make pstore write function return normal
 success/fail value

Currently pstore write interface employs record id as return
value, but it is not enough because it can't tell caller if
the write operation is successful. Pass the record id back via
an argument pointer and return zero for success, non-zero for
failure.

Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c   | 10 ++++++----
 drivers/firmware/efivars.c | 17 +++++++++--------
 fs/pstore/platform.c       | 11 ++++++-----
 include/linux/pstore.h     |  4 ++--
 4 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 5e820ea355703..127408069ca7f 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -933,7 +933,7 @@ static int erst_open_pstore(struct pstore_info *psi);
 static int erst_close_pstore(struct pstore_info *psi);
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 			   struct timespec *time, struct pstore_info *psi);
-static u64 erst_writer(enum pstore_type_id type, unsigned int part,
+static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part,
 		       size_t size, struct pstore_info *psi);
 static int erst_clearer(enum pstore_type_id type, u64 id,
 			struct pstore_info *psi);
@@ -1040,11 +1040,12 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 	return (rc < 0) ? rc : (len - sizeof(*rcd));
 }
 
-static u64 erst_writer(enum pstore_type_id type, unsigned int part,
+static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part,
 		       size_t size, struct pstore_info *psi)
 {
 	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
 					(erst_info.buf - sizeof(*rcd));
+	int ret;
 
 	memset(rcd, 0, sizeof(*rcd));
 	memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
@@ -1079,9 +1080,10 @@ static u64 erst_writer(enum pstore_type_id type, unsigned int part,
 	}
 	rcd->sec_hdr.section_severity = CPER_SEV_FATAL;
 
-	erst_write(&rcd->hdr);
+	ret = erst_write(&rcd->hdr);
+	*id = rcd->hdr.record_id;
 
-	return rcd->hdr.record_id;
+	return ret;
 }
 
 static int erst_clearer(enum pstore_type_id type, u64 id,
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index be8bcb035e2a3..8370f72d87ff5 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -490,8 +490,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 	return 0;
 }
 
-static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
-			    size_t size, struct pstore_info *psi)
+static int efi_pstore_write(enum pstore_type_id type, u64 *id,
+		unsigned int part, size_t size, struct pstore_info *psi)
 {
 	char name[DUMP_NAME_LEN];
 	char stub_name[DUMP_NAME_LEN];
@@ -499,7 +499,7 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	struct efivars *efivars = psi->data;
 	struct efivar_entry *entry, *found = NULL;
-	int i;
+	int i, ret = 0;
 
 	sprintf(stub_name, "dump-type%u-%u-", type, part);
 	sprintf(name, "%s%lu", stub_name, get_seconds());
@@ -548,18 +548,19 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
 		efivar_unregister(found);
 
 	if (size)
-		efivar_create_sysfs_entry(efivars,
+		ret = efivar_create_sysfs_entry(efivars,
 					  utf16_strsize(efi_name,
 							DUMP_NAME_LEN * 2),
 					  efi_name, &vendor);
 
-	return part;
+	*id = part;
+	return ret;
 };
 
 static int efi_pstore_erase(enum pstore_type_id type, u64 id,
 			    struct pstore_info *psi)
 {
-	efi_pstore_write(type, id, 0, psi);
+	efi_pstore_write(type, &id, (unsigned int)id, 0, psi);
 
 	return 0;
 }
@@ -580,8 +581,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 	return -1;
 }
 
-static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
-			    size_t size, struct pstore_info *psi)
+static int efi_pstore_write(enum pstore_type_id type, u64 *id,
+		unsigned int part, size_t size, struct pstore_info *psi)
 {
 	return 0;
 }
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 0472924024cc2..2bd620f0d796c 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -87,7 +87,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 	unsigned long	size, total = 0;
 	char		*dst, *why;
 	u64		id;
-	int		hsize;
+	int		hsize, ret;
 	unsigned int	part = 1;
 	unsigned long	flags = 0;
 	int		is_locked = 0;
@@ -122,9 +122,9 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		memcpy(dst, s1 + s1_start, l1_cpy);
 		memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
 
-		id = psinfo->write(PSTORE_TYPE_DMESG, part,
+		ret = psinfo->write(PSTORE_TYPE_DMESG, &id, part,
 				   hsize + l1_cpy + l2_cpy, psinfo);
-		if (reason == KMSG_DUMP_OOPS && pstore_is_mounted())
+		if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
 			pstore_new_entry = 1;
 		l1 -= l1_cpy;
 		l2 -= l2_cpy;
@@ -247,6 +247,7 @@ static void pstore_timefunc(unsigned long dummy)
 int pstore_write(enum pstore_type_id type, char *buf, size_t size)
 {
 	u64		id;
+	int		ret;
 	unsigned long	flags;
 
 	if (!psinfo)
@@ -257,8 +258,8 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size)
 
 	spin_lock_irqsave(&psinfo->buf_lock, flags);
 	memcpy(psinfo->buf, buf, size);
-	id = psinfo->write(type, 0, size, psinfo);
-	if (pstore_is_mounted())
+	ret = psinfo->write(type, &id, 0, size, psinfo);
+	if (ret == 0 && pstore_is_mounted())
 		pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
 			      size, CURRENT_TIME, psinfo);
 	spin_unlock_irqrestore(&psinfo->buf_lock, flags);
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index b91440e64d6e6..ea567321ae3c3 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -39,8 +39,8 @@ struct pstore_info {
 	int		(*close)(struct pstore_info *psi);
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
 			struct timespec *time, struct pstore_info *psi);
-	u64		(*write)(enum pstore_type_id type, unsigned int part,
-			size_t size, struct pstore_info *psi);
+	int		(*write)(enum pstore_type_id type, u64 *id,
+			unsigned int part, size_t size, struct pstore_info *psi);
 	int		(*erase)(enum pstore_type_id type, u64 id,
 			struct pstore_info *psi);
 	void		*data;