From c9a2bfd1f08973115c2fc3da9372e0e81ac42a19 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 14 Aug 2009 13:09:32 -0700
Subject: [PATCH 01/60] pcmcia: add CNF-CDROM-ID for ide

Fixes this report:
http://article.gmane.org/gmane.linux.kernel.pcmcia.devel/2228/

Reported-by: John McGrath <john@john-mcgrath.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-cs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 527908ff298c..063b933d864a 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -408,6 +408,7 @@ static struct pcmcia_device_id ide_ids[] = {
 	PCMCIA_DEVICE_PROD_ID123("PCMCIA", "IDE CARD", "F1", 0x281f1c5d, 0x1907960c, 0xf7fde8b9),
 	PCMCIA_DEVICE_PROD_ID12("ARGOSY", "CD-ROM", 0x78f308dc, 0x66536591),
 	PCMCIA_DEVICE_PROD_ID12("ARGOSY", "PnPIDE", 0x78f308dc, 0x0c694728),
+	PCMCIA_DEVICE_PROD_ID12("CNF   ", "CD-ROM", 0x46d7db81, 0x66536591),
 	PCMCIA_DEVICE_PROD_ID12("CNF CD-M", "CD-ROM", 0x7d93b852, 0x66536591),
 	PCMCIA_DEVICE_PROD_ID12("Creative Technology Ltd.", "PCMCIA CD-ROM Interface Card", 0xff8c8a45, 0xfe8020c4),
 	PCMCIA_DEVICE_PROD_ID12("Digital Equipment Corporation.", "Digital Mobile Media CD-ROM", 0x17692a66, 0xef1dcbde),

From ced909ff048c9950e211783417f3c01361f3be28 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 25 Aug 2009 19:24:10 -0700
Subject: [PATCH 02/60] Input: i8042 - add Acer Aspire 5536 to the nomux list

When KBC is in active multiplexing mode, disabling and re-enabling the
touchpad with the special key leaves the touchpad dead. Since the laptop
does not have any external PS/2 ports disabling MUX mode should be safe.

Reported-by: Eugeniy Meshcheryakov <eugen@debian.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/i8042-x86ia64io.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index ae04d8a494e5..ccbf23ece8e3 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -382,6 +382,14 @@ static struct dmi_system_id __initdata i8042_dmi_nomux_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Vostro1510"),
 		},
 	},
+	{
+		.ident = "Acer Aspire 5536",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5536"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
+		},
+	},
 	{ }
 };
 

From a1b08e75dff3dc18a88444803753e667bb1d126e Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 27 Aug 2009 14:46:56 +0800
Subject: [PATCH 03/60] ocfs2: invalidate dentry if its dentry_lock isn't
 initialized.

In commit a5a0a630922a2f6a774b6dac19f70cb5abd86bb0, when
ocfs2_attch_dentry_lock fails, we call an extra iput and reset
dentry->d_fsdata to NULL. This resolve a bug, but it isn't
completed and the dentry is still there. When we want to use
it again, ocfs2_dentry_revalidate doesn't catch it and return
true. That make future ocfs2_dentry_lock panic out.
One bug is http://oss.oracle.com/bugzilla/show_bug.cgi?id=1162.

The resolution is to add a check for dentry->d_fsdata in
revalidate process and return false if dentry->d_fsdata is NULL,
so that a new ocfs2_lookup will be called again.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dcache.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 2f28b7de2c8d..b4957c7d9fe2 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -85,6 +85,17 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
 		goto bail;
 	}
 
+	/*
+	 * If the last lookup failed to create dentry lock, let us
+	 * redo it.
+	 */
+	if (!dentry->d_fsdata) {
+		mlog(0, "Inode %llu doesn't have dentry lock, "
+		     "returning false\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
+		goto bail;
+	}
+
 	ret = 1;
 
 bail:

From 6bb56347f5162d1a7cb1dc461023360781ecd4c0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 28 Aug 2009 13:44:53 +0200
Subject: [PATCH 04/60] perf_counters: Increase paranoia level

Per-cpu counters are an ASLR information leak as they show
the execution other tasks do. Increase the paranoia level
to 1, which disallows per-cpu counters. (they still allow
counting/profiling of own tasks - and admin can profile
everything.)

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_counter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f274e1959885..7d4bb83b78cf 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -50,7 +50,7 @@ static atomic_t nr_task_counters __read_mostly;
  *  1 - disallow cpu counters to unpriv
  *  2 - disallow kernel profiling to unpriv
  */
-int sysctl_perf_counter_paranoid __read_mostly;
+int sysctl_perf_counter_paranoid __read_mostly = 1;
 
 static inline bool perf_paranoid_cpu(void)
 {

From 11ebd1bf07fafde8d16562966c96b05b0d4ced9e Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Fri, 28 Aug 2009 11:42:31 +0800
Subject: [PATCH 05/60] ipw2200: firmware DMA loading rework

Bartlomiej Zolnierkiewicz reported an atomic order-6 allocation failure
for ipw2200 firmware loading in kernel 2.6.30. High order allocation is
likely to fail and should always be avoided.

The patch fixes this problem by replacing the original order-6
pci_alloc_consistent() with an array of order-1 pages from a pci pool.
This utilized the ipw2200 DMA command blocks (up to 64 slots). The
maximum firmware size support remains the same (64*8K).

This patch fixes bug http://bugzilla.kernel.org/show_bug.cgi?id=14016

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ipw2x00/ipw2200.c | 120 ++++++++++++++-----------
 1 file changed, 67 insertions(+), 53 deletions(-)

diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index 6dcac73b4d29..f593fbbb4e52 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -2874,45 +2874,27 @@ static int ipw_fw_dma_add_command_block(struct ipw_priv *priv,
 	return 0;
 }
 
-static int ipw_fw_dma_add_buffer(struct ipw_priv *priv,
-				 u32 src_phys, u32 dest_address, u32 length)
+static int ipw_fw_dma_add_buffer(struct ipw_priv *priv, dma_addr_t *src_address,
+				 int nr, u32 dest_address, u32 len)
 {
-	u32 bytes_left = length;
-	u32 src_offset = 0;
-	u32 dest_offset = 0;
-	int status = 0;
+	int ret, i;
+	u32 size;
+
 	IPW_DEBUG_FW(">> \n");
-	IPW_DEBUG_FW_INFO("src_phys=0x%x dest_address=0x%x length=0x%x\n",
-			  src_phys, dest_address, length);
-	while (bytes_left > CB_MAX_LENGTH) {
-		status = ipw_fw_dma_add_command_block(priv,
-						      src_phys + src_offset,
-						      dest_address +
-						      dest_offset,
-						      CB_MAX_LENGTH, 0, 0);
-		if (status) {
+	IPW_DEBUG_FW_INFO("nr=%d dest_address=0x%x len=0x%x\n",
+			  nr, dest_address, len);
+
+	for (i = 0; i < nr; i++) {
+		size = min_t(u32, len - i * CB_MAX_LENGTH, CB_MAX_LENGTH);
+		ret = ipw_fw_dma_add_command_block(priv, src_address[i],
+						   dest_address +
+						   i * CB_MAX_LENGTH, size,
+						   0, 0);
+		if (ret) {
 			IPW_DEBUG_FW_INFO(": Failed\n");
 			return -1;
 		} else
 			IPW_DEBUG_FW_INFO(": Added new cb\n");
-
-		src_offset += CB_MAX_LENGTH;
-		dest_offset += CB_MAX_LENGTH;
-		bytes_left -= CB_MAX_LENGTH;
-	}
-
-	/* add the buffer tail */
-	if (bytes_left > 0) {
-		status =
-		    ipw_fw_dma_add_command_block(priv, src_phys + src_offset,
-						 dest_address + dest_offset,
-						 bytes_left, 0, 0);
-		if (status) {
-			IPW_DEBUG_FW_INFO(": Failed on the buffer tail\n");
-			return -1;
-		} else
-			IPW_DEBUG_FW_INFO
-			    (": Adding new cb - the buffer tail\n");
 	}
 
 	IPW_DEBUG_FW("<< \n");
@@ -3160,59 +3142,91 @@ static int ipw_load_ucode(struct ipw_priv *priv, u8 * data, size_t len)
 
 static int ipw_load_firmware(struct ipw_priv *priv, u8 * data, size_t len)
 {
-	int rc = -1;
+	int ret = -1;
 	int offset = 0;
 	struct fw_chunk *chunk;
-	dma_addr_t shared_phys;
-	u8 *shared_virt;
+	int total_nr = 0;
+	int i;
+	struct pci_pool *pool;
+	u32 *virts[CB_NUMBER_OF_ELEMENTS_SMALL];
+	dma_addr_t phys[CB_NUMBER_OF_ELEMENTS_SMALL];
 
 	IPW_DEBUG_TRACE("<< : \n");
-	shared_virt = pci_alloc_consistent(priv->pci_dev, len, &shared_phys);
 
-	if (!shared_virt)
+	pool = pci_pool_create("ipw2200", priv->pci_dev, CB_MAX_LENGTH, 0, 0);
+	if (!pool) {
+		IPW_ERROR("pci_pool_create failed\n");
 		return -ENOMEM;
-
-	memmove(shared_virt, data, len);
+	}
 
 	/* Start the Dma */
-	rc = ipw_fw_dma_enable(priv);
+	ret = ipw_fw_dma_enable(priv);
 
 	/* the DMA is already ready this would be a bug. */
 	BUG_ON(priv->sram_desc.last_cb_index > 0);
 
 	do {
+		u32 chunk_len;
+		u8 *start;
+		int size;
+		int nr = 0;
+
 		chunk = (struct fw_chunk *)(data + offset);
 		offset += sizeof(struct fw_chunk);
+		chunk_len = le32_to_cpu(chunk->length);
+		start = data + offset;
+
+		nr = (chunk_len + CB_MAX_LENGTH - 1) / CB_MAX_LENGTH;
+		for (i = 0; i < nr; i++) {
+			virts[total_nr] = pci_pool_alloc(pool, GFP_KERNEL,
+							 &phys[total_nr]);
+			if (!virts[total_nr]) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			size = min_t(u32, chunk_len - i * CB_MAX_LENGTH,
+				     CB_MAX_LENGTH);
+			memcpy(virts[total_nr], start, size);
+			start += size;
+			total_nr++;
+			/* We don't support fw chunk larger than 64*8K */
+			BUG_ON(total_nr > CB_NUMBER_OF_ELEMENTS_SMALL);
+		}
+
 		/* build DMA packet and queue up for sending */
 		/* dma to chunk->address, the chunk->length bytes from data +
 		 * offeset*/
 		/* Dma loading */
-		rc = ipw_fw_dma_add_buffer(priv, shared_phys + offset,
-					   le32_to_cpu(chunk->address),
-					   le32_to_cpu(chunk->length));
-		if (rc) {
+		ret = ipw_fw_dma_add_buffer(priv, &phys[total_nr - nr],
+					    nr, le32_to_cpu(chunk->address),
+					    chunk_len);
+		if (ret) {
 			IPW_DEBUG_INFO("dmaAddBuffer Failed\n");
 			goto out;
 		}
 
-		offset += le32_to_cpu(chunk->length);
+		offset += chunk_len;
 	} while (offset < len);
 
 	/* Run the DMA and wait for the answer */
-	rc = ipw_fw_dma_kick(priv);
-	if (rc) {
+	ret = ipw_fw_dma_kick(priv);
+	if (ret) {
 		IPW_ERROR("dmaKick Failed\n");
 		goto out;
 	}
 
-	rc = ipw_fw_dma_wait(priv);
-	if (rc) {
+	ret = ipw_fw_dma_wait(priv);
+	if (ret) {
 		IPW_ERROR("dmaWaitSync Failed\n");
 		goto out;
 	}
-      out:
-	pci_free_consistent(priv->pci_dev, len, shared_virt, shared_phys);
-	return rc;
+ out:
+	for (i = 0; i < total_nr; i++)
+		pci_pool_free(pool, virts[i], phys[i]);
+
+	pci_pool_destroy(pool);
+
+	return ret;
 }
 
 /* stop nic */

From 0c7d400fafaeab6014504a6a6249f01bac7f7db4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 29 Aug 2009 20:44:04 +1000
Subject: [PATCH 06/60] crypto: skcipher - Fix skcipher_dequeue_givcrypt NULL
 test

As struct skcipher_givcrypt_request includes struct crypto_request
at a non-zero offset, testing for NULL after converting the pointer
returned by crypto_dequeue_request does not work.  This can result
in IPsec crashes when the queue is depleted.

This patch fixes it by doing the pointer conversion only when the
return value is non-NULL.  In particular, we create a new function
__crypto_dequeue_request that does the pointer conversion.

Reported-by: Brad Bosch <bradbosch@comcast.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c                    | 11 +++++++++--
 include/crypto/algapi.h            |  1 +
 include/crypto/internal/skcipher.h |  4 ++--
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 56c62e2858d5..df0863d56995 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -692,7 +692,7 @@ int crypto_enqueue_request(struct crypto_queue *queue,
 }
 EXPORT_SYMBOL_GPL(crypto_enqueue_request);
 
-struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue)
+void *__crypto_dequeue_request(struct crypto_queue *queue, unsigned int offset)
 {
 	struct list_head *request;
 
@@ -707,7 +707,14 @@ struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue)
 	request = queue->list.next;
 	list_del(request);
 
-	return list_entry(request, struct crypto_async_request, list);
+	return (char *)list_entry(request, struct crypto_async_request, list) -
+	       offset;
+}
+EXPORT_SYMBOL_GPL(__crypto_dequeue_request);
+
+struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue)
+{
+	return __crypto_dequeue_request(queue, 0);
 }
 EXPORT_SYMBOL_GPL(crypto_dequeue_request);
 
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 010545436efa..5a2bd1cc9656 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -137,6 +137,7 @@ struct crypto_instance *crypto_alloc_instance(const char *name,
 void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen);
 int crypto_enqueue_request(struct crypto_queue *queue,
 			   struct crypto_async_request *request);
+void *__crypto_dequeue_request(struct crypto_queue *queue, unsigned int offset);
 struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue);
 int crypto_tfm_in_queue(struct crypto_queue *queue, struct crypto_tfm *tfm);
 
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index 2ba42cd7d6aa..3a748a6bf772 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -79,8 +79,8 @@ static inline int skcipher_enqueue_givcrypt(
 static inline struct skcipher_givcrypt_request *skcipher_dequeue_givcrypt(
 	struct crypto_queue *queue)
 {
-	return container_of(ablkcipher_dequeue_request(queue),
-			    struct skcipher_givcrypt_request, creq);
+	return __crypto_dequeue_request(
+		queue, offsetof(struct skcipher_givcrypt_request, creq.base));
 }
 
 static inline void *skcipher_givcrypt_reqctx(

From eced1dfcfcf6b0a35e925d73916a9d8e36ab5457 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 28 Aug 2009 17:10:47 +0200
Subject: [PATCH 07/60] perf_counter: Fix /0 bug in swcounters

We have a race in the swcounter stuff where we can start
counting a counter that has never been enabled, this leads to a
/0 situation.

The below avoids the /0 but doesn't close the race, this would
need a new counter state.

The race is due to perf_swcounter_is_counting() which cannot
discern between disabled due to scheduled out, and disabled for
any other reason.

Such a crash has been seen by Ingo:

[  967.092372] divide error: 0000 [#1] SMP
[  967.096499] last sysfs file: /sys/devices/system/cpu/cpu15/cache/index2/shared_cpu_map
[  967.104846] CPU 5
[  967.106965] Modules linked in:
[  967.110169] Pid: 3351, comm: hackbench Not tainted 2.6.31-rc8-tip-01158-gd940a54-dirty #1568 X8DTN
[  967.119456] RIP: 0010:[<ffffffff810c0aba>]  [<ffffffff810c0aba>] perf_swcounter_ctx_event+0x127/0x1af
[  967.129137] RSP: 0018:ffff8801a95abd70  EFLAGS: 00010046
[  967.134699] RAX: 0000000000000002 RBX: ffff8801bd645c00 RCX: 0000000000000002
[  967.142162] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8801bd645d40
[  967.149584] RBP: ffff8801a95abdb0 R08: 0000000000000001 R09: ffff8801a95abe00
[  967.157042] R10: 0000000000000037 R11: ffff8801aa1245f8 R12: ffff8801a95abe00
[  967.164481] R13: ffff8801a95abe00 R14: ffff8801aa1c0e78 R15: 0000000000000001
[  967.171953] FS:  0000000000000000(0000) GS:ffffc90000a00000(0063) knlGS:00000000f7f486c0
[  967.180406] CS:  0010 DS: 002b ES: 002b CR0: 000000008005003b
[  967.186374] CR2: 000000004822c0ac CR3: 00000001b19a2000 CR4: 00000000000006e0
[  967.193770] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  967.201224] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  967.208692] Process hackbench (pid: 3351, threadinfo ffff8801a95aa000, task ffff8801a96b0000)
[  967.217607] Stack:
[  967.219711]  0000000000000000 0000000000000037 0000000200000001 ffffc90000a1107c
[  967.227296] <0> ffff8801a95abe00 0000000000000001 0000000000000001 0000000000000037
[  967.235333] <0> ffff8801a95abdf0 ffffffff810c0c20 0000000200a14f30 ffff8801a95abe40
[  967.243532] Call Trace:
[  967.246103]  [<ffffffff810c0c20>] do_perf_swcounter_event+0xde/0xec
[  967.252635]  [<ffffffff810c0ca7>] perf_tpcounter_event+0x79/0x7b
[  967.258957]  [<ffffffff81037f73>] ftrace_profile_sched_switch+0xc0/0xcb
[  967.265791]  [<ffffffff8155f22d>] schedule+0x429/0x4c4
[  967.271156]  [<ffffffff8100c01e>] int_careful+0xd/0x14

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1251472247.17617.74.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_counter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7d4bb83b78cf..d7cbc579fc80 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -4066,6 +4066,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	hwc->sample_period = attr->sample_period;
 	if (attr->freq && attr->sample_freq)
 		hwc->sample_period = 1;
+	hwc->last_period = hwc->sample_period;
 
 	atomic64_set(&hwc->period_left, hwc->sample_period);
 

From 6faf17f6f1ffc586d16efc2f9fa2083a7785ee74 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Fri, 28 Aug 2009 13:00:06 -0700
Subject: [PATCH 08/60] PCI SR-IOV: correct broken resource alignment
 calculations

An SR-IOV capable device includes an SR-IOV PCIe capability which
describes the Virtual Function (VF) BAR requirements.  A typical SR-IOV
device can support multiple VFs whose BARs must be in a contiguous region,
effectively an array of VF BARs.  The BAR reports the size requirement
for a single VF.  We calculate the full range needed by simply multiplying
the VF BAR size with the number of possible VFs and create a resource
spanning the full range.

This all seems sane enough except it artificially inflates the alignment
requirement for the VF BAR.  The VF BAR need only be aligned to the size
of a single BAR not the contiguous range of VF BARs.  This can cause us
to fail to allocate resources for the BAR despite the fact that we
actually have enough space.

This patch adds a thin PCI specific layer over the generic
resource_alignment() function which is aware of the special nature of
VF BARs and does sorting and allocation based on the smaller alignment
requirement.

I recognize that while resource_alignment is generic, it's basically a
PCI helper.  An alternative to this patch is to add PCI VF BAR specific
information to struct resource.  I opted for the extra layer rather than
adding such PCI specific information to struct resource.  This does
have the slight downside that we don't cache the BAR size and re-read
for each alignment query (happens a small handful of times during boot
for each VF BAR).

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Yu Zhao <yu.zhao@intel.com>
Cc: stable@kernel.org
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/iov.c       | 23 +++++++++++++++++++++++
 drivers/pci/pci.h       | 13 +++++++++++++
 drivers/pci/setup-bus.c |  4 ++--
 drivers/pci/setup-res.c |  8 ++++----
 4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index e3a87210e947..e03fe98f0619 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -597,6 +597,29 @@ int pci_iov_resource_bar(struct pci_dev *dev, int resno,
 		4 * (resno - PCI_IOV_RESOURCES);
 }
 
+/**
+ * pci_sriov_resource_alignment - get resource alignment for VF BAR
+ * @dev: the PCI device
+ * @resno: the resource number
+ *
+ * Returns the alignment of the VF BAR found in the SR-IOV capability.
+ * This is not the same as the resource size which is defined as
+ * the VF BAR size multiplied by the number of VFs.  The alignment
+ * is just the VF BAR size.
+ */
+int pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
+{
+	struct resource tmp;
+	enum pci_bar_type type;
+	int reg = pci_iov_resource_bar(dev, resno, &type);
+	
+	if (!reg)
+		return 0;
+
+	 __pci_read_base(dev, type, &tmp, reg);
+	return resource_alignment(&tmp);
+}
+
 /**
  * pci_restore_iov_state - restore the state of the IOV capability
  * @dev: the PCI device
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f73bcbedf37c..5ff4d25bf0e9 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -243,6 +243,7 @@ extern int pci_iov_init(struct pci_dev *dev);
 extern void pci_iov_release(struct pci_dev *dev);
 extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
 				enum pci_bar_type *type);
+extern int pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
 extern void pci_restore_iov_state(struct pci_dev *dev);
 extern int pci_iov_bus_range(struct pci_bus *bus);
 
@@ -298,4 +299,16 @@ static inline int pci_ats_enabled(struct pci_dev *dev)
 }
 #endif /* CONFIG_PCI_IOV */
 
+static inline int pci_resource_alignment(struct pci_dev *dev,
+					 struct resource *res)
+{
+#ifdef CONFIG_PCI_IOV
+	int resno = res - dev->resource;
+
+	if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END)
+		return pci_sriov_resource_alignment(dev, resno);
+#endif
+	return resource_alignment(res);
+}
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index b636e245445d..7c443b4583ab 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -25,7 +25,7 @@
 #include <linux/ioport.h>
 #include <linux/cache.h>
 #include <linux/slab.h>
-
+#include "pci.h"
 
 static void pbus_assign_resources_sorted(const struct pci_bus *bus)
 {
@@ -384,7 +384,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 				continue;
 			r_size = resource_size(r);
 			/* For bridges size != alignment */
-			align = resource_alignment(r);
+			align = pci_resource_alignment(dev, r);
 			order = __ffs(align) - 20;
 			if (order > 11) {
 				dev_warn(&dev->dev, "BAR %d bad alignment %llx: "
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 1898c7b47907..88cdd1a937d6 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -144,7 +144,7 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
 
 	size = resource_size(res);
 	min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
-	align = resource_alignment(res);
+	align = pci_resource_alignment(dev, res);
 
 	/* First, try exact prefetching match.. */
 	ret = pci_bus_alloc_resource(bus, res, size, align, min,
@@ -178,7 +178,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	struct pci_bus *bus;
 	int ret;
 
-	align = resource_alignment(res);
+	align = pci_resource_alignment(dev, res);
 	if (!align) {
 		dev_info(&dev->dev, "BAR %d: can't allocate resource (bogus "
 			"alignment) %pR flags %#lx\n",
@@ -259,7 +259,7 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
 		if (!(r->flags) || r->parent)
 			continue;
 
-		r_align = resource_alignment(r);
+		r_align = pci_resource_alignment(dev, r);
 		if (!r_align) {
 			dev_warn(&dev->dev, "BAR %d: bogus alignment "
 				"%pR flags %#lx\n",
@@ -271,7 +271,7 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
 			struct resource_list *ln = list->next;
 
 			if (ln)
-				align = resource_alignment(ln->res);
+				align = pci_resource_alignment(ln->dev, ln->res);
 
 			if (r_align > align) {
 				tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);

From b1f1b8ce0a1d71cbc72f7540134d52b79bd8f5ac Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Sun, 30 Aug 2009 04:21:41 +0900
Subject: [PATCH 09/60] nilfs2: fix preempt count underflow in
 nilfs_btnode_prepare_change_key

This will fix the following preempt count underflow reported from
users with the title "[NILFS users] segctord problem" (Message-ID:
<949415.6494.qm@web58808.mail.re1.yahoo.com> and Message-ID:
<debc30fc0908270825v747c1734xa59126623cfd5b05@mail.gmail.com>):

 WARNING: at kernel/sched.c:4890 sub_preempt_count+0x95/0xa0()
 Hardware name: HP Compaq 6530b (KR980UT#ABC)
 Modules linked in: bridge stp llc bnep rfcomm l2cap xfs exportfs nilfs2 cowloop loop vboxnetadp vboxnetflt vboxdrv btusb bluetooth uvcvideo videodev v4l1_compat v4l2_compat_ioctl32 arc4 snd_hda_codec_analog ecb iwlagn iwlcore rfkill lib80211 mac80211 snd_hda_intel snd_hda_codec ehci_hcd uhci_hcd usbcore snd_hwdep snd_pcm tg3 cfg80211 psmouse snd_timer joydev libphy ohci1394 snd_page_alloc hp_accel lis3lv02d ieee1394 led_class i915 drm i2c_algo_bit video backlight output i2c_core dm_crypt dm_mod
 Pid: 4197, comm: segctord Not tainted 2.6.30-gentoo-r4-64 #7
 Call Trace:
  [<ffffffff8023fa05>] ? sub_preempt_count+0x95/0xa0
  [<ffffffff802470f8>] warn_slowpath_common+0x78/0xd0
  [<ffffffff8024715f>] warn_slowpath_null+0xf/0x20
  [<ffffffff8023fa05>] sub_preempt_count+0x95/0xa0
  [<ffffffffa04ce4db>] nilfs_btnode_prepare_change_key+0x11b/0x190 [nilfs2]
  [<ffffffffa04d01ad>] nilfs_btree_assign_p+0x19d/0x1e0 [nilfs2]
  [<ffffffffa04d10ad>] nilfs_btree_assign+0xbd/0x130 [nilfs2]
  [<ffffffffa04cead7>] nilfs_bmap_assign+0x47/0x70 [nilfs2]
  [<ffffffffa04d9bc6>] nilfs_segctor_do_construct+0x956/0x20f0 [nilfs2]
  [<ffffffff805ac8e2>] ? _spin_unlock_irqrestore+0x12/0x40
  [<ffffffff803c06e0>] ? __up_write+0xe0/0x150
  [<ffffffff80262959>] ? up_write+0x9/0x10
  [<ffffffffa04ce9f3>] ? nilfs_bmap_test_and_clear_dirty+0x43/0x60 [nilfs2]
  [<ffffffffa04cd627>] ? nilfs_mdt_fetch_dirty+0x27/0x60 [nilfs2]
  [<ffffffffa04db5fc>] nilfs_segctor_construct+0x8c/0xd0 [nilfs2]
  [<ffffffffa04dc3dc>] nilfs_segctor_thread+0x15c/0x3a0 [nilfs2]
  [<ffffffffa04dbe20>] ? nilfs_construction_timeout+0x0/0x10 [nilfs2]
  [<ffffffff80252633>] ? add_timer+0x13/0x20
  [<ffffffff802370da>] ? __wake_up_common+0x5a/0x90
  [<ffffffff8025e960>] ? autoremove_wake_function+0x0/0x40
  [<ffffffffa04dc280>] ? nilfs_segctor_thread+0x0/0x3a0 [nilfs2]
  [<ffffffffa04dc280>] ? nilfs_segctor_thread+0x0/0x3a0 [nilfs2]
  [<ffffffff8025e556>] kthread+0x56/0x90
  [<ffffffff8020cdea>] child_rip+0xa/0x20
  [<ffffffff8025e500>] ? kthread+0x0/0x90
  [<ffffffff8020cde0>] ? child_rip+0x0/0x20

This problem was caused due to a missing radix_tree_preload() call in
the retry path of nilfs_btnode_prepare_change_key() function.

Reported-by: Eric A <eric225125@yahoo.com>
Reported-by: Jerome Poulin <jeromepoulin@gmail.com>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Tested-by: Jerome Poulin <jeromepoulin@gmail.com>
Cc: stable@kernel.org
---
 fs/nilfs2/btnode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 7e0b61be212e..c668bca579c1 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -209,6 +209,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
 		 * We cannot call radix_tree_preload for the kernels older
 		 * than 2.6.23, because it is not exported for modules.
 		 */
+retry:
 		err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
 		if (err)
 			goto failed_unlock;
@@ -219,7 +220,6 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
 				       (unsigned long long)oldkey,
 				       (unsigned long long)newkey);
 
-retry:
 		spin_lock_irq(&btnc->tree_lock);
 		err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
 		spin_unlock_irq(&btnc->tree_lock);

From 38bddf04bcfe661fbdab94888c3b72c32f6873b3 Mon Sep 17 00:00:00 2001
From: Toru UCHIYAMA <uchiyama.toru@jp.fujitsu.com>
Date: Sun, 30 Aug 2009 22:04:07 -0700
Subject: [PATCH 10/60] gianfar: gfar_remove needs to call unregister_netdev()

This patch solves the problem that the Oops(BUG_ON) occurs by rmmod.

	# rmmod gianfar_driver
	------------[ cut here ]------------
	Kernel BUG at c01fec48 [verbose debug info unavailable]
	Oops: Exception in kernel mode, sig: 5 [#1]
	MPC837x MDS
	Modules linked in: gianfar_driver(-) usb_storage scsi_wait_scan
	NIP: c01fec48 LR: c01febf4 CTR: c01feba8
	REGS: dec5bd60 TRAP: 0700   Tainted: G        W   (2.6.31-rc2)
	MSR: 00029032 <EE,ME,CE,IR,DR>  CR: 22000424  XER: 20000000
	TASK = dec4cac0[1135] 'rmmod' THREAD: dec5a000
	GPR00: 00000002 dec5be10 dec4cac0 dfba1820 c035d444 c035d478 ffffffff 00000000
	GPR08: 0000002b 00000001 dfba193c 00000001 22000424 10019b34 1ffcb000 00000000
	GPR16: 10012008 00000000 bf82ebe0 100017ec bf82ebec bf82ebe8 bf82ebd0 00000880
	GPR24: 00000000 bf82ebf0 c03532f0 c03532e4 c036b594 dfba183c dfba1800 dfba1820
	NIP [c01fec48] free_netdev+0xa0/0xb8
	LR [c01febf4] free_netdev+0x4c/0xb8
	Call Trace:
	[dec5be10] [c01febf4] free_netdev+0x4c/0xb8 (unreliable)
	[dec5be30] [e105f290] gfar_remove+0x50/0x68 [gianfar_driver]
	[dec5be40] [c01ec534] of_platform_device_remove+0x30/0x44
	[dec5be50] [c0181760] __device_release_driver+0x68/0xc8
	[dec5be60] [c0181868] driver_detach+0xa8/0xac
	[dec5be80] [c0180814] bus_remove_driver+0x9c/0xd8
	[dec5bea0] [c0181efc] driver_unregister+0x60/0x98
	[dec5beb0] [c01ec650] of_unregister_driver+0x14/0x24
	[dec5bec0] [e10631bc] gfar_exit+0x18/0x4bc [gianfar_driver]
	[dec5bed0] [c0047584] sys_delete_module+0x16c/0x228
	[dec5bf40] [c00116bc] ret_from_syscall+0x0/0x38
	--- Exception: c01 at 0xff3669c
	    LR = 0x10000f34
	Instruction dump:
	409e0024 a07e00c0 7c63f050 4be74429 80010024 bba10014 38210020 7c0803a6
	4e800020 68000003 3160ffff 7d2b0110 <0f090000> 38000004 387e01f0 901e01d4
	---[ end trace 8c595bcd37230a0f ]---
	 localhost kernel: ------------[ cut here ]------------

Signed-off-by: Toru UCHIYAMA uchiyama.toru@jp.fujitsu.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index e212f2c5448b..24f7ca5e17de 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -491,6 +491,7 @@ static int gfar_remove(struct of_device *ofdev)
 
 	dev_set_drvdata(&ofdev->dev, NULL);
 
+	unregister_netdev(dev);
 	iounmap(priv->regs);
 	free_netdev(priv->ndev);
 

From b91ab72b830e1494c2c7f8de05ccb2ab2c9cfb26 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Tue, 1 Sep 2009 08:23:58 +0200
Subject: [PATCH 11/60] sound: oxygen: fix MCLK rate for 192 kHz playback

Do not forget to program the MCLK ratio for the I2S output.
Otherwise, the master clock frequency can be too high for
the DACs at sample frequencies above 96 kHz.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/oxygen/oxygen_pcm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/oxygen/oxygen_pcm.c b/sound/pci/oxygen/oxygen_pcm.c
index 3b5ca70c9d4d..ef2345d82b86 100644
--- a/sound/pci/oxygen/oxygen_pcm.c
+++ b/sound/pci/oxygen/oxygen_pcm.c
@@ -469,9 +469,11 @@ static int oxygen_multich_hw_params(struct snd_pcm_substream *substream,
 	oxygen_write16_masked(chip, OXYGEN_I2S_MULTICH_FORMAT,
 			      oxygen_rate(hw_params) |
 			      chip->model.dac_i2s_format |
+			      oxygen_i2s_mclk(hw_params) |
 			      oxygen_i2s_bits(hw_params),
 			      OXYGEN_I2S_RATE_MASK |
 			      OXYGEN_I2S_FORMAT_MASK |
+			      OXYGEN_I2S_MCLK_MASK |
 			      OXYGEN_I2S_BITS_MASK);
 	oxygen_update_dac_routing(chip);
 	oxygen_update_spdif_source(chip);

From 04a13c7c632e1fe04a5f6e6c83565d2559e37598 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 1 Sep 2009 21:12:28 +0900
Subject: [PATCH 12/60] percpu: don't assume existence of cpu0

percpu incorrectly assumed that cpu0 was always there which led to the
following warning and eventual oops on sparc machines w/o cpu0.

  WARNING: at mm/percpu.c:651 pcpu_map+0xdc/0x100()
  Modules linked in:
  Call Trace:
    [000000000045eb70] warn_slowpath_common+0x50/0xa0
    [000000000045ebdc] warn_slowpath_null+0x1c/0x40
    [00000000004d493c] pcpu_map+0xdc/0x100
    [00000000004d59a4] pcpu_alloc+0x3e4/0x4e0
    [00000000004d5af8] __alloc_percpu+0x18/0x40
    [00000000005b112c] __percpu_counter_init+0x4c/0xc0
  ...
  Unable to handle kernel NULL pointer dereference
  ...
   I7: <sysfs_new_dirent+0x30/0x120>
   Disabling lock debugging due to kernel taint
   Caller[000000000053c1b0]: sysfs_new_dirent+0x30/0x120
   Caller[000000000053c7a4]: create_dir+0x24/0xc0
   Caller[000000000053c870]: sysfs_create_dir+0x30/0x80
   Caller[00000000005990e8]: kobject_add_internal+0xc8/0x200
  ...
   Kernel panic - not syncing: Attempted to kill the idle task!

This patch fixes the problem by backporting parts from devel branch to
make percpu core not depend on the existence of cpu0.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Meelis Roos <mroos@linux.ee>
Cc: David Miller <davem@davemloft.net>
---
 mm/percpu.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index 5fe37842e0ea..3311c8919f37 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -197,7 +197,12 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
 static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk,
 				     int page_idx)
 {
-	return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
+	/*
+	 * Any possible cpu id can be used here, so there's no need to
+	 * worry about preemption or cpu hotplug.
+	 */
+	return *pcpu_chunk_pagep(chunk, raw_smp_processor_id(),
+				 page_idx) != NULL;
 }
 
 /* set the pointer to a chunk in a page struct */
@@ -297,6 +302,14 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 		return pcpu_first_chunk;
 	}
 
+	/*
+	 * The address is relative to unit0 which might be unused and
+	 * thus unmapped.  Offset the address to the unit space of the
+	 * current processor before looking it up in the vmalloc
+	 * space.  Note that any possible cpu id can be used here, so
+	 * there's no need to worry about preemption or cpu hotplug.
+	 */
+	addr += raw_smp_processor_id() * pcpu_unit_size;
 	return pcpu_get_page_chunk(vmalloc_to_page(addr));
 }
 

From ce6c3997c2fce74d12e6d8887a1d8cdf024fa850 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 7 Aug 2009 22:58:51 +0200
Subject: [PATCH 13/60] [CPUFREQ] Re-enable cpufreq suspend and resume code

Commit 4bc5d3413503 is broken and causes regressions:

(1) cpufreq_driver->resume() and ->suspend() were only called on
__powerpc__, but you could set them on all architectures. In fact,
->resume() was defined and used before the PPC-related commit
42d4dc3f4e1e complained about in 4bc5d3413503.

(2) Therfore, the resume functions in acpi_cpufreq and speedstep-smi
would never be called.

(3) This means speedstep-smi would be unusuable after suspend or resume.

The _real_ problem was calling cpufreq_driver->get() with interrupts
off, but it re-enabling interrupts on some platforms. Why is ->get()
necessary?

Some systems like to change the CPU frequency behind our
back, especially during BIOS-intensive operations like suspend or
resume. If such systems also use a CPU frequency-dependant timing loop,
delays might be off by large factors. Therefore, we need to ascertain
as soon as possible that the CPU frequency is indeed at the speed we
think it is. You can do this two ways: either setting it anew, or trying
to get it. The latter is what was done, the former also has the same IRQ
issue.

So, let's try something different: defer the checking to after interrupts
are re-enabled, by calling cpufreq_update_policy() (via schedule_work()).
Timings may be off until this later stage, so let's watch out for
resume regressions caused by the deferred handling of frequency changes
behind the kernel's back.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/cpufreq/cpufreq.c | 95 +++------------------------------------
 1 file changed, 7 insertions(+), 88 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index fd69086d08d5..2968ed6a9c49 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1250,20 +1250,11 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg)
 {
 	int ret = 0;
 
-#ifdef __powerpc__
 	int cpu = sysdev->id;
-	unsigned int cur_freq = 0;
 	struct cpufreq_policy *cpu_policy;
 
 	dprintk("suspending cpu %u\n", cpu);
 
-	/*
-	 * This whole bogosity is here because Powerbooks are made of fail.
-	 * No sane platform should need any of the code below to be run.
-	 * (it's entirely the wrong thing to do, as driver->get may
-	 *  reenable interrupts on some architectures).
-	 */
-
 	if (!cpu_online(cpu))
 		return 0;
 
@@ -1282,47 +1273,13 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg)
 
 	if (cpufreq_driver->suspend) {
 		ret = cpufreq_driver->suspend(cpu_policy, pmsg);
-		if (ret) {
+		if (ret)
 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
 					"step on CPU %u\n", cpu_policy->cpu);
-			goto out;
-		}
-	}
-
-	if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
-		goto out;
-
-	if (cpufreq_driver->get)
-		cur_freq = cpufreq_driver->get(cpu_policy->cpu);
-
-	if (!cur_freq || !cpu_policy->cur) {
-		printk(KERN_ERR "cpufreq: suspend failed to assert current "
-		       "frequency is what timing core thinks it is.\n");
-		goto out;
-	}
-
-	if (unlikely(cur_freq != cpu_policy->cur)) {
-		struct cpufreq_freqs freqs;
-
-		if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
-			dprintk("Warning: CPU frequency is %u, "
-			       "cpufreq assumed %u kHz.\n",
-			       cur_freq, cpu_policy->cur);
-
-		freqs.cpu = cpu;
-		freqs.old = cpu_policy->cur;
-		freqs.new = cur_freq;
-
-		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
-				    CPUFREQ_SUSPENDCHANGE, &freqs);
-		adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
-
-		cpu_policy->cur = cur_freq;
 	}
 
 out:
 	cpufreq_cpu_put(cpu_policy);
-#endif	/* __powerpc__ */
 	return ret;
 }
 
@@ -1330,24 +1287,21 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg)
  *	cpufreq_resume -  restore proper CPU frequency handling after resume
  *
  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
- *	2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
- *	3.) schedule call cpufreq_update_policy() ASAP as interrupts are
- *	    restored.
+ *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
+ *	    restored. It will verify that the current freq is in sync with
+ *	    what we believe it to be. This is a bit later than when it
+ *	    should be, but nonethteless it's better than calling
+ *	    cpufreq_driver->get() here which might re-enable interrupts...
  */
 static int cpufreq_resume(struct sys_device *sysdev)
 {
 	int ret = 0;
 
-#ifdef __powerpc__
 	int cpu = sysdev->id;
 	struct cpufreq_policy *cpu_policy;
 
 	dprintk("resuming cpu %u\n", cpu);
 
-	/* As with the ->suspend method, all the code below is
-	 * only necessary because Powerbooks suck.
-	 * See commit 42d4dc3f4e1e for jokes. */
-
 	if (!cpu_online(cpu))
 		return 0;
 
@@ -1373,45 +1327,10 @@ static int cpufreq_resume(struct sys_device *sysdev)
 		}
 	}
 
-	if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
-		unsigned int cur_freq = 0;
-
-		if (cpufreq_driver->get)
-			cur_freq = cpufreq_driver->get(cpu_policy->cpu);
-
-		if (!cur_freq || !cpu_policy->cur) {
-			printk(KERN_ERR "cpufreq: resume failed to assert "
-					"current frequency is what timing core "
-					"thinks it is.\n");
-			goto out;
-		}
-
-		if (unlikely(cur_freq != cpu_policy->cur)) {
-			struct cpufreq_freqs freqs;
-
-			if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
-				dprintk("Warning: CPU frequency "
-				       "is %u, cpufreq assumed %u kHz.\n",
-				       cur_freq, cpu_policy->cur);
-
-			freqs.cpu = cpu;
-			freqs.old = cpu_policy->cur;
-			freqs.new = cur_freq;
-
-			srcu_notifier_call_chain(
-					&cpufreq_transition_notifier_list,
-					CPUFREQ_RESUMECHANGE, &freqs);
-			adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
-
-			cpu_policy->cur = cur_freq;
-		}
-	}
-
-out:
 	schedule_work(&cpu_policy->update);
+
 fail:
 	cpufreq_cpu_put(cpu_policy);
-#endif	/* __powerpc__ */
 	return ret;
 }
 

From c295fc05789653ef24f296299df7c5f92fe74dce Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Tue, 1 Sep 2009 22:40:15 +0200
Subject: [PATCH 14/60] block: Allow changing max_sectors_kb above the default
 512

The patch "block: Use accessor functions for queue limits"
(ae03bf639a5027d27270123f5f6e3ee6a412781d) changed queue_max_sectors_store()
to use blk_queue_max_sectors() instead of directly assigning the value.

But blk_queue_max_sectors() differs a bit
1. It sets both max_sectors_kb, and max_hw_sectors_kb
2. Never allows one to change max_sectors_kb above BLK_DEF_MAX_SECTORS. If one
specifies a value greater then max_hw_sectors is set to that value but
max_sectors is set to BLK_DEF_MAX_SECTORS

I am not sure whether blk_queue_max_sectors() should be changed, as it seems
to be that way for a long time. And there may be callers dependent on that
behaviour.

This patch simply reverts to the older way of directly assigning the value to
max_sectors as it was before.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 418d63619680..d3aa2aadb3e0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -133,7 +133,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 		return -EINVAL;
 
 	spin_lock_irq(q->queue_lock);
-	blk_queue_max_sectors(q, max_sectors_kb << 1);
+	q->limits.max_sectors = max_sectors_kb << 1;
 	spin_unlock_irq(q->queue_lock);
 
 	return ret;

From 3725867dccfb83e4b0cff64e916a04258f300591 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 1 Sep 2009 14:03:08 -0400
Subject: [PATCH 15/60] xfs: actually enable the swapext compat handler

Fix a small typo in the compat ioctl handler that cause the swapext
compat handler to never be called.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Torsten Kaiser <just.for.lkml@googlemail.com>
Tested-by: Torsten Kaiser <just.for.lkml@googlemail.com>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 0882d166239a..eafcc7c18706 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -619,7 +619,7 @@ xfs_file_compat_ioctl(
 	case XFS_IOC_GETVERSION_32:
 		cmd = _NATIVE_IOC(cmd, long);
 		return xfs_file_ioctl(filp, cmd, p);
-	case XFS_IOC_SWAPEXT: {
+	case XFS_IOC_SWAPEXT_32: {
 		struct xfs_swapext	  sxp;
 		struct compat_xfs_swapext __user *sxu = arg;
 

From d66ee0587c3927aea5178a822976c7c853d815fe Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sun, 30 Aug 2009 23:15:36 +0000
Subject: [PATCH 16/60] net: sk_free() should be allowed right after sk_alloc()

After commit 2b85a34e911bf483c27cfdd124aeb1605145dc80
(net: No more expensive sock_hold()/sock_put() on each tx)
sk_free() frees socks conditionally and depends
on sk_wmem_alloc being set e.g. in sock_init_data(). But in some
cases sk_free() is called earlier, usually after other alloc errors.

Fix is to move sk_wmem_alloc initialization from sock_init_data()
to sk_alloc() itself.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index bbb25be7ddfe..76334228ed1c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1025,6 +1025,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		sk->sk_prot = sk->sk_prot_creator = prot;
 		sock_lock_init(sk);
 		sock_net_set(sk, get_net(net));
+		atomic_set(&sk->sk_wmem_alloc, 1);
 	}
 
 	return sk;
@@ -1872,7 +1873,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	 */
 	smp_wmb();
 	atomic_set(&sk->sk_refcnt, 1);
-	atomic_set(&sk->sk_wmem_alloc, 1);
 	atomic_set(&sk->sk_drops, 0);
 }
 EXPORT_SYMBOL(sock_init_data);

From 2fbd3da3877ad8d923b055e5996f80b4d4a6daf4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 1 Sep 2009 17:59:25 -0700
Subject: [PATCH 17/60] pkt_sched: Revert tasklet_hrtimer changes.

These are full of unresolved problems, mainly that conversions don't
work 1-1 from hrtimers to tasklet_hrtimers because unlike hrtimers
tasklets can't be killed from softirq context.

And when a qdisc gets reset, that's exactly what we need to do here.

We'll work this out in the net-next-2.6 tree and if warranted we'll
backport that work to -stable.

This reverts the following 3 changesets:

a2cb6a4dd470d7a64255a10b843b0d188416b78f
("pkt_sched: Fix bogon in tasklet_hrtimer changes.")

38acce2d7983632100a9ff3fd20295f6e34074a8
("pkt_sched: Convert CBQ to tasklet_hrtimer.")

ee5f9757ea17759e1ce5503bdae2b07e48e32af9
("pkt_sched: Convert qdisc_watchdog to tasklet_hrtimer")

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  4 ++--
 net/sched/sch_api.c     | 10 +++++-----
 net/sched/sch_cbq.c     | 25 +++++++++++--------------
 3 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 7eafb8d54470..82a3191375f5 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -61,8 +61,8 @@ psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
 }
 
 struct qdisc_watchdog {
-	struct tasklet_hrtimer	timer;
-	struct Qdisc		*qdisc;
+	struct hrtimer	timer;
+	struct Qdisc	*qdisc;
 };
 
 extern void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 92e6f3a52c13..24d17ce9c294 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -458,7 +458,7 @@ EXPORT_SYMBOL(qdisc_warn_nonwc);
 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 {
 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
-						 timer.timer);
+						 timer);
 
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
 	__netif_schedule(qdisc_root(wd->qdisc));
@@ -468,8 +468,8 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 
 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
 {
-	tasklet_hrtimer_init(&wd->timer, qdisc_watchdog,
-			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	wd->timer.function = qdisc_watchdog;
 	wd->qdisc = qdisc;
 }
 EXPORT_SYMBOL(qdisc_watchdog_init);
@@ -485,13 +485,13 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 	wd->qdisc->flags |= TCQ_F_THROTTLED;
 	time = ktime_set(0, 0);
 	time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
-	tasklet_hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
+	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
 }
 EXPORT_SYMBOL(qdisc_watchdog_schedule);
 
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
-	tasklet_hrtimer_cancel(&wd->timer);
+	hrtimer_cancel(&wd->timer);
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 149b0405c5ec..d5798e17a832 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -163,7 +163,7 @@ struct cbq_sched_data
 	psched_time_t		now_rt;		/* Cached real time */
 	unsigned		pmask;
 
-	struct tasklet_hrtimer	delay_timer;
+	struct hrtimer		delay_timer;
 	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
 						   started when CBQ has
 						   backlog, but cannot
@@ -503,8 +503,6 @@ static void cbq_ovl_delay(struct cbq_class *cl)
 		cl->undertime = q->now + delay;
 
 		if (delay > 0) {
-			struct hrtimer *ht;
-
 			sched += delay + cl->penalty;
 			cl->penalized = sched;
 			cl->cpriority = TC_CBQ_MAXPRIO;
@@ -512,12 +510,12 @@ static void cbq_ovl_delay(struct cbq_class *cl)
 
 			expires = ktime_set(0, 0);
 			expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
-			ht = &q->delay_timer.timer;
-			if (hrtimer_try_to_cancel(ht) &&
-			    ktime_to_ns(ktime_sub(hrtimer_get_expires(ht),
-						  expires)) > 0)
-				hrtimer_set_expires(ht, expires);
-			hrtimer_restart(ht);
+			if (hrtimer_try_to_cancel(&q->delay_timer) &&
+			    ktime_to_ns(ktime_sub(
+					hrtimer_get_expires(&q->delay_timer),
+					expires)) > 0)
+				hrtimer_set_expires(&q->delay_timer, expires);
+			hrtimer_restart(&q->delay_timer);
 			cl->delayed = 1;
 			cl->xstats.overactions++;
 			return;
@@ -593,7 +591,7 @@ static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 {
 	struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
-						delay_timer.timer);
+						delay_timer);
 	struct Qdisc *sch = q->watchdog.qdisc;
 	psched_time_t now;
 	psched_tdiff_t delay = 0;
@@ -623,7 +621,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 
 		time = ktime_set(0, 0);
 		time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
-		tasklet_hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
+		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 	}
 
 	sch->flags &= ~TCQ_F_THROTTLED;
@@ -1216,7 +1214,7 @@ cbq_reset(struct Qdisc* sch)
 	q->tx_class = NULL;
 	q->tx_borrowed = NULL;
 	qdisc_watchdog_cancel(&q->watchdog);
-	tasklet_hrtimer_cancel(&q->delay_timer);
+	hrtimer_cancel(&q->delay_timer);
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	q->now = psched_get_time();
 	q->now_rt = q->now;
@@ -1399,8 +1397,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->link.minidle = -0x7FFFFFFF;
 
 	qdisc_watchdog_init(&q->watchdog, sch);
-	tasklet_hrtimer_init(&q->delay_timer, cbq_undelay,
-			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	q->delay_timer.function = cbq_undelay;
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	q->now = psched_get_time();

From f2486f26692433ba27cc10991a085b503b0422a3 Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Mon, 31 Aug 2009 16:54:03 -0700
Subject: [PATCH 18/60] [IA64] Fix warning in dma-mapping.c

arch/ia64/kernel/dma-mapping.c:14: warning: control reaches end of non-void function
arch/ia64/kernel/dma-mapping.c:14: warning: no return statement in function returning non-void

This warning was introduced by commit: 390bd132b2831a2ad0268e84bffbfc0680debfe5
	Add dma_debug_init() for ia64

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/dma-mapping.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c
index 39a3cd0a4173..f2c1600da097 100644
--- a/arch/ia64/kernel/dma-mapping.c
+++ b/arch/ia64/kernel/dma-mapping.c
@@ -10,7 +10,9 @@ EXPORT_SYMBOL(dma_ops);
 
 static int __init dma_init(void)
 {
-       dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+
+	return 0;
 }
 fs_initcall(dma_init);
 

From 5afe18d2f58812f3924edbd215464e5e3e8545e7 Mon Sep 17 00:00:00 2001
From: Jiri Bohac <jbohac@suse.cz>
Date: Wed, 2 Sep 2009 11:00:46 +0200
Subject: [PATCH 19/60] [IA64] fix csum_ipv6_magic()

The 32-bit parameters (len and csum) of csum_ipv6_magic() are passed in 64-bit
registers in2 and in4. The high order 32 bits of the registers were never
cleared, and garbage was sometimes calculated into the checksum.

Fix this by clearing the high order 32 bits of these registers.

Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/lib/ip_fast_csum.S | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S
index 1f86aeb2c948..620d9dc5220f 100644
--- a/arch/ia64/lib/ip_fast_csum.S
+++ b/arch/ia64/lib/ip_fast_csum.S
@@ -96,20 +96,22 @@ END(ip_fast_csum)
 GLOBAL_ENTRY(csum_ipv6_magic)
 	ld4	r20=[in0],4
 	ld4	r21=[in1],4
-	dep	r15=in3,in2,32,16
+	zxt4	in2=in2
 	;;
 	ld4	r22=[in0],4
 	ld4	r23=[in1],4
-	mux1	r15=r15,@rev
+	dep	r15=in3,in2,32,16
 	;;
 	ld4	r24=[in0],4
 	ld4	r25=[in1],4
-	shr.u	r15=r15,16
+	mux1	r15=r15,@rev
 	add	r16=r20,r21
 	add	r17=r22,r23
+	zxt4	in4=in4
 	;;
 	ld4	r26=[in0],4
 	ld4	r27=[in1],4
+	shr.u	r15=r15,16
 	add	r18=r24,r25
 	add	r8=r16,r17
 	;;

From 92653453c3015c083b9fe0ad48261c6b2267d482 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Wed, 2 Sep 2009 18:25:39 +0200
Subject: [PATCH 20/60] sound: oxygen: handle cards with missing EEPROM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The card model detection code introduced in 2.6.30 that tries to work
around partially broken EEPROM contents by reading the EEPROM directly
does not handle cards where the EEPROM has been omitted.  In this case,
we have to use the default ID to allow the driver to load.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Reported-and-tested-by: Ozan Çağlayan <ozan@pardus.org.tr>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/oxygen/oxygen_lib.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c
index 312251d39696..9a8936e20744 100644
--- a/sound/pci/oxygen/oxygen_lib.c
+++ b/sound/pci/oxygen/oxygen_lib.c
@@ -260,6 +260,9 @@ oxygen_search_pci_id(struct oxygen *chip, const struct pci_device_id ids[])
 	 * chip didn't if the first EEPROM word was overwritten.
 	 */
 	subdevice = oxygen_read_eeprom(chip, 2);
+	/* use default ID if EEPROM is missing */
+	if (subdevice == 0xffff)
+		subdevice = 0x8788;
 	/*
 	 * We use only the subsystem device ID for searching because it is
 	 * unique even without the subsystem vendor ID, which may have been

From 16ebb5e0b36ceadc8186f71d68b0c4fa4b6e781b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Sep 2009 02:40:09 +0000
Subject: [PATCH 21/60] tc: Fix unitialized kernel memory leak

Three bytes of uninitialized kernel memory are currently leaked to user

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 24d17ce9c294..fdb694e9f759 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1456,6 +1456,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
 	tcm = NLMSG_DATA(nlh);
 	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad2 = 0;
 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
 	tcm->tcm_parent = q->handle;
 	tcm->tcm_handle = q->handle;

From a3df6f7d3090e611bcc774cd2cba45ae016d37e1 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 3 Sep 2009 11:52:02 +1000
Subject: [PATCH 22/60] perf_counter/powerpc: Fix cache event codes for POWER7

I had the codes for L1 D-cache load accesses and misses swapped
around, and the wrong codes for LL-cache accesses and misses.
This corrects them.

Reported-by: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@kernel.org>
LKML-Reference: <19103.8514.709300.585484@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/power7-pmu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 388cf57ad827..018d094d92f9 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -317,7 +317,7 @@ static int power7_generic_events[] = {
  */
 static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	0x400f0,	0xc880	},
+		[C(OP_READ)] = {	0xc880,		0x400f0	},
 		[C(OP_WRITE)] = {	0,		0x300f0	},
 		[C(OP_PREFETCH)] = {	0xd8b8,		0	},
 	},
@@ -327,8 +327,8 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_PREFETCH)] = {	0x408a,		0	},
 	},
 	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	0x6080,		0x6084	},
-		[C(OP_WRITE)] = {	0x6082,		0x6086	},
+		[C(OP_READ)] = {	0x16080,	0x26080	},
+		[C(OP_WRITE)] = {	0x16082,	0x26082	},
 		[C(OP_PREFETCH)] = {	0,		0	},
 	},
 	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */

From e6617c6ec28a17cf2f90262b835ec05b9b861400 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 3 Sep 2009 02:35:20 -0700
Subject: [PATCH 23/60] sparc64: Kill spurious NMI watchdog triggers by
 increasing limit to 30 seconds.

This is a compromise and a temporary workaround for bootup NMI
watchdog triggers some people see with qla2xxx devices present.

This happens when, for example:

CPU 0 is in the driver init and looping submitting mailbox commands to
load the firmware, then waiting for completion.

CPU 1 is receiving the device interrupts.  CPU 1 is where the NMI
watchdog triggers.

CPU 0 is submitting mailbox commands fast enough that by the time CPU
1 returns from the device interrupt handler, a new one is pending.
This sequence runs for more than 5 seconds.

The problematic case is CPU 1's timer interrupt running when the
barrage of device interrupts begin.  Then we have:

	timer interrupt
	return for softirq checking
	pending, thus enable interrupts

		 qla2xxx interrupt
		 return
		 qla2xxx interrupt
		 return
		 ... 5+ seconds pass
		 final qla2xxx interrupt for fw load
		 return

	run timer softirq
	return

At some point in the multi-second qla2xxx interrupt storm we trigger
the NMI watchdog on CPU 1 from the NMI interrupt handler.

The timer softirq, once we get back to running it, is smart enough to
run the timer work enough times to make up for the missed timer
interrupts.

However, the NMI watchdogs (both x86 and sparc) use the timer
interrupt count to notice the cpu is wedged.  But in the above
scenerio we'll receive only one such timer interrupt even if we last
all the way back to running the timer softirq.

The default watchdog trigger point is only 5 seconds, which is pretty
low (the softwatchdog triggers at 60 seconds).  So increase it to 30
seconds for now.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/nmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 2c0cc72d295b..b75bf502cd42 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -103,7 +103,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
 	}
 	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
 		local_inc(&__get_cpu_var(alert_counter));
-		if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
+		if (local_read(&__get_cpu_var(alert_counter)) == 30 * nmi_hz)
 			die_nmi("BUG: NMI Watchdog detected LOCKUP",
 				regs, panic_on_timeout);
 	} else {

From edcb3b14863e1a6aa1923eeaa81125a00cf51a80 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Thu, 6 Aug 2009 15:18:37 -0700
Subject: [PATCH 24/60] mtd: m25p80: fix null pointer dereference bug

This patch fixes the following oops, observed with MTD_PARTITIONS=n:

m25p80 spi32766.0: m25p80 (1024 Kbytes)
Unable to handle kernel paging request for data at address 0x00000008
Faulting instruction address: 0xc03a54b0
Oops: Kernel access of bad area, sig: 11 [#1]
Modules linked in:
NIP: c03a54b0 LR: c03a5494 CTR: c01e98b8
REGS: ef82bb60 TRAP: 0300   Not tainted  (2.6.31-rc4-00167-g4733fd3)
MSR: 00029000 <EE,ME,CE>  CR: 24022022  XER: 20000000
DEAR: 00000008, ESR: 00000000
TASK = ef82c000[1] 'swapper' THREAD: ef82a000
GPR00: 00000000 ef82bc10 ef82c000 0000002e 00001eb8 ffffffff c01e9824 00000036
GPR08: c054ed40 c0542a08 00001eb8 00004000 22022022 1001a1a0 3ff8fd00 00000000
GPR16: 00000000 00000001 00000000 00000000 ef82bddc c0530000 efbef500 ef8356d0
GPR24: 00000000 ef8356d0 00000000 efbf7a00 c0530ec4 ffffffed efbf5300 c0541f98
NIP [c03a54b0] m25p_probe+0x22c/0x354
LR [c03a5494] m25p_probe+0x210/0x354
Call Trace:
[ef82bc10] [c03a5494] m25p_probe+0x210/0x354 (unreliable)
[ef82bca0] [c024e37c] spi_drv_probe+0x2c/0x3c
[ef82bcb0] [c01f1afc] driver_probe_device+0xa4/0x178
[ef82bcd0] [c01f06e8] bus_for_each_drv+0x6c/0xa8
[ef82bd00] [c01f1a34] device_attach+0x84/0xa8
...

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/devices/m25p80.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index ae5fe91867e1..10ed195c0c1c 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -736,7 +736,7 @@ static int __devinit m25p_probe(struct spi_device *spi)
 			flash->partitioned = 1;
 			return add_mtd_partitions(&flash->mtd, parts, nr_parts);
 		}
-	} else if (data->nr_parts)
+	} else if (data && data->nr_parts)
 		dev_warn(&spi->dev, "ignoring %d default partitions on %s\n",
 				data->nr_parts, data->name);
 

From 4149ed1aa944ab864024982a2e568d17eccff504 Mon Sep 17 00:00:00 2001
From: Dimitri Gorokhovik <dimitri.gorokhovik@free.fr>
Date: Thu, 3 Sep 2009 14:59:13 +0100
Subject: [PATCH 25/60] mtd: nftl: write support is broken

Write support is broken in NFTL. Fix it.

Signed-off-by: <dimitri.gorokhovik@free.fr>
Cc: Tim Gardner <tim.gardner@canonical.com>
Cc: Scott James Remnant <scott@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nftlcore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index fb86cacd5bdb..665d3eba2f47 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -181,7 +181,7 @@ static int nftl_write(struct mtd_info *mtd, loff_t offs, size_t len,
 	int res;
 
 	ops.mode = MTD_OOB_PLACE;
-	ops.ooboffs = offs;
+	ops.ooboffs = offs & (mtd->writesize - 1);
 	ops.ooblen = mtd->oobsize;
 	ops.oobbuf = oob;
 	ops.datbuf = buf;

From 16f05c2b68520f94e365f9d347a7076f4ff00ad5 Mon Sep 17 00:00:00 2001
From: Dimitri Gorokhovik <dimitri.gorokhovik@free.fr>
Date: Thu, 3 Sep 2009 14:04:22 +0100
Subject: [PATCH 26/60] mtd: nftl: fix offset alignments

Arithmetic conversion in the mask computation makes the upper word
of the second argument passed down to mtd->read_oob(), be always 0
(assuming 'offs' being a 64-bit signed long long type, and
'mtd->writesize' being a 32-bit unsigned int type).

This patch applies over the other one adding masking in nftl_write,
"nftl: write support is broken".

Signed-off-by: Dimitri Gorokhovik <dimitri.gorokhovik@free.fr>
Cc: Tim Gardner <tim.gardner@canonical.com>
Cc: Scott James Remnant <scott@canonical.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nftlcore.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 665d3eba2f47..1002e1882996 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -135,16 +135,17 @@ static void nftl_remove_dev(struct mtd_blktrans_dev *dev)
 int nftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 		  size_t *retlen, uint8_t *buf)
 {
+	loff_t mask = mtd->writesize - 1;
 	struct mtd_oob_ops ops;
 	int res;
 
 	ops.mode = MTD_OOB_PLACE;
-	ops.ooboffs = offs & (mtd->writesize - 1);
+	ops.ooboffs = offs & mask;
 	ops.ooblen = len;
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	res = mtd->read_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
+	res = mtd->read_oob(mtd, offs & ~mask, &ops);
 	*retlen = ops.oobretlen;
 	return res;
 }
@@ -155,16 +156,17 @@ int nftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 int nftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 		   size_t *retlen, uint8_t *buf)
 {
+	loff_t mask = mtd->writesize - 1;
 	struct mtd_oob_ops ops;
 	int res;
 
 	ops.mode = MTD_OOB_PLACE;
-	ops.ooboffs = offs & (mtd->writesize - 1);
+	ops.ooboffs = offs & mask;
 	ops.ooblen = len;
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	res = mtd->write_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
+	res = mtd->write_oob(mtd, offs & ~mask, &ops);
 	*retlen = ops.oobretlen;
 	return res;
 }
@@ -177,17 +179,18 @@ int nftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 static int nftl_write(struct mtd_info *mtd, loff_t offs, size_t len,
 		      size_t *retlen, uint8_t *buf, uint8_t *oob)
 {
+	loff_t mask = mtd->writesize - 1;
 	struct mtd_oob_ops ops;
 	int res;
 
 	ops.mode = MTD_OOB_PLACE;
-	ops.ooboffs = offs & (mtd->writesize - 1);
+	ops.ooboffs = offs & mask;
 	ops.ooblen = mtd->oobsize;
 	ops.oobbuf = oob;
 	ops.datbuf = buf;
 	ops.len = len;
 
-	res = mtd->write_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
+	res = mtd->write_oob(mtd, offs & ~mask, &ops);
 	*retlen = ops.retlen;
 	return res;
 }

From bc8cec0dff072f1a45ce7f6b2c5234bb3411ac51 Mon Sep 17 00:00:00 2001
From: Massimo Cirillo <maxcir@gmail.com>
Date: Thu, 27 Aug 2009 10:44:09 +0200
Subject: [PATCH 27/60] JFFS2: add missing verify buffer
 allocation/deallocation

The function jffs2_nor_wbuf_flash_setup() doesn't allocate the verify buffer
if CONFIG_JFFS2_FS_WBUF_VERIFY is defined, so causing a kernel panic when
that macro is enabled and the verify function is called. Similarly the
jffs2_nor_wbuf_flash_cleanup() must free the buffer if
CONFIG_JFFS2_FS_WBUF_VERIFY is enabled.
The following patch fixes the problem.
The following patch applies to 2.6.30 kernel.

Signed-off-by: Massimo Cirillo <maxcir@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Cc: stable@kernel.org
---
 fs/jffs2/wbuf.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index d9a721e6db70..5ef7bac265e5 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1268,10 +1268,20 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) {
 	if (!c->wbuf)
 		return -ENOMEM;
 
+#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
+	c->wbuf_verify = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
+	if (!c->wbuf_verify) {
+		kfree(c->wbuf);
+		return -ENOMEM;
+	}
+#endif
 	return 0;
 }
 
 void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) {
+#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
+	kfree(c->wbuf_verify);
+#endif
 	kfree(c->wbuf);
 }
 

From d76b1590e06a63a3d8697168cd0aabf1c4b3cb3a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Sep 2009 22:38:59 +0300
Subject: [PATCH 28/60] slub: Fix kmem_cache_destroy() with SLAB_DESTROY_BY_RCU

kmem_cache_destroy() should call rcu_barrier() *after* kmem_cache_close() and
*before* sysfs_slab_remove() or risk rcu_free_slab() being called after
kmem_cache is deleted (kfreed).

rmmod nf_conntrack can crash the machine because it has to kmem_cache_destroy()
a SLAB_DESTROY_BY_RCU enabled cache.

Cc: <stable@kernel.org>
Reported-by: Zdenek Kabelac <zdenek.kabelac@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 mm/slub.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index b9f1491a58a1..b6276753626e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2594,8 +2594,6 @@ static inline int kmem_cache_close(struct kmem_cache *s)
  */
 void kmem_cache_destroy(struct kmem_cache *s)
 {
-	if (s->flags & SLAB_DESTROY_BY_RCU)
-		rcu_barrier();
 	down_write(&slub_lock);
 	s->refcount--;
 	if (!s->refcount) {
@@ -2606,6 +2604,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
 				"still has objects.\n", s->name, __func__);
 			dump_stack();
 		}
+		if (s->flags & SLAB_DESTROY_BY_RCU)
+			rcu_barrier();
 		sysfs_slab_remove(s);
 	} else
 		up_write(&slub_lock);

From 2bcaa6a4238094c5695d5b1943078388d82d3004 Mon Sep 17 00:00:00 2001
From: Dave Andrews <jetdog330@hotmail.com>
Date: Thu, 3 Sep 2009 17:21:27 -0700
Subject: [PATCH 29/60] Input: atkbd - add Compaq Presario R4000-series repeat
 quirk

Compaq Presario R4000-series laptops are not sending a "volume up button
release" and "volume down button release" signal in the PS/2 protocol for
atkbd. The URL below has some of confirmed reports:

https://bugs.launchpad.net/ubuntu/+source/linux/+bug/385477

Signed-off-by: Dave Andrews <jetdog330@hotmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/atkbd.c | 35 ++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 95fe0452dae4..6c6a09b1c0fe 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -879,6 +879,14 @@ static unsigned int atkbd_hp_zv6100_forced_release_keys[] = {
 	0xae, 0xb0, -1U
 };
 
+/*
+ * Perform fixup for HP (Compaq) Presario R4000 R4100 R4200 that don't generate
+ * release for their volume buttons
+ */
+static unsigned int atkbd_hp_r4000_forced_release_keys[] = {
+	0xae, 0xb0, -1U
+};
+
 /*
  * Samsung NC10,NC20 with Fn+F? key release not working
  */
@@ -1536,6 +1544,33 @@ static struct dmi_system_id atkbd_dmi_quirk_table[] __initdata = {
 		.callback = atkbd_setup_forced_release,
 		.driver_data = atkbd_hp_zv6100_forced_release_keys,
 	},
+	{
+		.ident = "HP Presario R4000",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4000"),
+		},
+		.callback = atkbd_setup_forced_release,
+		.driver_data = atkbd_hp_r4000_forced_release_keys,
+	},
+	{
+		.ident = "HP Presario R4100",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4100"),
+		},
+		.callback = atkbd_setup_forced_release,
+		.driver_data = atkbd_hp_r4000_forced_release_keys,
+	},
+	{
+		.ident = "HP Presario R4200",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4200"),
+		},
+		.callback = atkbd_setup_forced_release,
+		.driver_data = atkbd_hp_r4000_forced_release_keys,
+	},
 	{
 		.ident = "Inventec Symphony",
 		.matches = {

From bd4352cadfacb9084c97c853b025fac010266c26 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 4 Sep 2009 03:38:54 -0700
Subject: [PATCH 30/60] sparc64: Fix bootup with mcount in some configs.

Functions invoked early when booting up a cpu can't use
tracing because mcount requires a valid 'current_thread_info()'
and TLB mappings to be setup.

The code path of sun4v_register_mondo_queues --> register_one_mondo
is one such case.  sun4v_register_mondo_queues already has the
necessary 'notrace' annotation, but register_one_mondo does not.

Normally register_one_mondo is inlined so the bug doesn't trigger,
but with some config/compiler combinations, it won't be so we
must properly mark it notrace.

While we're here, add 'notrace' annoations to prom_printf and
prom_halt so that early error handling won't have the same problem.

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Reported-by: Leif Sawyer <lsawyer@gci.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/irq_64.c | 2 +-
 arch/sparc/prom/misc_64.c  | 2 +-
 arch/sparc/prom/printf.c   | 7 +++----
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index f0ee79055409..8daab33fc17d 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -886,7 +886,7 @@ void notrace init_irqwork_curcpu(void)
  * Therefore you cannot make any OBP calls, not even prom_printf,
  * from these two routines.
  */
-static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask)
+static void __cpuinit notrace register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask)
 {
 	unsigned long num_entries = (qmask + 1) / 64;
 	unsigned long status;
diff --git a/arch/sparc/prom/misc_64.c b/arch/sparc/prom/misc_64.c
index eedffb4fec2d..39fc6af21b7c 100644
--- a/arch/sparc/prom/misc_64.c
+++ b/arch/sparc/prom/misc_64.c
@@ -88,7 +88,7 @@ void prom_cmdline(void)
 /* Drop into the prom, but completely terminate the program.
  * No chance of continuing.
  */
-void prom_halt(void)
+void notrace prom_halt(void)
 {
 #ifdef CONFIG_SUN_LDOMS
 	if (ldom_domaining_enabled)
diff --git a/arch/sparc/prom/printf.c b/arch/sparc/prom/printf.c
index 660943ee4c2a..ca869266b9f3 100644
--- a/arch/sparc/prom/printf.c
+++ b/arch/sparc/prom/printf.c
@@ -14,14 +14,14 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/compiler.h>
 
 #include <asm/openprom.h>
 #include <asm/oplib.h>
 
 static char ppbuf[1024];
 
-void
-prom_write(const char *buf, unsigned int n)
+void notrace prom_write(const char *buf, unsigned int n)
 {
 	char ch;
 
@@ -33,8 +33,7 @@ prom_write(const char *buf, unsigned int n)
 	}
 }
 
-void
-prom_printf(const char *fmt, ...)
+void notrace prom_printf(const char *fmt, ...)
 {
 	va_list args;
 	int i;

From a77e28c7e1dc1a6a035c7627d4a88ecf3ea09aea Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Fri, 4 Sep 2009 20:40:16 +0100
Subject: [PATCH 31/60] dm multipath: fix oops when request based io fails when
 no paths

The patch posted at http://marc.info/?l=dm-devel&m=124539787228784&w=2
which was merged into cec47e3d4a861e1d942b3a580d0bbef2700d2bb2 ("dm:
prepare for request based option") introduced a regression in
request-based dm.

If map_request() calls dm_kill_unmapped_request() to complete a cloned
bio without dispatching it, clone->bio is still set when
dm_end_request() is called and the BUG_ON(clone->bio) is incorrect.

The patch fixes this bug by freeing bio in dm_end_request() if the clone
has bio.  I've redone my tests to cover all I/O paths and confirmed
there's no other regression.

Here is the oops I hit in request-based dm when I do I/O to a multipath
device which doesn't have any active path nor queue_if_no_path setting:

------------[ cut here ]------------
kernel BUG at /root/2.6.31-rc4.rqdm/drivers/md/dm.c:828!
invalid opcode: 0000 [#1] SMP
last sysfs file: /sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map
CPU 1
Modules linked in: autofs4 sunrpc cpufreq_ondemand acpi_cpufreq dm_mirror dm_region_hash dm_log dm_service_time dm_multipath scsi_dh dm_mod video output sbs sbshc battery ac sg sr_mod e1000e button cdrom serio_raw rtc_cmos rtc_core rtc_lib piix lpfc scsi_transport_fc ata_piix libata megaraid_sas sd_mod scsi_mod crc_t10dif ext3 jbd uhci_hcd ohci_hcd ehci_hcd [last unloaded: microcode]
Pid: 7, comm: ksoftirqd/1 Not tainted 2.6.31-rc4.rqdm #1 Express5800/120Lj [N8100-1417]
RIP: 0010:[<ffffffffa023629d>]  [<ffffffffa023629d>] dm_softirq_done+0xbd/0x100 [dm_mod]
RSP: 0018:ffff8800280a1f08  EFLAGS: 00010282
RAX: ffffffffa02544e0 RBX: ffff8802aa1111d0 RCX: ffff8802aa1111e0
RDX: ffff8802ab913e70 RSI: 0000000000000000 RDI: ffff8802ab913e70
RBP: ffff8800280a1f28 R08: ffffc90005457040 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: 00000000fffffffb
R13: ffff8802ab913e88 R14: ffff8802ab9c1438 R15: 0000000000000100
FS:  0000000000000000(0000) GS:ffff88002809e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000003d54a98640 CR3: 000000029f0a1000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process ksoftirqd/1 (pid: 7, threadinfo ffff8802ae50e000, task ffff8802ae4f8040)
Stack:
 ffff8800280a1f38 0000000000000020 ffffffff814f30a0 0000000000000004
<0> ffff8800280a1f58 ffffffff8116b245 ffff8800280a1f38 ffff8800280a1f38
<0> ffff8800280a1f58 0000000000000001 ffff8800280a1fa8 ffffffff810477bc
Call Trace:
 <IRQ>
 [<ffffffff8116b245>] blk_done_softirq+0x75/0x90
 [<ffffffff810477bc>] __do_softirq+0xcc/0x210
 [<ffffffff81047170>] ? ksoftirqd+0x0/0x110
 [<ffffffff8100ce7c>] call_softirq+0x1c/0x50
 <EOI>
 [<ffffffff8100e785>] do_softirq+0x65/0xa0
 [<ffffffff81047170>] ? ksoftirqd+0x0/0x110
 [<ffffffff810471e0>] ksoftirqd+0x70/0x110
 [<ffffffff81059559>] kthread+0x99/0xb0
 [<ffffffff8100cd7a>] child_rip+0xa/0x20
 [<ffffffff8100c73c>] ? restore_args+0x0/0x30
 [<ffffffff810594c0>] ? kthread+0x0/0xb0
 [<ffffffff8100cd70>] ? child_rip+0x0/0x20
Code: 44 89 e6 48 89 df e8 23 fb f2 e0 be 01 00 00 00 4c 89 f7 e8 f6 fd ff ff 5b 41 5c 41 5d 41 5e c9 c3 4c 89 ef e8 85 fe ff ff eb ed <0f> 0b eb fe 41 8b 85 dc 00 00 00 48 83 bb 10 01 00 00 00 89 83
RIP  [<ffffffffa023629d>] dm_softirq_done+0xbd/0x100 [dm_mod]
 RSP <ffff8800280a1f08>
---[ end trace 16af0a1d8542da55 ]---

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8a311ea0d441..b4845b14740d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -738,16 +738,22 @@ static void rq_completed(struct mapped_device *md, int run_queue)
 	dm_put(md);
 }
 
+static void free_rq_clone(struct request *clone)
+{
+	struct dm_rq_target_io *tio = clone->end_io_data;
+
+	blk_rq_unprep_clone(clone);
+	free_rq_tio(tio);
+}
+
 static void dm_unprep_request(struct request *rq)
 {
 	struct request *clone = rq->special;
-	struct dm_rq_target_io *tio = clone->end_io_data;
 
 	rq->special = NULL;
 	rq->cmd_flags &= ~REQ_DONTPREP;
 
-	blk_rq_unprep_clone(clone);
-	free_rq_tio(tio);
+	free_rq_clone(clone);
 }
 
 /*
@@ -825,8 +831,7 @@ static void dm_end_request(struct request *clone, int error)
 			rq->sense_len = clone->sense_len;
 	}
 
-	BUG_ON(clone->bio);
-	free_rq_tio(tio);
+	free_rq_clone(clone);
 
 	blk_end_request_all(rq, error);
 

From 8811f46c1f9386fc7017150de9d52359e5b1826e Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 4 Sep 2009 20:40:19 +0100
Subject: [PATCH 32/60] dm snapshot: implement iterate devices

Implement the .iterate_devices for the origin and snapshot targets.
dm-snapshot's lack of .iterate_devices resulted in the inability to
properly establish queue_limits for both targets.

With 4K sector drives: an unfortunate side-effect of not establishing
proper limits in either targets' DM device was that IO to the devices
would fail even though both had been created without error.

Commit af4874e03ed82f050d5872d8c39ce64bf16b5c38 ("dm target:s introduce
iterate devices fn") in 2.6.31-rc1 should have implemented .iterate_devices
for dm-snap.c's origin and snapshot targets.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index d573165cd2b7..57f1bf7f3b7a 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1176,6 +1176,15 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 	return 0;
 }
 
+static int snapshot_iterate_devices(struct dm_target *ti,
+				    iterate_devices_callout_fn fn, void *data)
+{
+	struct dm_snapshot *snap = ti->private;
+
+	return fn(ti, snap->origin, 0, ti->len, data);
+}
+
+
 /*-----------------------------------------------------------------
  * Origin methods
  *---------------------------------------------------------------*/
@@ -1410,20 +1419,29 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result,
 	return 0;
 }
 
+static int origin_iterate_devices(struct dm_target *ti,
+				  iterate_devices_callout_fn fn, void *data)
+{
+	struct dm_dev *dev = ti->private;
+
+	return fn(ti, dev, 0, ti->len, data);
+}
+
 static struct target_type origin_target = {
 	.name    = "snapshot-origin",
-	.version = {1, 6, 0},
+	.version = {1, 7, 0},
 	.module  = THIS_MODULE,
 	.ctr     = origin_ctr,
 	.dtr     = origin_dtr,
 	.map     = origin_map,
 	.resume  = origin_resume,
 	.status  = origin_status,
+	.iterate_devices = origin_iterate_devices,
 };
 
 static struct target_type snapshot_target = {
 	.name    = "snapshot",
-	.version = {1, 6, 0},
+	.version = {1, 7, 0},
 	.module  = THIS_MODULE,
 	.ctr     = snapshot_ctr,
 	.dtr     = snapshot_dtr,
@@ -1431,6 +1449,7 @@ static struct target_type snapshot_target = {
 	.end_io  = snapshot_end_io,
 	.resume  = snapshot_resume,
 	.status  = snapshot_status,
+	.iterate_devices = snapshot_iterate_devices,
 };
 
 static int __init dm_snapshot_init(void)

From f6a1ed10864b7540fa758bbccf3433fe17070329 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 4 Sep 2009 20:40:22 +0100
Subject: [PATCH 33/60] dm table: fix queue_limit checking device iterator

The logic to check for valid device areas is inverted relative to proper
use with iterate_devices.

The iterate_devices method calls its callback for every underlying
device in the target.  If any callback returns non-zero, iterate_devices
exits immediately.  But the callback device_area_is_valid() returns 0 on
error and 1 on success.  The overall effect without is that an error is
issued only if every device is invalid.

This patch renames device_area_is_valid to device_area_is_invalid and
inverts the logic so that one invalid device is sufficient to raise
an error.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-table.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index d952b3441913..aa60526075d7 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -343,10 +343,10 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
 }
 
 /*
- * If possible, this checks an area of a destination device is valid.
+ * If possible, this checks an area of a destination device is invalid.
  */
-static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
-				sector_t start, sector_t len, void *data)
+static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
+				  sector_t start, sector_t len, void *data)
 {
 	struct queue_limits *limits = data;
 	struct block_device *bdev = dev->bdev;
@@ -357,16 +357,16 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
 	char b[BDEVNAME_SIZE];
 
 	if (!dev_size)
-		return 1;
+		return 0;
 
 	if ((start >= dev_size) || (start + len > dev_size)) {
 		DMWARN("%s: %s too small for target",
 		       dm_device_name(ti->table->md), bdevname(bdev, b));
-		return 0;
+		return 1;
 	}
 
 	if (logical_block_size_sectors <= 1)
-		return 1;
+		return 0;
 
 	if (start & (logical_block_size_sectors - 1)) {
 		DMWARN("%s: start=%llu not aligned to h/w "
@@ -374,7 +374,7 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
 		       dm_device_name(ti->table->md),
 		       (unsigned long long)start,
 		       limits->logical_block_size, bdevname(bdev, b));
-		return 0;
+		return 1;
 	}
 
 	if (len & (logical_block_size_sectors - 1)) {
@@ -383,10 +383,10 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
 		       dm_device_name(ti->table->md),
 		       (unsigned long long)len,
 		       limits->logical_block_size, bdevname(bdev, b));
-		return 0;
+		return 1;
 	}
 
-	return 1;
+	return 0;
 }
 
 /*
@@ -1000,8 +1000,8 @@ int dm_calculate_queue_limits(struct dm_table *table,
 		 * Check each device area is consistent with the target's
 		 * overall queue limits.
 		 */
-		if (!ti->type->iterate_devices(ti, device_area_is_valid,
-					       &ti_limits))
+		if (ti->type->iterate_devices(ti, device_area_is_invalid,
+					      &ti_limits))
 			return -EINVAL;
 
 combine_limits:

From a963a956225eb0f8c4d3537f428153c30adf54b8 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 4 Sep 2009 20:40:24 +0100
Subject: [PATCH 34/60] dm table: add more context to terse warning messages

A couple of recent warning messages make it difficult for the reader to
determine exactly what is wrong.  This patch adds more information to
those messages.

The messages were added by these commits:
  5dea271b6d87bd1d79a59c1d5baac2596a841c37 ("dm table: pass correct dev area size
to device_area_is_valid")
  ea9df47cc92573b159ef3b4fda516c32cba9c4fd ("dm table: fix blk_stack_limits arg
to use bytes not sectors")

The patch also corrects references to logical_block_size in printk format
strings from %hu to %u.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-table.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index aa60526075d7..c90e662d2802 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -360,8 +360,12 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
 		return 0;
 
 	if ((start >= dev_size) || (start + len > dev_size)) {
-		DMWARN("%s: %s too small for target",
-		       dm_device_name(ti->table->md), bdevname(bdev, b));
+		DMWARN("%s: %s too small for target: "
+		       "start=%llu, len=%llu, dev_size=%llu",
+		       dm_device_name(ti->table->md), bdevname(bdev, b),
+		       (unsigned long long)start,
+		       (unsigned long long)len,
+		       (unsigned long long)dev_size);
 		return 1;
 	}
 
@@ -370,7 +374,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
 
 	if (start & (logical_block_size_sectors - 1)) {
 		DMWARN("%s: start=%llu not aligned to h/w "
-		       "logical block size %hu of %s",
+		       "logical block size %u of %s",
 		       dm_device_name(ti->table->md),
 		       (unsigned long long)start,
 		       limits->logical_block_size, bdevname(bdev, b));
@@ -379,7 +383,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
 
 	if (len & (logical_block_size_sectors - 1)) {
 		DMWARN("%s: len=%llu not aligned to h/w "
-		       "logical block size %hu of %s",
+		       "logical block size %u of %s",
 		       dm_device_name(ti->table->md),
 		       (unsigned long long)len,
 		       limits->logical_block_size, bdevname(bdev, b));
@@ -496,8 +500,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 	}
 
 	if (blk_stack_limits(limits, &q->limits, start << 9) < 0)
-		DMWARN("%s: target device %s is misaligned",
-		       dm_device_name(ti->table->md), bdevname(bdev, b));
+		DMWARN("%s: target device %s is misaligned: "
+		       "physical_block_size=%u, logical_block_size=%u, "
+		       "alignment_offset=%u, start=%llu",
+		       dm_device_name(ti->table->md), bdevname(bdev, b),
+		       q->limits.physical_block_size,
+		       q->limits.logical_block_size,
+		       q->limits.alignment_offset,
+		       (unsigned long long) start << 9);
+
 
 	/*
 	 * Check if merge fn is supported.
@@ -698,7 +709,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
 
 	if (remaining) {
 		DMWARN("%s: table line %u (start sect %llu len %llu) "
-		       "not aligned to h/w logical block size %hu",
+		       "not aligned to h/w logical block size %u",
 		       dm_device_name(table->md), i,
 		       (unsigned long long) ti->begin,
 		       (unsigned long long) ti->len,

From 40bea431274c247425e7f5970d796ff7b37a2b22 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 4 Sep 2009 20:40:25 +0100
Subject: [PATCH 35/60] dm stripe: expose correct io hints

Set sensible I/O hints for striped DM devices in the topology
infrastructure added for 2.6.31 for userspace tools to
obtain via sysfs.

Add .io_hints to 'struct target_type' to allow the I/O hints portion
(io_min and io_opt) of the 'struct queue_limits' to be set by each
target and implement this for dm-stripe.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-stripe.c        | 13 ++++++++++++-
 drivers/md/dm-table.c         |  4 ++++
 include/linux/device-mapper.h |  4 ++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4e0e5937e42a..3e563d251733 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -329,9 +329,19 @@ static int stripe_iterate_devices(struct dm_target *ti,
 	return ret;
 }
 
+static void stripe_io_hints(struct dm_target *ti,
+			    struct queue_limits *limits)
+{
+	struct stripe_c *sc = ti->private;
+	unsigned chunk_size = (sc->chunk_mask + 1) << 9;
+
+	blk_limits_io_min(limits, chunk_size);
+	limits->io_opt = chunk_size * sc->stripes;
+}
+
 static struct target_type stripe_target = {
 	.name   = "striped",
-	.version = {1, 2, 0},
+	.version = {1, 3, 0},
 	.module = THIS_MODULE,
 	.ctr    = stripe_ctr,
 	.dtr    = stripe_dtr,
@@ -339,6 +349,7 @@ static struct target_type stripe_target = {
 	.end_io = stripe_end_io,
 	.status = stripe_status,
 	.iterate_devices = stripe_iterate_devices,
+	.io_hints = stripe_io_hints,
 };
 
 int __init dm_stripe_init(void)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index c90e662d2802..1a6cb3c7822e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1007,6 +1007,10 @@ int dm_calculate_queue_limits(struct dm_table *table,
 		ti->type->iterate_devices(ti, dm_set_device_limits,
 					  &ti_limits);
 
+		/* Set I/O hints portion of queue limits */
+		if (ti->type->io_hints)
+			ti->type->io_hints(ti, &ti_limits);
+
 		/*
 		 * Check each device area is consistent with the target's
 		 * overall queue limits.
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 655e7721580a..df7607e6dce8 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -91,6 +91,9 @@ typedef int (*dm_iterate_devices_fn) (struct dm_target *ti,
 				      iterate_devices_callout_fn fn,
 				      void *data);
 
+typedef void (*dm_io_hints_fn) (struct dm_target *ti,
+				struct queue_limits *limits);
+
 /*
  * Returns:
  *    0: The target can handle the next I/O immediately.
@@ -151,6 +154,7 @@ struct target_type {
 	dm_merge_fn merge;
 	dm_busy_fn busy;
 	dm_iterate_devices_fn iterate_devices;
+	dm_io_hints_fn io_hints;
 
 	/* For internal device-mapper use. */
 	struct list_head list;

From 4142a969175302bc843d1505133488bfdbfa4732 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Fri, 4 Sep 2009 20:40:28 +0100
Subject: [PATCH 36/60] dm log: fix userspace status output

Fix 'dmsetup table' output.

There is a missing ' ' at the end of the string causing two
words to run together.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-log-userspace-base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index e69b96560997..2f2a244e1109 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -577,7 +577,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
 		break;
 	case STATUSTYPE_TABLE:
 		sz = 0;
-		DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1,
+		DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc + 1,
 		       lc->uuid, lc->usr_argv_str);
 		break;
 	}

From b8313b6da7e2e7c7f47d93d8561969a3ff9ba0ea Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Fri, 4 Sep 2009 20:40:30 +0100
Subject: [PATCH 37/60] dm log: remove incorrect field from userspace table
 output

The output of 'dmsetup table' includes an internal field that should not
be there.  This patch removes it.  To make the fix simpler, we first
reorder a constructor argument

The 'device size' argument is generated internally.  Currently it is
placed as the last space-separated word of the constructor string.
However, we need to use a version of the string without this word, so we
move it to the beginning instead so it is trivial to skip past it.

We keep a copy of the arguments passed to userspace for creating a log,
just in case we need to resend them.  These are the same arguments that
are desired in the STATUSTYPE_TABLE request, except for one.  When
creating the userspace log, the userspace daemon must know the size of
the mirror, so that is added to the arguments given in the constructor
table.  We were printing this extra argument out as well, which is a
mistake.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-log-userspace-base.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index 2f2a244e1109..c49da0a41c8e 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -111,10 +111,9 @@ static int build_constructor_string(struct dm_target *ti,
 		return -ENOMEM;
 	}
 
-	for (i = 0, str_size = 0; i < argc; i++)
-		str_size += sprintf(str + str_size, "%s ", argv[i]);
-	str_size += sprintf(str + str_size, "%llu",
-			    (unsigned long long)ti->len);
+	str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
+	for (i = 0; i < argc; i++)
+		str_size += sprintf(str + str_size, " %s", argv[i]);
 
 	*ctr_str = str;
 	return str_size;
@@ -561,6 +560,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
 			    char *result, unsigned maxlen)
 {
 	int r = 0;
+	char *table_args;
 	size_t sz = (size_t)maxlen;
 	struct log_c *lc = log->context;
 
@@ -577,8 +577,12 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
 		break;
 	case STATUSTYPE_TABLE:
 		sz = 0;
-		DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc + 1,
-		       lc->uuid, lc->usr_argv_str);
+		table_args = strstr(lc->usr_argv_str, " ");
+		BUG_ON(!table_args); /* There will always be a ' ' */
+		table_args++;
+
+		DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc,
+		       lc->uuid, table_args);
 		break;
 	}
 	return (r) ? 0 : (int)sz;

From d2b698644c97cb033261536a4f2010924a00eac9 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Fri, 4 Sep 2009 20:40:32 +0100
Subject: [PATCH 38/60] dm raid1: do not allow log_failure variable to unset
 after being set

This patch fixes a bug which was triggering a case where the primary leg
could not be changed on failure even when the mirror was in-sync.

The case involves the failure of the primary device along with
the transient failure of the log device.  The problem is that
bios can be put on the 'failures' list (due to log failure)
before 'fail_mirror' is called due to the primary device failure.
Normally, this is fine, but if the log device failure is transient,
a subsequent iteration of the work thread, 'do_mirror', will
reset 'log_failure'.  The 'do_failures' function then resets
the 'in_sync' variable when processing bios on the failures list.
The 'in_sync' variable is what is used to determine if the
primary device can be switched in the event of a failure.  Since
this has been reset, the primary device is incorrectly assumed
to be not switchable.

The case has been seen in the cluster mirror context, where one
machine realizes the log device is dead before the other machines.
As the responsibilities of the server migrate from one node to
another (because the mirror is being reconfigured due to the failure),
the new server may think for a moment that the log device is fine -
thus resetting the 'log_failure' variable.

In any case, it is inappropiate for us to reset the 'log_failure'
variable.  The above bug simply illustrates that it can actually
hurt us.

Cc: stable@kernel.org
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-raid1.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9726577cde49..33f179e66bf5 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -648,7 +648,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 	 */
 	dm_rh_inc_pending(ms->rh, &sync);
 	dm_rh_inc_pending(ms->rh, &nosync);
-	ms->log_failure = dm_rh_flush(ms->rh) ? 1 : 0;
+
+	/*
+	 * If the flush fails on a previous call and succeeds here,
+	 * we must not reset the log_failure variable.  We need
+	 * userspace interaction to do that.
+	 */
+	ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure;
 
 	/*
 	 * Dispatch io.

From 7ec23d50949d5062b5b749638dd9380ed75e58e5 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Fri, 4 Sep 2009 20:40:34 +0100
Subject: [PATCH 39/60] dm log: userspace add luid to distinguish between
 concurrent log instances

Device-mapper userspace logs (like the clustered log) are
identified by a universally unique identifier (UUID).  This
identifier is used to associate requests from the kernel to
a specific log in userspace.  The UUID must be unique everywhere,
since multiple machines may use this identifier when communicating
about a particular log, as is the case for cluster logs.

Sometimes, device-mapper/LVM may re-use a UUID.  This is the
case during pvmoves, when moving from one segment of an LV
to another, or when resizing a mirror, etc.  In these cases,
a new log is created with the same UUID and loaded in the
"inactive" slot.  When a device-mapper "resume" is issued,
the "live" table is deactivated and the new "inactive" table
becomes "live".  (The "inactive" table can also be removed
via a device-mapper 'clear' command.)

The above two issues were colliding.  More than one log was being
created with the same UUID, and there was no way to distinguish
between them.  So, sometimes the wrong log would be swapped
out during the exchange.

The solution is to create a locally unique identifier,
'luid', to go along with the UUID.  This new identifier is used
to determine exactly which log is being referenced by the kernel
when the log exchange is made.  The identifier is not
universally safe, but it does not need to be, since
create/destroy/suspend/resume operations are bound to a specific
machine; and these are the operations that make up the exchange.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-log-userspace-base.c     | 23 ++++++++++++++---------
 drivers/md/dm-log-userspace-transfer.c |  6 ++++--
 drivers/md/dm-log-userspace-transfer.h |  2 +-
 include/linux/dm-log-userspace.h       | 13 ++++++++++++-
 4 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index c49da0a41c8e..6e186b1a062d 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -21,6 +21,7 @@ struct log_c {
 	struct dm_target *ti;
 	uint32_t region_size;
 	region_t region_count;
+	uint64_t luid;
 	char uuid[DM_UUID_LEN];
 
 	char *usr_argv_str;
@@ -63,7 +64,7 @@ static int userspace_do_request(struct log_c *lc, const char *uuid,
 	 * restored.
 	 */
 retry:
-	r = dm_consult_userspace(uuid, request_type, data,
+	r = dm_consult_userspace(uuid, lc->luid, request_type, data,
 				 data_size, rdata, rdata_size);
 
 	if (r != -ESRCH)
@@ -74,14 +75,15 @@ static int userspace_do_request(struct log_c *lc, const char *uuid,
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(2*HZ);
 		DMWARN("Attempting to contact userspace log server...");
-		r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
+		r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
+					 lc->usr_argv_str,
 					 strlen(lc->usr_argv_str) + 1,
 					 NULL, NULL);
 		if (!r)
 			break;
 	}
 	DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
-	r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
+	r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
 				 0, NULL, NULL);
 	if (!r)
 		goto retry;
@@ -153,6 +155,9 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 		return -ENOMEM;
 	}
 
+	/* The ptr value is sufficient for local unique id */
+	lc->luid = (uint64_t)lc;
+
 	lc->ti = ti;
 
 	if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
@@ -172,7 +177,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 	}
 
 	/* Send table string */
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
 				 ctr_str, str_size, NULL, NULL);
 
 	if (r == -ESRCH) {
@@ -182,7 +187,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 
 	/* Since the region size does not change, get it now */
 	rdata_size = sizeof(rdata);
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
 				 NULL, 0, (char *)&rdata, &rdata_size);
 
 	if (r) {
@@ -211,7 +216,7 @@ static void userspace_dtr(struct dm_dirty_log *log)
 	int r;
 	struct log_c *lc = log->context;
 
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
 				 NULL, 0,
 				 NULL, NULL);
 
@@ -226,7 +231,7 @@ static int userspace_presuspend(struct dm_dirty_log *log)
 	int r;
 	struct log_c *lc = log->context;
 
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
 				 NULL, 0,
 				 NULL, NULL);
 
@@ -238,7 +243,7 @@ static int userspace_postsuspend(struct dm_dirty_log *log)
 	int r;
 	struct log_c *lc = log->context;
 
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
 				 NULL, 0,
 				 NULL, NULL);
 
@@ -251,7 +256,7 @@ static int userspace_resume(struct dm_dirty_log *log)
 	struct log_c *lc = log->context;
 
 	lc->in_sync_hint = 0;
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
 				 NULL, 0,
 				 NULL, NULL);
 
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 8ce74d95ae4d..ba0edad2d048 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -147,7 +147,8 @@ static void cn_ulog_callback(void *data)
 
 /**
  * dm_consult_userspace
- * @uuid: log's uuid (must be DM_UUID_LEN in size)
+ * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size)
+ * @luid: log's local unique identifier
  * @request_type:  found in include/linux/dm-log-userspace.h
  * @data: data to tx to the server
  * @data_size: size of data in bytes
@@ -163,7 +164,7 @@ static void cn_ulog_callback(void *data)
  *
  * Returns: 0 on success, -EXXX on failure
  **/
-int dm_consult_userspace(const char *uuid, int request_type,
+int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
 			 char *data, size_t data_size,
 			 char *rdata, size_t *rdata_size)
 {
@@ -190,6 +191,7 @@ int dm_consult_userspace(const char *uuid, int request_type,
 
 	memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size);
 	memcpy(tfr->uuid, uuid, DM_UUID_LEN);
+	tfr->luid = luid;
 	tfr->seq = dm_ulog_seq++;
 
 	/*
diff --git a/drivers/md/dm-log-userspace-transfer.h b/drivers/md/dm-log-userspace-transfer.h
index c26d8e4e2710..04ee874f9153 100644
--- a/drivers/md/dm-log-userspace-transfer.h
+++ b/drivers/md/dm-log-userspace-transfer.h
@@ -11,7 +11,7 @@
 
 int dm_ulog_tfr_init(void);
 void dm_ulog_tfr_exit(void);
-int dm_consult_userspace(const char *uuid, int request_type,
+int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
 			 char *data, size_t data_size,
 			 char *rdata, size_t *rdata_size);
 
diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h
index 642e3017b51f..8a1f972c0fe9 100644
--- a/include/linux/dm-log-userspace.h
+++ b/include/linux/dm-log-userspace.h
@@ -371,7 +371,18 @@
 	(DM_ULOG_REQUEST_MASK & (request_type))
 
 struct dm_ulog_request {
-	char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */
+	/*
+	 * The local unique identifier (luid) and the universally unique
+	 * identifier (uuid) are used to tie a request to a specific
+	 * mirror log.  A single machine log could probably make due with
+	 * just the 'luid', but a cluster-aware log must use the 'uuid' and
+	 * the 'luid'.  The uuid is what is required for node to node
+	 * communication concerning a particular log, but the 'luid' helps
+	 * differentiate between logs that are being swapped and have the
+	 * same 'uuid'.  (Think "live" and "inactive" device-mapper tables.)
+	 */
+	uint64_t luid;
+	char uuid[DM_UUID_LEN];
 	char padding[7];        /* Padding because DM_UUID_LEN = 129 */
 
 	int32_t error;          /* Used to report back processing errors */

From 02d2fd31defce6ff77146ad0fef4f19006055d86 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 4 Sep 2009 20:40:37 +0100
Subject: [PATCH 40/60] dm snapshot: refactor zero_disk_area to use chunk_io

Refactor chunk_io to prepare for the fix in the following patch.

Pass an area pointer to chunk_io and simplify zero_disk_area to use
chunk_io.  No functional change.

Cc: stable@kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap-persistent.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 6e3fe4f14934..2a3d626a98d9 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -188,7 +188,8 @@ static void do_metadata(struct work_struct *work)
 /*
  * Read or write a chunk aligned and sized block of data from a device.
  */
-static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
+static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
+		    int metadata)
 {
 	struct dm_io_region where = {
 		.bdev = ps->store->cow->bdev,
@@ -198,7 +199,7 @@ static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
 	struct dm_io_request io_req = {
 		.bi_rw = rw,
 		.mem.type = DM_IO_VMA,
-		.mem.ptr.vma = ps->area,
+		.mem.ptr.vma = area,
 		.client = ps->io_client,
 		.notify.fn = NULL,
 	};
@@ -240,7 +241,7 @@ static int area_io(struct pstore *ps, int rw)
 
 	chunk = area_location(ps, ps->current_area);
 
-	r = chunk_io(ps, chunk, rw, 0);
+	r = chunk_io(ps, ps->area, chunk, rw, 0);
 	if (r)
 		return r;
 
@@ -254,20 +255,7 @@ static void zero_memory_area(struct pstore *ps)
 
 static int zero_disk_area(struct pstore *ps, chunk_t area)
 {
-	struct dm_io_region where = {
-		.bdev = ps->store->cow->bdev,
-		.sector = ps->store->chunk_size * area_location(ps, area),
-		.count = ps->store->chunk_size,
-	};
-	struct dm_io_request io_req = {
-		.bi_rw = WRITE,
-		.mem.type = DM_IO_VMA,
-		.mem.ptr.vma = ps->zero_area,
-		.client = ps->io_client,
-		.notify.fn = NULL,
-	};
-
-	return dm_io(&io_req, 1, &where, NULL);
+	return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
 }
 
 static int read_header(struct pstore *ps, int *new_snapshot)
@@ -297,7 +285,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 	if (r)
 		return r;
 
-	r = chunk_io(ps, 0, READ, 1);
+	r = chunk_io(ps, ps->area, 0, READ, 1);
 	if (r)
 		goto bad;
 
@@ -359,7 +347,7 @@ static int write_header(struct pstore *ps)
 	dh->version = cpu_to_le32(ps->version);
 	dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
 
-	return chunk_io(ps, 0, WRITE, 1);
+	return chunk_io(ps, ps->area, 0, WRITE, 1);
 }
 
 /*

From 61578dcd3fafe6babd72e8db32110cc0b630a432 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 4 Sep 2009 20:40:39 +0100
Subject: [PATCH 41/60] dm snapshot: fix header corruption race on invalidation

If a persistent snapshot fills up, a race can corrupt the on-disk header
which causes a crash on any future attempt to activate the snapshot
(typically while booting).  This patch fixes the race.

When the snapshot overflows, __invalidate_snapshot is called, which calls
snapshot store method drop_snapshot. It goes to persistent_drop_snapshot that
calls write_header. write_header constructs the new header in the "area"
location.

Concurrently, an existing kcopyd job may finish, call copy_callback
and commit_exception method, that goes to persistent_commit_exception.
persistent_commit_exception doesn't do locking, relying on the fact that
callbacks are single-threaded, but it can race with snapshot invalidation and
overwrite the header that is just being written while the snapshot is being
invalidated.

The result of this race is a corrupted header being written that can
lead to a crash on further reactivation (if chunk_size is zero in the
corrupted header).

The fix is to use separate memory areas for each.

See the bug: https://bugzilla.redhat.com/show_bug.cgi?id=461506

Cc: stable@kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap-persistent.c | 44 +++++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 2a3d626a98d9..5d1a97580cb7 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -105,6 +105,13 @@ struct pstore {
 	 */
 	void *zero_area;
 
+	/*
+	 * An area used for header. The header can be written
+	 * concurrently with metadata (when invalidating the snapshot),
+	 * so it needs a separate buffer.
+	 */
+	void *header_area;
+
 	/*
 	 * Used to keep track of which metadata area the data in
 	 * 'chunk' refers to.
@@ -148,16 +155,27 @@ static int alloc_area(struct pstore *ps)
 	 */
 	ps->area = vmalloc(len);
 	if (!ps->area)
-		return r;
+		goto err_area;
 
 	ps->zero_area = vmalloc(len);
-	if (!ps->zero_area) {
-		vfree(ps->area);
-		return r;
-	}
+	if (!ps->zero_area)
+		goto err_zero_area;
 	memset(ps->zero_area, 0, len);
 
+	ps->header_area = vmalloc(len);
+	if (!ps->header_area)
+		goto err_header_area;
+
 	return 0;
+
+err_header_area:
+	vfree(ps->zero_area);
+
+err_zero_area:
+	vfree(ps->area);
+
+err_area:
+	return r;
 }
 
 static void free_area(struct pstore *ps)
@@ -169,6 +187,10 @@ static void free_area(struct pstore *ps)
 	if (ps->zero_area)
 		vfree(ps->zero_area);
 	ps->zero_area = NULL;
+
+	if (ps->header_area)
+		vfree(ps->header_area);
+	ps->header_area = NULL;
 }
 
 struct mdata_req {
@@ -285,11 +307,11 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 	if (r)
 		return r;
 
-	r = chunk_io(ps, ps->area, 0, READ, 1);
+	r = chunk_io(ps, ps->header_area, 0, READ, 1);
 	if (r)
 		goto bad;
 
-	dh = (struct disk_header *) ps->area;
+	dh = ps->header_area;
 
 	if (le32_to_cpu(dh->magic) == 0) {
 		*new_snapshot = 1;
@@ -339,15 +361,15 @@ static int write_header(struct pstore *ps)
 {
 	struct disk_header *dh;
 
-	memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
+	memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
 
-	dh = (struct disk_header *) ps->area;
+	dh = ps->header_area;
 	dh->magic = cpu_to_le32(SNAP_MAGIC);
 	dh->valid = cpu_to_le32(ps->valid);
 	dh->version = cpu_to_le32(ps->version);
 	dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
 
-	return chunk_io(ps, ps->area, 0, WRITE, 1);
+	return chunk_io(ps, ps->header_area, 0, WRITE, 1);
 }
 
 /*
@@ -667,6 +689,8 @@ static int persistent_ctr(struct dm_exception_store *store,
 	ps->valid = 1;
 	ps->version = SNAPSHOT_DISK_VERSION;
 	ps->area = NULL;
+	ps->zero_area = NULL;
+	ps->header_area = NULL;
 	ps->next_free = 2;	/* skipping the header and first area */
 	ps->current_committed = 0;
 

From 2defcc3fb4661e7351cb2ac48d843efc4c64db13 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 4 Sep 2009 20:40:41 +0100
Subject: [PATCH 42/60] dm exception store: split set_chunk_size

Break the function set_chunk_size to two functions in preparation for
the fix in the following patch.

Cc: stable@kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-exception-store.c | 8 ++++++++
 drivers/md/dm-exception-store.h | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 3710ff88fc10..4c01c7535fb5 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -171,6 +171,14 @@ static int set_chunk_size(struct dm_exception_store *store,
 	 */
 	chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
 
+	return dm_exception_store_set_chunk_size(store, chunk_size_ulong,
+						 error);
+}
+
+int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
+				      unsigned long chunk_size_ulong,
+				      char **error)
+{
 	/* Check chunk_size is a power of 2 */
 	if (!is_power_of_2(chunk_size_ulong)) {
 		*error = "Chunk size is not a power of 2";
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 2442c8c07898..812c71872ba0 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -168,6 +168,10 @@ static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
 int dm_exception_store_type_register(struct dm_exception_store_type *type);
 int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
 
+int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
+				      unsigned long chunk_size_ulong,
+				      char **error);
+
 int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
 			      unsigned *args_used,
 			      struct dm_exception_store **store);

From ae0b7448e91353ea5f821601a055aca6b58042cd Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 4 Sep 2009 20:40:43 +0100
Subject: [PATCH 43/60] dm snapshot: fix on disk chunk size validation

Fix some problems seen in the chunk size processing when activating a
pre-existing snapshot.

For a new snapshot, the chunk size can either be supplied by the creator
or a default value can be used.  For an existing snapshot, the
chunk size in the snapshot header on disk should always be used.

If someone attempts to load an existing snapshot and has the 'default
chunk size' option set, the kernel uses its default value even when it
is incorrect for the snapshot being loaded.  This patch ensures the
correct on-disk value is always used.

Secondly, when the code does use the chunk size stored on the disk it is
prudent to revalidate it, so the code can exit cleanly if it got
corrupted as happened in
https://bugzilla.redhat.com/show_bug.cgi?id=461506 .

Cc: stable@kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-exception-store.c |  5 +++++
 drivers/md/dm-snap-persistent.c | 22 ++++++++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 4c01c7535fb5..556acff3952f 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -191,6 +191,11 @@ int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
 		return -EINVAL;
 	}
 
+	if (chunk_size_ulong > INT_MAX >> SECTOR_SHIFT) {
+		*error = "Chunk size is too high";
+		return -EINVAL;
+	}
+
 	store->chunk_size = chunk_size_ulong;
 	store->chunk_mask = chunk_size_ulong - 1;
 	store->chunk_shift = ffs(chunk_size_ulong) - 1;
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 5d1a97580cb7..d5b2e08750d5 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -286,6 +286,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 	struct disk_header *dh;
 	chunk_t chunk_size;
 	int chunk_size_supplied = 1;
+	char *chunk_err;
 
 	/*
 	 * Use default chunk size (or hardsect_size, if larger) if none supplied
@@ -329,20 +330,25 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 	ps->version = le32_to_cpu(dh->version);
 	chunk_size = le32_to_cpu(dh->chunk_size);
 
-	if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
+	if (ps->store->chunk_size == chunk_size)
 		return 0;
 
-	DMWARN("chunk size %llu in device metadata overrides "
-	       "table chunk size of %llu.",
-	       (unsigned long long)chunk_size,
-	       (unsigned long long)ps->store->chunk_size);
+	if (chunk_size_supplied)
+		DMWARN("chunk size %llu in device metadata overrides "
+		       "table chunk size of %llu.",
+		       (unsigned long long)chunk_size,
+		       (unsigned long long)ps->store->chunk_size);
 
 	/* We had a bogus chunk_size. Fix stuff up. */
 	free_area(ps);
 
-	ps->store->chunk_size = chunk_size;
-	ps->store->chunk_mask = chunk_size - 1;
-	ps->store->chunk_shift = ffs(chunk_size) - 1;
+	r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
+					      &chunk_err);
+	if (r) {
+		DMERR("invalid on-disk chunk size %llu: %s.",
+		      (unsigned long long)chunk_size, chunk_err);
+		return r;
+	}
 
 	r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
 				ps->io_client);

From 8379e7c46cc48f51197dd663fc6676f47f2a1e71 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Fri, 4 Sep 2009 11:12:01 -0700
Subject: [PATCH 44/60] ocfs2: ocfs2_write_begin_nolock() should handle len=0

Bug introduced by mainline commit e7432675f8ca868a4af365759a8d4c3779a3d922
The bug causes ocfs2_write_begin_nolock() to oops when len=0.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Cc: stable@kernel.org
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/aops.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b401654011a2..8a1e61545f41 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1747,8 +1747,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 	 * we know zeros will only be needed in the first and/or last cluster.
 	 */
 	if (clusters_to_alloc || extents_to_split ||
-	    wc->w_desc[0].c_needs_zero ||
-	    wc->w_desc[wc->w_clen - 1].c_needs_zero)
+	    (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
+			    wc->w_desc[wc->w_clen - 1].c_needs_zero)))
 		cluster_of_pages = 1;
 	else
 		cluster_of_pages = 0;

From 1821bc19d54009b6f5e6462dd79074d728080839 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 5 Sep 2009 13:23:49 +0200
Subject: [PATCH 45/60] firewire: core: fix crash in iso resource management

This fixes a regression due to post 2.6.30 commit "firewire: core: do
not DMA-map stack addresses" 6fdc03709433ccc2005f0f593ae9d9dd04f7b485.

As David Moore noted, a previously correct sizeof() expression became
wrong since the commit changed its argument from an array to a pointer.
This resulted in an oops in ohci_cancel_packet in the shared workqueue
thread's context when an isochronous resource was to be freed.

Reported-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-iso.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c
index 110e731f5574..1c0b504a42f3 100644
--- a/drivers/firewire/core-iso.c
+++ b/drivers/firewire/core-iso.c
@@ -196,7 +196,7 @@ static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
 		switch (fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
 				irm_id, generation, SCODE_100,
 				CSR_REGISTER_BASE + CSR_BANDWIDTH_AVAILABLE,
-				data, sizeof(data))) {
+				data, 8)) {
 		case RCODE_GENERATION:
 			/* A generation change frees all bandwidth. */
 			return allocate ? -EAGAIN : bandwidth;
@@ -233,7 +233,7 @@ static int manage_channel(struct fw_card *card, int irm_id, int generation,
 		data[1] = old ^ c;
 		switch (fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
 					   irm_id, generation, SCODE_100,
-					   offset, data, sizeof(data))) {
+					   offset, data, 8)) {
 		case RCODE_GENERATION:
 			/* A generation change frees all channels. */
 			return allocate ? -EAGAIN : i;

From fc383796a8cc5df0a0c8633a16dd2e9528a16a63 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Fri, 28 Aug 2009 13:25:15 +0200
Subject: [PATCH 46/60] firewire: ohci: fix Agere FW643 and multiple cameras

An Agere FW643 OHCI 1.1 card works fine for video reception from one
camera but fails early if receiving from two cameras.  After a short
while, no IR IRQ events occur and the context control register does not
react anymore.  This happens regardless whether both IR DMA contexts are
dual-buffer or one is dual-buffer and the other packet-per-buffer.

This can be worked around by disabling dual buffer DMA mode entirely.
http://sourceforge.net/mailarchive/message.php?msg_name=4A7C0594.2020208%40gmail.com
(Reported by Samuel Audet.)

In another report (by Jonathan Cameron), an FW643 works OK with two
cameras in dual buffer mode.  Whether this is due to different chip
revisions or different usage patterns (different video formats) is not
yet clear.  However, as far as the current capabilities of
firewire-core's isochronous I/O interface are concerned, simply
switching off dual-buffer on non-working and working FW643s alike is not
a problem in practice.  We only need to revisit this issue if we are
going to enhance the interface, e.g. so that applications can explicitly
choose modes.

Reported-by: Samuel Audet <samuel.audet@gmail.com>
Reported-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/ohci.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index ecddd11b797a..3486bc49c177 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
+#include <linux/pci_ids.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
 
@@ -2372,6 +2373,9 @@ static void ohci_pmac_off(struct pci_dev *dev)
 #define ohci_pmac_off(dev)
 #endif /* CONFIG_PPC_PMAC */
 
+#define PCI_VENDOR_ID_AGERE		PCI_VENDOR_ID_ATT
+#define PCI_DEVICE_ID_AGERE_FW643	0x5901
+
 static int __devinit pci_probe(struct pci_dev *dev,
 			       const struct pci_device_id *ent)
 {
@@ -2422,6 +2426,11 @@ static int __devinit pci_probe(struct pci_dev *dev,
 	version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
 	ohci->use_dualbuffer = version >= OHCI_VERSION_1_1;
 
+	/* dual-buffer mode is broken if more than one IR context is active */
+	if (dev->vendor == PCI_VENDOR_ID_AGERE &&
+	    dev->device == PCI_DEVICE_ID_AGERE_FW643)
+		ohci->use_dualbuffer = false;
+
 /* x86-32 currently doesn't use highmem for dma_alloc_coherent */
 #if !defined(CONFIG_X86_32)
 	/* dual-buffer mode is broken with descriptor addresses above 2G */

From 4fe0badd5882c64dc2dcd8893f9b85db63339736 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Fri, 28 Aug 2009 13:26:03 +0200
Subject: [PATCH 47/60] firewire: ohci: fix Ricoh R5C832, video reception

In dual-buffer DMA mode, no video frames are ever received from R5C832
by libdc1394.  Fallback to packet-per-buffer DMA works reliably.
http://thread.gmane.org/gmane.linux.kernel.firewire.devel/13393/focus=13476

Reported-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/ohci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 3486bc49c177..76b321bb73f9 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -2431,6 +2431,11 @@ static int __devinit pci_probe(struct pci_dev *dev,
 	    dev->device == PCI_DEVICE_ID_AGERE_FW643)
 		ohci->use_dualbuffer = false;
 
+	/* dual-buffer mode is broken */
+	if (dev->vendor == PCI_VENDOR_ID_RICOH &&
+	    dev->device == PCI_DEVICE_ID_RICOH_R5C832)
+		ohci->use_dualbuffer = false;
+
 /* x86-32 currently doesn't use highmem for dma_alloc_coherent */
 #if !defined(CONFIG_X86_32)
 	/* dual-buffer mode is broken with descriptor addresses above 2G */

From baed6b82d9f160184c1c14cdb4accb08f3eb6b87 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Thu, 3 Sep 2009 23:07:35 +0200
Subject: [PATCH 48/60] firewire: sbp2: fix freeing of unallocated memory

If a target writes invalid status (typically status of a command that
already timed out), firewire-sbp2 attempts to put away an ORB that
doesn't exist.  https://bugzilla.redhat.com/show_bug.cgi?id=519772

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/sbp2.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index d27cb058da82..05f0c0c55f4a 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -456,12 +456,12 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request,
 	}
 	spin_unlock_irqrestore(&card->lock, flags);
 
-	if (&orb->link != &lu->orb_list)
+	if (&orb->link != &lu->orb_list) {
 		orb->callback(orb, &status);
-	else
+		kref_put(&orb->kref, free_orb);
+	} else {
 		fw_error("status write for unknown orb\n");
-
-	kref_put(&orb->kref, free_orb);
+	}
 
 	fw_send_response(card, request, RCODE_COMPLETE);
 }

From 4e49627b9bc29a14b393c480e8c979e3bc922ef7 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 5 Sep 2009 11:17:06 -0700
Subject: [PATCH 49/60] workqueues: introduce __cancel_delayed_work()

cancel_delayed_work() has to use del_timer_sync() to guarantee the timer
function is not running after return.  But most users doesn't actually
need this, and del_timer_sync() has problems: it is not useable from
interrupt, and it depends on every lock which could be taken from irq.

Introduce __cancel_delayed_work() which calls del_timer() instead.

The immediate reason for this patch is
http://bugzilla.kernel.org/show_bug.cgi?id=13757
but hopefully this helper makes sense anyway.

As for 13757 bug, actually we need requeue_delayed_work(), but its
semantics are not yet clear.

Merge this patch early to resolves cross-tree interdependencies between
input and infiniband.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 13e1adf55c4c..6273fa97b527 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -240,6 +240,21 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
+/*
+ * Like above, but uses del_timer() instead of del_timer_sync(). This means,
+ * if it returns 0 the timer function may be running and the queueing is in
+ * progress.
+ */
+static inline int __cancel_delayed_work(struct delayed_work *work)
+{
+	int ret;
+
+	ret = del_timer(&work->timer);
+	if (ret)
+		work_clear_pending(&work->work);
+	return ret;
+}
+
 extern int cancel_delayed_work_sync(struct delayed_work *work);
 
 /* Obsolete. use cancel_delayed_work_sync() */

From a190887b58c32d19c2eee007c5eb8faa970a69ba Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 5 Sep 2009 11:17:07 -0700
Subject: [PATCH 50/60] nommu: fix error handling in do_mmap_pgoff()

Fix the error handling in do_mmap_pgoff().  If do_mmap_shared_file() or
do_mmap_private() fail, we jump to the error_put_region label at which
point we cann __put_nommu_region() on the region - but we haven't yet
added the region to the tree, and so __put_nommu_region() may BUG
because the region tree is empty or it may corrupt the region tree.

To get around this, we can afford to add the region to the region tree
before calling do_mmap_shared_file() or do_mmap_private() as we keep
nommu_region_sem write-locked, so no-one can race with us by seeing a
transient region.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/nommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/nommu.c b/mm/nommu.c
index 4bde489ec431..66e81e7e9fe9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1352,6 +1352,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 	}
 
 	vma->vm_region = region;
+	add_nommu_region(region);
 
 	/* set up the mapping */
 	if (file && vma->vm_flags & VM_SHARED)
@@ -1361,8 +1362,6 @@ unsigned long do_mmap_pgoff(struct file *file,
 	if (ret < 0)
 		goto error_put_region;
 
-	add_nommu_region(region);
-
 	/* okay... we have a mapping; now we have to register it */
 	result = vma->vm_start;
 

From dd5d241ea955006122d76af88af87de73fec25b4 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Sat, 5 Sep 2009 11:17:11 -0700
Subject: [PATCH 51/60] page-allocator: always change pageblock ownership when
 anti-fragmentation is disabled

On low-memory systems, anti-fragmentation gets disabled as fragmentation
cannot be avoided on a sufficiently large boundary to be worthwhile.  Once
disabled, there is a period of time when all the pageblocks are marked
MOVABLE and the expectation is that they get marked UNMOVABLE at each call
to __rmqueue_fallback().

However, when MAX_ORDER is large the pageblocks do not change ownership
because the normal criteria are not met.  This has the effect of
prematurely breaking up too many large contiguous blocks.  This is most
serious on NOMMU systems which depend on high-order allocations to boot.
This patch causes pageblocks to change ownership on every fallback when
anti-fragmentation is disabled.  This prevents the large blocks being
prematurely broken up.

This is a fix to commit 49255c619fbd482d704289b5eb2795f8e3b7ff2e [page
allocator: move check for disabled anti-fragmentation out of fastpath] and
the problem affects 2.6.31-rc8.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Tested-by: Paul Mundt <lethal@linux-sh.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5cc986eb9f6f..a0de15f46987 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -817,13 +817,15 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 			 * agressive about taking ownership of free pages
 			 */
 			if (unlikely(current_order >= (pageblock_order >> 1)) ||
-					start_migratetype == MIGRATE_RECLAIMABLE) {
+					start_migratetype == MIGRATE_RECLAIMABLE ||
+					page_group_by_mobility_disabled) {
 				unsigned long pages;
 				pages = move_freepages_block(zone, page,
 								start_migratetype);
 
 				/* Claim the whole block if over half of it is free */
-				if (pages >= (1 << (pageblock_order-1)))
+				if (pages >= (1 << (pageblock_order-1)) ||
+						page_group_by_mobility_disabled)
 					set_pageblock_migratetype(page,
 								start_migratetype);
 

From a2a8474c3fff88d8dd52d05cb450563fb26fd26c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 5 Sep 2009 11:17:13 -0700
Subject: [PATCH 52/60] exec: do not sleep in TASK_TRACED under
 ->cred_guard_mutex

Tom Horsley reports that his debugger hangs when it tries to read
/proc/pid_of_tracee/maps, this happens since

	"mm_for_maps: take ->cred_guard_mutex to fix the race with exec"
	04b836cbf19e885f8366bccb2e4b0474346c02d

commit in 2.6.31.

But the root of the problem lies in the fact that do_execve() path calls
tracehook_report_exec() which can stop if the tracer sets PT_TRACE_EXEC.

The tracee must not sleep in TASK_TRACED holding this mutex.  Even if we
remove ->cred_guard_mutex from mm_for_maps() and proc_pid_attr_write(),
another task doing PTRACE_ATTACH should not hang until it is killed or the
tracee resumes.

With this patch do_execve() does not use ->cred_guard_mutex directly and
we do not hold it throughout, instead:

	- introduce prepare_bprm_creds() helper, it locks the mutex
	  and calls prepare_exec_creds() to initialize bprm->cred.

	- install_exec_creds() drops the mutex after commit_creds(),
	  and thus before tracehook_report_exec()->ptrace_stop().

	  or, if exec fails,

	  free_bprm() drops this mutex when bprm->cred != NULL which
	  indicates install_exec_creds() was not called.

Reported-by: Tom Horsley <tom.horsley@att.net>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat.c             | 17 +++--------
 fs/exec.c               | 63 +++++++++++++++++++++++++----------------
 include/linux/binfmts.h |  1 +
 3 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/fs/compat.c b/fs/compat.c
index 94502dab972a..6d6f98fe64a0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1485,20 +1485,15 @@ int compat_do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = -ERESTARTNOINTR;
-	if (mutex_lock_interruptible(&current->cred_guard_mutex))
+	retval = prepare_bprm_creds(bprm);
+	if (retval)
 		goto out_free;
-	current->in_execve = 1;
-
-	retval = -ENOMEM;
-	bprm->cred = prepare_exec_creds();
-	if (!bprm->cred)
-		goto out_unlock;
 
 	retval = check_unsafe_exec(bprm);
 	if (retval < 0)
-		goto out_unlock;
+		goto out_free;
 	clear_in_exec = retval;
+	current->in_execve = 1;
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
@@ -1547,7 +1542,6 @@ int compat_do_execve(char * filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_guard_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1567,10 +1561,7 @@ int compat_do_execve(char * filename,
 out_unmark:
 	if (clear_in_exec)
 		current->fs->in_exec = 0;
-
-out_unlock:
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_guard_mutex);
 
 out_free:
 	free_bprm(bprm);
diff --git a/fs/exec.c b/fs/exec.c
index fb4f3cdda78c..172ceb6edde4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1015,6 +1015,35 @@ int flush_old_exec(struct linux_binprm * bprm)
 
 EXPORT_SYMBOL(flush_old_exec);
 
+/*
+ * Prepare credentials and lock ->cred_guard_mutex.
+ * install_exec_creds() commits the new creds and drops the lock.
+ * Or, if exec fails before, free_bprm() should release ->cred and
+ * and unlock.
+ */
+int prepare_bprm_creds(struct linux_binprm *bprm)
+{
+	if (mutex_lock_interruptible(&current->cred_guard_mutex))
+		return -ERESTARTNOINTR;
+
+	bprm->cred = prepare_exec_creds();
+	if (likely(bprm->cred))
+		return 0;
+
+	mutex_unlock(&current->cred_guard_mutex);
+	return -ENOMEM;
+}
+
+void free_bprm(struct linux_binprm *bprm)
+{
+	free_arg_pages(bprm);
+	if (bprm->cred) {
+		mutex_unlock(&current->cred_guard_mutex);
+		abort_creds(bprm->cred);
+	}
+	kfree(bprm);
+}
+
 /*
  * install the new credentials for this executable
  */
@@ -1024,12 +1053,13 @@ void install_exec_creds(struct linux_binprm *bprm)
 
 	commit_creds(bprm->cred);
 	bprm->cred = NULL;
-
-	/* cred_guard_mutex must be held at least to this point to prevent
+	/*
+	 * cred_guard_mutex must be held at least to this point to prevent
 	 * ptrace_attach() from altering our determination of the task's
-	 * credentials; any time after this it may be unlocked */
-
+	 * credentials; any time after this it may be unlocked.
+	 */
 	security_bprm_committed_creds(bprm);
+	mutex_unlock(&current->cred_guard_mutex);
 }
 EXPORT_SYMBOL(install_exec_creds);
 
@@ -1246,14 +1276,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 
 EXPORT_SYMBOL(search_binary_handler);
 
-void free_bprm(struct linux_binprm *bprm)
-{
-	free_arg_pages(bprm);
-	if (bprm->cred)
-		abort_creds(bprm->cred);
-	kfree(bprm);
-}
-
 /*
  * sys_execve() executes a new program.
  */
@@ -1277,20 +1299,15 @@ int do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = -ERESTARTNOINTR;
-	if (mutex_lock_interruptible(&current->cred_guard_mutex))
+	retval = prepare_bprm_creds(bprm);
+	if (retval)
 		goto out_free;
-	current->in_execve = 1;
-
-	retval = -ENOMEM;
-	bprm->cred = prepare_exec_creds();
-	if (!bprm->cred)
-		goto out_unlock;
 
 	retval = check_unsafe_exec(bprm);
 	if (retval < 0)
-		goto out_unlock;
+		goto out_free;
 	clear_in_exec = retval;
+	current->in_execve = 1;
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
@@ -1340,7 +1357,6 @@ int do_execve(char * filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_guard_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1360,10 +1376,7 @@ int do_execve(char * filename,
 out_unmark:
 	if (clear_in_exec)
 		current->fs->in_exec = 0;
-
-out_unlock:
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_guard_mutex);
 
 out_free:
 	free_bprm(bprm);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 61ee18c1bdb4..2046b5b8af48 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -117,6 +117,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 			   int executable_stack);
 extern int bprm_mm_init(struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
+extern int prepare_bprm_creds(struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
 extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
 extern int set_binfmt(struct linux_binfmt *new);

From 37f81fa1f63ad38e16125526bb2769ae0ea8d332 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 5 Sep 2009 12:46:07 -0700
Subject: [PATCH 53/60] n_tty: do O_ONLCR translation as a single write

When translating CR to CRNL in the n_tty line discipline, we did it as
two tty_put_char() calls.  Which works, but is stupid, and has caused
problems before too with bad interactions with the write_room() logic.
The generic USB serial driver had that problem, for example.

Now the pty layer had similar issues after being moved to the generic
tty buffering code (in commit d945cb9cce20ac7143c2de8d88b187f62db99bdc:
"pty: Rework the pty layer to use the normal buffering logic").

So stop doing the silly separate two writes, and do it as a single write
instead.  That's what the n_tty layer already does for the space
expansion of tabs (XTABS), and it means that we'll now always have just
a single write for the CRNL to match the single 'tty_write_room()' test,
which hopefully means that the next time somebody screws up buffering,
it won't cause weeks of debugging.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/n_tty.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index 973be2f44195..4e28b35024ec 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -300,8 +300,7 @@ static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
 			if (space < 2)
 				return -1;
 			tty->canon_column = tty->column = 0;
-			tty_put_char(tty, '\r');
-			tty_put_char(tty, c);
+			tty->ops->write(tty, "\r\n", 2);
 			return 2;
 		}
 		tty->canon_column = tty->column;

From ac89a9174decf343de049a06fad75681f71890eb Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 5 Sep 2009 13:27:10 -0700
Subject: [PATCH 54/60] pty: don't limit the writes to 'pty_space()' inside
 'pty_write()'

The whole write-room thing is something that is up to the _caller_ to
worry about, not the pty layer itself.  The total buffer space will
still be limited by the buffering routines themselves, so there is no
advantage or need in having pty_write() artificially limit the size
somehow.

And what happened was that the caller (the n_tty line discipline, in
this case) may have verified that there is room for 2 bytes to be
written (for NL -> CRNL expansion), and it used to then do those writes
as two single-byte writes.  And if the first byte written (CR) then
caused a new tty buffer to be allocated, pty_space() may have returned
zero when trying to write the second byte (LF), and then incorrectly
failed the write - leading to a lost newline character.

This should finally fix

	http://bugzilla.kernel.org/show_bug.cgi?id=14015

Reported-by: Mikael Pettersson <mikpe@it.uu.se>
Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/pty.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index d083c73d784a..b33d6688e910 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -109,21 +109,13 @@ static int pty_space(struct tty_struct *to)
  *	the other side of the pty/tty pair.
  */
 
-static int pty_write(struct tty_struct *tty, const unsigned char *buf,
-								int count)
+static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c)
 {
 	struct tty_struct *to = tty->link;
-	int c;
 
 	if (tty->stopped)
 		return 0;
 
-	/* This isn't locked but our 8K is quite sloppy so no
-	   big deal */
-
-	c = pty_space(to);
-	if (c > count)
-		c = count;
 	if (c > 0) {
 		/* Stuff the data into the input queue of the other end */
 		c = tty_insert_flip_string(to, buf, c);

From 9de6886ec6e37f45807266a702bb7621498395ad Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Sat, 5 Sep 2009 00:25:37 -0400
Subject: [PATCH 55/60] ext2: fix unbalanced kmap()/kunmap()

In ext2_rename(), dir_page is acquired through ext2_dotdot().  It is
then released through ext2_set_link() but only if old_dir != new_dir.
Failing that, the pkmap reference count is never decremented and the
page remains pinned forever.  Repeat that a couple times with highmem
pages and all pkmap slots get exhausted, and every further kmap() calls
end up stalling on the pkmap_map_wait queue at which point the whole
system comes to a halt.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Acked-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext2/namei.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e1dedb0f7873..78d9b925fc94 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -362,6 +362,10 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 	if (dir_de) {
 		if (old_dir != new_dir)
 			ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
+		else {
+			kunmap(dir_page);
+			page_cache_release(dir_page);
+		}
 		inode_dec_link_count(old_dir);
 	}
 	return 0;

From 74a01180db4bbfd61304ae0ba1f60af55ffc803d Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <thunderbird2k@gmail.com>
Date: Thu, 3 Sep 2009 09:57:23 -0600
Subject: [PATCH 56/60] powerpc: Fix i8259 interrupt driver kernel crash on
 ML510

This patch fixes a null pointer exception caused by removal of
'ack()' for level interrupts in the Xilinx interrupt driver.  A recent
change to the xilinx interrupt controller removed the ack hook for
level irqs.

Signed-off-by: Roderick Colenbrander <thunderbird2k@gmail.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/sysdev/xilinx_intc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c
index 3ee1fd37bbfc..40edad520770 100644
--- a/arch/powerpc/sysdev/xilinx_intc.c
+++ b/arch/powerpc/sysdev/xilinx_intc.c
@@ -234,7 +234,6 @@ static void xilinx_i8259_cascade(unsigned int irq, struct irq_desc *desc)
 		generic_handle_irq(cascade_irq);
 
 	/* Let xilinx_intc end the interrupt */
-	desc->chip->ack(irq);
 	desc->chip->unmask(irq);
 }
 

From e07cccf4046978df10f2e13fe2b99b2f9b3a65db Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 5 Sep 2009 16:38:12 -0700
Subject: [PATCH 57/60] Linux 2.6.31-rc9

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 25c615e57302..7d3415c0709c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 31
-EXTRAVERSION = -rc8
+EXTRAVERSION = -rc9
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*

From d9d8e0418ffd3d189345c435861e254c17ae06e5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 6 Sep 2009 01:41:02 -0700
Subject: [PATCH 58/60] gianfar: Fix build.

Reported by Michael Guntsche <mike@it-loops.com>

--------------------
Commit
38bddf04bcfe661fbdab94888c3b72c32f6873b3 gianfar: gfar_remove needs to call unregister_netdev()

breaks the build of the gianfar driver because "dev" is undefined in
this function. To quickly test rc9 I changed this to priv->ndev but I do
not know if this is the correct one.
--------------------

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 24f7ca5e17de..a00ec639c380 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -491,7 +491,7 @@ static int gfar_remove(struct of_device *ofdev)
 
 	dev_set_drvdata(&ofdev->dev, NULL);
 
-	unregister_netdev(dev);
+	unregister_netdev(priv->ndev);
 	iounmap(priv->regs);
 	free_netdev(priv->ndev);
 

From acd0c935178649f72c44ec49ca83bee35ce1f79e Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Fri, 4 Sep 2009 13:08:46 -0400
Subject: [PATCH 59/60] IMA: update ima_counts_put

- As ima_counts_put() may be called after the inode has been freed,
verify that the inode is not NULL, before dereferencing it.

- Maintain the IMA file counters in may_open() properly, decrementing
any counter increments on subsequent errors.

Reported-by: Ciprian Docan <docan@eden.rutgers.edu>
Reported-by: J.R. Okajima <hooanon05@yahoo.co.jp>
Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: Eric Paris <eparis@redhat.com
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/namei.c                        | 22 +++++++++++++++-------
 security/integrity/ima/ima_main.c |  6 +++++-
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index f3c5b278895a..1f13751693a5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1542,28 +1542,31 @@ int may_open(struct path *path, int acc_mode, int flag)
 	 * An append-only file must be opened in append mode for writing.
 	 */
 	if (IS_APPEND(inode)) {
+		error = -EPERM;
 		if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
-			return -EPERM;
+			goto err_out;
 		if (flag & O_TRUNC)
-			return -EPERM;
+			goto err_out;
 	}
 
 	/* O_NOATIME can only be set by the owner or superuser */
 	if (flag & O_NOATIME)
-		if (!is_owner_or_cap(inode))
-			return -EPERM;
+		if (!is_owner_or_cap(inode)) {
+			error = -EPERM;
+			goto err_out;
+		}
 
 	/*
 	 * Ensure there are no outstanding leases on the file.
 	 */
 	error = break_lease(inode, flag);
 	if (error)
-		return error;
+		goto err_out;
 
 	if (flag & O_TRUNC) {
 		error = get_write_access(inode);
 		if (error)
-			return error;
+			goto err_out;
 
 		/*
 		 * Refuse to truncate files with mandatory locks held on them.
@@ -1581,12 +1584,17 @@ int may_open(struct path *path, int acc_mode, int flag)
 		}
 		put_write_access(inode);
 		if (error)
-			return error;
+			goto err_out;
 	} else
 		if (flag & FMODE_WRITE)
 			vfs_dq_init(inode);
 
 	return 0;
+err_out:
+	ima_counts_put(path, acc_mode ?
+		       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
+		       ACC_MODE(flag) & (MAY_READ | MAY_WRITE));
+	return error;
 }
 
 /*
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 4732f5e5d127..b85e61bcf246 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -249,7 +249,11 @@ void ima_counts_put(struct path *path, int mask)
 	struct inode *inode = path->dentry->d_inode;
 	struct ima_iint_cache *iint;
 
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
+	/* The inode may already have been freed, freeing the iint
+	 * with it. Verify the inode is not NULL before dereferencing
+	 * it.
+	 */
+	if (!ima_initialized || !inode || !S_ISREG(inode->i_mode))
 		return;
 	iint = ima_iint_find_insert_get(inode);
 	if (!iint)

From a54775c8758a754186bc6adbfc518b1e9f8f1e4e Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 7 Sep 2009 15:26:19 +1000
Subject: [PATCH 60/60] drm/radeon/kms: add LTE/GTE discard + rv515 two sided
 stencil register.

This adds some rv350+ register for LTE/GTE discard,
and enables the rv515 two sided stencil register.
It also disables the DEPTHXY_OFFSET register which
can be used to workaround the CS checker.
Moves rs690 to proper place in rs600 and uses correct
table on rs600.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r300.c        |  2 +-
 drivers/gpu/drm/radeon/radeon_asic.h |  6 +--
 drivers/gpu/drm/radeon/rs600.c       | 65 ++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/rs690.c       | 64 ---------------------------
 drivers/gpu/drm/radeon/rv515.c       |  2 +-
 5 files changed, 70 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 053f4ec397f7..051bca6e3a4f 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -995,7 +995,7 @@ static const unsigned r300_reg_safe_bm[159] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
+	0x0003FC01, 0xFFFFFCF8, 0xFF800B19,
 };
 
 static int r300_packet0_check(struct radeon_cs_parser *p,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 7ca6c13569b5..93d8f8889302 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -266,6 +266,7 @@ static struct radeon_asic rs400_asic = {
 /*
  * rs600.
  */
+int rs600_init(struct radeon_device *dev);
 void rs600_errata(struct radeon_device *rdev);
 void rs600_vram_info(struct radeon_device *rdev);
 int rs600_mc_init(struct radeon_device *rdev);
@@ -281,7 +282,7 @@ uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void rs600_bandwidth_update(struct radeon_device *rdev);
 static struct radeon_asic rs600_asic = {
-	.init = &r300_init,
+	.init = &rs600_init,
 	.errata = &rs600_errata,
 	.vram_info = &rs600_vram_info,
 	.gpu_reset = &r300_gpu_reset,
@@ -316,7 +317,6 @@ static struct radeon_asic rs600_asic = {
 /*
  * rs690,rs740
  */
-int rs690_init(struct radeon_device *rdev);
 void rs690_errata(struct radeon_device *rdev);
 void rs690_vram_info(struct radeon_device *rdev);
 int rs690_mc_init(struct radeon_device *rdev);
@@ -325,7 +325,7 @@ uint32_t rs690_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs690_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void rs690_bandwidth_update(struct radeon_device *rdev);
 static struct radeon_asic rs690_asic = {
-	.init = &rs690_init,
+	.init = &rs600_init,
 	.errata = &rs690_errata,
 	.vram_info = &rs690_vram_info,
 	.gpu_reset = &r300_gpu_reset,
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 7e8ce983a908..02fd11aad6a2 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -409,3 +409,68 @@ void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
 		((reg) & RS600_MC_ADDR_MASK));
 	WREG32(RS600_MC_DATA, v);
 }
+
+static const unsigned rs600_reg_safe_bm[219] = {
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
+	0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
+	0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
+	0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
+	0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
+	0x00000000, 0x0000C100, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x0003FC01, 0xFFFFFCF8, 0xFF800B19, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+};
+
+int rs600_init(struct radeon_device *rdev)
+{
+	rdev->config.r300.reg_safe_bm = rs600_reg_safe_bm;
+	rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(rs600_reg_safe_bm);
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index bc6b7c5339bc..879882533e45 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -653,67 +653,3 @@ void rs690_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
 	WREG32(RS690_MC_INDEX, RS690_MC_INDEX_WR_ACK);
 }
 
-static const unsigned rs690_reg_safe_bm[219] = {
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0x17FF1FFF,0xFFFFFFFC,0xFFFFFFFF,0xFF30FFBF,
-	0xFFFFFFF8,0xC3E6FFFF,0xFFFFF6DF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFF03F,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFEFCE,0xF00EBFFF,0x007C0000,
-	0xF0000078,0xFF000009,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFF7FF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFC78,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,
-	0x38FF8F50,0xFFF88082,0xF000000C,0xFAE009FF,
-	0x0000FFFF,0xFFFFFFFF,0xFFFFFFFF,0x00000000,
-	0x00000000,0x0000C100,0x00000000,0x00000000,
-	0x00000000,0x00000000,0x00000000,0x00000000,
-	0x00000000,0xFFFF0000,0xFFFFFFFF,0xFF80FFFF,
-	0x00000000,0x00000000,0x00000000,0x00000000,
-	0x0003FC01,0xFFFFFFF8,0xFE800B19,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
-};
-
-int rs690_init(struct radeon_device *rdev)
-{
-	rdev->config.r300.reg_safe_bm = rs690_reg_safe_bm;
-	rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(rs690_reg_safe_bm);
-	return 0;
-}
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 31a7f668ae5a..0566fb67e460 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -508,7 +508,7 @@ static const unsigned r500_reg_safe_bm[219] = {
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF80FFFF,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x0003FC01, 0x3FFFFCF8, 0xFE800B19, 0xFFFFFFFF,
+	0x0003FC01, 0x3FFFFCF8, 0xFF800B19, 0xFFDFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,