From c9a2bfd1f08973115c2fc3da9372e0e81ac42a19 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 14 Aug 2009 13:09:32 -0700 Subject: [PATCH 01/60] pcmcia: add CNF-CDROM-ID for ide Fixes this report: http://article.gmane.org/gmane.linux.kernel.pcmcia.devel/2228/ Reported-by: John McGrath Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- drivers/ide/ide-cs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c index 527908ff298c..063b933d864a 100644 --- a/drivers/ide/ide-cs.c +++ b/drivers/ide/ide-cs.c @@ -408,6 +408,7 @@ static struct pcmcia_device_id ide_ids[] = { PCMCIA_DEVICE_PROD_ID123("PCMCIA", "IDE CARD", "F1", 0x281f1c5d, 0x1907960c, 0xf7fde8b9), PCMCIA_DEVICE_PROD_ID12("ARGOSY", "CD-ROM", 0x78f308dc, 0x66536591), PCMCIA_DEVICE_PROD_ID12("ARGOSY", "PnPIDE", 0x78f308dc, 0x0c694728), + PCMCIA_DEVICE_PROD_ID12("CNF ", "CD-ROM", 0x46d7db81, 0x66536591), PCMCIA_DEVICE_PROD_ID12("CNF CD-M", "CD-ROM", 0x7d93b852, 0x66536591), PCMCIA_DEVICE_PROD_ID12("Creative Technology Ltd.", "PCMCIA CD-ROM Interface Card", 0xff8c8a45, 0xfe8020c4), PCMCIA_DEVICE_PROD_ID12("Digital Equipment Corporation.", "Digital Mobile Media CD-ROM", 0x17692a66, 0xef1dcbde), From ced909ff048c9950e211783417f3c01361f3be28 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 25 Aug 2009 19:24:10 -0700 Subject: [PATCH 02/60] Input: i8042 - add Acer Aspire 5536 to the nomux list When KBC is in active multiplexing mode, disabling and re-enabling the touchpad with the special key leaves the touchpad dead. Since the laptop does not have any external PS/2 ports disabling MUX mode should be safe. Reported-by: Eugeniy Meshcheryakov Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index ae04d8a494e5..ccbf23ece8e3 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -382,6 +382,14 @@ static struct dmi_system_id __initdata i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Vostro1510"), }, }, + { + .ident = "Acer Aspire 5536", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5536"), + DMI_MATCH(DMI_PRODUCT_VERSION, "0100"), + }, + }, { } }; From a1b08e75dff3dc18a88444803753e667bb1d126e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 27 Aug 2009 14:46:56 +0800 Subject: [PATCH 03/60] ocfs2: invalidate dentry if its dentry_lock isn't initialized. In commit a5a0a630922a2f6a774b6dac19f70cb5abd86bb0, when ocfs2_attch_dentry_lock fails, we call an extra iput and reset dentry->d_fsdata to NULL. This resolve a bug, but it isn't completed and the dentry is still there. When we want to use it again, ocfs2_dentry_revalidate doesn't catch it and return true. That make future ocfs2_dentry_lock panic out. One bug is http://oss.oracle.com/bugzilla/show_bug.cgi?id=1162. The resolution is to add a check for dentry->d_fsdata in revalidate process and return false if dentry->d_fsdata is NULL, so that a new ocfs2_lookup will be called again. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/dcache.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 2f28b7de2c8d..b4957c7d9fe2 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -85,6 +85,17 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, goto bail; } + /* + * If the last lookup failed to create dentry lock, let us + * redo it. + */ + if (!dentry->d_fsdata) { + mlog(0, "Inode %llu doesn't have dentry lock, " + "returning false\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno); + goto bail; + } + ret = 1; bail: From 6bb56347f5162d1a7cb1dc461023360781ecd4c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 28 Aug 2009 13:44:53 +0200 Subject: [PATCH 04/60] perf_counters: Increase paranoia level Per-cpu counters are an ASLR information leak as they show the execution other tasks do. Increase the paranoia level to 1, which disallows per-cpu counters. (they still allow counting/profiling of own tasks - and admin can profile everything.) Acked-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f274e1959885..7d4bb83b78cf 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -50,7 +50,7 @@ static atomic_t nr_task_counters __read_mostly; * 1 - disallow cpu counters to unpriv * 2 - disallow kernel profiling to unpriv */ -int sysctl_perf_counter_paranoid __read_mostly; +int sysctl_perf_counter_paranoid __read_mostly = 1; static inline bool perf_paranoid_cpu(void) { From 11ebd1bf07fafde8d16562966c96b05b0d4ced9e Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Fri, 28 Aug 2009 11:42:31 +0800 Subject: [PATCH 05/60] ipw2200: firmware DMA loading rework Bartlomiej Zolnierkiewicz reported an atomic order-6 allocation failure for ipw2200 firmware loading in kernel 2.6.30. High order allocation is likely to fail and should always be avoided. The patch fixes this problem by replacing the original order-6 pci_alloc_consistent() with an array of order-1 pages from a pci pool. This utilized the ipw2200 DMA command blocks (up to 64 slots). The maximum firmware size support remains the same (64*8K). This patch fixes bug http://bugzilla.kernel.org/show_bug.cgi?id=14016 Cc: Andrew Morton Cc: Mel Gorman Signed-off-by: Zhu Yi Signed-off-by: John W. Linville --- drivers/net/wireless/ipw2x00/ipw2200.c | 120 ++++++++++++++----------- 1 file changed, 67 insertions(+), 53 deletions(-) diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c index 6dcac73b4d29..f593fbbb4e52 100644 --- a/drivers/net/wireless/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/ipw2x00/ipw2200.c @@ -2874,45 +2874,27 @@ static int ipw_fw_dma_add_command_block(struct ipw_priv *priv, return 0; } -static int ipw_fw_dma_add_buffer(struct ipw_priv *priv, - u32 src_phys, u32 dest_address, u32 length) +static int ipw_fw_dma_add_buffer(struct ipw_priv *priv, dma_addr_t *src_address, + int nr, u32 dest_address, u32 len) { - u32 bytes_left = length; - u32 src_offset = 0; - u32 dest_offset = 0; - int status = 0; + int ret, i; + u32 size; + IPW_DEBUG_FW(">> \n"); - IPW_DEBUG_FW_INFO("src_phys=0x%x dest_address=0x%x length=0x%x\n", - src_phys, dest_address, length); - while (bytes_left > CB_MAX_LENGTH) { - status = ipw_fw_dma_add_command_block(priv, - src_phys + src_offset, - dest_address + - dest_offset, - CB_MAX_LENGTH, 0, 0); - if (status) { + IPW_DEBUG_FW_INFO("nr=%d dest_address=0x%x len=0x%x\n", + nr, dest_address, len); + + for (i = 0; i < nr; i++) { + size = min_t(u32, len - i * CB_MAX_LENGTH, CB_MAX_LENGTH); + ret = ipw_fw_dma_add_command_block(priv, src_address[i], + dest_address + + i * CB_MAX_LENGTH, size, + 0, 0); + if (ret) { IPW_DEBUG_FW_INFO(": Failed\n"); return -1; } else IPW_DEBUG_FW_INFO(": Added new cb\n"); - - src_offset += CB_MAX_LENGTH; - dest_offset += CB_MAX_LENGTH; - bytes_left -= CB_MAX_LENGTH; - } - - /* add the buffer tail */ - if (bytes_left > 0) { - status = - ipw_fw_dma_add_command_block(priv, src_phys + src_offset, - dest_address + dest_offset, - bytes_left, 0, 0); - if (status) { - IPW_DEBUG_FW_INFO(": Failed on the buffer tail\n"); - return -1; - } else - IPW_DEBUG_FW_INFO - (": Adding new cb - the buffer tail\n"); } IPW_DEBUG_FW("<< \n"); @@ -3160,59 +3142,91 @@ static int ipw_load_ucode(struct ipw_priv *priv, u8 * data, size_t len) static int ipw_load_firmware(struct ipw_priv *priv, u8 * data, size_t len) { - int rc = -1; + int ret = -1; int offset = 0; struct fw_chunk *chunk; - dma_addr_t shared_phys; - u8 *shared_virt; + int total_nr = 0; + int i; + struct pci_pool *pool; + u32 *virts[CB_NUMBER_OF_ELEMENTS_SMALL]; + dma_addr_t phys[CB_NUMBER_OF_ELEMENTS_SMALL]; IPW_DEBUG_TRACE("<< : \n"); - shared_virt = pci_alloc_consistent(priv->pci_dev, len, &shared_phys); - if (!shared_virt) + pool = pci_pool_create("ipw2200", priv->pci_dev, CB_MAX_LENGTH, 0, 0); + if (!pool) { + IPW_ERROR("pci_pool_create failed\n"); return -ENOMEM; - - memmove(shared_virt, data, len); + } /* Start the Dma */ - rc = ipw_fw_dma_enable(priv); + ret = ipw_fw_dma_enable(priv); /* the DMA is already ready this would be a bug. */ BUG_ON(priv->sram_desc.last_cb_index > 0); do { + u32 chunk_len; + u8 *start; + int size; + int nr = 0; + chunk = (struct fw_chunk *)(data + offset); offset += sizeof(struct fw_chunk); + chunk_len = le32_to_cpu(chunk->length); + start = data + offset; + + nr = (chunk_len + CB_MAX_LENGTH - 1) / CB_MAX_LENGTH; + for (i = 0; i < nr; i++) { + virts[total_nr] = pci_pool_alloc(pool, GFP_KERNEL, + &phys[total_nr]); + if (!virts[total_nr]) { + ret = -ENOMEM; + goto out; + } + size = min_t(u32, chunk_len - i * CB_MAX_LENGTH, + CB_MAX_LENGTH); + memcpy(virts[total_nr], start, size); + start += size; + total_nr++; + /* We don't support fw chunk larger than 64*8K */ + BUG_ON(total_nr > CB_NUMBER_OF_ELEMENTS_SMALL); + } + /* build DMA packet and queue up for sending */ /* dma to chunk->address, the chunk->length bytes from data + * offeset*/ /* Dma loading */ - rc = ipw_fw_dma_add_buffer(priv, shared_phys + offset, - le32_to_cpu(chunk->address), - le32_to_cpu(chunk->length)); - if (rc) { + ret = ipw_fw_dma_add_buffer(priv, &phys[total_nr - nr], + nr, le32_to_cpu(chunk->address), + chunk_len); + if (ret) { IPW_DEBUG_INFO("dmaAddBuffer Failed\n"); goto out; } - offset += le32_to_cpu(chunk->length); + offset += chunk_len; } while (offset < len); /* Run the DMA and wait for the answer */ - rc = ipw_fw_dma_kick(priv); - if (rc) { + ret = ipw_fw_dma_kick(priv); + if (ret) { IPW_ERROR("dmaKick Failed\n"); goto out; } - rc = ipw_fw_dma_wait(priv); - if (rc) { + ret = ipw_fw_dma_wait(priv); + if (ret) { IPW_ERROR("dmaWaitSync Failed\n"); goto out; } - out: - pci_free_consistent(priv->pci_dev, len, shared_virt, shared_phys); - return rc; + out: + for (i = 0; i < total_nr; i++) + pci_pool_free(pool, virts[i], phys[i]); + + pci_pool_destroy(pool); + + return ret; } /* stop nic */ From 0c7d400fafaeab6014504a6a6249f01bac7f7db4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 29 Aug 2009 20:44:04 +1000 Subject: [PATCH 06/60] crypto: skcipher - Fix skcipher_dequeue_givcrypt NULL test As struct skcipher_givcrypt_request includes struct crypto_request at a non-zero offset, testing for NULL after converting the pointer returned by crypto_dequeue_request does not work. This can result in IPsec crashes when the queue is depleted. This patch fixes it by doing the pointer conversion only when the return value is non-NULL. In particular, we create a new function __crypto_dequeue_request that does the pointer conversion. Reported-by: Brad Bosch Signed-off-by: Herbert Xu --- crypto/algapi.c | 11 +++++++++-- include/crypto/algapi.h | 1 + include/crypto/internal/skcipher.h | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 56c62e2858d5..df0863d56995 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -692,7 +692,7 @@ int crypto_enqueue_request(struct crypto_queue *queue, } EXPORT_SYMBOL_GPL(crypto_enqueue_request); -struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue) +void *__crypto_dequeue_request(struct crypto_queue *queue, unsigned int offset) { struct list_head *request; @@ -707,7 +707,14 @@ struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue) request = queue->list.next; list_del(request); - return list_entry(request, struct crypto_async_request, list); + return (char *)list_entry(request, struct crypto_async_request, list) - + offset; +} +EXPORT_SYMBOL_GPL(__crypto_dequeue_request); + +struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue) +{ + return __crypto_dequeue_request(queue, 0); } EXPORT_SYMBOL_GPL(crypto_dequeue_request); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 010545436efa..5a2bd1cc9656 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -137,6 +137,7 @@ struct crypto_instance *crypto_alloc_instance(const char *name, void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen); int crypto_enqueue_request(struct crypto_queue *queue, struct crypto_async_request *request); +void *__crypto_dequeue_request(struct crypto_queue *queue, unsigned int offset); struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue); int crypto_tfm_in_queue(struct crypto_queue *queue, struct crypto_tfm *tfm); diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 2ba42cd7d6aa..3a748a6bf772 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -79,8 +79,8 @@ static inline int skcipher_enqueue_givcrypt( static inline struct skcipher_givcrypt_request *skcipher_dequeue_givcrypt( struct crypto_queue *queue) { - return container_of(ablkcipher_dequeue_request(queue), - struct skcipher_givcrypt_request, creq); + return __crypto_dequeue_request( + queue, offsetof(struct skcipher_givcrypt_request, creq.base)); } static inline void *skcipher_givcrypt_reqctx( From eced1dfcfcf6b0a35e925d73916a9d8e36ab5457 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 Aug 2009 17:10:47 +0200 Subject: [PATCH 07/60] perf_counter: Fix /0 bug in swcounters We have a race in the swcounter stuff where we can start counting a counter that has never been enabled, this leads to a /0 situation. The below avoids the /0 but doesn't close the race, this would need a new counter state. The race is due to perf_swcounter_is_counting() which cannot discern between disabled due to scheduled out, and disabled for any other reason. Such a crash has been seen by Ingo: [ 967.092372] divide error: 0000 [#1] SMP [ 967.096499] last sysfs file: /sys/devices/system/cpu/cpu15/cache/index2/shared_cpu_map [ 967.104846] CPU 5 [ 967.106965] Modules linked in: [ 967.110169] Pid: 3351, comm: hackbench Not tainted 2.6.31-rc8-tip-01158-gd940a54-dirty #1568 X8DTN [ 967.119456] RIP: 0010:[] [] perf_swcounter_ctx_event+0x127/0x1af [ 967.129137] RSP: 0018:ffff8801a95abd70 EFLAGS: 00010046 [ 967.134699] RAX: 0000000000000002 RBX: ffff8801bd645c00 RCX: 0000000000000002 [ 967.142162] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8801bd645d40 [ 967.149584] RBP: ffff8801a95abdb0 R08: 0000000000000001 R09: ffff8801a95abe00 [ 967.157042] R10: 0000000000000037 R11: ffff8801aa1245f8 R12: ffff8801a95abe00 [ 967.164481] R13: ffff8801a95abe00 R14: ffff8801aa1c0e78 R15: 0000000000000001 [ 967.171953] FS: 0000000000000000(0000) GS:ffffc90000a00000(0063) knlGS:00000000f7f486c0 [ 967.180406] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b [ 967.186374] CR2: 000000004822c0ac CR3: 00000001b19a2000 CR4: 00000000000006e0 [ 967.193770] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 967.201224] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 967.208692] Process hackbench (pid: 3351, threadinfo ffff8801a95aa000, task ffff8801a96b0000) [ 967.217607] Stack: [ 967.219711] 0000000000000000 0000000000000037 0000000200000001 ffffc90000a1107c [ 967.227296] <0> ffff8801a95abe00 0000000000000001 0000000000000001 0000000000000037 [ 967.235333] <0> ffff8801a95abdf0 ffffffff810c0c20 0000000200a14f30 ffff8801a95abe40 [ 967.243532] Call Trace: [ 967.246103] [] do_perf_swcounter_event+0xde/0xec [ 967.252635] [] perf_tpcounter_event+0x79/0x7b [ 967.258957] [] ftrace_profile_sched_switch+0xc0/0xcb [ 967.265791] [] schedule+0x429/0x4c4 [ 967.271156] [] int_careful+0xd/0x14 Reported-by: Ingo Molnar Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1251472247.17617.74.camel@laptop> Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 7d4bb83b78cf..d7cbc579fc80 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -4066,6 +4066,7 @@ perf_counter_alloc(struct perf_counter_attr *attr, hwc->sample_period = attr->sample_period; if (attr->freq && attr->sample_freq) hwc->sample_period = 1; + hwc->last_period = hwc->sample_period; atomic64_set(&hwc->period_left, hwc->sample_period); From 6faf17f6f1ffc586d16efc2f9fa2083a7785ee74 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Fri, 28 Aug 2009 13:00:06 -0700 Subject: [PATCH 08/60] PCI SR-IOV: correct broken resource alignment calculations An SR-IOV capable device includes an SR-IOV PCIe capability which describes the Virtual Function (VF) BAR requirements. A typical SR-IOV device can support multiple VFs whose BARs must be in a contiguous region, effectively an array of VF BARs. The BAR reports the size requirement for a single VF. We calculate the full range needed by simply multiplying the VF BAR size with the number of possible VFs and create a resource spanning the full range. This all seems sane enough except it artificially inflates the alignment requirement for the VF BAR. The VF BAR need only be aligned to the size of a single BAR not the contiguous range of VF BARs. This can cause us to fail to allocate resources for the BAR despite the fact that we actually have enough space. This patch adds a thin PCI specific layer over the generic resource_alignment() function which is aware of the special nature of VF BARs and does sorting and allocation based on the smaller alignment requirement. I recognize that while resource_alignment is generic, it's basically a PCI helper. An alternative to this patch is to add PCI VF BAR specific information to struct resource. I opted for the extra layer rather than adding such PCI specific information to struct resource. This does have the slight downside that we don't cache the BAR size and re-read for each alignment query (happens a small handful of times during boot for each VF BAR). Signed-off-by: Chris Wright Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Matthew Wilcox Cc: Yu Zhao Cc: stable@kernel.org Signed-off-by: Jesse Barnes --- drivers/pci/iov.c | 23 +++++++++++++++++++++++ drivers/pci/pci.h | 13 +++++++++++++ drivers/pci/setup-bus.c | 4 ++-- drivers/pci/setup-res.c | 8 ++++---- 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index e3a87210e947..e03fe98f0619 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -597,6 +597,29 @@ int pci_iov_resource_bar(struct pci_dev *dev, int resno, 4 * (resno - PCI_IOV_RESOURCES); } +/** + * pci_sriov_resource_alignment - get resource alignment for VF BAR + * @dev: the PCI device + * @resno: the resource number + * + * Returns the alignment of the VF BAR found in the SR-IOV capability. + * This is not the same as the resource size which is defined as + * the VF BAR size multiplied by the number of VFs. The alignment + * is just the VF BAR size. + */ +int pci_sriov_resource_alignment(struct pci_dev *dev, int resno) +{ + struct resource tmp; + enum pci_bar_type type; + int reg = pci_iov_resource_bar(dev, resno, &type); + + if (!reg) + return 0; + + __pci_read_base(dev, type, &tmp, reg); + return resource_alignment(&tmp); +} + /** * pci_restore_iov_state - restore the state of the IOV capability * @dev: the PCI device diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f73bcbedf37c..5ff4d25bf0e9 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -243,6 +243,7 @@ extern int pci_iov_init(struct pci_dev *dev); extern void pci_iov_release(struct pci_dev *dev); extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); +extern int pci_sriov_resource_alignment(struct pci_dev *dev, int resno); extern void pci_restore_iov_state(struct pci_dev *dev); extern int pci_iov_bus_range(struct pci_bus *bus); @@ -298,4 +299,16 @@ static inline int pci_ats_enabled(struct pci_dev *dev) } #endif /* CONFIG_PCI_IOV */ +static inline int pci_resource_alignment(struct pci_dev *dev, + struct resource *res) +{ +#ifdef CONFIG_PCI_IOV + int resno = res - dev->resource; + + if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) + return pci_sriov_resource_alignment(dev, resno); +#endif + return resource_alignment(res); +} + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index b636e245445d..7c443b4583ab 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -25,7 +25,7 @@ #include #include #include - +#include "pci.h" static void pbus_assign_resources_sorted(const struct pci_bus *bus) { @@ -384,7 +384,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long continue; r_size = resource_size(r); /* For bridges size != alignment */ - align = resource_alignment(r); + align = pci_resource_alignment(dev, r); order = __ffs(align) - 20; if (order > 11) { dev_warn(&dev->dev, "BAR %d bad alignment %llx: " diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 1898c7b47907..88cdd1a937d6 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -144,7 +144,7 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, size = resource_size(res); min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; - align = resource_alignment(res); + align = pci_resource_alignment(dev, res); /* First, try exact prefetching match.. */ ret = pci_bus_alloc_resource(bus, res, size, align, min, @@ -178,7 +178,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) struct pci_bus *bus; int ret; - align = resource_alignment(res); + align = pci_resource_alignment(dev, res); if (!align) { dev_info(&dev->dev, "BAR %d: can't allocate resource (bogus " "alignment) %pR flags %#lx\n", @@ -259,7 +259,7 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) if (!(r->flags) || r->parent) continue; - r_align = resource_alignment(r); + r_align = pci_resource_alignment(dev, r); if (!r_align) { dev_warn(&dev->dev, "BAR %d: bogus alignment " "%pR flags %#lx\n", @@ -271,7 +271,7 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) struct resource_list *ln = list->next; if (ln) - align = resource_alignment(ln->res); + align = pci_resource_alignment(ln->dev, ln->res); if (r_align > align) { tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); From b1f1b8ce0a1d71cbc72f7540134d52b79bd8f5ac Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 30 Aug 2009 04:21:41 +0900 Subject: [PATCH 09/60] nilfs2: fix preempt count underflow in nilfs_btnode_prepare_change_key This will fix the following preempt count underflow reported from users with the title "[NILFS users] segctord problem" (Message-ID: <949415.6494.qm@web58808.mail.re1.yahoo.com> and Message-ID: ): WARNING: at kernel/sched.c:4890 sub_preempt_count+0x95/0xa0() Hardware name: HP Compaq 6530b (KR980UT#ABC) Modules linked in: bridge stp llc bnep rfcomm l2cap xfs exportfs nilfs2 cowloop loop vboxnetadp vboxnetflt vboxdrv btusb bluetooth uvcvideo videodev v4l1_compat v4l2_compat_ioctl32 arc4 snd_hda_codec_analog ecb iwlagn iwlcore rfkill lib80211 mac80211 snd_hda_intel snd_hda_codec ehci_hcd uhci_hcd usbcore snd_hwdep snd_pcm tg3 cfg80211 psmouse snd_timer joydev libphy ohci1394 snd_page_alloc hp_accel lis3lv02d ieee1394 led_class i915 drm i2c_algo_bit video backlight output i2c_core dm_crypt dm_mod Pid: 4197, comm: segctord Not tainted 2.6.30-gentoo-r4-64 #7 Call Trace: [] ? sub_preempt_count+0x95/0xa0 [] warn_slowpath_common+0x78/0xd0 [] warn_slowpath_null+0xf/0x20 [] sub_preempt_count+0x95/0xa0 [] nilfs_btnode_prepare_change_key+0x11b/0x190 [nilfs2] [] nilfs_btree_assign_p+0x19d/0x1e0 [nilfs2] [] nilfs_btree_assign+0xbd/0x130 [nilfs2] [] nilfs_bmap_assign+0x47/0x70 [nilfs2] [] nilfs_segctor_do_construct+0x956/0x20f0 [nilfs2] [] ? _spin_unlock_irqrestore+0x12/0x40 [] ? __up_write+0xe0/0x150 [] ? up_write+0x9/0x10 [] ? nilfs_bmap_test_and_clear_dirty+0x43/0x60 [nilfs2] [] ? nilfs_mdt_fetch_dirty+0x27/0x60 [nilfs2] [] nilfs_segctor_construct+0x8c/0xd0 [nilfs2] [] nilfs_segctor_thread+0x15c/0x3a0 [nilfs2] [] ? nilfs_construction_timeout+0x0/0x10 [nilfs2] [] ? add_timer+0x13/0x20 [] ? __wake_up_common+0x5a/0x90 [] ? autoremove_wake_function+0x0/0x40 [] ? nilfs_segctor_thread+0x0/0x3a0 [nilfs2] [] ? nilfs_segctor_thread+0x0/0x3a0 [nilfs2] [] kthread+0x56/0x90 [] child_rip+0xa/0x20 [] ? kthread+0x0/0x90 [] ? child_rip+0x0/0x20 This problem was caused due to a missing radix_tree_preload() call in the retry path of nilfs_btnode_prepare_change_key() function. Reported-by: Eric A Reported-by: Jerome Poulin Signed-off-by: Ryusuke Konishi Tested-by: Jerome Poulin Cc: stable@kernel.org --- fs/nilfs2/btnode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 7e0b61be212e..c668bca579c1 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -209,6 +209,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, * We cannot call radix_tree_preload for the kernels older * than 2.6.23, because it is not exported for modules. */ +retry: err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); if (err) goto failed_unlock; @@ -219,7 +220,6 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, (unsigned long long)oldkey, (unsigned long long)newkey); -retry: spin_lock_irq(&btnc->tree_lock); err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); spin_unlock_irq(&btnc->tree_lock); From 38bddf04bcfe661fbdab94888c3b72c32f6873b3 Mon Sep 17 00:00:00 2001 From: Toru UCHIYAMA Date: Sun, 30 Aug 2009 22:04:07 -0700 Subject: [PATCH 10/60] gianfar: gfar_remove needs to call unregister_netdev() This patch solves the problem that the Oops(BUG_ON) occurs by rmmod. # rmmod gianfar_driver ------------[ cut here ]------------ Kernel BUG at c01fec48 [verbose debug info unavailable] Oops: Exception in kernel mode, sig: 5 [#1] MPC837x MDS Modules linked in: gianfar_driver(-) usb_storage scsi_wait_scan NIP: c01fec48 LR: c01febf4 CTR: c01feba8 REGS: dec5bd60 TRAP: 0700 Tainted: G W (2.6.31-rc2) MSR: 00029032 CR: 22000424 XER: 20000000 TASK = dec4cac0[1135] 'rmmod' THREAD: dec5a000 GPR00: 00000002 dec5be10 dec4cac0 dfba1820 c035d444 c035d478 ffffffff 00000000 GPR08: 0000002b 00000001 dfba193c 00000001 22000424 10019b34 1ffcb000 00000000 GPR16: 10012008 00000000 bf82ebe0 100017ec bf82ebec bf82ebe8 bf82ebd0 00000880 GPR24: 00000000 bf82ebf0 c03532f0 c03532e4 c036b594 dfba183c dfba1800 dfba1820 NIP [c01fec48] free_netdev+0xa0/0xb8 LR [c01febf4] free_netdev+0x4c/0xb8 Call Trace: [dec5be10] [c01febf4] free_netdev+0x4c/0xb8 (unreliable) [dec5be30] [e105f290] gfar_remove+0x50/0x68 [gianfar_driver] [dec5be40] [c01ec534] of_platform_device_remove+0x30/0x44 [dec5be50] [c0181760] __device_release_driver+0x68/0xc8 [dec5be60] [c0181868] driver_detach+0xa8/0xac [dec5be80] [c0180814] bus_remove_driver+0x9c/0xd8 [dec5bea0] [c0181efc] driver_unregister+0x60/0x98 [dec5beb0] [c01ec650] of_unregister_driver+0x14/0x24 [dec5bec0] [e10631bc] gfar_exit+0x18/0x4bc [gianfar_driver] [dec5bed0] [c0047584] sys_delete_module+0x16c/0x228 [dec5bf40] [c00116bc] ret_from_syscall+0x0/0x38 --- Exception: c01 at 0xff3669c LR = 0x10000f34 Instruction dump: 409e0024 a07e00c0 7c63f050 4be74429 80010024 bba10014 38210020 7c0803a6 4e800020 68000003 3160ffff 7d2b0110 <0f090000> 38000004 387e01f0 901e01d4 ---[ end trace 8c595bcd37230a0f ]--- localhost kernel: ------------[ cut here ]------------ Signed-off-by: Toru UCHIYAMA uchiyama.toru@jp.fujitsu.com Signed-off-by: David S. Miller --- drivers/net/gianfar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index e212f2c5448b..24f7ca5e17de 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -491,6 +491,7 @@ static int gfar_remove(struct of_device *ofdev) dev_set_drvdata(&ofdev->dev, NULL); + unregister_netdev(dev); iounmap(priv->regs); free_netdev(priv->ndev); From b91ab72b830e1494c2c7f8de05ccb2ab2c9cfb26 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 1 Sep 2009 08:23:58 +0200 Subject: [PATCH 11/60] sound: oxygen: fix MCLK rate for 192 kHz playback Do not forget to program the MCLK ratio for the I2S output. Otherwise, the master clock frequency can be too high for the DACs at sample frequencies above 96 kHz. Signed-off-by: Clemens Ladisch Cc: Signed-off-by: Takashi Iwai --- sound/pci/oxygen/oxygen_pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/oxygen/oxygen_pcm.c b/sound/pci/oxygen/oxygen_pcm.c index 3b5ca70c9d4d..ef2345d82b86 100644 --- a/sound/pci/oxygen/oxygen_pcm.c +++ b/sound/pci/oxygen/oxygen_pcm.c @@ -469,9 +469,11 @@ static int oxygen_multich_hw_params(struct snd_pcm_substream *substream, oxygen_write16_masked(chip, OXYGEN_I2S_MULTICH_FORMAT, oxygen_rate(hw_params) | chip->model.dac_i2s_format | + oxygen_i2s_mclk(hw_params) | oxygen_i2s_bits(hw_params), OXYGEN_I2S_RATE_MASK | OXYGEN_I2S_FORMAT_MASK | + OXYGEN_I2S_MCLK_MASK | OXYGEN_I2S_BITS_MASK); oxygen_update_dac_routing(chip); oxygen_update_spdif_source(chip); From 04a13c7c632e1fe04a5f6e6c83565d2559e37598 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Sep 2009 21:12:28 +0900 Subject: [PATCH 12/60] percpu: don't assume existence of cpu0 percpu incorrectly assumed that cpu0 was always there which led to the following warning and eventual oops on sparc machines w/o cpu0. WARNING: at mm/percpu.c:651 pcpu_map+0xdc/0x100() Modules linked in: Call Trace: [000000000045eb70] warn_slowpath_common+0x50/0xa0 [000000000045ebdc] warn_slowpath_null+0x1c/0x40 [00000000004d493c] pcpu_map+0xdc/0x100 [00000000004d59a4] pcpu_alloc+0x3e4/0x4e0 [00000000004d5af8] __alloc_percpu+0x18/0x40 [00000000005b112c] __percpu_counter_init+0x4c/0xc0 ... Unable to handle kernel NULL pointer dereference ... I7: Disabling lock debugging due to kernel taint Caller[000000000053c1b0]: sysfs_new_dirent+0x30/0x120 Caller[000000000053c7a4]: create_dir+0x24/0xc0 Caller[000000000053c870]: sysfs_create_dir+0x30/0x80 Caller[00000000005990e8]: kobject_add_internal+0xc8/0x200 ... Kernel panic - not syncing: Attempted to kill the idle task! This patch fixes the problem by backporting parts from devel branch to make percpu core not depend on the existence of cpu0. Signed-off-by: Tejun Heo Reported-by: Meelis Roos Cc: David Miller --- mm/percpu.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/mm/percpu.c b/mm/percpu.c index 5fe37842e0ea..3311c8919f37 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -197,7 +197,12 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, int page_idx) { - return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; + /* + * Any possible cpu id can be used here, so there's no need to + * worry about preemption or cpu hotplug. + */ + return *pcpu_chunk_pagep(chunk, raw_smp_processor_id(), + page_idx) != NULL; } /* set the pointer to a chunk in a page struct */ @@ -297,6 +302,14 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) return pcpu_first_chunk; } + /* + * The address is relative to unit0 which might be unused and + * thus unmapped. Offset the address to the unit space of the + * current processor before looking it up in the vmalloc + * space. Note that any possible cpu id can be used here, so + * there's no need to worry about preemption or cpu hotplug. + */ + addr += raw_smp_processor_id() * pcpu_unit_size; return pcpu_get_page_chunk(vmalloc_to_page(addr)); } From ce6c3997c2fce74d12e6d8887a1d8cdf024fa850 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 7 Aug 2009 22:58:51 +0200 Subject: [PATCH 13/60] [CPUFREQ] Re-enable cpufreq suspend and resume code Commit 4bc5d3413503 is broken and causes regressions: (1) cpufreq_driver->resume() and ->suspend() were only called on __powerpc__, but you could set them on all architectures. In fact, ->resume() was defined and used before the PPC-related commit 42d4dc3f4e1e complained about in 4bc5d3413503. (2) Therfore, the resume functions in acpi_cpufreq and speedstep-smi would never be called. (3) This means speedstep-smi would be unusuable after suspend or resume. The _real_ problem was calling cpufreq_driver->get() with interrupts off, but it re-enabling interrupts on some platforms. Why is ->get() necessary? Some systems like to change the CPU frequency behind our back, especially during BIOS-intensive operations like suspend or resume. If such systems also use a CPU frequency-dependant timing loop, delays might be off by large factors. Therefore, we need to ascertain as soon as possible that the CPU frequency is indeed at the speed we think it is. You can do this two ways: either setting it anew, or trying to get it. The latter is what was done, the former also has the same IRQ issue. So, let's try something different: defer the checking to after interrupts are re-enabled, by calling cpufreq_update_policy() (via schedule_work()). Timings may be off until this later stage, so let's watch out for resume regressions caused by the deferred handling of frequency changes behind the kernel's back. Signed-off-by: Dominik Brodowski Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 95 +++------------------------------------ 1 file changed, 7 insertions(+), 88 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index fd69086d08d5..2968ed6a9c49 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1250,20 +1250,11 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) { int ret = 0; -#ifdef __powerpc__ int cpu = sysdev->id; - unsigned int cur_freq = 0; struct cpufreq_policy *cpu_policy; dprintk("suspending cpu %u\n", cpu); - /* - * This whole bogosity is here because Powerbooks are made of fail. - * No sane platform should need any of the code below to be run. - * (it's entirely the wrong thing to do, as driver->get may - * reenable interrupts on some architectures). - */ - if (!cpu_online(cpu)) return 0; @@ -1282,47 +1273,13 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) if (cpufreq_driver->suspend) { ret = cpufreq_driver->suspend(cpu_policy, pmsg); - if (ret) { + if (ret) printk(KERN_ERR "cpufreq: suspend failed in ->suspend " "step on CPU %u\n", cpu_policy->cpu); - goto out; - } - } - - if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS) - goto out; - - if (cpufreq_driver->get) - cur_freq = cpufreq_driver->get(cpu_policy->cpu); - - if (!cur_freq || !cpu_policy->cur) { - printk(KERN_ERR "cpufreq: suspend failed to assert current " - "frequency is what timing core thinks it is.\n"); - goto out; - } - - if (unlikely(cur_freq != cpu_policy->cur)) { - struct cpufreq_freqs freqs; - - if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) - dprintk("Warning: CPU frequency is %u, " - "cpufreq assumed %u kHz.\n", - cur_freq, cpu_policy->cur); - - freqs.cpu = cpu; - freqs.old = cpu_policy->cur; - freqs.new = cur_freq; - - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, - CPUFREQ_SUSPENDCHANGE, &freqs); - adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs); - - cpu_policy->cur = cur_freq; } out: cpufreq_cpu_put(cpu_policy); -#endif /* __powerpc__ */ return ret; } @@ -1330,24 +1287,21 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) * cpufreq_resume - restore proper CPU frequency handling after resume * * 1.) resume CPUfreq hardware support (cpufreq_driver->resume()) - * 2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync - * 3.) schedule call cpufreq_update_policy() ASAP as interrupts are - * restored. + * 2.) schedule call cpufreq_update_policy() ASAP as interrupts are + * restored. It will verify that the current freq is in sync with + * what we believe it to be. This is a bit later than when it + * should be, but nonethteless it's better than calling + * cpufreq_driver->get() here which might re-enable interrupts... */ static int cpufreq_resume(struct sys_device *sysdev) { int ret = 0; -#ifdef __powerpc__ int cpu = sysdev->id; struct cpufreq_policy *cpu_policy; dprintk("resuming cpu %u\n", cpu); - /* As with the ->suspend method, all the code below is - * only necessary because Powerbooks suck. - * See commit 42d4dc3f4e1e for jokes. */ - if (!cpu_online(cpu)) return 0; @@ -1373,45 +1327,10 @@ static int cpufreq_resume(struct sys_device *sysdev) } } - if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { - unsigned int cur_freq = 0; - - if (cpufreq_driver->get) - cur_freq = cpufreq_driver->get(cpu_policy->cpu); - - if (!cur_freq || !cpu_policy->cur) { - printk(KERN_ERR "cpufreq: resume failed to assert " - "current frequency is what timing core " - "thinks it is.\n"); - goto out; - } - - if (unlikely(cur_freq != cpu_policy->cur)) { - struct cpufreq_freqs freqs; - - if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) - dprintk("Warning: CPU frequency " - "is %u, cpufreq assumed %u kHz.\n", - cur_freq, cpu_policy->cur); - - freqs.cpu = cpu; - freqs.old = cpu_policy->cur; - freqs.new = cur_freq; - - srcu_notifier_call_chain( - &cpufreq_transition_notifier_list, - CPUFREQ_RESUMECHANGE, &freqs); - adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs); - - cpu_policy->cur = cur_freq; - } - } - -out: schedule_work(&cpu_policy->update); + fail: cpufreq_cpu_put(cpu_policy); -#endif /* __powerpc__ */ return ret; } From c295fc05789653ef24f296299df7c5f92fe74dce Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Tue, 1 Sep 2009 22:40:15 +0200 Subject: [PATCH 14/60] block: Allow changing max_sectors_kb above the default 512 The patch "block: Use accessor functions for queue limits" (ae03bf639a5027d27270123f5f6e3ee6a412781d) changed queue_max_sectors_store() to use blk_queue_max_sectors() instead of directly assigning the value. But blk_queue_max_sectors() differs a bit 1. It sets both max_sectors_kb, and max_hw_sectors_kb 2. Never allows one to change max_sectors_kb above BLK_DEF_MAX_SECTORS. If one specifies a value greater then max_hw_sectors is set to that value but max_sectors is set to BLK_DEF_MAX_SECTORS I am not sure whether blk_queue_max_sectors() should be changed, as it seems to be that way for a long time. And there may be callers dependent on that behaviour. This patch simply reverts to the older way of directly assigning the value to max_sectors as it was before. Signed-off-by: Nikanth Karthikesan Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 418d63619680..d3aa2aadb3e0 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -133,7 +133,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) return -EINVAL; spin_lock_irq(q->queue_lock); - blk_queue_max_sectors(q, max_sectors_kb << 1); + q->limits.max_sectors = max_sectors_kb << 1; spin_unlock_irq(q->queue_lock); return ret; From 3725867dccfb83e4b0cff64e916a04258f300591 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2009 14:03:08 -0400 Subject: [PATCH 15/60] xfs: actually enable the swapext compat handler Fix a small typo in the compat ioctl handler that cause the swapext compat handler to never be called. Signed-off-by: Christoph Hellwig Reviewed-by: Torsten Kaiser Tested-by: Torsten Kaiser Reviewed-by: Eric Sandeen Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/linux-2.6/xfs_ioctl32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 0882d166239a..eafcc7c18706 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -619,7 +619,7 @@ xfs_file_compat_ioctl( case XFS_IOC_GETVERSION_32: cmd = _NATIVE_IOC(cmd, long); return xfs_file_ioctl(filp, cmd, p); - case XFS_IOC_SWAPEXT: { + case XFS_IOC_SWAPEXT_32: { struct xfs_swapext sxp; struct compat_xfs_swapext __user *sxu = arg; From d66ee0587c3927aea5178a822976c7c853d815fe Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 30 Aug 2009 23:15:36 +0000 Subject: [PATCH 16/60] net: sk_free() should be allowed right after sk_alloc() After commit 2b85a34e911bf483c27cfdd124aeb1605145dc80 (net: No more expensive sock_hold()/sock_put() on each tx) sk_free() frees socks conditionally and depends on sk_wmem_alloc being set e.g. in sock_init_data(). But in some cases sk_free() is called earlier, usually after other alloc errors. Fix is to move sk_wmem_alloc initialization from sock_init_data() to sk_alloc() itself. Signed-off-by: Jarek Poplawski Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index bbb25be7ddfe..76334228ed1c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1025,6 +1025,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); sock_net_set(sk, get_net(net)); + atomic_set(&sk->sk_wmem_alloc, 1); } return sk; @@ -1872,7 +1873,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) */ smp_wmb(); atomic_set(&sk->sk_refcnt, 1); - atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data); From 2fbd3da3877ad8d923b055e5996f80b4d4a6daf4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Sep 2009 17:59:25 -0700 Subject: [PATCH 17/60] pkt_sched: Revert tasklet_hrtimer changes. These are full of unresolved problems, mainly that conversions don't work 1-1 from hrtimers to tasklet_hrtimers because unlike hrtimers tasklets can't be killed from softirq context. And when a qdisc gets reset, that's exactly what we need to do here. We'll work this out in the net-next-2.6 tree and if warranted we'll backport that work to -stable. This reverts the following 3 changesets: a2cb6a4dd470d7a64255a10b843b0d188416b78f ("pkt_sched: Fix bogon in tasklet_hrtimer changes.") 38acce2d7983632100a9ff3fd20295f6e34074a8 ("pkt_sched: Convert CBQ to tasklet_hrtimer.") ee5f9757ea17759e1ce5503bdae2b07e48e32af9 ("pkt_sched: Convert qdisc_watchdog to tasklet_hrtimer") Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 4 ++-- net/sched/sch_api.c | 10 +++++----- net/sched/sch_cbq.c | 25 +++++++++++-------------- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 7eafb8d54470..82a3191375f5 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -61,8 +61,8 @@ psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) } struct qdisc_watchdog { - struct tasklet_hrtimer timer; - struct Qdisc *qdisc; + struct hrtimer timer; + struct Qdisc *qdisc; }; extern void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 92e6f3a52c13..24d17ce9c294 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -458,7 +458,7 @@ EXPORT_SYMBOL(qdisc_warn_nonwc); static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, - timer.timer); + timer); wd->qdisc->flags &= ~TCQ_F_THROTTLED; __netif_schedule(qdisc_root(wd->qdisc)); @@ -468,8 +468,8 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) { - tasklet_hrtimer_init(&wd->timer, qdisc_watchdog, - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + wd->timer.function = qdisc_watchdog; wd->qdisc = qdisc; } EXPORT_SYMBOL(qdisc_watchdog_init); @@ -485,13 +485,13 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) wd->qdisc->flags |= TCQ_F_THROTTLED; time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); - tasklet_hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); + hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); } EXPORT_SYMBOL(qdisc_watchdog_schedule); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { - tasklet_hrtimer_cancel(&wd->timer); + hrtimer_cancel(&wd->timer); wd->qdisc->flags &= ~TCQ_F_THROTTLED; } EXPORT_SYMBOL(qdisc_watchdog_cancel); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 149b0405c5ec..d5798e17a832 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -163,7 +163,7 @@ struct cbq_sched_data psched_time_t now_rt; /* Cached real time */ unsigned pmask; - struct tasklet_hrtimer delay_timer; + struct hrtimer delay_timer; struct qdisc_watchdog watchdog; /* Watchdog timer, started when CBQ has backlog, but cannot @@ -503,8 +503,6 @@ static void cbq_ovl_delay(struct cbq_class *cl) cl->undertime = q->now + delay; if (delay > 0) { - struct hrtimer *ht; - sched += delay + cl->penalty; cl->penalized = sched; cl->cpriority = TC_CBQ_MAXPRIO; @@ -512,12 +510,12 @@ static void cbq_ovl_delay(struct cbq_class *cl) expires = ktime_set(0, 0); expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched)); - ht = &q->delay_timer.timer; - if (hrtimer_try_to_cancel(ht) && - ktime_to_ns(ktime_sub(hrtimer_get_expires(ht), - expires)) > 0) - hrtimer_set_expires(ht, expires); - hrtimer_restart(ht); + if (hrtimer_try_to_cancel(&q->delay_timer) && + ktime_to_ns(ktime_sub( + hrtimer_get_expires(&q->delay_timer), + expires)) > 0) + hrtimer_set_expires(&q->delay_timer, expires); + hrtimer_restart(&q->delay_timer); cl->delayed = 1; cl->xstats.overactions++; return; @@ -593,7 +591,7 @@ static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio, static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) { struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data, - delay_timer.timer); + delay_timer); struct Qdisc *sch = q->watchdog.qdisc; psched_time_t now; psched_tdiff_t delay = 0; @@ -623,7 +621,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay)); - tasklet_hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); + hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); } sch->flags &= ~TCQ_F_THROTTLED; @@ -1216,7 +1214,7 @@ cbq_reset(struct Qdisc* sch) q->tx_class = NULL; q->tx_borrowed = NULL; qdisc_watchdog_cancel(&q->watchdog); - tasklet_hrtimer_cancel(&q->delay_timer); + hrtimer_cancel(&q->delay_timer); q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); q->now_rt = q->now; @@ -1399,8 +1397,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.minidle = -0x7FFFFFFF; qdisc_watchdog_init(&q->watchdog, sch); - tasklet_hrtimer_init(&q->delay_timer, cbq_undelay, - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); q->delay_timer.function = cbq_undelay; q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); From f2486f26692433ba27cc10991a085b503b0422a3 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Mon, 31 Aug 2009 16:54:03 -0700 Subject: [PATCH 18/60] [IA64] Fix warning in dma-mapping.c arch/ia64/kernel/dma-mapping.c:14: warning: control reaches end of non-void function arch/ia64/kernel/dma-mapping.c:14: warning: no return statement in function returning non-void This warning was introduced by commit: 390bd132b2831a2ad0268e84bffbfc0680debfe5 Add dma_debug_init() for ia64 Signed-off-by: Tony Luck --- arch/ia64/kernel/dma-mapping.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 39a3cd0a4173..f2c1600da097 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -10,7 +10,9 @@ EXPORT_SYMBOL(dma_ops); static int __init dma_init(void) { - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + + return 0; } fs_initcall(dma_init); From 5afe18d2f58812f3924edbd215464e5e3e8545e7 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Wed, 2 Sep 2009 11:00:46 +0200 Subject: [PATCH 19/60] [IA64] fix csum_ipv6_magic() The 32-bit parameters (len and csum) of csum_ipv6_magic() are passed in 64-bit registers in2 and in4. The high order 32 bits of the registers were never cleared, and garbage was sometimes calculated into the checksum. Fix this by clearing the high order 32 bits of these registers. Signed-off-by: Jiri Bohac Signed-off-by: Tony Luck --- arch/ia64/lib/ip_fast_csum.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S index 1f86aeb2c948..620d9dc5220f 100644 --- a/arch/ia64/lib/ip_fast_csum.S +++ b/arch/ia64/lib/ip_fast_csum.S @@ -96,20 +96,22 @@ END(ip_fast_csum) GLOBAL_ENTRY(csum_ipv6_magic) ld4 r20=[in0],4 ld4 r21=[in1],4 - dep r15=in3,in2,32,16 + zxt4 in2=in2 ;; ld4 r22=[in0],4 ld4 r23=[in1],4 - mux1 r15=r15,@rev + dep r15=in3,in2,32,16 ;; ld4 r24=[in0],4 ld4 r25=[in1],4 - shr.u r15=r15,16 + mux1 r15=r15,@rev add r16=r20,r21 add r17=r22,r23 + zxt4 in4=in4 ;; ld4 r26=[in0],4 ld4 r27=[in1],4 + shr.u r15=r15,16 add r18=r24,r25 add r8=r16,r17 ;; From 92653453c3015c083b9fe0ad48261c6b2267d482 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Wed, 2 Sep 2009 18:25:39 +0200 Subject: [PATCH 20/60] sound: oxygen: handle cards with missing EEPROM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The card model detection code introduced in 2.6.30 that tries to work around partially broken EEPROM contents by reading the EEPROM directly does not handle cards where the EEPROM has been omitted. In this case, we have to use the default ID to allow the driver to load. Signed-off-by: Clemens Ladisch Reported-and-tested-by: Ozan Çağlayan Cc: Signed-off-by: Takashi Iwai --- sound/pci/oxygen/oxygen_lib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c index 312251d39696..9a8936e20744 100644 --- a/sound/pci/oxygen/oxygen_lib.c +++ b/sound/pci/oxygen/oxygen_lib.c @@ -260,6 +260,9 @@ oxygen_search_pci_id(struct oxygen *chip, const struct pci_device_id ids[]) * chip didn't if the first EEPROM word was overwritten. */ subdevice = oxygen_read_eeprom(chip, 2); + /* use default ID if EEPROM is missing */ + if (subdevice == 0xffff) + subdevice = 0x8788; /* * We use only the subsystem device ID for searching because it is * unique even without the subsystem vendor ID, which may have been From 16ebb5e0b36ceadc8186f71d68b0c4fa4b6e781b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Sep 2009 02:40:09 +0000 Subject: [PATCH 21/60] tc: Fix unitialized kernel memory leak Three bytes of uninitialized kernel memory are currently leaked to user Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 24d17ce9c294..fdb694e9f759 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1456,6 +1456,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = q->handle; tcm->tcm_handle = q->handle; From a3df6f7d3090e611bcc774cd2cba45ae016d37e1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 3 Sep 2009 11:52:02 +1000 Subject: [PATCH 22/60] perf_counter/powerpc: Fix cache event codes for POWER7 I had the codes for L1 D-cache load accesses and misses swapped around, and the wrong codes for LL-cache accesses and misses. This corrects them. Reported-by: Corey Ashford Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: LKML-Reference: <19103.8514.709300.585484@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/power7-pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 388cf57ad827..018d094d92f9 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -317,7 +317,7 @@ static int power7_generic_events[] = { */ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x400f0, 0xc880 }, + [C(OP_READ)] = { 0xc880, 0x400f0 }, [C(OP_WRITE)] = { 0, 0x300f0 }, [C(OP_PREFETCH)] = { 0xd8b8, 0 }, }, @@ -327,8 +327,8 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_PREFETCH)] = { 0x408a, 0 }, }, [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x6080, 0x6084 }, - [C(OP_WRITE)] = { 0x6082, 0x6086 }, + [C(OP_READ)] = { 0x16080, 0x26080 }, + [C(OP_WRITE)] = { 0x16082, 0x26082 }, [C(OP_PREFETCH)] = { 0, 0 }, }, [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ From e6617c6ec28a17cf2f90262b835ec05b9b861400 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 3 Sep 2009 02:35:20 -0700 Subject: [PATCH 23/60] sparc64: Kill spurious NMI watchdog triggers by increasing limit to 30 seconds. This is a compromise and a temporary workaround for bootup NMI watchdog triggers some people see with qla2xxx devices present. This happens when, for example: CPU 0 is in the driver init and looping submitting mailbox commands to load the firmware, then waiting for completion. CPU 1 is receiving the device interrupts. CPU 1 is where the NMI watchdog triggers. CPU 0 is submitting mailbox commands fast enough that by the time CPU 1 returns from the device interrupt handler, a new one is pending. This sequence runs for more than 5 seconds. The problematic case is CPU 1's timer interrupt running when the barrage of device interrupts begin. Then we have: timer interrupt return for softirq checking pending, thus enable interrupts qla2xxx interrupt return qla2xxx interrupt return ... 5+ seconds pass final qla2xxx interrupt for fw load return run timer softirq return At some point in the multi-second qla2xxx interrupt storm we trigger the NMI watchdog on CPU 1 from the NMI interrupt handler. The timer softirq, once we get back to running it, is smart enough to run the timer work enough times to make up for the missed timer interrupts. However, the NMI watchdogs (both x86 and sparc) use the timer interrupt count to notice the cpu is wedged. But in the above scenerio we'll receive only one such timer interrupt even if we last all the way back to running the timer softirq. The default watchdog trigger point is only 5 seconds, which is pretty low (the softwatchdog triggers at 60 seconds). So increase it to 30 seconds for now. Signed-off-by: David S. Miller --- arch/sparc/kernel/nmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 2c0cc72d295b..b75bf502cd42 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -103,7 +103,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) } if (!touched && __get_cpu_var(last_irq_sum) == sum) { local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) + if (local_read(&__get_cpu_var(alert_counter)) == 30 * nmi_hz) die_nmi("BUG: NMI Watchdog detected LOCKUP", regs, panic_on_timeout); } else { From edcb3b14863e1a6aa1923eeaa81125a00cf51a80 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Aug 2009 15:18:37 -0700 Subject: [PATCH 24/60] mtd: m25p80: fix null pointer dereference bug This patch fixes the following oops, observed with MTD_PARTITIONS=n: m25p80 spi32766.0: m25p80 (1024 Kbytes) Unable to handle kernel paging request for data at address 0x00000008 Faulting instruction address: 0xc03a54b0 Oops: Kernel access of bad area, sig: 11 [#1] Modules linked in: NIP: c03a54b0 LR: c03a5494 CTR: c01e98b8 REGS: ef82bb60 TRAP: 0300 Not tainted (2.6.31-rc4-00167-g4733fd3) MSR: 00029000 CR: 24022022 XER: 20000000 DEAR: 00000008, ESR: 00000000 TASK = ef82c000[1] 'swapper' THREAD: ef82a000 GPR00: 00000000 ef82bc10 ef82c000 0000002e 00001eb8 ffffffff c01e9824 00000036 GPR08: c054ed40 c0542a08 00001eb8 00004000 22022022 1001a1a0 3ff8fd00 00000000 GPR16: 00000000 00000001 00000000 00000000 ef82bddc c0530000 efbef500 ef8356d0 GPR24: 00000000 ef8356d0 00000000 efbf7a00 c0530ec4 ffffffed efbf5300 c0541f98 NIP [c03a54b0] m25p_probe+0x22c/0x354 LR [c03a5494] m25p_probe+0x210/0x354 Call Trace: [ef82bc10] [c03a5494] m25p_probe+0x210/0x354 (unreliable) [ef82bca0] [c024e37c] spi_drv_probe+0x2c/0x3c [ef82bcb0] [c01f1afc] driver_probe_device+0xa4/0x178 [ef82bcd0] [c01f06e8] bus_for_each_drv+0x6c/0xa8 [ef82bd00] [c01f1a34] device_attach+0x84/0xa8 ... Signed-off-by: Anton Vorontsov Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/devices/m25p80.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index ae5fe91867e1..10ed195c0c1c 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -736,7 +736,7 @@ static int __devinit m25p_probe(struct spi_device *spi) flash->partitioned = 1; return add_mtd_partitions(&flash->mtd, parts, nr_parts); } - } else if (data->nr_parts) + } else if (data && data->nr_parts) dev_warn(&spi->dev, "ignoring %d default partitions on %s\n", data->nr_parts, data->name); From 4149ed1aa944ab864024982a2e568d17eccff504 Mon Sep 17 00:00:00 2001 From: Dimitri Gorokhovik Date: Thu, 3 Sep 2009 14:59:13 +0100 Subject: [PATCH 25/60] mtd: nftl: write support is broken Write support is broken in NFTL. Fix it. Signed-off-by: Cc: Tim Gardner Cc: Scott James Remnant Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/mtd/nftlcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c index fb86cacd5bdb..665d3eba2f47 100644 --- a/drivers/mtd/nftlcore.c +++ b/drivers/mtd/nftlcore.c @@ -181,7 +181,7 @@ static int nftl_write(struct mtd_info *mtd, loff_t offs, size_t len, int res; ops.mode = MTD_OOB_PLACE; - ops.ooboffs = offs; + ops.ooboffs = offs & (mtd->writesize - 1); ops.ooblen = mtd->oobsize; ops.oobbuf = oob; ops.datbuf = buf; From 16f05c2b68520f94e365f9d347a7076f4ff00ad5 Mon Sep 17 00:00:00 2001 From: Dimitri Gorokhovik Date: Thu, 3 Sep 2009 14:04:22 +0100 Subject: [PATCH 26/60] mtd: nftl: fix offset alignments Arithmetic conversion in the mask computation makes the upper word of the second argument passed down to mtd->read_oob(), be always 0 (assuming 'offs' being a 64-bit signed long long type, and 'mtd->writesize' being a 32-bit unsigned int type). This patch applies over the other one adding masking in nftl_write, "nftl: write support is broken". Signed-off-by: Dimitri Gorokhovik Cc: Tim Gardner Cc: Scott James Remnant Signed-off-by: David Woodhouse --- drivers/mtd/nftlcore.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c index 665d3eba2f47..1002e1882996 100644 --- a/drivers/mtd/nftlcore.c +++ b/drivers/mtd/nftlcore.c @@ -135,16 +135,17 @@ static void nftl_remove_dev(struct mtd_blktrans_dev *dev) int nftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len, size_t *retlen, uint8_t *buf) { + loff_t mask = mtd->writesize - 1; struct mtd_oob_ops ops; int res; ops.mode = MTD_OOB_PLACE; - ops.ooboffs = offs & (mtd->writesize - 1); + ops.ooboffs = offs & mask; ops.ooblen = len; ops.oobbuf = buf; ops.datbuf = NULL; - res = mtd->read_oob(mtd, offs & ~(mtd->writesize - 1), &ops); + res = mtd->read_oob(mtd, offs & ~mask, &ops); *retlen = ops.oobretlen; return res; } @@ -155,16 +156,17 @@ int nftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len, int nftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len, size_t *retlen, uint8_t *buf) { + loff_t mask = mtd->writesize - 1; struct mtd_oob_ops ops; int res; ops.mode = MTD_OOB_PLACE; - ops.ooboffs = offs & (mtd->writesize - 1); + ops.ooboffs = offs & mask; ops.ooblen = len; ops.oobbuf = buf; ops.datbuf = NULL; - res = mtd->write_oob(mtd, offs & ~(mtd->writesize - 1), &ops); + res = mtd->write_oob(mtd, offs & ~mask, &ops); *retlen = ops.oobretlen; return res; } @@ -177,17 +179,18 @@ int nftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len, static int nftl_write(struct mtd_info *mtd, loff_t offs, size_t len, size_t *retlen, uint8_t *buf, uint8_t *oob) { + loff_t mask = mtd->writesize - 1; struct mtd_oob_ops ops; int res; ops.mode = MTD_OOB_PLACE; - ops.ooboffs = offs & (mtd->writesize - 1); + ops.ooboffs = offs & mask; ops.ooblen = mtd->oobsize; ops.oobbuf = oob; ops.datbuf = buf; ops.len = len; - res = mtd->write_oob(mtd, offs & ~(mtd->writesize - 1), &ops); + res = mtd->write_oob(mtd, offs & ~mask, &ops); *retlen = ops.retlen; return res; } From bc8cec0dff072f1a45ce7f6b2c5234bb3411ac51 Mon Sep 17 00:00:00 2001 From: Massimo Cirillo Date: Thu, 27 Aug 2009 10:44:09 +0200 Subject: [PATCH 27/60] JFFS2: add missing verify buffer allocation/deallocation The function jffs2_nor_wbuf_flash_setup() doesn't allocate the verify buffer if CONFIG_JFFS2_FS_WBUF_VERIFY is defined, so causing a kernel panic when that macro is enabled and the verify function is called. Similarly the jffs2_nor_wbuf_flash_cleanup() must free the buffer if CONFIG_JFFS2_FS_WBUF_VERIFY is enabled. The following patch fixes the problem. The following patch applies to 2.6.30 kernel. Signed-off-by: Massimo Cirillo Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse Cc: stable@kernel.org --- fs/jffs2/wbuf.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index d9a721e6db70..5ef7bac265e5 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1268,10 +1268,20 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { if (!c->wbuf) return -ENOMEM; +#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY + c->wbuf_verify = kmalloc(c->wbuf_pagesize, GFP_KERNEL); + if (!c->wbuf_verify) { + kfree(c->wbuf); + return -ENOMEM; + } +#endif return 0; } void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) { +#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY + kfree(c->wbuf_verify); +#endif kfree(c->wbuf); } From d76b1590e06a63a3d8697168cd0aabf1c4b3cb3a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Sep 2009 22:38:59 +0300 Subject: [PATCH 28/60] slub: Fix kmem_cache_destroy() with SLAB_DESTROY_BY_RCU kmem_cache_destroy() should call rcu_barrier() *after* kmem_cache_close() and *before* sysfs_slab_remove() or risk rcu_free_slab() being called after kmem_cache is deleted (kfreed). rmmod nf_conntrack can crash the machine because it has to kmem_cache_destroy() a SLAB_DESTROY_BY_RCU enabled cache. Cc: Reported-by: Zdenek Kabelac Signed-off-by: Eric Dumazet Acked-by: Paul E. McKenney Signed-off-by: Pekka Enberg --- mm/slub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index b9f1491a58a1..b6276753626e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2594,8 +2594,6 @@ static inline int kmem_cache_close(struct kmem_cache *s) */ void kmem_cache_destroy(struct kmem_cache *s) { - if (s->flags & SLAB_DESTROY_BY_RCU) - rcu_barrier(); down_write(&slub_lock); s->refcount--; if (!s->refcount) { @@ -2606,6 +2604,8 @@ void kmem_cache_destroy(struct kmem_cache *s) "still has objects.\n", s->name, __func__); dump_stack(); } + if (s->flags & SLAB_DESTROY_BY_RCU) + rcu_barrier(); sysfs_slab_remove(s); } else up_write(&slub_lock); From 2bcaa6a4238094c5695d5b1943078388d82d3004 Mon Sep 17 00:00:00 2001 From: Dave Andrews Date: Thu, 3 Sep 2009 17:21:27 -0700 Subject: [PATCH 29/60] Input: atkbd - add Compaq Presario R4000-series repeat quirk Compaq Presario R4000-series laptops are not sending a "volume up button release" and "volume down button release" signal in the PS/2 protocol for atkbd. The URL below has some of confirmed reports: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/385477 Signed-off-by: Dave Andrews Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/atkbd.c | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 95fe0452dae4..6c6a09b1c0fe 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -879,6 +879,14 @@ static unsigned int atkbd_hp_zv6100_forced_release_keys[] = { 0xae, 0xb0, -1U }; +/* + * Perform fixup for HP (Compaq) Presario R4000 R4100 R4200 that don't generate + * release for their volume buttons + */ +static unsigned int atkbd_hp_r4000_forced_release_keys[] = { + 0xae, 0xb0, -1U +}; + /* * Samsung NC10,NC20 with Fn+F? key release not working */ @@ -1536,6 +1544,33 @@ static struct dmi_system_id atkbd_dmi_quirk_table[] __initdata = { .callback = atkbd_setup_forced_release, .driver_data = atkbd_hp_zv6100_forced_release_keys, }, + { + .ident = "HP Presario R4000", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4000"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkbd_hp_r4000_forced_release_keys, + }, + { + .ident = "HP Presario R4100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4100"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkbd_hp_r4000_forced_release_keys, + }, + { + .ident = "HP Presario R4200", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4200"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkbd_hp_r4000_forced_release_keys, + }, { .ident = "Inventec Symphony", .matches = { From bd4352cadfacb9084c97c853b025fac010266c26 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Sep 2009 03:38:54 -0700 Subject: [PATCH 30/60] sparc64: Fix bootup with mcount in some configs. Functions invoked early when booting up a cpu can't use tracing because mcount requires a valid 'current_thread_info()' and TLB mappings to be setup. The code path of sun4v_register_mondo_queues --> register_one_mondo is one such case. sun4v_register_mondo_queues already has the necessary 'notrace' annotation, but register_one_mondo does not. Normally register_one_mondo is inlined so the bug doesn't trigger, but with some config/compiler combinations, it won't be so we must properly mark it notrace. While we're here, add 'notrace' annoations to prom_printf and prom_halt so that early error handling won't have the same problem. Reported-by: Alexander Beregalov Reported-by: Leif Sawyer Signed-off-by: David S. Miller --- arch/sparc/kernel/irq_64.c | 2 +- arch/sparc/prom/misc_64.c | 2 +- arch/sparc/prom/printf.c | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index f0ee79055409..8daab33fc17d 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -886,7 +886,7 @@ void notrace init_irqwork_curcpu(void) * Therefore you cannot make any OBP calls, not even prom_printf, * from these two routines. */ -static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask) +static void __cpuinit notrace register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask) { unsigned long num_entries = (qmask + 1) / 64; unsigned long status; diff --git a/arch/sparc/prom/misc_64.c b/arch/sparc/prom/misc_64.c index eedffb4fec2d..39fc6af21b7c 100644 --- a/arch/sparc/prom/misc_64.c +++ b/arch/sparc/prom/misc_64.c @@ -88,7 +88,7 @@ void prom_cmdline(void) /* Drop into the prom, but completely terminate the program. * No chance of continuing. */ -void prom_halt(void) +void notrace prom_halt(void) { #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) diff --git a/arch/sparc/prom/printf.c b/arch/sparc/prom/printf.c index 660943ee4c2a..ca869266b9f3 100644 --- a/arch/sparc/prom/printf.c +++ b/arch/sparc/prom/printf.c @@ -14,14 +14,14 @@ */ #include +#include #include #include static char ppbuf[1024]; -void -prom_write(const char *buf, unsigned int n) +void notrace prom_write(const char *buf, unsigned int n) { char ch; @@ -33,8 +33,7 @@ prom_write(const char *buf, unsigned int n) } } -void -prom_printf(const char *fmt, ...) +void notrace prom_printf(const char *fmt, ...) { va_list args; int i; From a77e28c7e1dc1a6a035c7627d4a88ecf3ea09aea Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Fri, 4 Sep 2009 20:40:16 +0100 Subject: [PATCH 31/60] dm multipath: fix oops when request based io fails when no paths The patch posted at http://marc.info/?l=dm-devel&m=124539787228784&w=2 which was merged into cec47e3d4a861e1d942b3a580d0bbef2700d2bb2 ("dm: prepare for request based option") introduced a regression in request-based dm. If map_request() calls dm_kill_unmapped_request() to complete a cloned bio without dispatching it, clone->bio is still set when dm_end_request() is called and the BUG_ON(clone->bio) is incorrect. The patch fixes this bug by freeing bio in dm_end_request() if the clone has bio. I've redone my tests to cover all I/O paths and confirmed there's no other regression. Here is the oops I hit in request-based dm when I do I/O to a multipath device which doesn't have any active path nor queue_if_no_path setting: ------------[ cut here ]------------ kernel BUG at /root/2.6.31-rc4.rqdm/drivers/md/dm.c:828! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map CPU 1 Modules linked in: autofs4 sunrpc cpufreq_ondemand acpi_cpufreq dm_mirror dm_region_hash dm_log dm_service_time dm_multipath scsi_dh dm_mod video output sbs sbshc battery ac sg sr_mod e1000e button cdrom serio_raw rtc_cmos rtc_core rtc_lib piix lpfc scsi_transport_fc ata_piix libata megaraid_sas sd_mod scsi_mod crc_t10dif ext3 jbd uhci_hcd ohci_hcd ehci_hcd [last unloaded: microcode] Pid: 7, comm: ksoftirqd/1 Not tainted 2.6.31-rc4.rqdm #1 Express5800/120Lj [N8100-1417] RIP: 0010:[] [] dm_softirq_done+0xbd/0x100 [dm_mod] RSP: 0018:ffff8800280a1f08 EFLAGS: 00010282 RAX: ffffffffa02544e0 RBX: ffff8802aa1111d0 RCX: ffff8802aa1111e0 RDX: ffff8802ab913e70 RSI: 0000000000000000 RDI: ffff8802ab913e70 RBP: ffff8800280a1f28 R08: ffffc90005457040 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 00000000fffffffb R13: ffff8802ab913e88 R14: ffff8802ab9c1438 R15: 0000000000000100 FS: 0000000000000000(0000) GS:ffff88002809e000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000003d54a98640 CR3: 000000029f0a1000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process ksoftirqd/1 (pid: 7, threadinfo ffff8802ae50e000, task ffff8802ae4f8040) Stack: ffff8800280a1f38 0000000000000020 ffffffff814f30a0 0000000000000004 <0> ffff8800280a1f58 ffffffff8116b245 ffff8800280a1f38 ffff8800280a1f38 <0> ffff8800280a1f58 0000000000000001 ffff8800280a1fa8 ffffffff810477bc Call Trace: [] blk_done_softirq+0x75/0x90 [] __do_softirq+0xcc/0x210 [] ? ksoftirqd+0x0/0x110 [] call_softirq+0x1c/0x50 [] do_softirq+0x65/0xa0 [] ? ksoftirqd+0x0/0x110 [] ksoftirqd+0x70/0x110 [] kthread+0x99/0xb0 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0xb0 [] ? child_rip+0x0/0x20 Code: 44 89 e6 48 89 df e8 23 fb f2 e0 be 01 00 00 00 4c 89 f7 e8 f6 fd ff ff 5b 41 5c 41 5d 41 5e c9 c3 4c 89 ef e8 85 fe ff ff eb ed <0f> 0b eb fe 41 8b 85 dc 00 00 00 48 83 bb 10 01 00 00 00 89 83 RIP [] dm_softirq_done+0xbd/0x100 [dm_mod] RSP ---[ end trace 16af0a1d8542da55 ]--- Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8a311ea0d441..b4845b14740d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -738,16 +738,22 @@ static void rq_completed(struct mapped_device *md, int run_queue) dm_put(md); } +static void free_rq_clone(struct request *clone) +{ + struct dm_rq_target_io *tio = clone->end_io_data; + + blk_rq_unprep_clone(clone); + free_rq_tio(tio); +} + static void dm_unprep_request(struct request *rq) { struct request *clone = rq->special; - struct dm_rq_target_io *tio = clone->end_io_data; rq->special = NULL; rq->cmd_flags &= ~REQ_DONTPREP; - blk_rq_unprep_clone(clone); - free_rq_tio(tio); + free_rq_clone(clone); } /* @@ -825,8 +831,7 @@ static void dm_end_request(struct request *clone, int error) rq->sense_len = clone->sense_len; } - BUG_ON(clone->bio); - free_rq_tio(tio); + free_rq_clone(clone); blk_end_request_all(rq, error); From 8811f46c1f9386fc7017150de9d52359e5b1826e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 4 Sep 2009 20:40:19 +0100 Subject: [PATCH 32/60] dm snapshot: implement iterate devices Implement the .iterate_devices for the origin and snapshot targets. dm-snapshot's lack of .iterate_devices resulted in the inability to properly establish queue_limits for both targets. With 4K sector drives: an unfortunate side-effect of not establishing proper limits in either targets' DM device was that IO to the devices would fail even though both had been created without error. Commit af4874e03ed82f050d5872d8c39ce64bf16b5c38 ("dm target:s introduce iterate devices fn") in 2.6.31-rc1 should have implemented .iterate_devices for dm-snap.c's origin and snapshot targets. Signed-off-by: Mike Snitzer Signed-off-by: Andrew Morton Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index d573165cd2b7..57f1bf7f3b7a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1176,6 +1176,15 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, return 0; } +static int snapshot_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct dm_snapshot *snap = ti->private; + + return fn(ti, snap->origin, 0, ti->len, data); +} + + /*----------------------------------------------------------------- * Origin methods *---------------------------------------------------------------*/ @@ -1410,20 +1419,29 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result, return 0; } +static int origin_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct dm_dev *dev = ti->private; + + return fn(ti, dev, 0, ti->len, data); +} + static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 6, 0}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, .map = origin_map, .resume = origin_resume, .status = origin_status, + .iterate_devices = origin_iterate_devices, }; static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 6, 0}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -1431,6 +1449,7 @@ static struct target_type snapshot_target = { .end_io = snapshot_end_io, .resume = snapshot_resume, .status = snapshot_status, + .iterate_devices = snapshot_iterate_devices, }; static int __init dm_snapshot_init(void) From f6a1ed10864b7540fa758bbccf3433fe17070329 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 4 Sep 2009 20:40:22 +0100 Subject: [PATCH 33/60] dm table: fix queue_limit checking device iterator The logic to check for valid device areas is inverted relative to proper use with iterate_devices. The iterate_devices method calls its callback for every underlying device in the target. If any callback returns non-zero, iterate_devices exits immediately. But the callback device_area_is_valid() returns 0 on error and 1 on success. The overall effect without is that an error is issued only if every device is invalid. This patch renames device_area_is_valid to device_area_is_invalid and inverts the logic so that one invalid device is sufficient to raise an error. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d952b3441913..aa60526075d7 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -343,10 +343,10 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) } /* - * If possible, this checks an area of a destination device is valid. + * If possible, this checks an area of a destination device is invalid. */ -static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { struct queue_limits *limits = data; struct block_device *bdev = dev->bdev; @@ -357,16 +357,16 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, char b[BDEVNAME_SIZE]; if (!dev_size) - return 1; + return 0; if ((start >= dev_size) || (start + len > dev_size)) { DMWARN("%s: %s too small for target", dm_device_name(ti->table->md), bdevname(bdev, b)); - return 0; + return 1; } if (logical_block_size_sectors <= 1) - return 1; + return 0; if (start & (logical_block_size_sectors - 1)) { DMWARN("%s: start=%llu not aligned to h/w " @@ -374,7 +374,7 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, dm_device_name(ti->table->md), (unsigned long long)start, limits->logical_block_size, bdevname(bdev, b)); - return 0; + return 1; } if (len & (logical_block_size_sectors - 1)) { @@ -383,10 +383,10 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, dm_device_name(ti->table->md), (unsigned long long)len, limits->logical_block_size, bdevname(bdev, b)); - return 0; + return 1; } - return 1; + return 0; } /* @@ -1000,8 +1000,8 @@ int dm_calculate_queue_limits(struct dm_table *table, * Check each device area is consistent with the target's * overall queue limits. */ - if (!ti->type->iterate_devices(ti, device_area_is_valid, - &ti_limits)) + if (ti->type->iterate_devices(ti, device_area_is_invalid, + &ti_limits)) return -EINVAL; combine_limits: From a963a956225eb0f8c4d3537f428153c30adf54b8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 4 Sep 2009 20:40:24 +0100 Subject: [PATCH 34/60] dm table: add more context to terse warning messages A couple of recent warning messages make it difficult for the reader to determine exactly what is wrong. This patch adds more information to those messages. The messages were added by these commits: 5dea271b6d87bd1d79a59c1d5baac2596a841c37 ("dm table: pass correct dev area size to device_area_is_valid") ea9df47cc92573b159ef3b4fda516c32cba9c4fd ("dm table: fix blk_stack_limits arg to use bytes not sectors") The patch also corrects references to logical_block_size in printk format strings from %hu to %u. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index aa60526075d7..c90e662d2802 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -360,8 +360,12 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, return 0; if ((start >= dev_size) || (start + len > dev_size)) { - DMWARN("%s: %s too small for target", - dm_device_name(ti->table->md), bdevname(bdev, b)); + DMWARN("%s: %s too small for target: " + "start=%llu, len=%llu, dev_size=%llu", + dm_device_name(ti->table->md), bdevname(bdev, b), + (unsigned long long)start, + (unsigned long long)len, + (unsigned long long)dev_size); return 1; } @@ -370,7 +374,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, if (start & (logical_block_size_sectors - 1)) { DMWARN("%s: start=%llu not aligned to h/w " - "logical block size %hu of %s", + "logical block size %u of %s", dm_device_name(ti->table->md), (unsigned long long)start, limits->logical_block_size, bdevname(bdev, b)); @@ -379,7 +383,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, if (len & (logical_block_size_sectors - 1)) { DMWARN("%s: len=%llu not aligned to h/w " - "logical block size %hu of %s", + "logical block size %u of %s", dm_device_name(ti->table->md), (unsigned long long)len, limits->logical_block_size, bdevname(bdev, b)); @@ -496,8 +500,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, } if (blk_stack_limits(limits, &q->limits, start << 9) < 0) - DMWARN("%s: target device %s is misaligned", - dm_device_name(ti->table->md), bdevname(bdev, b)); + DMWARN("%s: target device %s is misaligned: " + "physical_block_size=%u, logical_block_size=%u, " + "alignment_offset=%u, start=%llu", + dm_device_name(ti->table->md), bdevname(bdev, b), + q->limits.physical_block_size, + q->limits.logical_block_size, + q->limits.alignment_offset, + (unsigned long long) start << 9); + /* * Check if merge fn is supported. @@ -698,7 +709,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table, if (remaining) { DMWARN("%s: table line %u (start sect %llu len %llu) " - "not aligned to h/w logical block size %hu", + "not aligned to h/w logical block size %u", dm_device_name(table->md), i, (unsigned long long) ti->begin, (unsigned long long) ti->len, From 40bea431274c247425e7f5970d796ff7b37a2b22 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 4 Sep 2009 20:40:25 +0100 Subject: [PATCH 35/60] dm stripe: expose correct io hints Set sensible I/O hints for striped DM devices in the topology infrastructure added for 2.6.31 for userspace tools to obtain via sysfs. Add .io_hints to 'struct target_type' to allow the I/O hints portion (io_min and io_opt) of the 'struct queue_limits' to be set by each target and implement this for dm-stripe. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-stripe.c | 13 ++++++++++++- drivers/md/dm-table.c | 4 ++++ include/linux/device-mapper.h | 4 ++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4e0e5937e42a..3e563d251733 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -329,9 +329,19 @@ static int stripe_iterate_devices(struct dm_target *ti, return ret; } +static void stripe_io_hints(struct dm_target *ti, + struct queue_limits *limits) +{ + struct stripe_c *sc = ti->private; + unsigned chunk_size = (sc->chunk_mask + 1) << 9; + + blk_limits_io_min(limits, chunk_size); + limits->io_opt = chunk_size * sc->stripes; +} + static struct target_type stripe_target = { .name = "striped", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, @@ -339,6 +349,7 @@ static struct target_type stripe_target = { .end_io = stripe_end_io, .status = stripe_status, .iterate_devices = stripe_iterate_devices, + .io_hints = stripe_io_hints, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index c90e662d2802..1a6cb3c7822e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1007,6 +1007,10 @@ int dm_calculate_queue_limits(struct dm_table *table, ti->type->iterate_devices(ti, dm_set_device_limits, &ti_limits); + /* Set I/O hints portion of queue limits */ + if (ti->type->io_hints) + ti->type->io_hints(ti, &ti_limits); + /* * Check each device area is consistent with the target's * overall queue limits. diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 655e7721580a..df7607e6dce8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -91,6 +91,9 @@ typedef int (*dm_iterate_devices_fn) (struct dm_target *ti, iterate_devices_callout_fn fn, void *data); +typedef void (*dm_io_hints_fn) (struct dm_target *ti, + struct queue_limits *limits); + /* * Returns: * 0: The target can handle the next I/O immediately. @@ -151,6 +154,7 @@ struct target_type { dm_merge_fn merge; dm_busy_fn busy; dm_iterate_devices_fn iterate_devices; + dm_io_hints_fn io_hints; /* For internal device-mapper use. */ struct list_head list; From 4142a969175302bc843d1505133488bfdbfa4732 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Fri, 4 Sep 2009 20:40:28 +0100 Subject: [PATCH 36/60] dm log: fix userspace status output Fix 'dmsetup table' output. There is a missing ' ' at the end of the string causing two words to run together. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-log-userspace-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index e69b96560997..2f2a244e1109 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -577,7 +577,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, break; case STATUSTYPE_TABLE: sz = 0; - DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1, + DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc + 1, lc->uuid, lc->usr_argv_str); break; } From b8313b6da7e2e7c7f47d93d8561969a3ff9ba0ea Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Fri, 4 Sep 2009 20:40:30 +0100 Subject: [PATCH 37/60] dm log: remove incorrect field from userspace table output The output of 'dmsetup table' includes an internal field that should not be there. This patch removes it. To make the fix simpler, we first reorder a constructor argument The 'device size' argument is generated internally. Currently it is placed as the last space-separated word of the constructor string. However, we need to use a version of the string without this word, so we move it to the beginning instead so it is trivial to skip past it. We keep a copy of the arguments passed to userspace for creating a log, just in case we need to resend them. These are the same arguments that are desired in the STATUSTYPE_TABLE request, except for one. When creating the userspace log, the userspace daemon must know the size of the mirror, so that is added to the arguments given in the constructor table. We were printing this extra argument out as well, which is a mistake. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-log-userspace-base.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 2f2a244e1109..c49da0a41c8e 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -111,10 +111,9 @@ static int build_constructor_string(struct dm_target *ti, return -ENOMEM; } - for (i = 0, str_size = 0; i < argc; i++) - str_size += sprintf(str + str_size, "%s ", argv[i]); - str_size += sprintf(str + str_size, "%llu", - (unsigned long long)ti->len); + str_size = sprintf(str, "%llu", (unsigned long long)ti->len); + for (i = 0; i < argc; i++) + str_size += sprintf(str + str_size, " %s", argv[i]); *ctr_str = str; return str_size; @@ -561,6 +560,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, char *result, unsigned maxlen) { int r = 0; + char *table_args; size_t sz = (size_t)maxlen; struct log_c *lc = log->context; @@ -577,8 +577,12 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, break; case STATUSTYPE_TABLE: sz = 0; - DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc + 1, - lc->uuid, lc->usr_argv_str); + table_args = strstr(lc->usr_argv_str, " "); + BUG_ON(!table_args); /* There will always be a ' ' */ + table_args++; + + DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc, + lc->uuid, table_args); break; } return (r) ? 0 : (int)sz; From d2b698644c97cb033261536a4f2010924a00eac9 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Fri, 4 Sep 2009 20:40:32 +0100 Subject: [PATCH 38/60] dm raid1: do not allow log_failure variable to unset after being set This patch fixes a bug which was triggering a case where the primary leg could not be changed on failure even when the mirror was in-sync. The case involves the failure of the primary device along with the transient failure of the log device. The problem is that bios can be put on the 'failures' list (due to log failure) before 'fail_mirror' is called due to the primary device failure. Normally, this is fine, but if the log device failure is transient, a subsequent iteration of the work thread, 'do_mirror', will reset 'log_failure'. The 'do_failures' function then resets the 'in_sync' variable when processing bios on the failures list. The 'in_sync' variable is what is used to determine if the primary device can be switched in the event of a failure. Since this has been reset, the primary device is incorrectly assumed to be not switchable. The case has been seen in the cluster mirror context, where one machine realizes the log device is dead before the other machines. As the responsibilities of the server migrate from one node to another (because the mirror is being reconfigured due to the failure), the new server may think for a moment that the log device is fine - thus resetting the 'log_failure' variable. In any case, it is inappropiate for us to reset the 'log_failure' variable. The above bug simply illustrates that it can actually hurt us. Cc: stable@kernel.org Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid1.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9726577cde49..33f179e66bf5 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -648,7 +648,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) */ dm_rh_inc_pending(ms->rh, &sync); dm_rh_inc_pending(ms->rh, &nosync); - ms->log_failure = dm_rh_flush(ms->rh) ? 1 : 0; + + /* + * If the flush fails on a previous call and succeeds here, + * we must not reset the log_failure variable. We need + * userspace interaction to do that. + */ + ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure; /* * Dispatch io. From 7ec23d50949d5062b5b749638dd9380ed75e58e5 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Fri, 4 Sep 2009 20:40:34 +0100 Subject: [PATCH 39/60] dm log: userspace add luid to distinguish between concurrent log instances Device-mapper userspace logs (like the clustered log) are identified by a universally unique identifier (UUID). This identifier is used to associate requests from the kernel to a specific log in userspace. The UUID must be unique everywhere, since multiple machines may use this identifier when communicating about a particular log, as is the case for cluster logs. Sometimes, device-mapper/LVM may re-use a UUID. This is the case during pvmoves, when moving from one segment of an LV to another, or when resizing a mirror, etc. In these cases, a new log is created with the same UUID and loaded in the "inactive" slot. When a device-mapper "resume" is issued, the "live" table is deactivated and the new "inactive" table becomes "live". (The "inactive" table can also be removed via a device-mapper 'clear' command.) The above two issues were colliding. More than one log was being created with the same UUID, and there was no way to distinguish between them. So, sometimes the wrong log would be swapped out during the exchange. The solution is to create a locally unique identifier, 'luid', to go along with the UUID. This new identifier is used to determine exactly which log is being referenced by the kernel when the log exchange is made. The identifier is not universally safe, but it does not need to be, since create/destroy/suspend/resume operations are bound to a specific machine; and these are the operations that make up the exchange. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-log-userspace-base.c | 23 ++++++++++++++--------- drivers/md/dm-log-userspace-transfer.c | 6 ++++-- drivers/md/dm-log-userspace-transfer.h | 2 +- include/linux/dm-log-userspace.h | 13 ++++++++++++- 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index c49da0a41c8e..6e186b1a062d 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -21,6 +21,7 @@ struct log_c { struct dm_target *ti; uint32_t region_size; region_t region_count; + uint64_t luid; char uuid[DM_UUID_LEN]; char *usr_argv_str; @@ -63,7 +64,7 @@ static int userspace_do_request(struct log_c *lc, const char *uuid, * restored. */ retry: - r = dm_consult_userspace(uuid, request_type, data, + r = dm_consult_userspace(uuid, lc->luid, request_type, data, data_size, rdata, rdata_size); if (r != -ESRCH) @@ -74,14 +75,15 @@ static int userspace_do_request(struct log_c *lc, const char *uuid, set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(2*HZ); DMWARN("Attempting to contact userspace log server..."); - r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str, + r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR, + lc->usr_argv_str, strlen(lc->usr_argv_str) + 1, NULL, NULL); if (!r) break; } DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); - r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL, + r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL, 0, NULL, NULL); if (!r) goto retry; @@ -153,6 +155,9 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, return -ENOMEM; } + /* The ptr value is sufficient for local unique id */ + lc->luid = (uint64_t)lc; + lc->ti = ti; if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { @@ -172,7 +177,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, } /* Send table string */ - r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, ctr_str, str_size, NULL, NULL); if (r == -ESRCH) { @@ -182,7 +187,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, /* Since the region size does not change, get it now */ rdata_size = sizeof(rdata); - r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE, NULL, 0, (char *)&rdata, &rdata_size); if (r) { @@ -211,7 +216,7 @@ static void userspace_dtr(struct dm_dirty_log *log) int r; struct log_c *lc = log->context; - r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, NULL, 0, NULL, NULL); @@ -226,7 +231,7 @@ static int userspace_presuspend(struct dm_dirty_log *log) int r; struct log_c *lc = log->context; - r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, NULL, 0, NULL, NULL); @@ -238,7 +243,7 @@ static int userspace_postsuspend(struct dm_dirty_log *log) int r; struct log_c *lc = log->context; - r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, NULL, 0, NULL, NULL); @@ -251,7 +256,7 @@ static int userspace_resume(struct dm_dirty_log *log) struct log_c *lc = log->context; lc->in_sync_hint = 0; - r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, NULL, 0, NULL, NULL); diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c index 8ce74d95ae4d..ba0edad2d048 100644 --- a/drivers/md/dm-log-userspace-transfer.c +++ b/drivers/md/dm-log-userspace-transfer.c @@ -147,7 +147,8 @@ static void cn_ulog_callback(void *data) /** * dm_consult_userspace - * @uuid: log's uuid (must be DM_UUID_LEN in size) + * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size) + * @luid: log's local unique identifier * @request_type: found in include/linux/dm-log-userspace.h * @data: data to tx to the server * @data_size: size of data in bytes @@ -163,7 +164,7 @@ static void cn_ulog_callback(void *data) * * Returns: 0 on success, -EXXX on failure **/ -int dm_consult_userspace(const char *uuid, int request_type, +int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type, char *data, size_t data_size, char *rdata, size_t *rdata_size) { @@ -190,6 +191,7 @@ int dm_consult_userspace(const char *uuid, int request_type, memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size); memcpy(tfr->uuid, uuid, DM_UUID_LEN); + tfr->luid = luid; tfr->seq = dm_ulog_seq++; /* diff --git a/drivers/md/dm-log-userspace-transfer.h b/drivers/md/dm-log-userspace-transfer.h index c26d8e4e2710..04ee874f9153 100644 --- a/drivers/md/dm-log-userspace-transfer.h +++ b/drivers/md/dm-log-userspace-transfer.h @@ -11,7 +11,7 @@ int dm_ulog_tfr_init(void); void dm_ulog_tfr_exit(void); -int dm_consult_userspace(const char *uuid, int request_type, +int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type, char *data, size_t data_size, char *rdata, size_t *rdata_size); diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h index 642e3017b51f..8a1f972c0fe9 100644 --- a/include/linux/dm-log-userspace.h +++ b/include/linux/dm-log-userspace.h @@ -371,7 +371,18 @@ (DM_ULOG_REQUEST_MASK & (request_type)) struct dm_ulog_request { - char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */ + /* + * The local unique identifier (luid) and the universally unique + * identifier (uuid) are used to tie a request to a specific + * mirror log. A single machine log could probably make due with + * just the 'luid', but a cluster-aware log must use the 'uuid' and + * the 'luid'. The uuid is what is required for node to node + * communication concerning a particular log, but the 'luid' helps + * differentiate between logs that are being swapped and have the + * same 'uuid'. (Think "live" and "inactive" device-mapper tables.) + */ + uint64_t luid; + char uuid[DM_UUID_LEN]; char padding[7]; /* Padding because DM_UUID_LEN = 129 */ int32_t error; /* Used to report back processing errors */ From 02d2fd31defce6ff77146ad0fef4f19006055d86 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 4 Sep 2009 20:40:37 +0100 Subject: [PATCH 40/60] dm snapshot: refactor zero_disk_area to use chunk_io Refactor chunk_io to prepare for the fix in the following patch. Pass an area pointer to chunk_io and simplify zero_disk_area to use chunk_io. No functional change. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap-persistent.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 6e3fe4f14934..2a3d626a98d9 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -188,7 +188,8 @@ static void do_metadata(struct work_struct *work) /* * Read or write a chunk aligned and sized block of data from a device. */ -static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) +static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw, + int metadata) { struct dm_io_region where = { .bdev = ps->store->cow->bdev, @@ -198,7 +199,7 @@ static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) struct dm_io_request io_req = { .bi_rw = rw, .mem.type = DM_IO_VMA, - .mem.ptr.vma = ps->area, + .mem.ptr.vma = area, .client = ps->io_client, .notify.fn = NULL, }; @@ -240,7 +241,7 @@ static int area_io(struct pstore *ps, int rw) chunk = area_location(ps, ps->current_area); - r = chunk_io(ps, chunk, rw, 0); + r = chunk_io(ps, ps->area, chunk, rw, 0); if (r) return r; @@ -254,20 +255,7 @@ static void zero_memory_area(struct pstore *ps) static int zero_disk_area(struct pstore *ps, chunk_t area) { - struct dm_io_region where = { - .bdev = ps->store->cow->bdev, - .sector = ps->store->chunk_size * area_location(ps, area), - .count = ps->store->chunk_size, - }; - struct dm_io_request io_req = { - .bi_rw = WRITE, - .mem.type = DM_IO_VMA, - .mem.ptr.vma = ps->zero_area, - .client = ps->io_client, - .notify.fn = NULL, - }; - - return dm_io(&io_req, 1, &where, NULL); + return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0); } static int read_header(struct pstore *ps, int *new_snapshot) @@ -297,7 +285,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) if (r) return r; - r = chunk_io(ps, 0, READ, 1); + r = chunk_io(ps, ps->area, 0, READ, 1); if (r) goto bad; @@ -359,7 +347,7 @@ static int write_header(struct pstore *ps) dh->version = cpu_to_le32(ps->version); dh->chunk_size = cpu_to_le32(ps->store->chunk_size); - return chunk_io(ps, 0, WRITE, 1); + return chunk_io(ps, ps->area, 0, WRITE, 1); } /* From 61578dcd3fafe6babd72e8db32110cc0b630a432 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 4 Sep 2009 20:40:39 +0100 Subject: [PATCH 41/60] dm snapshot: fix header corruption race on invalidation If a persistent snapshot fills up, a race can corrupt the on-disk header which causes a crash on any future attempt to activate the snapshot (typically while booting). This patch fixes the race. When the snapshot overflows, __invalidate_snapshot is called, which calls snapshot store method drop_snapshot. It goes to persistent_drop_snapshot that calls write_header. write_header constructs the new header in the "area" location. Concurrently, an existing kcopyd job may finish, call copy_callback and commit_exception method, that goes to persistent_commit_exception. persistent_commit_exception doesn't do locking, relying on the fact that callbacks are single-threaded, but it can race with snapshot invalidation and overwrite the header that is just being written while the snapshot is being invalidated. The result of this race is a corrupted header being written that can lead to a crash on further reactivation (if chunk_size is zero in the corrupted header). The fix is to use separate memory areas for each. See the bug: https://bugzilla.redhat.com/show_bug.cgi?id=461506 Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap-persistent.c | 44 +++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 2a3d626a98d9..5d1a97580cb7 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -105,6 +105,13 @@ struct pstore { */ void *zero_area; + /* + * An area used for header. The header can be written + * concurrently with metadata (when invalidating the snapshot), + * so it needs a separate buffer. + */ + void *header_area; + /* * Used to keep track of which metadata area the data in * 'chunk' refers to. @@ -148,16 +155,27 @@ static int alloc_area(struct pstore *ps) */ ps->area = vmalloc(len); if (!ps->area) - return r; + goto err_area; ps->zero_area = vmalloc(len); - if (!ps->zero_area) { - vfree(ps->area); - return r; - } + if (!ps->zero_area) + goto err_zero_area; memset(ps->zero_area, 0, len); + ps->header_area = vmalloc(len); + if (!ps->header_area) + goto err_header_area; + return 0; + +err_header_area: + vfree(ps->zero_area); + +err_zero_area: + vfree(ps->area); + +err_area: + return r; } static void free_area(struct pstore *ps) @@ -169,6 +187,10 @@ static void free_area(struct pstore *ps) if (ps->zero_area) vfree(ps->zero_area); ps->zero_area = NULL; + + if (ps->header_area) + vfree(ps->header_area); + ps->header_area = NULL; } struct mdata_req { @@ -285,11 +307,11 @@ static int read_header(struct pstore *ps, int *new_snapshot) if (r) return r; - r = chunk_io(ps, ps->area, 0, READ, 1); + r = chunk_io(ps, ps->header_area, 0, READ, 1); if (r) goto bad; - dh = (struct disk_header *) ps->area; + dh = ps->header_area; if (le32_to_cpu(dh->magic) == 0) { *new_snapshot = 1; @@ -339,15 +361,15 @@ static int write_header(struct pstore *ps) { struct disk_header *dh; - memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT); + memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT); - dh = (struct disk_header *) ps->area; + dh = ps->header_area; dh->magic = cpu_to_le32(SNAP_MAGIC); dh->valid = cpu_to_le32(ps->valid); dh->version = cpu_to_le32(ps->version); dh->chunk_size = cpu_to_le32(ps->store->chunk_size); - return chunk_io(ps, ps->area, 0, WRITE, 1); + return chunk_io(ps, ps->header_area, 0, WRITE, 1); } /* @@ -667,6 +689,8 @@ static int persistent_ctr(struct dm_exception_store *store, ps->valid = 1; ps->version = SNAPSHOT_DISK_VERSION; ps->area = NULL; + ps->zero_area = NULL; + ps->header_area = NULL; ps->next_free = 2; /* skipping the header and first area */ ps->current_committed = 0; From 2defcc3fb4661e7351cb2ac48d843efc4c64db13 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 4 Sep 2009 20:40:41 +0100 Subject: [PATCH 42/60] dm exception store: split set_chunk_size Break the function set_chunk_size to two functions in preparation for the fix in the following patch. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-exception-store.c | 8 ++++++++ drivers/md/dm-exception-store.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 3710ff88fc10..4c01c7535fb5 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -171,6 +171,14 @@ static int set_chunk_size(struct dm_exception_store *store, */ chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9); + return dm_exception_store_set_chunk_size(store, chunk_size_ulong, + error); +} + +int dm_exception_store_set_chunk_size(struct dm_exception_store *store, + unsigned long chunk_size_ulong, + char **error) +{ /* Check chunk_size is a power of 2 */ if (!is_power_of_2(chunk_size_ulong)) { *error = "Chunk size is not a power of 2"; diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 2442c8c07898..812c71872ba0 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -168,6 +168,10 @@ static inline chunk_t sector_to_chunk(struct dm_exception_store *store, int dm_exception_store_type_register(struct dm_exception_store_type *type); int dm_exception_store_type_unregister(struct dm_exception_store_type *type); +int dm_exception_store_set_chunk_size(struct dm_exception_store *store, + unsigned long chunk_size_ulong, + char **error); + int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, unsigned *args_used, struct dm_exception_store **store); From ae0b7448e91353ea5f821601a055aca6b58042cd Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 4 Sep 2009 20:40:43 +0100 Subject: [PATCH 43/60] dm snapshot: fix on disk chunk size validation Fix some problems seen in the chunk size processing when activating a pre-existing snapshot. For a new snapshot, the chunk size can either be supplied by the creator or a default value can be used. For an existing snapshot, the chunk size in the snapshot header on disk should always be used. If someone attempts to load an existing snapshot and has the 'default chunk size' option set, the kernel uses its default value even when it is incorrect for the snapshot being loaded. This patch ensures the correct on-disk value is always used. Secondly, when the code does use the chunk size stored on the disk it is prudent to revalidate it, so the code can exit cleanly if it got corrupted as happened in https://bugzilla.redhat.com/show_bug.cgi?id=461506 . Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-exception-store.c | 5 +++++ drivers/md/dm-snap-persistent.c | 22 ++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 4c01c7535fb5..556acff3952f 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -191,6 +191,11 @@ int dm_exception_store_set_chunk_size(struct dm_exception_store *store, return -EINVAL; } + if (chunk_size_ulong > INT_MAX >> SECTOR_SHIFT) { + *error = "Chunk size is too high"; + return -EINVAL; + } + store->chunk_size = chunk_size_ulong; store->chunk_mask = chunk_size_ulong - 1; store->chunk_shift = ffs(chunk_size_ulong) - 1; diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 5d1a97580cb7..d5b2e08750d5 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -286,6 +286,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) struct disk_header *dh; chunk_t chunk_size; int chunk_size_supplied = 1; + char *chunk_err; /* * Use default chunk size (or hardsect_size, if larger) if none supplied @@ -329,20 +330,25 @@ static int read_header(struct pstore *ps, int *new_snapshot) ps->version = le32_to_cpu(dh->version); chunk_size = le32_to_cpu(dh->chunk_size); - if (!chunk_size_supplied || ps->store->chunk_size == chunk_size) + if (ps->store->chunk_size == chunk_size) return 0; - DMWARN("chunk size %llu in device metadata overrides " - "table chunk size of %llu.", - (unsigned long long)chunk_size, - (unsigned long long)ps->store->chunk_size); + if (chunk_size_supplied) + DMWARN("chunk size %llu in device metadata overrides " + "table chunk size of %llu.", + (unsigned long long)chunk_size, + (unsigned long long)ps->store->chunk_size); /* We had a bogus chunk_size. Fix stuff up. */ free_area(ps); - ps->store->chunk_size = chunk_size; - ps->store->chunk_mask = chunk_size - 1; - ps->store->chunk_shift = ffs(chunk_size) - 1; + r = dm_exception_store_set_chunk_size(ps->store, chunk_size, + &chunk_err); + if (r) { + DMERR("invalid on-disk chunk size %llu: %s.", + (unsigned long long)chunk_size, chunk_err); + return r; + } r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size), ps->io_client); From 8379e7c46cc48f51197dd663fc6676f47f2a1e71 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Fri, 4 Sep 2009 11:12:01 -0700 Subject: [PATCH 44/60] ocfs2: ocfs2_write_begin_nolock() should handle len=0 Bug introduced by mainline commit e7432675f8ca868a4af365759a8d4c3779a3d922 The bug causes ocfs2_write_begin_nolock() to oops when len=0. Signed-off-by: Sunil Mushran Cc: stable@kernel.org Signed-off-by: Joel Becker --- fs/ocfs2/aops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index b401654011a2..8a1e61545f41 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1747,8 +1747,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * we know zeros will only be needed in the first and/or last cluster. */ if (clusters_to_alloc || extents_to_split || - wc->w_desc[0].c_needs_zero || - wc->w_desc[wc->w_clen - 1].c_needs_zero) + (wc->w_clen && (wc->w_desc[0].c_needs_zero || + wc->w_desc[wc->w_clen - 1].c_needs_zero))) cluster_of_pages = 1; else cluster_of_pages = 0; From 1821bc19d54009b6f5e6462dd79074d728080839 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 5 Sep 2009 13:23:49 +0200 Subject: [PATCH 45/60] firewire: core: fix crash in iso resource management This fixes a regression due to post 2.6.30 commit "firewire: core: do not DMA-map stack addresses" 6fdc03709433ccc2005f0f593ae9d9dd04f7b485. As David Moore noted, a previously correct sizeof() expression became wrong since the commit changed its argument from an array to a pointer. This resulted in an oops in ohci_cancel_packet in the shared workqueue thread's context when an isochronous resource was to be freed. Reported-by: Jonathan Cameron Signed-off-by: Stefan Richter --- drivers/firewire/core-iso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c index 110e731f5574..1c0b504a42f3 100644 --- a/drivers/firewire/core-iso.c +++ b/drivers/firewire/core-iso.c @@ -196,7 +196,7 @@ static int manage_bandwidth(struct fw_card *card, int irm_id, int generation, switch (fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP, irm_id, generation, SCODE_100, CSR_REGISTER_BASE + CSR_BANDWIDTH_AVAILABLE, - data, sizeof(data))) { + data, 8)) { case RCODE_GENERATION: /* A generation change frees all bandwidth. */ return allocate ? -EAGAIN : bandwidth; @@ -233,7 +233,7 @@ static int manage_channel(struct fw_card *card, int irm_id, int generation, data[1] = old ^ c; switch (fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP, irm_id, generation, SCODE_100, - offset, data, sizeof(data))) { + offset, data, 8)) { case RCODE_GENERATION: /* A generation change frees all channels. */ return allocate ? -EAGAIN : i; From fc383796a8cc5df0a0c8633a16dd2e9528a16a63 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Fri, 28 Aug 2009 13:25:15 +0200 Subject: [PATCH 46/60] firewire: ohci: fix Agere FW643 and multiple cameras An Agere FW643 OHCI 1.1 card works fine for video reception from one camera but fails early if receiving from two cameras. After a short while, no IR IRQ events occur and the context control register does not react anymore. This happens regardless whether both IR DMA contexts are dual-buffer or one is dual-buffer and the other packet-per-buffer. This can be worked around by disabling dual buffer DMA mode entirely. http://sourceforge.net/mailarchive/message.php?msg_name=4A7C0594.2020208%40gmail.com (Reported by Samuel Audet.) In another report (by Jonathan Cameron), an FW643 works OK with two cameras in dual buffer mode. Whether this is due to different chip revisions or different usage patterns (different video formats) is not yet clear. However, as far as the current capabilities of firewire-core's isochronous I/O interface are concerned, simply switching off dual-buffer on non-working and working FW643s alike is not a problem in practice. We only need to revisit this issue if we are going to enhance the interface, e.g. so that applications can explicitly choose modes. Reported-by: Samuel Audet Reported-by: Jonathan Cameron Signed-off-by: Stefan Richter --- drivers/firewire/ohci.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index ecddd11b797a..3486bc49c177 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -2372,6 +2373,9 @@ static void ohci_pmac_off(struct pci_dev *dev) #define ohci_pmac_off(dev) #endif /* CONFIG_PPC_PMAC */ +#define PCI_VENDOR_ID_AGERE PCI_VENDOR_ID_ATT +#define PCI_DEVICE_ID_AGERE_FW643 0x5901 + static int __devinit pci_probe(struct pci_dev *dev, const struct pci_device_id *ent) { @@ -2422,6 +2426,11 @@ static int __devinit pci_probe(struct pci_dev *dev, version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff; ohci->use_dualbuffer = version >= OHCI_VERSION_1_1; + /* dual-buffer mode is broken if more than one IR context is active */ + if (dev->vendor == PCI_VENDOR_ID_AGERE && + dev->device == PCI_DEVICE_ID_AGERE_FW643) + ohci->use_dualbuffer = false; + /* x86-32 currently doesn't use highmem for dma_alloc_coherent */ #if !defined(CONFIG_X86_32) /* dual-buffer mode is broken with descriptor addresses above 2G */ From 4fe0badd5882c64dc2dcd8893f9b85db63339736 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Fri, 28 Aug 2009 13:26:03 +0200 Subject: [PATCH 47/60] firewire: ohci: fix Ricoh R5C832, video reception In dual-buffer DMA mode, no video frames are ever received from R5C832 by libdc1394. Fallback to packet-per-buffer DMA works reliably. http://thread.gmane.org/gmane.linux.kernel.firewire.devel/13393/focus=13476 Reported-by: Jonathan Cameron Signed-off-by: Stefan Richter --- drivers/firewire/ohci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 3486bc49c177..76b321bb73f9 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2431,6 +2431,11 @@ static int __devinit pci_probe(struct pci_dev *dev, dev->device == PCI_DEVICE_ID_AGERE_FW643) ohci->use_dualbuffer = false; + /* dual-buffer mode is broken */ + if (dev->vendor == PCI_VENDOR_ID_RICOH && + dev->device == PCI_DEVICE_ID_RICOH_R5C832) + ohci->use_dualbuffer = false; + /* x86-32 currently doesn't use highmem for dma_alloc_coherent */ #if !defined(CONFIG_X86_32) /* dual-buffer mode is broken with descriptor addresses above 2G */ From baed6b82d9f160184c1c14cdb4accb08f3eb6b87 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 3 Sep 2009 23:07:35 +0200 Subject: [PATCH 48/60] firewire: sbp2: fix freeing of unallocated memory If a target writes invalid status (typically status of a command that already timed out), firewire-sbp2 attempts to put away an ORB that doesn't exist. https://bugzilla.redhat.com/show_bug.cgi?id=519772 Signed-off-by: Stefan Richter --- drivers/firewire/sbp2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index d27cb058da82..05f0c0c55f4a 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -456,12 +456,12 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request, } spin_unlock_irqrestore(&card->lock, flags); - if (&orb->link != &lu->orb_list) + if (&orb->link != &lu->orb_list) { orb->callback(orb, &status); - else + kref_put(&orb->kref, free_orb); + } else { fw_error("status write for unknown orb\n"); - - kref_put(&orb->kref, free_orb); + } fw_send_response(card, request, RCODE_COMPLETE); } From 4e49627b9bc29a14b393c480e8c979e3bc922ef7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 5 Sep 2009 11:17:06 -0700 Subject: [PATCH 49/60] workqueues: introduce __cancel_delayed_work() cancel_delayed_work() has to use del_timer_sync() to guarantee the timer function is not running after return. But most users doesn't actually need this, and del_timer_sync() has problems: it is not useable from interrupt, and it depends on every lock which could be taken from irq. Introduce __cancel_delayed_work() which calls del_timer() instead. The immediate reason for this patch is http://bugzilla.kernel.org/show_bug.cgi?id=13757 but hopefully this helper makes sense anyway. As for 13757 bug, actually we need requeue_delayed_work(), but its semantics are not yet clear. Merge this patch early to resolves cross-tree interdependencies between input and infiniband. Signed-off-by: Oleg Nesterov Cc: Dmitry Torokhov Cc: Roland Dreier Cc: Stefan Richter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 13e1adf55c4c..6273fa97b527 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -240,6 +240,21 @@ static inline int cancel_delayed_work(struct delayed_work *work) return ret; } +/* + * Like above, but uses del_timer() instead of del_timer_sync(). This means, + * if it returns 0 the timer function may be running and the queueing is in + * progress. + */ +static inline int __cancel_delayed_work(struct delayed_work *work) +{ + int ret; + + ret = del_timer(&work->timer); + if (ret) + work_clear_pending(&work->work); + return ret; +} + extern int cancel_delayed_work_sync(struct delayed_work *work); /* Obsolete. use cancel_delayed_work_sync() */ From a190887b58c32d19c2eee007c5eb8faa970a69ba Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 5 Sep 2009 11:17:07 -0700 Subject: [PATCH 50/60] nommu: fix error handling in do_mmap_pgoff() Fix the error handling in do_mmap_pgoff(). If do_mmap_shared_file() or do_mmap_private() fail, we jump to the error_put_region label at which point we cann __put_nommu_region() on the region - but we haven't yet added the region to the tree, and so __put_nommu_region() may BUG because the region tree is empty or it may corrupt the region tree. To get around this, we can afford to add the region to the region tree before calling do_mmap_shared_file() or do_mmap_private() as we keep nommu_region_sem write-locked, so no-one can race with us by seeing a transient region. Signed-off-by: David Howells Acked-by: Pekka Enberg Acked-by: Paul Mundt Cc: Mel Gorman Acked-by: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/nommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/nommu.c b/mm/nommu.c index 4bde489ec431..66e81e7e9fe9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1352,6 +1352,7 @@ unsigned long do_mmap_pgoff(struct file *file, } vma->vm_region = region; + add_nommu_region(region); /* set up the mapping */ if (file && vma->vm_flags & VM_SHARED) @@ -1361,8 +1362,6 @@ unsigned long do_mmap_pgoff(struct file *file, if (ret < 0) goto error_put_region; - add_nommu_region(region); - /* okay... we have a mapping; now we have to register it */ result = vma->vm_start; From dd5d241ea955006122d76af88af87de73fec25b4 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Sat, 5 Sep 2009 11:17:11 -0700 Subject: [PATCH 51/60] page-allocator: always change pageblock ownership when anti-fragmentation is disabled On low-memory systems, anti-fragmentation gets disabled as fragmentation cannot be avoided on a sufficiently large boundary to be worthwhile. Once disabled, there is a period of time when all the pageblocks are marked MOVABLE and the expectation is that they get marked UNMOVABLE at each call to __rmqueue_fallback(). However, when MAX_ORDER is large the pageblocks do not change ownership because the normal criteria are not met. This has the effect of prematurely breaking up too many large contiguous blocks. This is most serious on NOMMU systems which depend on high-order allocations to boot. This patch causes pageblocks to change ownership on every fallback when anti-fragmentation is disabled. This prevents the large blocks being prematurely broken up. This is a fix to commit 49255c619fbd482d704289b5eb2795f8e3b7ff2e [page allocator: move check for disabled anti-fragmentation out of fastpath] and the problem affects 2.6.31-rc8. Signed-off-by: Mel Gorman Tested-by: Paul Mundt Cc: David Howells Cc: Pekka Enberg Acked-by: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5cc986eb9f6f..a0de15f46987 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -817,13 +817,15 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) * agressive about taking ownership of free pages */ if (unlikely(current_order >= (pageblock_order >> 1)) || - start_migratetype == MIGRATE_RECLAIMABLE) { + start_migratetype == MIGRATE_RECLAIMABLE || + page_group_by_mobility_disabled) { unsigned long pages; pages = move_freepages_block(zone, page, start_migratetype); /* Claim the whole block if over half of it is free */ - if (pages >= (1 << (pageblock_order-1))) + if (pages >= (1 << (pageblock_order-1)) || + page_group_by_mobility_disabled) set_pageblock_migratetype(page, start_migratetype); From a2a8474c3fff88d8dd52d05cb450563fb26fd26c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 5 Sep 2009 11:17:13 -0700 Subject: [PATCH 52/60] exec: do not sleep in TASK_TRACED under ->cred_guard_mutex Tom Horsley reports that his debugger hangs when it tries to read /proc/pid_of_tracee/maps, this happens since "mm_for_maps: take ->cred_guard_mutex to fix the race with exec" 04b836cbf19e885f8366bccb2e4b0474346c02d commit in 2.6.31. But the root of the problem lies in the fact that do_execve() path calls tracehook_report_exec() which can stop if the tracer sets PT_TRACE_EXEC. The tracee must not sleep in TASK_TRACED holding this mutex. Even if we remove ->cred_guard_mutex from mm_for_maps() and proc_pid_attr_write(), another task doing PTRACE_ATTACH should not hang until it is killed or the tracee resumes. With this patch do_execve() does not use ->cred_guard_mutex directly and we do not hold it throughout, instead: - introduce prepare_bprm_creds() helper, it locks the mutex and calls prepare_exec_creds() to initialize bprm->cred. - install_exec_creds() drops the mutex after commit_creds(), and thus before tracehook_report_exec()->ptrace_stop(). or, if exec fails, free_bprm() drops this mutex when bprm->cred != NULL which indicates install_exec_creds() was not called. Reported-by: Tom Horsley Signed-off-by: Oleg Nesterov Acked-by: David Howells Cc: Roland McGrath Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat.c | 17 +++-------- fs/exec.c | 63 +++++++++++++++++++++++++---------------- include/linux/binfmts.h | 1 + 3 files changed, 43 insertions(+), 38 deletions(-) diff --git a/fs/compat.c b/fs/compat.c index 94502dab972a..6d6f98fe64a0 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1485,20 +1485,15 @@ int compat_do_execve(char * filename, if (!bprm) goto out_files; - retval = -ERESTARTNOINTR; - if (mutex_lock_interruptible(¤t->cred_guard_mutex)) + retval = prepare_bprm_creds(bprm); + if (retval) goto out_free; - current->in_execve = 1; - - retval = -ENOMEM; - bprm->cred = prepare_exec_creds(); - if (!bprm->cred) - goto out_unlock; retval = check_unsafe_exec(bprm); if (retval < 0) - goto out_unlock; + goto out_free; clear_in_exec = retval; + current->in_execve = 1; file = open_exec(filename); retval = PTR_ERR(file); @@ -1547,7 +1542,6 @@ int compat_do_execve(char * filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; - mutex_unlock(¤t->cred_guard_mutex); acct_update_integrals(current); free_bprm(bprm); if (displaced) @@ -1567,10 +1561,7 @@ int compat_do_execve(char * filename, out_unmark: if (clear_in_exec) current->fs->in_exec = 0; - -out_unlock: current->in_execve = 0; - mutex_unlock(¤t->cred_guard_mutex); out_free: free_bprm(bprm); diff --git a/fs/exec.c b/fs/exec.c index fb4f3cdda78c..172ceb6edde4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1015,6 +1015,35 @@ int flush_old_exec(struct linux_binprm * bprm) EXPORT_SYMBOL(flush_old_exec); +/* + * Prepare credentials and lock ->cred_guard_mutex. + * install_exec_creds() commits the new creds and drops the lock. + * Or, if exec fails before, free_bprm() should release ->cred and + * and unlock. + */ +int prepare_bprm_creds(struct linux_binprm *bprm) +{ + if (mutex_lock_interruptible(¤t->cred_guard_mutex)) + return -ERESTARTNOINTR; + + bprm->cred = prepare_exec_creds(); + if (likely(bprm->cred)) + return 0; + + mutex_unlock(¤t->cred_guard_mutex); + return -ENOMEM; +} + +void free_bprm(struct linux_binprm *bprm) +{ + free_arg_pages(bprm); + if (bprm->cred) { + mutex_unlock(¤t->cred_guard_mutex); + abort_creds(bprm->cred); + } + kfree(bprm); +} + /* * install the new credentials for this executable */ @@ -1024,12 +1053,13 @@ void install_exec_creds(struct linux_binprm *bprm) commit_creds(bprm->cred); bprm->cred = NULL; - - /* cred_guard_mutex must be held at least to this point to prevent + /* + * cred_guard_mutex must be held at least to this point to prevent * ptrace_attach() from altering our determination of the task's - * credentials; any time after this it may be unlocked */ - + * credentials; any time after this it may be unlocked. + */ security_bprm_committed_creds(bprm); + mutex_unlock(¤t->cred_guard_mutex); } EXPORT_SYMBOL(install_exec_creds); @@ -1246,14 +1276,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) EXPORT_SYMBOL(search_binary_handler); -void free_bprm(struct linux_binprm *bprm) -{ - free_arg_pages(bprm); - if (bprm->cred) - abort_creds(bprm->cred); - kfree(bprm); -} - /* * sys_execve() executes a new program. */ @@ -1277,20 +1299,15 @@ int do_execve(char * filename, if (!bprm) goto out_files; - retval = -ERESTARTNOINTR; - if (mutex_lock_interruptible(¤t->cred_guard_mutex)) + retval = prepare_bprm_creds(bprm); + if (retval) goto out_free; - current->in_execve = 1; - - retval = -ENOMEM; - bprm->cred = prepare_exec_creds(); - if (!bprm->cred) - goto out_unlock; retval = check_unsafe_exec(bprm); if (retval < 0) - goto out_unlock; + goto out_free; clear_in_exec = retval; + current->in_execve = 1; file = open_exec(filename); retval = PTR_ERR(file); @@ -1340,7 +1357,6 @@ int do_execve(char * filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; - mutex_unlock(¤t->cred_guard_mutex); acct_update_integrals(current); free_bprm(bprm); if (displaced) @@ -1360,10 +1376,7 @@ int do_execve(char * filename, out_unmark: if (clear_in_exec) current->fs->in_exec = 0; - -out_unlock: current->in_execve = 0; - mutex_unlock(¤t->cred_guard_mutex); out_free: free_bprm(bprm); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 61ee18c1bdb4..2046b5b8af48 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -117,6 +117,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); +extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); extern int set_binfmt(struct linux_binfmt *new); From 37f81fa1f63ad38e16125526bb2769ae0ea8d332 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 5 Sep 2009 12:46:07 -0700 Subject: [PATCH 53/60] n_tty: do O_ONLCR translation as a single write When translating CR to CRNL in the n_tty line discipline, we did it as two tty_put_char() calls. Which works, but is stupid, and has caused problems before too with bad interactions with the write_room() logic. The generic USB serial driver had that problem, for example. Now the pty layer had similar issues after being moved to the generic tty buffering code (in commit d945cb9cce20ac7143c2de8d88b187f62db99bdc: "pty: Rework the pty layer to use the normal buffering logic"). So stop doing the silly separate two writes, and do it as a single write instead. That's what the n_tty layer already does for the space expansion of tabs (XTABS), and it means that we'll now always have just a single write for the CRNL to match the single 'tty_write_room()' test, which hopefully means that the next time somebody screws up buffering, it won't cause weeks of debugging. Signed-off-by: Linus Torvalds --- drivers/char/n_tty.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index 973be2f44195..4e28b35024ec 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -300,8 +300,7 @@ static int do_output_char(unsigned char c, struct tty_struct *tty, int space) if (space < 2) return -1; tty->canon_column = tty->column = 0; - tty_put_char(tty, '\r'); - tty_put_char(tty, c); + tty->ops->write(tty, "\r\n", 2); return 2; } tty->canon_column = tty->column; From ac89a9174decf343de049a06fad75681f71890eb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 5 Sep 2009 13:27:10 -0700 Subject: [PATCH 54/60] pty: don't limit the writes to 'pty_space()' inside 'pty_write()' The whole write-room thing is something that is up to the _caller_ to worry about, not the pty layer itself. The total buffer space will still be limited by the buffering routines themselves, so there is no advantage or need in having pty_write() artificially limit the size somehow. And what happened was that the caller (the n_tty line discipline, in this case) may have verified that there is room for 2 bytes to be written (for NL -> CRNL expansion), and it used to then do those writes as two single-byte writes. And if the first byte written (CR) then caused a new tty buffer to be allocated, pty_space() may have returned zero when trying to write the second byte (LF), and then incorrectly failed the write - leading to a lost newline character. This should finally fix http://bugzilla.kernel.org/show_bug.cgi?id=14015 Reported-by: Mikael Pettersson Acked-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/pty.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/char/pty.c b/drivers/char/pty.c index d083c73d784a..b33d6688e910 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -109,21 +109,13 @@ static int pty_space(struct tty_struct *to) * the other side of the pty/tty pair. */ -static int pty_write(struct tty_struct *tty, const unsigned char *buf, - int count) +static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c) { struct tty_struct *to = tty->link; - int c; if (tty->stopped) return 0; - /* This isn't locked but our 8K is quite sloppy so no - big deal */ - - c = pty_space(to); - if (c > count) - c = count; if (c > 0) { /* Stuff the data into the input queue of the other end */ c = tty_insert_flip_string(to, buf, c); From 9de6886ec6e37f45807266a702bb7621498395ad Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sat, 5 Sep 2009 00:25:37 -0400 Subject: [PATCH 55/60] ext2: fix unbalanced kmap()/kunmap() In ext2_rename(), dir_page is acquired through ext2_dotdot(). It is then released through ext2_set_link() but only if old_dir != new_dir. Failing that, the pkmap reference count is never decremented and the page remains pinned forever. Repeat that a couple times with highmem pages and all pkmap slots get exhausted, and every further kmap() calls end up stalling on the pkmap_map_wait queue at which point the whole system comes to a halt. Signed-off-by: Nicolas Pitre Acked-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/ext2/namei.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index e1dedb0f7873..78d9b925fc94 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -362,6 +362,10 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, if (dir_de) { if (old_dir != new_dir) ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0); + else { + kunmap(dir_page); + page_cache_release(dir_page); + } inode_dec_link_count(old_dir); } return 0; From 74a01180db4bbfd61304ae0ba1f60af55ffc803d Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Thu, 3 Sep 2009 09:57:23 -0600 Subject: [PATCH 56/60] powerpc: Fix i8259 interrupt driver kernel crash on ML510 This patch fixes a null pointer exception caused by removal of 'ack()' for level interrupts in the Xilinx interrupt driver. A recent change to the xilinx interrupt controller removed the ack hook for level irqs. Signed-off-by: Roderick Colenbrander Signed-off-by: Grant Likely Acked-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/powerpc/sysdev/xilinx_intc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 3ee1fd37bbfc..40edad520770 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -234,7 +234,6 @@ static void xilinx_i8259_cascade(unsigned int irq, struct irq_desc *desc) generic_handle_irq(cascade_irq); /* Let xilinx_intc end the interrupt */ - desc->chip->ack(irq); desc->chip->unmask(irq); } From e07cccf4046978df10f2e13fe2b99b2f9b3a65db Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 5 Sep 2009 16:38:12 -0700 Subject: [PATCH 57/60] Linux 2.6.31-rc9 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 25c615e57302..7d3415c0709c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 31 -EXTRAVERSION = -rc8 +EXTRAVERSION = -rc9 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* From d9d8e0418ffd3d189345c435861e254c17ae06e5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 6 Sep 2009 01:41:02 -0700 Subject: [PATCH 58/60] gianfar: Fix build. Reported by Michael Guntsche -------------------- Commit 38bddf04bcfe661fbdab94888c3b72c32f6873b3 gianfar: gfar_remove needs to call unregister_netdev() breaks the build of the gianfar driver because "dev" is undefined in this function. To quickly test rc9 I changed this to priv->ndev but I do not know if this is the correct one. -------------------- Signed-off-by: David S. Miller --- drivers/net/gianfar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 24f7ca5e17de..a00ec639c380 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -491,7 +491,7 @@ static int gfar_remove(struct of_device *ofdev) dev_set_drvdata(&ofdev->dev, NULL); - unregister_netdev(dev); + unregister_netdev(priv->ndev); iounmap(priv->regs); free_netdev(priv->ndev); From acd0c935178649f72c44ec49ca83bee35ce1f79e Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Fri, 4 Sep 2009 13:08:46 -0400 Subject: [PATCH 59/60] IMA: update ima_counts_put - As ima_counts_put() may be called after the inode has been freed, verify that the inode is not NULL, before dereferencing it. - Maintain the IMA file counters in may_open() properly, decrementing any counter increments on subsequent errors. Reported-by: Ciprian Docan Reported-by: J.R. Okajima Signed-off-by: Mimi Zohar Acked-by: Eric Paris --- fs/namei.c | 22 +++++++++++++++------- security/integrity/ima/ima_main.c | 6 +++++- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f3c5b278895a..1f13751693a5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1542,28 +1542,31 @@ int may_open(struct path *path, int acc_mode, int flag) * An append-only file must be opened in append mode for writing. */ if (IS_APPEND(inode)) { + error = -EPERM; if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) - return -EPERM; + goto err_out; if (flag & O_TRUNC) - return -EPERM; + goto err_out; } /* O_NOATIME can only be set by the owner or superuser */ if (flag & O_NOATIME) - if (!is_owner_or_cap(inode)) - return -EPERM; + if (!is_owner_or_cap(inode)) { + error = -EPERM; + goto err_out; + } /* * Ensure there are no outstanding leases on the file. */ error = break_lease(inode, flag); if (error) - return error; + goto err_out; if (flag & O_TRUNC) { error = get_write_access(inode); if (error) - return error; + goto err_out; /* * Refuse to truncate files with mandatory locks held on them. @@ -1581,12 +1584,17 @@ int may_open(struct path *path, int acc_mode, int flag) } put_write_access(inode); if (error) - return error; + goto err_out; } else if (flag & FMODE_WRITE) vfs_dq_init(inode); return 0; +err_out: + ima_counts_put(path, acc_mode ? + acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : + ACC_MODE(flag) & (MAY_READ | MAY_WRITE)); + return error; } /* diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 4732f5e5d127..b85e61bcf246 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -249,7 +249,11 @@ void ima_counts_put(struct path *path, int mask) struct inode *inode = path->dentry->d_inode; struct ima_iint_cache *iint; - if (!ima_initialized || !S_ISREG(inode->i_mode)) + /* The inode may already have been freed, freeing the iint + * with it. Verify the inode is not NULL before dereferencing + * it. + */ + if (!ima_initialized || !inode || !S_ISREG(inode->i_mode)) return; iint = ima_iint_find_insert_get(inode); if (!iint) From a54775c8758a754186bc6adbfc518b1e9f8f1e4e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 7 Sep 2009 15:26:19 +1000 Subject: [PATCH 60/60] drm/radeon/kms: add LTE/GTE discard + rv515 two sided stencil register. This adds some rv350+ register for LTE/GTE discard, and enables the rv515 two sided stencil register. It also disables the DEPTHXY_OFFSET register which can be used to workaround the CS checker. Moves rs690 to proper place in rs600 and uses correct table on rs600. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r300.c | 2 +- drivers/gpu/drm/radeon/radeon_asic.h | 6 +-- drivers/gpu/drm/radeon/rs600.c | 65 ++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/rs690.c | 64 --------------------------- drivers/gpu/drm/radeon/rv515.c | 2 +- 5 files changed, 70 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 053f4ec397f7..051bca6e3a4f 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -995,7 +995,7 @@ static const unsigned r300_reg_safe_bm[159] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x0003FC01, 0xFFFFFFF8, 0xFE800B19, + 0x0003FC01, 0xFFFFFCF8, 0xFF800B19, }; static int r300_packet0_check(struct radeon_cs_parser *p, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 7ca6c13569b5..93d8f8889302 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -266,6 +266,7 @@ static struct radeon_asic rs400_asic = { /* * rs600. */ +int rs600_init(struct radeon_device *dev); void rs600_errata(struct radeon_device *rdev); void rs600_vram_info(struct radeon_device *rdev); int rs600_mc_init(struct radeon_device *rdev); @@ -281,7 +282,7 @@ uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg); void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); void rs600_bandwidth_update(struct radeon_device *rdev); static struct radeon_asic rs600_asic = { - .init = &r300_init, + .init = &rs600_init, .errata = &rs600_errata, .vram_info = &rs600_vram_info, .gpu_reset = &r300_gpu_reset, @@ -316,7 +317,6 @@ static struct radeon_asic rs600_asic = { /* * rs690,rs740 */ -int rs690_init(struct radeon_device *rdev); void rs690_errata(struct radeon_device *rdev); void rs690_vram_info(struct radeon_device *rdev); int rs690_mc_init(struct radeon_device *rdev); @@ -325,7 +325,7 @@ uint32_t rs690_mc_rreg(struct radeon_device *rdev, uint32_t reg); void rs690_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); void rs690_bandwidth_update(struct radeon_device *rdev); static struct radeon_asic rs690_asic = { - .init = &rs690_init, + .init = &rs600_init, .errata = &rs690_errata, .vram_info = &rs690_vram_info, .gpu_reset = &r300_gpu_reset, diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 7e8ce983a908..02fd11aad6a2 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -409,3 +409,68 @@ void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) ((reg) & RS600_MC_ADDR_MASK)); WREG32(RS600_MC_DATA, v); } + +static const unsigned rs600_reg_safe_bm[219] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF, + 0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000, + 0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, + 0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF, + 0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, + 0x00000000, 0x0000C100, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0003FC01, 0xFFFFFCF8, 0xFF800B19, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, +}; + +int rs600_init(struct radeon_device *rdev) +{ + rdev->config.r300.reg_safe_bm = rs600_reg_safe_bm; + rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(rs600_reg_safe_bm); + return 0; +} diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index bc6b7c5339bc..879882533e45 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -653,67 +653,3 @@ void rs690_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) WREG32(RS690_MC_INDEX, RS690_MC_INDEX_WR_ACK); } -static const unsigned rs690_reg_safe_bm[219] = { - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0x17FF1FFF,0xFFFFFFFC,0xFFFFFFFF,0xFF30FFBF, - 0xFFFFFFF8,0xC3E6FFFF,0xFFFFF6DF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFF03F, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFEFCE,0xF00EBFFF,0x007C0000, - 0xF0000078,0xFF000009,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFF7FF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFC78,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF, - 0x38FF8F50,0xFFF88082,0xF000000C,0xFAE009FF, - 0x0000FFFF,0xFFFFFFFF,0xFFFFFFFF,0x00000000, - 0x00000000,0x0000C100,0x00000000,0x00000000, - 0x00000000,0x00000000,0x00000000,0x00000000, - 0x00000000,0xFFFF0000,0xFFFFFFFF,0xFF80FFFF, - 0x00000000,0x00000000,0x00000000,0x00000000, - 0x0003FC01,0xFFFFFFF8,0xFE800B19,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, - 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, -}; - -int rs690_init(struct radeon_device *rdev) -{ - rdev->config.r300.reg_safe_bm = rs690_reg_safe_bm; - rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(rs690_reg_safe_bm); - return 0; -} diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 31a7f668ae5a..0566fb67e460 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -508,7 +508,7 @@ static const unsigned r500_reg_safe_bm[219] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF80FFFF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x0003FC01, 0x3FFFFCF8, 0xFE800B19, 0xFFFFFFFF, + 0x0003FC01, 0x3FFFFCF8, 0xFF800B19, 0xFFDFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,