From 81772fea4110f7ce8083d52503c9c4ddaa50f75b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 10 Feb 2008 23:57:36 +0100 Subject: [PATCH 01/16] x86: remove over noisy debug printk pageattr-test.c contains a noisy debug printk that people reported. The condition under which it prints (randomly tapping into a mem_map[] hole and not being able to c_p_a() there) is valid behavior and not interesting to report. Remove it. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/x86/mm/pageattr-test.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index ed8201600354..75f1b109aae8 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -40,7 +40,6 @@ struct split_state { static int print_split(struct split_state *s) { long i, expected, missed = 0; - int printed = 0; int err = 0; s->lpg = s->gpg = s->spg = s->exec = 0; @@ -53,12 +52,6 @@ static int print_split(struct split_state *s) pte = lookup_address(addr, &level); if (!pte) { - if (!printed) { - dump_pagetable(addr); - printk(KERN_INFO "CPA %lx no pte level %d\n", - addr, level); - printed = 1; - } missed++; i++; continue; From 7585eb1b7cf4bbace37ce18500809140c8eeccc3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 Feb 2008 10:18:53 +0900 Subject: [PATCH 02/16] pata_via: fix SATA cable detection on cx700 The first port of cx700 is SATA. Fix cable detection. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/pata_via.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 39627ab684bf..d119a68c388f 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -84,6 +84,7 @@ enum { VIA_BAD_ID = 0x100, /* Has wrong vendor ID (0x1107) */ VIA_BAD_AST = 0x200, /* Don't touch Address Setup Timing */ VIA_NO_ENABLES = 0x400, /* Has no enablebits */ + VIA_SATA_PATA = 0x800, /* SATA/PATA combined configuration */ }; /* @@ -100,7 +101,7 @@ static const struct via_isa_bridge { { "vx800", PCI_DEVICE_ID_VIA_VX800, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, - { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, + { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA }, { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES}, { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, @@ -172,6 +173,9 @@ static int via_cable_detect(struct ata_port *ap) { if (via_cable_override(pdev)) return ATA_CBL_PATA40_SHORT; + if ((config->flags & VIA_SATA_PATA) && ap->port_no == 0) + return ATA_CBL_SATA; + /* Early chips are 40 wire */ if ((config->flags & VIA_UDMA) < VIA_UDMA_66) return ATA_CBL_PATA40; From 4055dee7f525a702a060ea08a3fb9f045317355f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 Feb 2008 10:34:08 +0900 Subject: [PATCH 03/16] libata: ignore deverr on SETXFER if mode is configured Some controllers (VIA CX700) raise device error on SETXFER even after mode configuration succeeded. Update ata_dev_set_mode() such that device error is ignored if transfer mode is configured correctly. To implement this, device is revalidated even after device error on SETXFER. This fixes kernel bugzilla bug 8563. Signed-off-by: Tejun Heo Cc: Alan Cox Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 48 +++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 3011919f3ec8..004dae4ea5bc 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3048,6 +3048,8 @@ int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel) static int ata_dev_set_mode(struct ata_device *dev) { struct ata_eh_context *ehc = &dev->link->eh_context; + const char *dev_err_whine = ""; + int ign_dev_err = 0; unsigned int err_mask; int rc; @@ -3057,41 +3059,57 @@ static int ata_dev_set_mode(struct ata_device *dev) err_mask = ata_dev_set_xfermode(dev); + if (err_mask & ~AC_ERR_DEV) + goto fail; + + /* revalidate */ + ehc->i.flags |= ATA_EHI_POST_SETMODE; + rc = ata_dev_revalidate(dev, ATA_DEV_UNKNOWN, 0); + ehc->i.flags &= ~ATA_EHI_POST_SETMODE; + if (rc) + return rc; + /* Old CFA may refuse this command, which is just fine */ if (dev->xfer_shift == ATA_SHIFT_PIO && ata_id_is_cfa(dev->id)) - err_mask &= ~AC_ERR_DEV; + ign_dev_err = 1; /* Some very old devices and some bad newer ones fail any kind of SET_XFERMODE request but support PIO0-2 timings and no IORDY */ if (dev->xfer_shift == ATA_SHIFT_PIO && !ata_id_has_iordy(dev->id) && dev->pio_mode <= XFER_PIO_2) - err_mask &= ~AC_ERR_DEV; + ign_dev_err = 1; /* Early MWDMA devices do DMA but don't allow DMA mode setting. Don't fail an MWDMA0 set IFF the device indicates it is in MWDMA0 */ if (dev->xfer_shift == ATA_SHIFT_MWDMA && dev->dma_mode == XFER_MW_DMA_0 && (dev->id[63] >> 8) & 1) - err_mask &= ~AC_ERR_DEV; + ign_dev_err = 1; - if (err_mask) { - ata_dev_printk(dev, KERN_ERR, "failed to set xfermode " - "(err_mask=0x%x)\n", err_mask); - return -EIO; - } + /* if the device is actually configured correctly, ignore dev err */ + if (dev->xfer_mode == ata_xfer_mask2mode(ata_id_xfermask(dev->id))) + ign_dev_err = 1; - ehc->i.flags |= ATA_EHI_POST_SETMODE; - rc = ata_dev_revalidate(dev, ATA_DEV_UNKNOWN, 0); - ehc->i.flags &= ~ATA_EHI_POST_SETMODE; - if (rc) - return rc; + if (err_mask & AC_ERR_DEV) { + if (!ign_dev_err) + goto fail; + else + dev_err_whine = " (device error ignored)"; + } DPRINTK("xfer_shift=%u, xfer_mode=0x%x\n", dev->xfer_shift, (int)dev->xfer_mode); - ata_dev_printk(dev, KERN_INFO, "configured for %s\n", - ata_mode_string(ata_xfer_mode2mask(dev->xfer_mode))); + ata_dev_printk(dev, KERN_INFO, "configured for %s%s\n", + ata_mode_string(ata_xfer_mode2mask(dev->xfer_mode)), + dev_err_whine); + return 0; + + fail: + ata_dev_printk(dev, KERN_ERR, "failed to set xfermode " + "(err_mask=0x%x)\n", err_mask); + return -EIO; } /** From 8f71efe25f8718200027b547a3e749ae3300fe60 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Feb 2008 15:06:17 -0800 Subject: [PATCH 04/16] sata_mv: fix loop with last port commit f351b2d638c3cb0b95adde3549b7bfaf3f991dfa sata_mv: Support SoC controllers cause panic: scsi 4:0:0:0: Direct-Access ATA HITACHI HDS7225S V44O PQ: 0 ANSI: 5 sd 4:0:0:0: [sde] 488390625 512-byte hardware sectors (250056 MB) sd 4:0:0:0: [sde] Write Protect is off sd 4:0:0:0: [sde] Mode Sense: 00 3a 00 00 sd 4:0:0:0: [sde] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA sd 4:0:0:0: [sde] 488390625 512-byte hardware sectors (250056 MB) sd 4:0:0:0: [sde] Write Protect is off sd 4:0:0:0: [sde] Mode Sense: 00 3a 00 00 sd 4:0:0:0: [sde] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA sde:<1>BUG: unable to handle kernel NULL pointer dereference at 000000000000001a IP: [] mv_interrupt+0x21c/0x4cc PGD 0 Oops: 0000 [1] SMP CPU 3 Modules linked in: Pid: 0, comm: swapper Not tainted 2.6.24-smp-08636-g0afc2ed-dirty #26 RIP: 0010:[] [] mv_interrupt+0x21c/0x4cc RSP: 0000:ffff8102050bbec8 EFLAGS: 00010297 RAX: 0000000000000008 RBX: 0000000000000000 RCX: 0000000000000003 RDX: 0000000000008000 RSI: 0000000000000286 RDI: ffff8102035180e0 RBP: 0000000000000001 R08: 0000000000000003 R09: ffff8102036613e0 R10: 0000000000000002 R11: ffffffff8061474c R12: ffff8102035bf828 R13: 0000000000000008 R14: ffff81020348ece8 R15: ffffc20002cb2000 FS: 0000000000000000(0000) GS:ffff810405025700(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 000000000000001a CR3: 0000000000201000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 0, threadinfo ffff810405094000, task ffff8102050b28c0) Stack: 000000010000000c 0002040000220400 0000001100000002 ffff81020348eda8 0000000000000001 ffff8102035f2cc0 0000000000000000 0000000000000000 0000000000000018 0000000000000000 0000000000000000 ffffffff80269ee8 Call Trace: [] ? handle_IRQ_event+0x25/0x53 [] ? handle_fasteoi_irq+0x90/0xc8 [] ? do_IRQ+0xf1/0x15f [] ? default_idle+0x0/0x55 [] ? ret_from_intr+0x0/0xa [] ? lapic_next_event+0x0/0xa [] ? default_idle+0x31/0x55 [] ? default_idle+0x2c/0x55 [] ? default_idle+0x0/0x55 [] ? cpu_idle+0x92/0xb8 Code: 41 14 85 c0 89 44 24 14 0f 84 9d 02 00 00 f7 d0 01 d6 41 89 d5 89 41 14 8b 41 14 89 34 24 e9 7e 02 00 00 49 63 c5 49 8b 5c c6 48 43 1a 80 4c 8b a3 20 37 00 00 0f 85 62 02 00 00 31 c9 41 83 RIP [] mv_interrupt+0x21c/0x4cc RSP CR2: 000000000000001a ---[ end trace 2583b5f7a5350584 ]--- Kernel panic - not syncing: Aiee, killing interrupt handler! last_port already include port0 base. this patch change use last_port directly, and move pp assignment later. Signed-off-by: Yinghai Lu Signed-off-by: Jeff Garzik --- drivers/ata/sata_mv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 080b8362f8d6..f5333cec2dfa 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -1716,14 +1716,16 @@ static void mv_host_intr(struct ata_host *host, u32 relevant, unsigned int hc) VPRINTK("ENTER, hc%u relevant=0x%08x HC IRQ cause=0x%08x\n", hc, relevant, hc_irq_cause); - for (port = port0; port < port0 + last_port; port++) { + for (port = port0; port < last_port; port++) { struct ata_port *ap = host->ports[port]; - struct mv_port_priv *pp = ap->private_data; + struct mv_port_priv *pp; int have_err_bits, hard_port, shift; if ((!ap) || (ap->flags & ATA_FLAG_DISABLED)) continue; + pp = ap->private_data; + shift = port << 1; /* (port * 2) */ if (port >= MV_PORTS_PER_HC) { shift++; /* skip bit 8 in the HC Main IRQ reg */ From c9544bcb4c7df07555e4b22d297c5705738da09d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 8 Feb 2008 15:22:39 +0000 Subject: [PATCH 05/16] pata_amd: Note in the module description it handles Nvidia This has confused a few people so fix it Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/ata/pata_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c index 761a66608d7b..ea567e2b1703 100644 --- a/drivers/ata/pata_amd.c +++ b/drivers/ata/pata_amd.c @@ -772,7 +772,7 @@ static void __exit amd_exit(void) } MODULE_AUTHOR("Alan Cox"); -MODULE_DESCRIPTION("low-level driver for AMD PATA IDE"); +MODULE_DESCRIPTION("low-level driver for AMD and Nvidia PATA IDE"); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(pci, amd); MODULE_VERSION(DRV_VERSION); From 8397248d4662d77296889529c911e2182151afa9 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 8 Feb 2008 15:23:38 +0000 Subject: [PATCH 06/16] pata_legacy: typo fix Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/ata/pata_legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index 333dc15f8ccf..6c59969fd50b 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -127,7 +127,7 @@ static int opti82c611a; /* Opti82c611A on primary 1, sec 2, both 3 */ static int opti82c46x; /* Opti 82c465MV present(pri/sec autodetect) */ static int qdi; /* Set to probe QDI controllers */ static int winbond; /* Set to probe Winbond controllers, - give I/O port if non stdanard */ + give I/O port if non standard */ static int autospeed; /* Chip present which snoops speed changes */ static int pio_mask = 0x1F; /* PIO range for autospeed devices */ static int iordy_mask = 0xFFFFFFFF; /* Use iordy if available */ From 4194645079ca15679bf7e5b00e71561cf6864761 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 8 Feb 2008 15:25:10 +0000 Subject: [PATCH 07/16] pata_ninja32: setup changes Forcibly set more of the configuration at init time. This seems to fix at least one problem reported. We don't know what most of these bits do, but we do know what windows stuffs there. Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/ata/pata_ninja32.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c index 1c1b83541d13..15dd649f89ee 100644 --- a/drivers/ata/pata_ninja32.c +++ b/drivers/ata/pata_ninja32.c @@ -17,6 +17,7 @@ * Base + 0x00 IRQ Status * Base + 0x01 IRQ control * Base + 0x02 Chipset control + * Base + 0x03 Unknown * Base + 0x04 VDMA and reset control + wait bits * Base + 0x08 BMIMBA * Base + 0x0C DMA Length @@ -174,8 +175,12 @@ static int ninja32_init_one(struct pci_dev *dev, const struct pci_device_id *id) ata_std_ports(&ap->ioaddr); iowrite8(0x05, base + 0x01); /* Enable interrupt lines */ - iowrite8(0xB3, base + 0x02); /* Burst, ?? setup */ - iowrite8(0x00, base + 0x04); /* WAIT0 ? */ + iowrite8(0xBE, base + 0x02); /* Burst, ?? setup */ + iowrite8(0x01, base + 0x03); /* Unknown */ + iowrite8(0x20, base + 0x04); /* WAIT0 */ + iowrite8(0x8f, base + 0x05); /* Unknown */ + iowrite8(0xa4, base + 0x1c); /* Unknown */ + iowrite8(0x83, base + 0x1d); /* BMDMA control: WAIT0 */ /* FIXME: Should we disable them at remove ? */ return ata_host_activate(host, dev->irq, ata_interrupt, IRQF_SHARED, &ninja32_sht); From fbf14e2f2d674e6a2ff0fb2aa569e7f6687483a3 Mon Sep 17 00:00:00 2001 From: Byron Bradley Date: Sun, 10 Feb 2008 21:17:30 +0000 Subject: [PATCH 08/16] sata_mv: platform driver allocs dma without create When the sata_mv driver is used as a platform driver, mv_create_dma_pools() is never called so it fails when trying to alloc in mv_pool_start(). Signed-off-by: Byron Bradley Acked-by: Mark Lord Signed-off-by: Jeff Garzik --- drivers/ata/sata_mv.c | 44 +++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index f5333cec2dfa..04b571764aff 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -2881,6 +2881,26 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx) return rc; } +static int mv_create_dma_pools(struct mv_host_priv *hpriv, struct device *dev) +{ + hpriv->crqb_pool = dmam_pool_create("crqb_q", dev, MV_CRQB_Q_SZ, + MV_CRQB_Q_SZ, 0); + if (!hpriv->crqb_pool) + return -ENOMEM; + + hpriv->crpb_pool = dmam_pool_create("crpb_q", dev, MV_CRPB_Q_SZ, + MV_CRPB_Q_SZ, 0); + if (!hpriv->crpb_pool) + return -ENOMEM; + + hpriv->sg_tbl_pool = dmam_pool_create("sg_tbl", dev, MV_SG_TBL_SZ, + MV_SG_TBL_SZ, 0); + if (!hpriv->sg_tbl_pool) + return -ENOMEM; + + return 0; +} + /** * mv_platform_probe - handle a positive probe of an soc Marvell * host @@ -2934,6 +2954,10 @@ static int mv_platform_probe(struct platform_device *pdev) hpriv->base = ioremap(res->start, res->end - res->start + 1); hpriv->base -= MV_SATAHC0_REG_BASE; + rc = mv_create_dma_pools(hpriv, &pdev->dev); + if (rc) + return rc; + /* initialize adapter */ rc = mv_init_host(host, chip_soc); if (rc) @@ -3070,26 +3094,6 @@ static void mv_print_info(struct ata_host *host) scc_s, (MV_HP_FLAG_MSI & hpriv->hp_flags) ? "MSI" : "INTx"); } -static int mv_create_dma_pools(struct mv_host_priv *hpriv, struct device *dev) -{ - hpriv->crqb_pool = dmam_pool_create("crqb_q", dev, MV_CRQB_Q_SZ, - MV_CRQB_Q_SZ, 0); - if (!hpriv->crqb_pool) - return -ENOMEM; - - hpriv->crpb_pool = dmam_pool_create("crpb_q", dev, MV_CRPB_Q_SZ, - MV_CRPB_Q_SZ, 0); - if (!hpriv->crpb_pool) - return -ENOMEM; - - hpriv->sg_tbl_pool = dmam_pool_create("sg_tbl", dev, MV_SG_TBL_SZ, - MV_SG_TBL_SZ, 0); - if (!hpriv->sg_tbl_pool) - return -ENOMEM; - - return 0; -} - /** * mv_pci_init_one - handle a positive probe of a PCI Marvell host * @pdev: PCI device found From 10d0aa3c0a03dd04227ab3a4958563d84276d02e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 11 Feb 2008 13:23:46 -0800 Subject: [PATCH 09/16] [IA64] Fix build for sim_defconfig Commit bdc807871d58285737d50dc6163d0feb72cb0dc2 broke the build for this config because the sim_defconfig selects CONFIG_HZ=250 but include/asm-ia64/param.h has an ifdef for the simulator to force HZ to 32. So we ended up with a kernel/timeconst.h set for HZ=250 ... which then failed the check for the right HZ value and died with: Drop the #ifdef magic from param.h and make force CONFIG_HZ=32 directly for the simulator. Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 7 +++++++ include/asm-ia64/param.h | 10 +--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 2d4fcd01bc91..dff9edfc7465 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -232,7 +232,14 @@ config PGTABLE_4 endchoice +if IA64_HP_SIM +config HZ + default 32 +endif + +if !IA64_HP_SIM source kernel/Kconfig.hz +endif config IA64_BRL_EMU bool diff --git a/include/asm-ia64/param.h b/include/asm-ia64/param.h index 49c62dd5eccf..0964c32c1358 100644 --- a/include/asm-ia64/param.h +++ b/include/asm-ia64/param.h @@ -19,15 +19,7 @@ #define MAXHOSTNAMELEN 64 /* max length of hostname */ #ifdef __KERNEL__ -# ifdef CONFIG_IA64_HP_SIM - /* - * Yeah, simulating stuff is slow, so let us catch some breath between - * timer interrupts... - */ -# define HZ 32 -# else -# define HZ CONFIG_HZ -# endif +# define HZ CONFIG_HZ # define USER_HZ HZ # define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ #else From 29c271123dc7895a9f77d3e61e747b2a052d0a2a Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Sun, 10 Feb 2008 20:22:57 -0600 Subject: [PATCH 10/16] mlx4_core: Fix build break (missing include) Commit 313abe55 ("mlx4_core: For 64-bit systems, vmap() kernel queue buffers") caused this to pop up on powerpc allyesconfig, looks like a missing include file: drivers/net/mlx4/alloc.c: In function 'mlx4_buf_alloc': drivers/net/mlx4/alloc.c:162: error: implicit declaration of function 'vmap' drivers/net/mlx4/alloc.c:162: error: 'VM_MAP' undeclared (first use in this function) drivers/net/mlx4/alloc.c:162: error: (Each undeclared identifier is reported only once drivers/net/mlx4/alloc.c:162: error: for each function it appears in.) drivers/net/mlx4/alloc.c:162: warning: assignment makes pointer from integer without a cast drivers/net/mlx4/alloc.c: In function 'mlx4_buf_free': drivers/net/mlx4/alloc.c:187: error: implicit declaration of function 'vunmap' Signed-off-by: Olof Johansson Signed-off-by: Roland Dreier --- drivers/net/mlx4/alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index 521dc0322ee4..75ef9d0d974d 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "mlx4.h" From 271cad6d7e91ff8eea18976311692f99cd667ad3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Feb 2008 20:03:17 +0100 Subject: [PATCH 11/16] Make topology fallback macros reference their arguments. This avoids warnings with unreferenced variables in the !NUMA case. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-generic/topology.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 5d9d70cd17fc..342a2a0105c4 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -30,19 +30,19 @@ /* Other architectures wishing to use this simple topology API should fill in the below functions as appropriate in their own file. */ #ifndef cpu_to_node -#define cpu_to_node(cpu) (0) +#define cpu_to_node(cpu) ((void)(cpu),0) #endif #ifndef parent_node -#define parent_node(node) (0) +#define parent_node(node) ((void)(node),0) #endif #ifndef node_to_cpumask -#define node_to_cpumask(node) (cpu_online_map) +#define node_to_cpumask(node) ((void)node, cpu_online_map) #endif #ifndef node_to_first_cpu -#define node_to_first_cpu(node) (0) +#define node_to_first_cpu(node) ((void)(node),0) #endif #ifndef pcibus_to_node -#define pcibus_to_node(node) (-1) +#define pcibus_to_node(bus) ((void)(bus), -1) #endif #ifndef pcibus_to_cpumask From c76d118ecc5fcac7c823fb428676860dba0fdd20 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 11 Feb 2008 23:52:47 +0200 Subject: [PATCH 12/16] Add Matt to MAINTAINERS as a SLAB allocator maintainer Matt is already the maintainer of SLOB which is one of the "SLAB" allocators in the kernel so add him to MAINTAINERS. Signed-off-by: Pekka Enberg Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c40f0ae96552..6680ec44779e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3561,6 +3561,8 @@ P: Christoph Lameter M: clameter@sgi.com P: Pekka Enberg M: penberg@cs.helsinki.fi +P: Matt Mackall +M: mpm@selenic.com L: linux-mm@kvack.org S: Maintained From 900cf086fd2fbad07f72f4575449e0d0958f860f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 11 Feb 2008 16:17:33 -0700 Subject: [PATCH 13/16] Be more robust about bad arguments in get_user_pages() So I spent a while pounding my head against my monitor trying to figure out the vmsplice() vulnerability - how could a failure to check for *read* access turn into a root exploit? It turns out that it's a buffer overflow problem which is made easy by the way get_user_pages() is coded. In particular, "len" is a signed int, and it is only checked at the *end* of a do {} while() loop. So, if it is passed in as zero, the loop will execute once and decrement len to -1. At that point, the loop will proceed until the next invalid address is found; in the process, it will likely overflow the pages array passed in to get_user_pages(). I think that, if get_user_pages() has been asked to grab zero pages, that's what it should do. Thus this patch; it is, among other things, enough to block the (already fixed) root exploit and any others which might be lurking in similar code. I also think that the number of pages should be unsigned, but changing the prototype of this function probably requires some more careful review. Signed-off-by: Jonathan Corbet Signed-off-by: Linus Torvalds --- mm/memory.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index e5628a5fd678..717aa0e3be2d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -989,6 +989,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int i; unsigned int vm_flags; + if (len <= 0) + return 0; /* * Require read or write permissions. * If 'force' is set, we only require the "MAY" flags. From 31f1de46b90ad360a16e7af3e277d104961df923 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 12 Feb 2008 13:30:22 +0900 Subject: [PATCH 14/16] mempolicy: silently restrict nodemask to allowed nodes Kosaki Motohito noted that "numactl --interleave=all ..." failed in the presence of memoryless nodes. This patch attempts to fix that problem. Some background: numactl --interleave=all calls set_mempolicy(2) with a fully populated [out to MAXNUMNODES] nodemask. set_mempolicy() [in do_set_mempolicy()] calls contextualize_policy() which requires that the nodemask be a subset of the current task's mems_allowed; else EINVAL will be returned. A task's mems_allowed will always be a subset of node_states[N_HIGH_MEMORY] i.e., nodes with memory. So, a fully populated nodemask will be declared invalid if it includes memoryless nodes. NOTE: the same thing will occur when running in a cpuset with restricted mem_allowed--for the same reason: node mask contains dis-allowed nodes. mbind(2), on the other hand, just masks off any nodes in the nodemask that are not included in the caller's mems_allowed. In each case [mbind() and set_mempolicy()], mpol_check_policy() will complain [again, resulting in EINVAL] if the nodemask contains any memoryless nodes. This is somewhat redundant as mpol_new() will remove memoryless nodes for interleave policy, as will bind_zonelist()--called by mpol_new() for BIND policy. Proposed fix: 1) modify contextualize_policy logic to: a) remember whether the incoming node mask is empty. b) if not, restrict the nodemask to allowed nodes, as is currently done in-line for mbind(). This guarantees that the resulting mask includes only nodes with memory. NOTE: this is a [benign, IMO] change in behavior for set_mempolicy(). Dis-allowed nodes will be silently ignored, rather than returning an error. c) fold this code into mpol_check_policy(), replace 2 calls to contextualize_policy() to call mpol_check_policy() directly and remove contextualize_policy(). 2) In existing mpol_check_policy() logic, after "contextualization": a) MPOL_DEFAULT: require that in coming mask "was_empty" b) MPOL_{BIND|INTERLEAVE}: require that contextualized nodemask contains at least one node. c) add a case for MPOL_PREFERRED: if in coming was not empty and resulting mask IS empty, user specified invalid nodes. Return EINVAL. c) remove the now redundant check for memoryless nodes 3) remove the now redundant masking of policy nodes for interleave policy from mpol_new(). 4) Now that mpol_check_policy() contextualizes the nodemask, remove the in-line nodes_and() from sys_mbind(). I believe that this restores mbind() to the behavior before the memoryless-nodes patch series. E.g., we'll no longer treat an invalid nodemask with MPOL_PREFERRED as local allocation. [ Patch history: v1 -> v2: - Communicate whether or not incoming node mask was empty to mpol_check_policy() for better error checking. - As suggested by David Rientjes, remove the now unused cpuset_nodes_subset_current_mems_allowed() from cpuset.h v2 -> v3: - As suggested by Kosaki Motohito, fold the "contextualization" of policy nodemask into mpol_check_policy(). Looks a little cleaner. ] Signed-off-by: Lee Schermerhorn Signed-off-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro Acked-by: David Rientjes Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 3 --- mm/mempolicy.c | 61 +++++++++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index f8c9a2752f06..0a26be353cb3 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -26,8 +26,6 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); void cpuset_update_task_memory_state(void); -#define cpuset_nodes_subset_current_mems_allowed(nodes) \ - nodes_subset((nodes), current->mems_allowed) int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask); @@ -103,7 +101,6 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) #define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY]) static inline void cpuset_init_current_mems_allowed(void) {} static inline void cpuset_update_task_memory_state(void) {} -#define cpuset_nodes_subset_current_mems_allowed(nodes) (1) static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) { diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 83c69f8a64c2..8d246c3b340f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -116,22 +116,51 @@ static void mpol_rebind_policy(struct mempolicy *pol, /* Do sanity checking on a policy */ static int mpol_check_policy(int mode, nodemask_t *nodes) { - int empty = nodes_empty(*nodes); + int was_empty, is_empty; + + if (!nodes) + return 0; + + /* + * "Contextualize" the in-coming nodemast for cpusets: + * Remember whether in-coming nodemask was empty, If not, + * restrict the nodes to the allowed nodes in the cpuset. + * This is guaranteed to be a subset of nodes with memory. + */ + cpuset_update_task_memory_state(); + is_empty = was_empty = nodes_empty(*nodes); + if (!was_empty) { + nodes_and(*nodes, *nodes, cpuset_current_mems_allowed); + is_empty = nodes_empty(*nodes); /* after "contextualization" */ + } switch (mode) { case MPOL_DEFAULT: - if (!empty) + /* + * require caller to specify an empty nodemask + * before "contextualization" + */ + if (!was_empty) return -EINVAL; break; case MPOL_BIND: case MPOL_INTERLEAVE: - /* Preferred will only use the first bit, but allow - more for now. */ - if (empty) + /* + * require at least 1 valid node after "contextualization" + */ + if (is_empty) + return -EINVAL; + break; + case MPOL_PREFERRED: + /* + * Did caller specify invalid nodes? + * Don't silently accept this as "local allocation". + */ + if (!was_empty && is_empty) return -EINVAL; break; } - return nodes_subset(*nodes, node_states[N_HIGH_MEMORY]) ? 0 : -EINVAL; + return 0; } /* Generate a custom zonelist for the BIND policy. */ @@ -188,8 +217,6 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) switch (mode) { case MPOL_INTERLEAVE: policy->v.nodes = *nodes; - nodes_and(policy->v.nodes, policy->v.nodes, - node_states[N_HIGH_MEMORY]); if (nodes_weight(policy->v.nodes) == 0) { kmem_cache_free(policy_cache, policy); return ERR_PTR(-EINVAL); @@ -421,18 +448,6 @@ static int mbind_range(struct vm_area_struct *vma, unsigned long start, return err; } -static int contextualize_policy(int mode, nodemask_t *nodes) -{ - if (!nodes) - return 0; - - cpuset_update_task_memory_state(); - if (!cpuset_nodes_subset_current_mems_allowed(*nodes)) - return -EINVAL; - return mpol_check_policy(mode, nodes); -} - - /* * Update task->flags PF_MEMPOLICY bit: set iff non-default * mempolicy. Allows more rapid checking of this (combined perhaps @@ -468,7 +483,7 @@ static long do_set_mempolicy(int mode, nodemask_t *nodes) { struct mempolicy *new; - if (contextualize_policy(mode, nodes)) + if (mpol_check_policy(mode, nodes)) return -EINVAL; new = mpol_new(mode, nodes); if (IS_ERR(new)) @@ -915,10 +930,6 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len, err = get_nodes(&nodes, nmask, maxnode); if (err) return err; -#ifdef CONFIG_CPUSETS - /* Restrict the nodes to the allowed nodes in the cpuset */ - nodes_and(nodes, nodes, current->mems_allowed); -#endif return do_mbind(start, len, mode, &nodes, flags); } From 2c1582699872d38682b136b1446953ee351bc7e1 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 11 Feb 2008 14:38:51 -0800 Subject: [PATCH 15/16] x86: vdso_install fix The makefile magic for installing the 32-bit vdso images on disk had a little error. A single-line change would fix that bug, but this does a little more to reduce the error-prone duplication of this bit of makefile variable magic. Signed-off-by: Roland McGrath Signed-off-by: Linus Torvalds --- arch/x86/vdso/Makefile | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index d28dda574700..f385a4b4a484 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -7,7 +7,7 @@ VDSO32-$(CONFIG_X86_32) := y VDSO32-$(CONFIG_COMPAT) := y vdso-install-$(VDSO64-y) += vdso.so -vdso-install-$(VDSO32-y) += $(vdso32-y:=.so) +vdso-install-$(VDSO32-y) += $(vdso32-images) # files to link into the vdso @@ -63,6 +63,8 @@ vdso32.so-$(CONFIG_X86_32) += int80 vdso32.so-$(CONFIG_COMPAT) += syscall vdso32.so-$(VDSO32-y) += sysenter +vdso32-images = $(vdso32.so-y:%=vdso32-%.so) + CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 @@ -71,21 +73,21 @@ VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ targets += vdso32/vdso32.lds -targets += $(vdso32.so-y:%=vdso32-%.so.dbg) $(vdso32.so-y:%=vdso32-%.so) +targets += $(vdso32-images) $(vdso32-images:=.dbg) targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o) -extra-y += $(vdso32.so-y:%=vdso32-%.so) +extra-y += $(vdso32-images) -$(obj)/vdso32.o: $(vdso32.so-y:%=$(obj)/vdso32-%.so) +$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) -$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): asflags-$(CONFIG_X86_64) += -m32 +$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) +$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 -$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ - $(obj)/vdso32/vdso32.lds \ - $(obj)/vdso32/note.o \ - $(obj)/vdso32/%.o +$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ + $(obj)/vdso32/vdso32.lds \ + $(obj)/vdso32/note.o \ + $(obj)/vdso32/%.o $(call if_changed,vdso) # Make vdso32-*-syms.lds from each image, and then make sure they match. From 96b5a46e2a72dc1829370c87053e0cd558d58bc0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 11 Feb 2008 20:52:01 -0800 Subject: [PATCH 16/16] WMI: initialize wmi_blocks.list even if ACPI is disabled Even if we don't want to register the WMI driver, we should initialize the wmi_blocks list to be empty, since we don't want the wmi helper functions to oops just because that basic list has not even been set up. With this, "find_guid()" will happily return "not found" rather than oopsing all over the place, and the callers will then just automatically return false or AE_NOT_FOUND as appropriate. Signed-off-by: Linus Torvalds --- drivers/acpi/wmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/wmi.c b/drivers/acpi/wmi.c index 36b84ab418dd..457ed3d3f51c 100644 --- a/drivers/acpi/wmi.c +++ b/drivers/acpi/wmi.c @@ -673,11 +673,11 @@ static int __init acpi_wmi_init(void) { acpi_status result; + INIT_LIST_HEAD(&wmi_blocks.list); + if (acpi_disabled) return -ENODEV; - INIT_LIST_HEAD(&wmi_blocks.list); - result = acpi_bus_register_driver(&acpi_wmi_driver); if (result < 0) {