From b7283945c5ed8e78f0fe2c3313d8d1cdbc19b4b3 Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 1 Nov 2010 22:48:11 +0000 Subject: [PATCH 01/69] OMAP2: Devkit8000: Fix mmc regulator failure This patch fixes the following error: >regulator: VMMC1: 1850 <--> 3150 mV at 3000 mV normal standby >twl_reg twl_reg.6: can't register VMMC1, -22 >twl_reg: probe of twl_reg.6 failed with error -22 Signed-off-by: Thomas Weber Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/board-devkit8000.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c index 067f4379c87f..53ac762518bd 100644 --- a/arch/arm/mach-omap2/board-devkit8000.c +++ b/arch/arm/mach-omap2/board-devkit8000.c @@ -242,9 +242,6 @@ static int devkit8000_twl_gpio_setup(struct device *dev, mmc[0].gpio_cd = gpio + 0; omap2_hsmmc_init(mmc); - /* link regulators to MMC adapters */ - devkit8000_vmmc1_supply.dev = mmc[0].dev; - /* TWL4030_GPIO_MAX + 1 == ledB, PMU_STAT (out, active low LED) */ gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1; From e860e6da96f5a320a752da232e03d7bf885710b7 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 25 Oct 2010 14:35:24 +0000 Subject: [PATCH 02/69] omap: dma: Add read-back to DMA interrupt handler to avoid spuriousinterrupts Flush the writes to IRQSTATUS_L0 register in the DMA interrupt handler by reading the register directly after write. This prevents the spurious DMA interrupts noted when using VDD_OPP 1 Signed-off-by: Mathias Nyman Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index f5c5b8da9a87..2c2826571d45 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -1983,6 +1983,8 @@ static int omap2_dma_handle_ch(int ch) dma_write(OMAP2_DMA_CSR_CLEAR_MASK, CSR(ch)); dma_write(1 << ch, IRQSTATUS_L0); + /* read back the register to flush the write */ + dma_read(IRQSTATUS_L0); /* If the ch is not chained then chain_id will be -1 */ if (dma_chan[ch].chain_id != -1) { From 1cff502d8b22272addc4f5f57346d598b4755d9e Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 2 Nov 2010 14:04:01 +0000 Subject: [PATCH 03/69] OMAP1: camera.h: add missing include #include directive is required to compile the dependant boards (board-ams-delta for now). Signed-off-by: Janusz Krzysztofik [tony@atomide.com: updated comments] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/include/mach/camera.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap1/include/mach/camera.h b/arch/arm/mach-omap1/include/mach/camera.h index fd54b452eb22..847d00f0bb0a 100644 --- a/arch/arm/mach-omap1/include/mach/camera.h +++ b/arch/arm/mach-omap1/include/mach/camera.h @@ -1,6 +1,8 @@ #ifndef __ASM_ARCH_CAMERA_H_ #define __ASM_ARCH_CAMERA_H_ +#include + void omap1_camera_init(void *); static inline void omap1_set_camera_info(struct omap1_cam_platform_data *info) From 3f25cb042ca77ac52546ae9f0039cfd0a243698c Mon Sep 17 00:00:00 2001 From: Tony SIM Date: Mon, 8 Nov 2010 04:07:47 +0000 Subject: [PATCH 04/69] ARM: mach-shmobile: intc-sh7372: fix interrupt number Signed-off-by: Tony SIM Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/intc-sh7372.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c index 4cd3cae38e72..30b2f400666a 100644 --- a/arch/arm/mach-shmobile/intc-sh7372.c +++ b/arch/arm/mach-shmobile/intc-sh7372.c @@ -98,7 +98,7 @@ static struct intc_vect intca_vectors[] __initdata = { INTC_VECT(IRQ14A, 0x03c0), INTC_VECT(IRQ15A, 0x03e0), INTC_VECT(IRQ16A, 0x3200), INTC_VECT(IRQ17A, 0x3220), INTC_VECT(IRQ18A, 0x3240), INTC_VECT(IRQ19A, 0x3260), - INTC_VECT(IRQ20A, 0x3280), INTC_VECT(IRQ31A, 0x32a0), + INTC_VECT(IRQ20A, 0x3280), INTC_VECT(IRQ21A, 0x32a0), INTC_VECT(IRQ22A, 0x32c0), INTC_VECT(IRQ23A, 0x32e0), INTC_VECT(IRQ24A, 0x3300), INTC_VECT(IRQ25A, 0x3320), INTC_VECT(IRQ26A, 0x3340), INTC_VECT(IRQ27A, 0x3360), From 2e351ec61c35fac01ed1fb1ce35c183bf85e780c Mon Sep 17 00:00:00 2001 From: Yusuke Goda Date: Mon, 8 Nov 2010 05:45:09 +0000 Subject: [PATCH 05/69] ARM: mach-shmobile: ap4evb: Mark NOR boot loader partitions read-only. This makes the loader and bootenv partitions read-only under MTD for the on-board NOR flash. Signed-off-by: Yusuke Goda Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ap4evb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 32d9e2816e56..63c2fcac8e1e 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -163,11 +163,13 @@ static struct mtd_partition nor_flash_partitions[] = { .name = "loader", .offset = 0x00000000, .size = 512 * 1024, + .mask_flags = MTD_WRITEABLE, }, { .name = "bootenv", .offset = MTDPART_OFS_APPEND, .size = 512 * 1024, + .mask_flags = MTD_WRITEABLE, }, { .name = "kernel_ro", From 899be96db75451ba98cb217109ef4cf2ee6de927 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 8 Nov 2010 13:35:10 +0800 Subject: [PATCH 06/69] rtc: rtc-sh - fix a memory leak request_mem_region() will call kzalloc to allocate memory for struct resource. release_resource() unregisters the resource but does not free the allocated memory, thus use release_mem_region() instead to fix the memory leak. Signed-off-by: Axel Lin Signed-off-by: Paul Mundt --- drivers/rtc/rtc-sh.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index 5efbd5990ff8..06e41ed93230 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -761,7 +761,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev) clk_put(rtc->clk); iounmap(rtc->regbase); err_badmap: - release_resource(rtc->res); + release_mem_region(rtc->res->start, rtc->regsize); err_badres: kfree(rtc); @@ -786,7 +786,7 @@ static int __exit sh_rtc_remove(struct platform_device *pdev) } iounmap(rtc->regbase); - release_resource(rtc->res); + release_mem_region(rtc->res->start, rtc->regsize); clk_disable(rtc->clk); clk_put(rtc->clk); From c1e44756fdb7b363fd22cb5514dced40752e36c5 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 8 Nov 2010 15:01:02 +0100 Subject: [PATCH 07/69] cfq-iosched: do cleanup Some functions should return boolean. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9eba291eb6fd..b8174bb4a6a1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -637,11 +637,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) static inline bool cfq_slice_used(struct cfq_queue *cfqq) { if (cfq_cfqq_slice_new(cfqq)) - return 0; + return false; if (time_before(jiffies, cfqq->slice_end)) - return 0; + return false; - return 1; + return true; } /* @@ -1892,10 +1892,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) * in their service tree. */ if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) - return 1; + return true; cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", service_tree->count); - return 0; + return false; } static void cfq_arm_slice_timer(struct cfq_data *cfqd) @@ -2359,12 +2359,12 @@ static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, { /* the queue hasn't finished any request, can't estimate */ if (cfq_cfqq_slice_new(cfqq)) - return 1; + return true; if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, cfqq->slice_end)) - return 1; + return true; - return 0; + return false; } static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) From d2d59e18a1ea8ecdd1c0a52af320e9a7f5391cc4 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 8 Nov 2010 15:01:03 +0100 Subject: [PATCH 08/69] cfq-iosched: schedule dispatch for noidle queue A queue is idle at cfq_dispatch_requests(), but it gets noidle later. Unless other task explictly does unplug or all requests are drained, we will not deliever requests to the disk even cfq_arm_slice_timer doesn't make the queue idle. For example, cfq_should_idle() returns true because of service_tree->count == 1, and then other queues are added. Note, I didn't see obvious performance impacts so far with the patch, but just thought this could be a problem. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index b8174bb4a6a1..986865e3fbc5 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3255,6 +3255,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) return true; + /* An idle queue should not be idle now for some reason */ + if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq)) + return true; + if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) return false; @@ -3508,8 +3512,25 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) } } - if (!cfqd->rq_in_driver) + if (!cfqd->rq_in_driver) { + cfq_schedule_dispatch(cfqd); + return; + } + /* + * A queue is idle at cfq_dispatch_requests(), but it gets noidle + * later. We schedule a dispatch if the queue has no requests, + * otherwise the disk is actually in idle till all requests + * are finished even cfq_arm_slice_timer doesn't make the queue idle + * */ + cfqq = cfqd->active_queue; + if (!cfqq) + return; + + if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq) && + (!cfqd->cfq_group_idle || cfqq->cfqg->nr_cfqq > 1)) { + cfq_del_timer(cfqd, cfqq); cfq_schedule_dispatch(cfqd); + } } /* From 8e1ac6655104bc6e1e79d67e2df88cc8fa9b6e07 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 8 Nov 2010 15:01:04 +0100 Subject: [PATCH 09/69] cfq-iosched: don't idle if a deep seek queue is slow If a deep seek queue slowly deliver requests but disk is much faster, idle for the queue just wastes disk throughput. If the queue delevers all requests before half its slice is used, the patch disable idle for it. In my test, application delivers 32 requests one time, the disk can accept 128 requests at maxium and disk is fast. without the patch, the throughput is just around 30m/s, while with it, the speed is about 80m/s. The disk is a SSD, but is detected as a rotational disk. I can configure it as SSD, but I thought the deep seek queue logic should be fixed too, for example, considering a fast raid. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 986865e3fbc5..ca4d19907243 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2285,6 +2285,17 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) goto keep_queue; } + /* + * This is a deep seek queue, but the device is much faster than + * the queue can deliver, don't idle + **/ + if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && + (cfq_cfqq_slice_new(cfqq) || + (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { + cfq_clear_cfqq_deep(cfqq); + cfq_clear_cfqq_idle_window(cfqq); + } + if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { cfqq = NULL; goto keep_queue; From dfcccd3aaba15e4e8ffae65fb2a757b3e49470de Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 8 Nov 2010 06:48:00 +0000 Subject: [PATCH 10/69] arm: omap1: devices: need to return with a value Get rid of the following warning: arch/arm/mach-omap1/devices.c: In function 'omap_init_wdt': arch/arm/mach-omap1/devices.c:298: warning: 'return' with no value, in function returning non-void while at that, also change: platform_device_register(); return 0; into: return platform_device_register(); Signed-off-by: Felipe Balbi Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/devices.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap1/devices.c b/arch/arm/mach-omap1/devices.c index ea0d80a89da7..e7f9ee63dce5 100644 --- a/arch/arm/mach-omap1/devices.c +++ b/arch/arm/mach-omap1/devices.c @@ -321,10 +321,9 @@ static struct platform_device omap_wdt_device = { static int __init omap_init_wdt(void) { if (!cpu_is_omap16xx()) - return; + return -ENODEV; - platform_device_register(&omap_wdt_device); - return 0; + return platform_device_register(&omap_wdt_device); } subsys_initcall(omap_init_wdt); #endif From 99870bd784ff9eb2405eab060125c0ded74968cd Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 8 Nov 2010 17:02:26 +0900 Subject: [PATCH 11/69] sh: intc: Fix up initializers for gcc 4.5. The _INTC_ARRAY() initializer presently does a NULL test which blows up as a non-constant initializer under gcc 4.5. This switches over to a type test to account for NULL initializers explicitly. Signed-off-by: Paul Mundt --- include/linux/sh_intc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index f656d1a43dc0..5812fefbcedf 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -79,7 +79,7 @@ struct intc_hw_desc { unsigned int nr_subgroups; }; -#define _INTC_ARRAY(a) a, a == NULL ? 0 : sizeof(a)/sizeof(*a) +#define _INTC_ARRAY(a) a, __same_type(a, NULL) ? 0 : sizeof(a)/sizeof(*a) #define INTC_HW_DESC(vectors, groups, mask_regs, \ prio_regs, sense_regs, ack_regs) \ From 2b9408a45978dcda77407859148deeccf403c372 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 9 Nov 2010 14:51:13 +0100 Subject: [PATCH 12/69] cfq-iosched: don't schedule a dispatch for a non-idle queue Vivek suggests we don't need schedule a dispatch when an idle queue becomes nonidle. And he is right, cfq_should_preempt already covers the logic. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ca4d19907243..f90519430be6 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3523,25 +3523,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) } } - if (!cfqd->rq_in_driver) { + if (!cfqd->rq_in_driver) cfq_schedule_dispatch(cfqd); - return; - } - /* - * A queue is idle at cfq_dispatch_requests(), but it gets noidle - * later. We schedule a dispatch if the queue has no requests, - * otherwise the disk is actually in idle till all requests - * are finished even cfq_arm_slice_timer doesn't make the queue idle - * */ - cfqq = cfqd->active_queue; - if (!cfqq) - return; - - if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq) && - (!cfqd->cfq_group_idle || cfqq->cfqg->nr_cfqq > 1)) { - cfq_del_timer(cfqd, cfqq); - cfq_schedule_dispatch(cfqd); - } } /* From 91d63f8a306722dbf1b400d4afb11f69512977ad Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 4 Nov 2010 11:05:55 +0000 Subject: [PATCH 13/69] fbdev: sh_mobile_hdmi: properly clean up modedb on monitor unplug Even though this is not a problem currently, it is better to clear the freed pointer and nullify the length of the freed memory. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/video/sh_mobile_hdmi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/video/sh_mobile_hdmi.c b/drivers/video/sh_mobile_hdmi.c index 55b3077ff6ff..d7df10315d8d 100644 --- a/drivers/video/sh_mobile_hdmi.c +++ b/drivers/video/sh_mobile_hdmi.c @@ -1071,6 +1071,10 @@ static void sh_hdmi_edid_work_fn(struct work_struct *work) if (!hdmi->info) goto out; + hdmi->monspec.modedb_len = 0; + fb_destroy_modedb(hdmi->monspec.modedb); + hdmi->monspec.modedb = NULL; + acquire_console_sem(); /* HDMI disconnect */ @@ -1078,7 +1082,6 @@ static void sh_hdmi_edid_work_fn(struct work_struct *work) release_console_sem(); pm_runtime_put(hdmi->dev); - fb_destroy_modedb(hdmi->monspec.modedb); } out: From 5ae0cf82df212253857326a6706018eccb658683 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 4 Nov 2010 11:06:01 +0000 Subject: [PATCH 14/69] fbdev: sh_mobile_lcdc: use the standard CEA-861 720p timing sh_mobile_lcdcfb.c has a hard-coded 720p video mode, used as default, if none is explicitly specified by the platform. Adjust its timing to match the CEA standard. Also add an explicit refresh rate value, which is needed, when used with HDMI, to be able to recognise the default 720p mode as a pre-programmed VIC #4. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/video/sh_mobile_lcdcfb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index 50963739a409..a87dace49cb7 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -115,15 +115,16 @@ static const struct fb_videomode default_720p = { .xres = 1280, .yres = 720, - .left_margin = 200, - .right_margin = 88, - .hsync_len = 48, + .left_margin = 220, + .right_margin = 110, + .hsync_len = 40, .upper_margin = 20, .lower_margin = 5, .vsync_len = 5, .pixclock = 13468, + .refresh = 60, .sync = FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT, }; From 5fd284e6cd39f731db86dfd2440553365d5fad4d Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 4 Nov 2010 11:06:11 +0000 Subject: [PATCH 15/69] fbdev: sh_mobile_lcdc: use correct number of modes, when using the default Fix zero mode number, when using the default 720p mode. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/video/sh_mobile_lcdcfb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index a87dace49cb7..9b1364723c65 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -1198,6 +1198,7 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev) const struct fb_videomode *mode = cfg->lcd_cfg; unsigned long max_size = 0; int k; + int num_cfg; ch->info = framebuffer_alloc(0, &pdev->dev); if (!ch->info) { @@ -1233,8 +1234,14 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev) info->fix = sh_mobile_lcdc_fix; info->fix.smem_len = max_size * (cfg->bpp / 8) * 2; - if (!mode) + if (!mode) { mode = &default_720p; + num_cfg = 1; + } else { + num_cfg = ch->cfg.num_cfg; + } + + fb_videomode_to_modelist(mode, num_cfg, &info->modelist); fb_videomode_to_var(var, mode); /* Default Y virtual resolution is 2x panel size */ @@ -1282,10 +1289,6 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev) for (i = 0; i < j; i++) { struct sh_mobile_lcdc_chan *ch = priv->ch + i; - const struct fb_videomode *mode = ch->cfg.lcd_cfg; - - if (!mode) - mode = &default_720p; info = ch->info; @@ -1298,7 +1301,6 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev) } } - fb_videomode_to_modelist(mode, ch->cfg.num_cfg, &info->modelist); error = register_framebuffer(info); if (error < 0) goto err1; From ed10b490ea6498f76284043565d42ca3649ccca1 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 10 Nov 2010 18:02:25 +0900 Subject: [PATCH 16/69] sh: clkfwk: fix up compiler warnings. CC drivers/sh/clk/core.o drivers/sh/clk/core.c: In function 'clk_round_parent': drivers/sh/clk/core.c:574: warning: format '%lu' expects type 'long unsigned int', but argument 2 has type 'unsigned int' drivers/sh/clk/core.c:594: warning: format '%lu' expects type 'long unsigned int', but argument 2 has type 'unsigned int' Signed-off-by: Paul Mundt --- drivers/sh/clk/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/sh/clk/core.c b/drivers/sh/clk/core.c index 09615b51d591..cb12a8e1466b 100644 --- a/drivers/sh/clk/core.c +++ b/drivers/sh/clk/core.c @@ -571,7 +571,7 @@ long clk_round_parent(struct clk *clk, unsigned long target, *best_freq = freq_max; } - pr_debug("too low freq %lu, error %lu\n", freq->frequency, + pr_debug("too low freq %u, error %lu\n", freq->frequency, target - freq_max); if (!error) @@ -591,7 +591,7 @@ long clk_round_parent(struct clk *clk, unsigned long target, *best_freq = freq_min; } - pr_debug("too high freq %lu, error %lu\n", freq->frequency, + pr_debug("too high freq %u, error %lu\n", freq->frequency, freq_min - target); if (!error) From bea278278f0bb9af3ce6234acece9772d401a252 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 10 Nov 2010 18:07:43 +0900 Subject: [PATCH 17/69] MAINTAINERS: update the sh git tree entry. Reflect the recent tree restructuring. Signed-off-by: Paul Mundt --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0094224ca79b..39267ace999f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5705,7 +5705,7 @@ M: Paul Mundt L: linux-sh@vger.kernel.org W: http://www.linux-sh.org Q: http://patchwork.kernel.org/project/linux-sh/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6.git sh-latest S: Supported F: Documentation/sh/ F: arch/sh/ From d22c0e5088912a9f05760c597e34876f58d1cee6 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 10 Nov 2010 18:09:14 +0900 Subject: [PATCH 18/69] MAINTAINERS: update the ARM SH-Mobile git tree entry. Reflect the recent tree restructuring. Signed-off-by: Paul Mundt --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0094224ca79b..f5ec964865c7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -945,7 +945,7 @@ M: Magnus Damm L: linux-sh@vger.kernel.org W: http://oss.renesas.com Q: http://patchwork.kernel.org/project/linux-sh/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/genesis-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6.git rmobile-latest S: Supported F: arch/arm/mach-shmobile/ F: drivers/sh/ From 32ed3036c56284a720c0c00d92ee14bf609f497d Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 10 Nov 2010 13:04:19 +0200 Subject: [PATCH 19/69] sisfb: limit POST memory test according to PCI resource length If the POST memory test fails, the driver may access illegal memory areas. Instead of hard coding the maximum size, set it according to the PCI resource length (an additional check is needed in sisfb_post_map_vram() to ensure it's big enough). DRAM sizing will later adjust video_size to the correct value. Signed-off-by: Aaro Koskinen Cc: Thomas Winischhofer Signed-off-by: Paul Mundt --- drivers/video/sis/sis_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c index b52f8e4ef1fd..3dde12b0ab06 100644 --- a/drivers/video/sis/sis_main.c +++ b/drivers/video/sis/sis_main.c @@ -4181,6 +4181,9 @@ static void __devinit sisfb_post_map_vram(struct sis_video_info *ivideo, unsigned int *mapsize, unsigned int min) { + if (*mapsize < (min << 20)) + return; + ivideo->video_vbase = ioremap(ivideo->video_base, (*mapsize)); if(!ivideo->video_vbase) { @@ -4514,7 +4517,7 @@ sisfb_post_sis300(struct pci_dev *pdev) } else { #endif /* Need to map max FB size for finding out about RAM size */ - mapsize = 64 << 20; + mapsize = ivideo->video_size; sisfb_post_map_vram(ivideo, &mapsize, 4); if(ivideo->video_vbase) { @@ -4680,7 +4683,7 @@ sisfb_post_xgi_ramsize(struct sis_video_info *ivideo) orSISIDXREG(SISSR, 0x20, (0x80 | 0x04)); /* Need to map max FB size for finding out about RAM size */ - mapsize = 256 << 20; + mapsize = ivideo->video_size; sisfb_post_map_vram(ivideo, &mapsize, 32); if(!ivideo->video_vbase) { @@ -5936,6 +5939,7 @@ sisfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } ivideo->video_base = pci_resource_start(pdev, 0); + ivideo->video_size = pci_resource_len(pdev, 0); ivideo->mmio_base = pci_resource_start(pdev, 1); ivideo->mmio_size = pci_resource_len(pdev, 1); ivideo->SiS_Pr.RelIO = pci_resource_start(pdev, 2) + 0x30; From 108409a8a4e325db38f27258da68d7207a0ad433 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 10 Nov 2010 11:45:18 +0200 Subject: [PATCH 20/69] OMAP: VRAM: improve VRAM error prints Improve the error prints to give more information about the offending address & size. Signed-off-by: Tomi Valkeinen Signed-off-by: Paul Mundt --- drivers/video/omap2/vram.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/video/omap2/vram.c b/drivers/video/omap2/vram.c index fed2a72bc6b6..bb5ee0663e65 100644 --- a/drivers/video/omap2/vram.c +++ b/drivers/video/omap2/vram.c @@ -554,9 +554,15 @@ void __init omap_vram_reserve_sdram_memblock(void) size = PAGE_ALIGN(size); if (paddr) { - if ((paddr & ~PAGE_MASK) || - !memblock_is_region_memory(paddr, size)) { - pr_err("Illegal SDRAM region for VRAM\n"); + if (paddr & ~PAGE_MASK) { + pr_err("VRAM start address 0x%08x not page aligned\n", + paddr); + return; + } + + if (!memblock_is_region_memory(paddr, size)) { + pr_err("Illegal SDRAM region 0x%08x..0x%08x for VRAM\n", + paddr, paddr + size - 1); return; } From 88abf44d3d8d4fefcbf3d57584d471e38cb51627 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 10 Nov 2010 11:45:19 +0200 Subject: [PATCH 21/69] OMAP: VRAM: Fix boot-time memory allocation Use memblock_free() and memblock_remove() to remove the allocated or reserved VRAM area from normal kernel memory. This is a slightly modified version of patches from Felipe Contreras and Namhyung Kim. Reported-by: Felipe Contreras Reported-by: Namhyung Kim Signed-off-by: Tomi Valkeinen Signed-off-by: Paul Mundt --- drivers/video/omap2/vram.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/video/omap2/vram.c b/drivers/video/omap2/vram.c index bb5ee0663e65..2fd7e5271be9 100644 --- a/drivers/video/omap2/vram.c +++ b/drivers/video/omap2/vram.c @@ -576,9 +576,12 @@ void __init omap_vram_reserve_sdram_memblock(void) return; } } else { - paddr = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_REAL_LIMIT); + paddr = memblock_alloc(size, PAGE_SIZE); } + memblock_free(paddr, size); + memblock_remove(paddr, size); + omap_vram_add_region(paddr, size); pr_info("Reserving %u bytes SDRAM for VRAM\n", size); From 0bf3d5a0fb569b13fc5a05f7d5a240d2db70ac61 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 10 Nov 2010 11:45:20 +0200 Subject: [PATCH 22/69] OMAP: DSS: Fix documentation regarding 'vram' kernel parameter The DSS documentation didn't mention the option to give the VRAM start address. Signed-off-by: Tomi Valkeinen Signed-off-by: Paul Mundt --- Documentation/arm/OMAP/DSS | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/arm/OMAP/DSS b/Documentation/arm/OMAP/DSS index 0af0e9eed5d6..888ae7b83ae4 100644 --- a/Documentation/arm/OMAP/DSS +++ b/Documentation/arm/OMAP/DSS @@ -255,9 +255,10 @@ framebuffer parameters. Kernel boot arguments --------------------- -vram= - - Amount of total VRAM to preallocate. For example, "10M". omapfb - allocates memory for framebuffers from VRAM. +vram=[,] + - Amount of total VRAM to preallocate and optionally a physical start + memory address. For example, "10M". omapfb allocates memory for + framebuffers from VRAM. omapfb.mode=:[,...] - Default video mode for specified displays. For example, From 34db1d595ef6f183fbc1e42cda45a3dfa0035258 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 11 Nov 2010 09:58:57 +0100 Subject: [PATCH 23/69] block: export 'ro' sysfs attribute for partitions We already export 'ro' for the disk. This adds the same attribute for partitions. Cc: Karel Zak Signed-off-by: Kay Sievers Signed-off-by: Jens Axboe --- fs/partitions/check.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 79fbf3f390f0..861ae84fcee5 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -237,6 +237,13 @@ ssize_t part_size_show(struct device *dev, return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); } +ssize_t part_ro_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + return sprintf(buf, "%d\n", p->policy ? 1 : 0); +} + ssize_t part_alignment_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -312,6 +319,7 @@ ssize_t part_fail_store(struct device *dev, static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); +static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL); static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, NULL); @@ -326,6 +334,7 @@ static struct attribute *part_attrs[] = { &dev_attr_partition.attr, &dev_attr_start.attr, &dev_attr_size.attr, + &dev_attr_ro.attr, &dev_attr_alignment_offset.attr, &dev_attr_discard_alignment.attr, &dev_attr_stat.attr, From ac3abf2c37a9b0be604ea9825705a8510a9a6ba3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 8 Nov 2010 23:20:27 -0500 Subject: [PATCH 24/69] PCI hotplug: ibmphp: Add check to prevent reading beyond mapped area While testing various randconfigs with ktest.pl, I hit the following panic: BUG: unable to handle kernel paging request at f7e54b03 IP: [] ibmphp_access_ebda+0x101/0x19bb Adding printks, I found that the loop that reads the ebda blocks can move out of the mapped section. ibmphp_access_ebda: start=f7e44c00 size=5120 end=f7e46000 ibmphp_access_ebda: io_mem=f7e44d80 offset=384 ibmphp_access_ebda: io_mem=f7e54b03 offset=65283 The start of the iomap was at f7e44c00 and had a size of 5120, making the end f7e46000. We start with an offset of 0x180 or 384, giving the first read at 0xf7e44d80. Reading that location yields 65283, which is much bigger than the 5120 that was allocated and makes the next read at f7e54b03 which is outside the mapped area. Perhaps this is a bug in the driver, or buggy hardware, but this patch is more about not crashing my box on start up and just giving a warning if it detects this error. This patch at least lets my box boot with just a warning. Cc: Chandru Siddalingappa Signed-off-by: Steven Rostedt Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/ibmphp_ebda.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c index 5becbdee4027..2850e64dedae 100644 --- a/drivers/pci/hotplug/ibmphp_ebda.c +++ b/drivers/pci/hotplug/ibmphp_ebda.c @@ -276,6 +276,12 @@ int __init ibmphp_access_ebda (void) for (;;) { offset = next_offset; + + /* Make sure what we read is still in the mapped section */ + if (WARN(offset > (ebda_sz * 1024 - 4), + "ibmphp_ebda: next read is beyond ebda_sz\n")) + break; + next_offset = readw (io_mem + offset); /* offset of next blk */ offset += 2; From 4723d0f2f96e6c910f951d595067eb31e0dd2d01 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 22 Sep 2010 11:09:19 -0600 Subject: [PATCH 25/69] x86/PCI: coalesce overlapping host bridge windows Some BIOSes provide PCI host bridge windows that overlap, e.g., pci_root PNP0A03:00: host bridge window [mem 0xb0000000-0xffffffff] pci_root PNP0A03:00: host bridge window [mem 0xafffffff-0xdfffffff] pci_root PNP0A03:00: host bridge window [mem 0xf0000000-0xffffffff] If we simply insert these as children of iomem_resource, the second window fails because it conflicts with the first, and the third is inserted as a child of the first, i.e., b0000000-ffffffff PCI Bus 0000:00 f0000000-ffffffff PCI Bus 0000:00 When we claim PCI device resources, this can cause collisions like this if we put them in the first window: pci 0000:00:01.0: address space collision: [mem 0xff300000-0xff4fffff] conflicts with PCI Bus 0000:00 [mem 0xf0000000-0xffffffff] Host bridge windows are top-level resources by definition, so it doesn't make sense to make the third window a child of the first. This patch coalesces any host bridge windows that overlap. For the example above, the result is this single window: pci_root PNP0A03:00: host bridge window [mem 0xafffffff-0xffffffff] This fixes a 2.6.34 regression. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=17011 Reported-and-tested-by: Anisse Astier Reported-and-tested-by: Pramod Dematagoda Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 103 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 20 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 15466c096ba5..0972315c3860 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -138,7 +138,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) struct acpi_resource_address64 addr; acpi_status status; unsigned long flags; - struct resource *root, *conflict; u64 start, end; status = resource_to_addr(acpi_res, &addr); @@ -146,12 +145,10 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; if (addr.resource_type == ACPI_MEMORY_RANGE) { - root = &iomem_resource; flags = IORESOURCE_MEM; if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY) flags |= IORESOURCE_PREFETCH; } else if (addr.resource_type == ACPI_IO_RANGE) { - root = &ioport_resource; flags = IORESOURCE_IO; } else return AE_OK; @@ -172,25 +169,90 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; } - conflict = insert_resource_conflict(root, res); - if (conflict) { - dev_err(&info->bridge->dev, - "address space collision: host bridge window %pR " - "conflicts with %s %pR\n", - res, conflict->name, conflict); - } else { - pci_bus_add_resource(info->bus, res, 0); - info->res_num++; - if (addr.translation_offset) - dev_info(&info->bridge->dev, "host bridge window %pR " - "(PCI address [%#llx-%#llx])\n", - res, res->start - addr.translation_offset, - res->end - addr.translation_offset); + info->res_num++; + if (addr.translation_offset) + dev_info(&info->bridge->dev, "host bridge window %pR " + "(PCI address [%#llx-%#llx])\n", + res, res->start - addr.translation_offset, + res->end - addr.translation_offset); + else + dev_info(&info->bridge->dev, "host bridge window %pR\n", res); + + return AE_OK; +} + +static bool resource_contains(struct resource *res, resource_size_t point) +{ + if (res->start <= point && point <= res->end) + return true; + return false; +} + +static void coalesce_windows(struct pci_root_info *info, int type) +{ + int i, j; + struct resource *res1, *res2; + + for (i = 0; i < info->res_num; i++) { + res1 = &info->res[i]; + if (!(res1->flags & type)) + continue; + + for (j = i + 1; j < info->res_num; j++) { + res2 = &info->res[j]; + if (!(res2->flags & type)) + continue; + + /* + * I don't like throwing away windows because then + * our resources no longer match the ACPI _CRS, but + * the kernel resource tree doesn't allow overlaps. + */ + if (resource_contains(res1, res2->start) || + resource_contains(res1, res2->end) || + resource_contains(res2, res1->start) || + resource_contains(res2, res1->end)) { + res1->start = min(res1->start, res2->start); + res1->end = max(res1->end, res2->end); + dev_info(&info->bridge->dev, + "host bridge window expanded to %pR; %pR ignored\n", + res1, res2); + res2->flags = 0; + } + } + } +} + +static void add_resources(struct pci_root_info *info) +{ + int i; + struct resource *res, *root, *conflict; + + if (!pci_use_crs) + return; + + coalesce_windows(info, IORESOURCE_MEM); + coalesce_windows(info, IORESOURCE_IO); + + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + else if (res->flags & IORESOURCE_IO) + root = &ioport_resource; else - dev_info(&info->bridge->dev, - "host bridge window %pR\n", res); + continue; + + conflict = insert_resource_conflict(root, res); + if (conflict) + dev_err(&info->bridge->dev, + "address space collision: host bridge window %pR " + "conflicts with %s %pR\n", + res, conflict->name, conflict); + else + pci_bus_add_resource(info->bus, res, 0); } - return AE_OK; } static void @@ -224,6 +286,7 @@ get_current_resources(struct acpi_device *device, int busnum, acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, &info); + add_resources(&info); return; name_alloc_fail: From 3b519e4ea618b6943a82931630872907f9ac2c2b Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 10 Nov 2010 11:03:21 +0100 Subject: [PATCH 26/69] PCI: fix size checks for mmap() on /proc/bus/pci files The checks for valid mmaps of PCI resources made through /proc/bus/pci files that were introduced in 9eff02e2042f96fb2aedd02e032eca1c5333d767 have several problems: 1. mmap() calls on /proc/bus/pci files are made with real file offsets > 0, whereas under /sys/bus/pci/devices, the start of the resource corresponds to offset 0. This may lead to false negatives in pci_mmap_fits(), which implicitly assumes the /sys/bus/pci/devices layout. 2. The loop in proc_bus_pci_mmap doesn't skip empty resouces. This leads to false positives, because pci_mmap_fits() doesn't treat empty resources correctly (the calculated size is 1 << (8*sizeof(resource_size_t)-PAGE_SHIFT) in this case!). 3. If a user maps resources with BAR > 0, pci_mmap_fits will emit bogus WARNINGS for the first resources that don't fit until the correct one is found. On many controllers the first 2-4 BARs are used, and the others are empty. In this case, an mmap attempt will first fail on the non-empty BARs (including the "right" BAR because of 1.) and emit bogus WARNINGS because of 3., and finally succeed on the first empty BAR because of 2. This is certainly not the intended behaviour. This patch addresses all 3 issues. Updated with an enum type for the additional parameter for pci_mmap_fits(). Cc: stable@kernel.org Signed-off-by: Martin Wilck Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 22 ++++++++++++++++------ drivers/pci/pci.h | 7 ++++++- drivers/pci/proc.c | 2 +- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index b5a7d9bfcb24..25accc9dda3b 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -705,17 +705,21 @@ void pci_remove_legacy_files(struct pci_bus *b) #ifdef HAVE_PCI_MMAP -int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma) +int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma, + enum pci_mmap_api mmap_api) { - unsigned long nr, start, size; + unsigned long nr, start, size, pci_start; + if (pci_resource_len(pdev, resno) == 0) + return 0; nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; start = vma->vm_pgoff; size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1; - if (start < size && size - start >= nr) + pci_start = (mmap_api == PCI_MMAP_SYSFS) ? + pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0; + if (start >= pci_start && start < pci_start + size && + start + nr <= pci_start + size) return 1; - WARN(1, "process \"%s\" tried to map 0x%08lx-0x%08lx on %s BAR %d (size 0x%08lx)\n", - current->comm, start, start+nr, pci_name(pdev), resno, size); return 0; } @@ -745,8 +749,14 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, if (i >= PCI_ROM_RESOURCE) return -ENODEV; - if (!pci_mmap_fits(pdev, i, vma)) + if (!pci_mmap_fits(pdev, i, vma, PCI_MMAP_SYSFS)) { + WARN(1, "process \"%s\" tried to map 0x%08lx bytes " + "at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n", + current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff, + pci_name(pdev), i, + pci_resource_start(pdev, i), pci_resource_len(pdev, i)); return -EINVAL; + } /* pci_mmap_page_range() expects the same kind of entry as coming * from /proc/bus/pci/ which is a "user visible" value. If this is diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f5c7c382765f..7d33f6673868 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -22,8 +22,13 @@ extern void pci_remove_firmware_label_files(struct pci_dev *pdev); #endif extern void pci_cleanup_rom(struct pci_dev *dev); #ifdef HAVE_PCI_MMAP +enum pci_mmap_api { + PCI_MMAP_SYSFS, /* mmap on /sys/bus/pci/devices//resource */ + PCI_MMAP_PROCFS /* mmap on /proc/bus/pci/ */ +}; extern int pci_mmap_fits(struct pci_dev *pdev, int resno, - struct vm_area_struct *vma); + struct vm_area_struct *vmai, + enum pci_mmap_api mmap_api); #endif int pci_probe_reset_function(struct pci_dev *dev); diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 297b72c880a1..ea00647f4732 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -257,7 +257,7 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) /* Make sure the caller is mapping a real resource for this device */ for (i = 0; i < PCI_ROM_RESOURCE; i++) { - if (pci_mmap_fits(dev, i, vma)) + if (pci_mmap_fits(dev, i, vma, PCI_MMAP_PROCFS)) break; } From 97c145f7c87453cec90e91238fba5fe2c1561b32 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 5 Nov 2010 15:16:36 -0400 Subject: [PATCH 27/69] PCI: read current power state at enable time When we enable a PCI device, we avoid doing a lot of the initial setup work if the device's enable count is non-zero. If we don't fetch the power state though, we may later fail to set up MSI due to the unknown status. So pick it up before we short circuit the rest due to a pre-existing enable or mismatched enable/disable pair (as happens with VGA devices, which are special in a special way). Tested-by: Jesse Brandeburg Reported-by: Dave Airlie Tested-by: Dave Airlie Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e98c8104297b..710c8a29be0d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1007,6 +1007,18 @@ static int __pci_enable_device_flags(struct pci_dev *dev, int err; int i, bars = 0; + /* + * Power state could be unknown at this point, either due to a fresh + * boot or a device removal call. So get the current power state + * so that things like MSI message writing will behave as expected + * (e.g. if the device really is in D0 at enable time). + */ + if (dev->pm_cap) { + u16 pmcsr; + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); + dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); + } + if (atomic_add_return(1, &dev->enable_cnt) > 1) return 0; /* already enabled */ From 82e3e767c21fef2b1b38868e20eb4e470a1e38e3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 10 Nov 2010 10:26:07 -0700 Subject: [PATCH 28/69] PCI: fix pci_bus_alloc_resource() hang, prefer positive decode When a PCI bus has two resources with the same start/end, e.g., pci_bus 0000:04: resource 2 [mem 0xd0000000-0xd7ffffff pref] pci_bus 0000:04: resource 7 [mem 0xd0000000-0xd7ffffff] the previous pci_bus_find_resource_prev() implementation would alternate between them forever: pci_bus_find_resource_prev(... [mem 0xd0000000-0xd7ffffff pref]) returns [mem 0xd0000000-0xd7ffffff] pci_bus_find_resource_prev(... [mem 0xd0000000-0xd7ffffff]) returns [mem 0xd0000000-0xd7ffffff pref] pci_bus_find_resource_prev(... [mem 0xd0000000-0xd7ffffff pref]) returns [mem 0xd0000000-0xd7ffffff] ... This happened because there was no ordering between two resources with the same start and end. A resource that had the same start and end as the cursor, but was not itself the cursor, was considered to be before the cursor. This patch fixes the hang by making a fixed ordering between any two resources. In addition, it tries to allocate from positively decoded regions before using any subtractively decoded resources. This means we will use a positive decode region before a subtractive decode one, even if it means using a smaller address. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=22062 Reported-by: Borislav Petkov Tested-by: Borislav Petkov Acked-by: Linus Torvalds Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 70 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 5624db8c9ad0..003170ea2e39 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -64,17 +64,57 @@ void pci_bus_remove_resources(struct pci_bus *bus) } } +static bool pci_bus_resource_better(struct resource *res1, bool pos1, + struct resource *res2, bool pos2) +{ + /* If exactly one is positive decode, always prefer that one */ + if (pos1 != pos2) + return pos1 ? true : false; + + /* Prefer the one that contains the highest address */ + if (res1->end != res2->end) + return (res1->end > res2->end) ? true : false; + + /* Otherwise, prefer the one with highest "center of gravity" */ + if (res1->start != res2->start) + return (res1->start > res2->start) ? true : false; + + /* Otherwise, choose one arbitrarily (but consistently) */ + return (res1 > res2) ? true : false; +} + +static bool pci_bus_resource_positive(struct pci_bus *bus, struct resource *res) +{ + struct pci_bus_resource *bus_res; + + /* + * This relies on the fact that pci_bus.resource[] refers to P2P or + * CardBus bridge base/limit registers, which are always positively + * decoded. The pci_bus.resources list contains host bridge or + * subtractively decoded resources. + */ + list_for_each_entry(bus_res, &bus->resources, list) { + if (bus_res->res == res) + return (bus_res->flags & PCI_SUBTRACTIVE_DECODE) ? + false : true; + } + return true; +} + /* - * Find the highest-address bus resource below the cursor "res". If the - * cursor is NULL, return the highest resource. + * Find the next-best bus resource after the cursor "res". If the cursor is + * NULL, return the best resource. "Best" means that we prefer positive + * decode regions over subtractive decode, then those at higher addresses. */ static struct resource *pci_bus_find_resource_prev(struct pci_bus *bus, unsigned int type, struct resource *res) { + bool res_pos, r_pos, prev_pos = false; struct resource *r, *prev = NULL; int i; + res_pos = pci_bus_resource_positive(bus, res); pci_bus_for_each_resource(bus, r, i) { if (!r) continue; @@ -82,26 +122,14 @@ static struct resource *pci_bus_find_resource_prev(struct pci_bus *bus, if ((r->flags & IORESOURCE_TYPE_BITS) != type) continue; - /* If this resource is at or past the cursor, skip it */ - if (res) { - if (r == res) - continue; - if (r->end > res->end) - continue; - if (r->end == res->end && r->start > res->start) - continue; + r_pos = pci_bus_resource_positive(bus, r); + if (!res || pci_bus_resource_better(res, res_pos, r, r_pos)) { + if (!prev || pci_bus_resource_better(r, r_pos, + prev, prev_pos)) { + prev = r; + prev_pos = r_pos; + } } - - if (!prev) - prev = r; - - /* - * A small resource is higher than a large one that ends at - * the same address. - */ - if (r->end > prev->end || - (r->end == prev->end && r->start > prev->start)) - prev = r; } return prev; From c39d8d558dd79ce3444bfd590f5d8f0b8ad85879 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Sat, 13 Nov 2010 16:01:59 +0900 Subject: [PATCH 29/69] ARM: SAMSUNG: Fix HAVE_S3C2410_WATCHDOG warnings Fix build warnings warning: (ARCH_S3C64XX && && WATCHDOG || ARCH_S5P64X0 && && WATCHDOG || ARCH_S5P6442 && && WATCHDOG || ARCH_S5PC100 && || ARCH_S5PV210 && || ARCH_S5PV310 && || MACH_SMDK6410 && ARCH_S3C64XX) selects HAVE_S3C2410_WATCHDOG which has unmet direct dependencies (WATCHDOG) Signed-off-by: Kyungmin Park [kgene.kim@samsung.com: Added fix same warning(mach-s3c64xx/Kconfig)] Signed-off-by: Kukjin Kim --- arch/arm/Kconfig | 12 ++++++------ arch/arm/mach-s3c64xx/Kconfig | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a19a5266d5fc..a30271385e70 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -677,7 +677,7 @@ config ARCH_S3C64XX select USB_ARCH_HAS_OHCI select SAMSUNG_GPIOLIB_4BIT select HAVE_S3C2410_I2C - select HAVE_S3C2410_WATCHDOG + select HAVE_S3C2410_WATCHDOG if WATCHDOG help Samsung S3C64XX series based systems @@ -686,7 +686,7 @@ config ARCH_S5P64X0 select CPU_V6 select GENERIC_GPIO select HAVE_CLK - select HAVE_S3C2410_WATCHDOG + select HAVE_S3C2410_WATCHDOG if WATCHDOG select ARCH_USES_GETTIMEOFFSET select HAVE_S3C2410_I2C select HAVE_S3C_RTC @@ -700,7 +700,7 @@ config ARCH_S5P6442 select GENERIC_GPIO select HAVE_CLK select ARCH_USES_GETTIMEOFFSET - select HAVE_S3C2410_WATCHDOG + select HAVE_S3C2410_WATCHDOG if WATCHDOG help Samsung S5P6442 CPU based systems @@ -713,7 +713,7 @@ config ARCH_S5PC100 select ARCH_USES_GETTIMEOFFSET select HAVE_S3C2410_I2C select HAVE_S3C_RTC - select HAVE_S3C2410_WATCHDOG + select HAVE_S3C2410_WATCHDOG if WATCHDOG help Samsung S5PC100 series based systems @@ -728,7 +728,7 @@ config ARCH_S5PV210 select ARCH_USES_GETTIMEOFFSET select HAVE_S3C2410_I2C select HAVE_S3C_RTC - select HAVE_S3C2410_WATCHDOG + select HAVE_S3C2410_WATCHDOG if WATCHDOG help Samsung S5PV210/S5PC110 series based systems @@ -741,7 +741,7 @@ config ARCH_S5PV310 select GENERIC_CLOCKEVENTS select HAVE_S3C_RTC select HAVE_S3C2410_I2C - select HAVE_S3C2410_WATCHDOG + select HAVE_S3C2410_WATCHDOG if WATCHDOG help Samsung S5PV310 series based systems diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig index 1ca7bdc6485c..579d2f0f4dd0 100644 --- a/arch/arm/mach-s3c64xx/Kconfig +++ b/arch/arm/mach-s3c64xx/Kconfig @@ -143,7 +143,7 @@ config MACH_SMDK6410 select S3C_DEV_USB_HSOTG select S3C_DEV_WDT select SAMSUNG_DEV_KEYPAD - select HAVE_S3C2410_WATCHDOG + select HAVE_S3C2410_WATCHDOG if WATCHDOG select S3C64XX_SETUP_SDHCI select S3C64XX_SETUP_I2C1 select S3C64XX_SETUP_IDE From 20676c15ed91b5862e17a29b05ec977af483700f Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Sat, 13 Nov 2010 16:08:32 +0900 Subject: [PATCH 30/69] ARM: SAMSUNG: Fix HAVE_S3C2410_I2C warnings This patch fixes following warnings. warning: (ARCH_S3C2410 && || ARCH_S3C64XX && || ARCH_S5P64X0 && || ARCH_S5PC100 && || ARCH_S5PV210 && || ARCH_S5PV310 && ) selects HAVE_S3C2410_I2C which has unmet direct dependencies (I2C) Signed-off-by: Kukjin Kim --- arch/arm/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a30271385e70..30ef76a18ef7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -646,7 +646,7 @@ config ARCH_S3C2410 select ARCH_HAS_CPUFREQ select HAVE_CLK select ARCH_USES_GETTIMEOFFSET - select HAVE_S3C2410_I2C + select HAVE_S3C2410_I2C if I2C help Samsung S3C2410X CPU based systems, such as the Simtec Electronics BAST (), the IPAQ 1940 or @@ -676,7 +676,7 @@ config ARCH_S3C64XX select S3C_DEV_NAND select USB_ARCH_HAS_OHCI select SAMSUNG_GPIOLIB_4BIT - select HAVE_S3C2410_I2C + select HAVE_S3C2410_I2C if I2C select HAVE_S3C2410_WATCHDOG if WATCHDOG help Samsung S3C64XX series based systems @@ -688,7 +688,7 @@ config ARCH_S5P64X0 select HAVE_CLK select HAVE_S3C2410_WATCHDOG if WATCHDOG select ARCH_USES_GETTIMEOFFSET - select HAVE_S3C2410_I2C + select HAVE_S3C2410_I2C if I2C select HAVE_S3C_RTC help Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440, @@ -711,7 +711,7 @@ config ARCH_S5PC100 select CPU_V7 select ARM_L1_CACHE_SHIFT_6 select ARCH_USES_GETTIMEOFFSET - select HAVE_S3C2410_I2C + select HAVE_S3C2410_I2C if I2C select HAVE_S3C_RTC select HAVE_S3C2410_WATCHDOG if WATCHDOG help @@ -726,7 +726,7 @@ config ARCH_S5PV210 select ARM_L1_CACHE_SHIFT_6 select ARCH_HAS_CPUFREQ select ARCH_USES_GETTIMEOFFSET - select HAVE_S3C2410_I2C + select HAVE_S3C2410_I2C if I2C select HAVE_S3C_RTC select HAVE_S3C2410_WATCHDOG if WATCHDOG help @@ -740,7 +740,7 @@ config ARCH_S5PV310 select HAVE_CLK select GENERIC_CLOCKEVENTS select HAVE_S3C_RTC - select HAVE_S3C2410_I2C + select HAVE_S3C2410_I2C if I2C select HAVE_S3C2410_WATCHDOG if WATCHDOG help Samsung S5PV310 series based systems From 754961a8e1ef49ee2d304d2ab086cf0aa6809214 Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Sat, 13 Nov 2010 16:11:46 +0900 Subject: [PATCH 31/69] ARM: SAMSUNG: Fix HAVE_S3C_RTC warnings This patch fixes followng build warnings. warning: (ARCH_S5P64X0 && || ARCH_S5PC100 && || ARCH_S5PV210 && || ARCH_S5PV310 && ) selects HAVE_S3C_RTC which has unmet direct dependencies (RTC_CLASS) Signed-off-by: Kukjin Kim --- arch/arm/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 30ef76a18ef7..bb854cde3f81 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -689,7 +689,7 @@ config ARCH_S5P64X0 select HAVE_S3C2410_WATCHDOG if WATCHDOG select ARCH_USES_GETTIMEOFFSET select HAVE_S3C2410_I2C if I2C - select HAVE_S3C_RTC + select HAVE_S3C_RTC if RTC_CLASS help Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440, SMDK6450. @@ -712,7 +712,7 @@ config ARCH_S5PC100 select ARM_L1_CACHE_SHIFT_6 select ARCH_USES_GETTIMEOFFSET select HAVE_S3C2410_I2C if I2C - select HAVE_S3C_RTC + select HAVE_S3C_RTC if RTC_CLASS select HAVE_S3C2410_WATCHDOG if WATCHDOG help Samsung S5PC100 series based systems @@ -727,7 +727,7 @@ config ARCH_S5PV210 select ARCH_HAS_CPUFREQ select ARCH_USES_GETTIMEOFFSET select HAVE_S3C2410_I2C if I2C - select HAVE_S3C_RTC + select HAVE_S3C_RTC if RTC_CLASS select HAVE_S3C2410_WATCHDOG if WATCHDOG help Samsung S5PV210/S5PC110 series based systems @@ -739,7 +739,7 @@ config ARCH_S5PV310 select GENERIC_GPIO select HAVE_CLK select GENERIC_CLOCKEVENTS - select HAVE_S3C_RTC + select HAVE_S3C_RTC if RTC_CLASS select HAVE_S3C2410_I2C if I2C select HAVE_S3C2410_WATCHDOG if WATCHDOG help From 731edacb7567c15f25b1fccac18a203bd432ae5e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Nov 2010 11:55:16 +0100 Subject: [PATCH 32/69] mtd: fix bdev exclusive open bugs in block2mtd::add_device() There are two bdev exclusive open bugs. * open_bdev_exclusive() must not be called with NULL holder. Use dev as the holder. * open_by_devnum() doesn't open the bdev exclusively but block2mtd_free_device() always assumes it. Explicitly claim the bdev. The latter is rather clumsy but will be simplified with future blkdev_get/put() cleanups. - Updated to use local variable @mode to cache FMODE_* masks as suggested by Artem Bityutskiy. Signed-off-by: Tejun Heo Cc: linux-mtd@lists.infradead.org Cc: Artem Bityutskiy --- drivers/mtd/devices/block2mtd.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 2cf0cc6a4189..a9e2d3b38aeb 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -234,6 +234,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) /* FIXME: ensure that mtd->size % erase_size == 0 */ static struct block2mtd_dev *add_device(char *devname, int erase_size) { + const fmode_t mode = FMODE_READ | FMODE_WRITE; struct block_device *bdev; struct block2mtd_dev *dev; char *name; @@ -246,7 +247,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) return NULL; /* Get a handle on the device */ - bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, NULL); + bdev = open_bdev_exclusive(devname, mode, dev); #ifndef MODULE if (IS_ERR(bdev)) { @@ -255,7 +256,15 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) dev_t devt = name_to_dev_t(devname); if (devt) { - bdev = open_by_devnum(devt, FMODE_WRITE | FMODE_READ); + bdev = open_by_devnum(devt, mode); + if (!IS_ERR(bdev)) { + int ret; + ret = bd_claim(bdev, dev); + if (ret) { + blkdev_put(bdev, mode); + bdev = ERR_PTR(ret); + } + } } } #endif From 37004c42f7240035bc2726c340c4efa726b4818e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Nov 2010 11:55:17 +0100 Subject: [PATCH 33/69] btrfs: close_bdev_exclusive() should use the same @flags as the matching open_bdev_exclusive() In the failure path of __btrfs_open_devices(), close_bdev_exclusive() is called with @flags which doesn't match the one used during open_bdev_exclusive(). Fix it. Signed-off-by: Tejun Heo Cc: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cc04dc1445d6..d39596224d21 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -638,7 +638,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, error_brelse: brelse(bh); error_close: - close_bdev_exclusive(bdev, FMODE_READ); + close_bdev_exclusive(bdev, flags); error: continue; } From e09b457bdb7e8d23fc54dcef0930ac697d8de895 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Nov 2010 11:55:17 +0100 Subject: [PATCH 34/69] block: simplify holder symlink handling Code to manage symlinks in /sys/block/*/{holders|slaves} are overly complex with multiple holder considerations, redundant extra references to all involved kobjects, unused generic kobject holder support and unnecessary mixup with bd_claim/release functionalities. Strip it down to what's necessary (single gendisk holder) and make it use a separate interface. This is a step for cleaning up bd_claim/release. This patch makes dm-table slightly more complex but it will be simplified again with further changes. Signed-off-by: Tejun Heo Acked-by: Neil Brown Acked-by: Mike Snitzer Cc: dm-devel@redhat.com --- drivers/md/dm-table.c | 23 ++- drivers/md/md.c | 4 +- fs/block_dev.c | 322 ++++++------------------------------------ include/linux/fs.h | 16 ++- 4 files changed, 74 insertions(+), 291 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 90267f8d64ee..2c876ffc63df 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -328,12 +328,22 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev, bdev = open_by_devnum(dev, d->dm_dev.mode); if (IS_ERR(bdev)) return PTR_ERR(bdev); - r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); - if (r) + + r = bd_claim(bdev, _claim_ptr); + if (r) { blkdev_put(bdev, d->dm_dev.mode); - else - d->dm_dev.bdev = bdev; - return r; + return r; + } + + r = bd_link_disk_holder(bdev, dm_disk(md)); + if (r) { + bd_release(bdev); + blkdev_put(bdev, d->dm_dev.mode); + return r; + } + + d->dm_dev.bdev = bdev; + return 0; } /* @@ -344,7 +354,8 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) if (!d->dm_dev.bdev) return; - bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); + bd_unlink_disk_holder(d->dm_dev.bdev); + bd_release(d->dm_dev.bdev); blkdev_put(d->dm_dev.bdev, d->dm_dev.mode); d->dm_dev.bdev = NULL; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 4e957f3140a8..c47644fca1a1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1880,7 +1880,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); list_add_rcu(&rdev->same_set, &mddev->disks); - bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); + bd_link_disk_holder(rdev->bdev, mddev->gendisk); /* May as well allow recovery to be retried once */ mddev->recovery_disabled = 0; @@ -1907,7 +1907,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) MD_BUG(); return; } - bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); + bd_unlink_disk_holder(rdev->bdev); list_del_rcu(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; diff --git a/fs/block_dev.c b/fs/block_dev.c index 06e8ff12b97c..9329068684d2 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -426,9 +426,6 @@ static void init_once(void *foo) mutex_init(&bdev->bd_mutex); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); -#ifdef CONFIG_SYSFS - INIT_LIST_HEAD(&bdev->bd_holder_list); -#endif inode_init_once(&ei->vfs_inode); /* Initialize mutex for freeze. */ mutex_init(&bdev->bd_fsfreeze_mutex); @@ -881,314 +878,83 @@ void bd_release(struct block_device *bdev) EXPORT_SYMBOL(bd_release); #ifdef CONFIG_SYSFS -/* - * Functions for bd_claim_by_kobject / bd_release_from_kobject - * - * If a kobject is passed to bd_claim_by_kobject() - * and the kobject has a parent directory, - * following symlinks are created: - * o from the kobject to the claimed bdev - * o from "holders" directory of the bdev to the parent of the kobject - * bd_release_from_kobject() removes these symlinks. - * - * Example: - * If /dev/dm-0 maps to /dev/sda, kobject corresponding to - * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then: - * /sys/block/dm-0/slaves/sda --> /sys/block/sda - * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 - */ - static int add_symlink(struct kobject *from, struct kobject *to) { - if (!from || !to) - return 0; return sysfs_create_link(from, to, kobject_name(to)); } static void del_symlink(struct kobject *from, struct kobject *to) { - if (!from || !to) - return; sysfs_remove_link(from, kobject_name(to)); } -/* - * 'struct bd_holder' contains pointers to kobjects symlinked by - * bd_claim_by_kobject. - * It's connected to bd_holder_list which is protected by bdev->bd_sem. - */ -struct bd_holder { - struct list_head list; /* chain of holders of the bdev */ - int count; /* references from the holder */ - struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */ - struct kobject *hdev; /* e.g. "/block/dm-0" */ - struct kobject *hdir; /* e.g. "/block/sda/holders" */ - struct kobject *sdev; /* e.g. "/block/sda" */ -}; - -/* - * Get references of related kobjects at once. - * Returns 1 on success. 0 on failure. - * - * Should call bd_holder_release_dirs() after successful use. - */ -static int bd_holder_grab_dirs(struct block_device *bdev, - struct bd_holder *bo) -{ - if (!bdev || !bo) - return 0; - - bo->sdir = kobject_get(bo->sdir); - if (!bo->sdir) - return 0; - - bo->hdev = kobject_get(bo->sdir->parent); - if (!bo->hdev) - goto fail_put_sdir; - - bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); - if (!bo->sdev) - goto fail_put_hdev; - - bo->hdir = kobject_get(bdev->bd_part->holder_dir); - if (!bo->hdir) - goto fail_put_sdev; - - return 1; - -fail_put_sdev: - kobject_put(bo->sdev); -fail_put_hdev: - kobject_put(bo->hdev); -fail_put_sdir: - kobject_put(bo->sdir); - - return 0; -} - -/* Put references of related kobjects at once. */ -static void bd_holder_release_dirs(struct bd_holder *bo) -{ - kobject_put(bo->hdir); - kobject_put(bo->sdev); - kobject_put(bo->hdev); - kobject_put(bo->sdir); -} - -static struct bd_holder *alloc_bd_holder(struct kobject *kobj) -{ - struct bd_holder *bo; - - bo = kzalloc(sizeof(*bo), GFP_KERNEL); - if (!bo) - return NULL; - - bo->count = 1; - bo->sdir = kobj; - - return bo; -} - -static void free_bd_holder(struct bd_holder *bo) -{ - kfree(bo); -} - /** - * find_bd_holder - find matching struct bd_holder from the block device + * bd_link_disk_holder - create symlinks between holding disk and slave bdev + * @bdev: the claimed slave bdev + * @disk: the holding disk * - * @bdev: struct block device to be searched - * @bo: target struct bd_holder + * This functions creates the following sysfs symlinks. * - * Returns matching entry with @bo in @bdev->bd_holder_list. - * If found, increment the reference count and return the pointer. - * If not found, returns NULL. - */ -static struct bd_holder *find_bd_holder(struct block_device *bdev, - struct bd_holder *bo) -{ - struct bd_holder *tmp; - - list_for_each_entry(tmp, &bdev->bd_holder_list, list) - if (tmp->sdir == bo->sdir) { - tmp->count++; - return tmp; - } - - return NULL; -} - -/** - * add_bd_holder - create sysfs symlinks for bd_claim() relationship + * - from "slaves" directory of the holder @disk to the claimed @bdev + * - from "holders" directory of the @bdev to the holder @disk * - * @bdev: block device to be bd_claimed - * @bo: preallocated and initialized by alloc_bd_holder() + * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is + * passed to bd_link_disk_holder(), then: * - * Add @bo to @bdev->bd_holder_list, create symlinks. + * /sys/block/dm-0/slaves/sda --> /sys/block/sda + * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 * - * Returns 0 if symlinks are created. - * Returns -ve if something fails. - */ -static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) -{ - int err; - - if (!bo) - return -EINVAL; - - if (!bd_holder_grab_dirs(bdev, bo)) - return -EBUSY; - - err = add_symlink(bo->sdir, bo->sdev); - if (err) - return err; - - err = add_symlink(bo->hdir, bo->hdev); - if (err) { - del_symlink(bo->sdir, bo->sdev); - return err; - } - - list_add_tail(&bo->list, &bdev->bd_holder_list); - return 0; -} - -/** - * del_bd_holder - delete sysfs symlinks for bd_claim() relationship - * - * @bdev: block device to be bd_claimed - * @kobj: holder's kobject - * - * If there is matching entry with @kobj in @bdev->bd_holder_list - * and no other bd_claim() from the same kobject, - * remove the struct bd_holder from the list, delete symlinks for it. - * - * Returns a pointer to the struct bd_holder when it's removed from the list - * and ready to be freed. - * Returns NULL if matching claim isn't found or there is other bd_claim() - * by the same kobject. - */ -static struct bd_holder *del_bd_holder(struct block_device *bdev, - struct kobject *kobj) -{ - struct bd_holder *bo; - - list_for_each_entry(bo, &bdev->bd_holder_list, list) { - if (bo->sdir == kobj) { - bo->count--; - BUG_ON(bo->count < 0); - if (!bo->count) { - list_del(&bo->list); - del_symlink(bo->sdir, bo->sdev); - del_symlink(bo->hdir, bo->hdev); - bd_holder_release_dirs(bo); - return bo; - } - break; - } - } - - return NULL; -} - -/** - * bd_claim_by_kobject - bd_claim() with additional kobject signature - * - * @bdev: block device to be claimed - * @holder: holder's signature - * @kobj: holder's kobject + * The caller must have claimed @bdev before calling this function and + * ensure that both @bdev and @disk are valid during the creation and + * lifetime of these symlinks. * - * Do bd_claim() and if it succeeds, create sysfs symlinks between - * the bdev and the holder's kobject. - * Use bd_release_from_kobject() when relesing the claimed bdev. + * CONTEXT: + * Might sleep. * - * Returns 0 on success. (same as bd_claim()) - * Returns errno on failure. + * RETURNS: + * 0 on success, -errno on failure. */ -static int bd_claim_by_kobject(struct block_device *bdev, void *holder, - struct kobject *kobj) +int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) { - int err; - struct bd_holder *bo, *found; - - if (!kobj) - return -EINVAL; - - bo = alloc_bd_holder(kobj); - if (!bo) - return -ENOMEM; + int ret = 0; mutex_lock(&bdev->bd_mutex); - err = bd_claim(bdev, holder); - if (err) - goto fail; + WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk); - found = find_bd_holder(bdev, bo); - if (found) - goto fail; + /* FIXME: remove the following once add_disk() handles errors */ + if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) + goto out_unlock; - err = add_bd_holder(bdev, bo); - if (err) - bd_release(bdev); - else - bo = NULL; -fail: - mutex_unlock(&bdev->bd_mutex); - free_bd_holder(bo); - return err; -} + ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + if (ret) + goto out_unlock; -/** - * bd_release_from_kobject - bd_release() with additional kobject signature - * - * @bdev: block device to be released - * @kobj: holder's kobject - * - * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject(). - */ -static void bd_release_from_kobject(struct block_device *bdev, - struct kobject *kobj) -{ - if (!kobj) - return; + ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); + if (ret) { + del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + goto out_unlock; + } - mutex_lock(&bdev->bd_mutex); - bd_release(bdev); - free_bd_holder(del_bd_holder(bdev, kobj)); + bdev->bd_holder_disk = disk; +out_unlock: mutex_unlock(&bdev->bd_mutex); + return ret; } +EXPORT_SYMBOL_GPL(bd_link_disk_holder); -/** - * bd_claim_by_disk - wrapper function for bd_claim_by_kobject() - * - * @bdev: block device to be claimed - * @holder: holder's signature - * @disk: holder's gendisk - * - * Call bd_claim_by_kobject() with getting @disk->slave_dir. - */ -int bd_claim_by_disk(struct block_device *bdev, void *holder, - struct gendisk *disk) +void bd_unlink_disk_holder(struct block_device *bdev) { - return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); -} -EXPORT_SYMBOL_GPL(bd_claim_by_disk); + struct gendisk *disk = bdev->bd_holder_disk; -/** - * bd_release_from_disk - wrapper function for bd_release_from_kobject() - * - * @bdev: block device to be claimed - * @disk: holder's gendisk - * - * Call bd_release_from_kobject() and put @disk->slave_dir. - */ -void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk) -{ - bd_release_from_kobject(bdev, disk->slave_dir); - kobject_put(disk->slave_dir); + bdev->bd_holder_disk = NULL; + if (!disk) + return; + + del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); } -EXPORT_SYMBOL_GPL(bd_release_from_disk); +EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); #endif /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 334d68a17108..66b7f2c5d7e9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -663,7 +663,7 @@ struct block_device { void * bd_holder; int bd_holders; #ifdef CONFIG_SYSFS - struct list_head bd_holder_list; + struct gendisk * bd_holder_disk; /* for sysfs slave linkng */ #endif struct block_device * bd_contains; unsigned bd_block_size; @@ -2042,11 +2042,17 @@ extern int blkdev_put(struct block_device *, fmode_t); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); #ifdef CONFIG_SYSFS -extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); -extern void bd_release_from_disk(struct block_device *, struct gendisk *); +extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); +extern void bd_unlink_disk_holder(struct block_device *bdev); #else -#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) -#define bd_release_from_disk(bdev, disk) bd_release(bdev) +static inline int bd_link_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ + return 0; +} +static inline void bd_unlink_disk_holder(struct block_device *bdev) +{ +} #endif #endif From e525fd89d380c4a94c0d63913a1dd1a593ed25e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Nov 2010 11:55:17 +0100 Subject: [PATCH 35/69] block: make blkdev_get/put() handle exclusive access Over time, block layer has accumulated a set of APIs dealing with bdev open, close, claim and release. * blkdev_get/put() are the primary open and close functions. * bd_claim/release() deal with exclusive open. * open/close_bdev_exclusive() are combination of open and claim and the other way around, respectively. * bd_link/unlink_disk_holder() to create and remove holder/slave symlinks. * open_by_devnum() wraps bdget() + blkdev_get(). The interface is a bit confusing and the decoupling of open and claim makes it impossible to properly guarantee exclusive access as in-kernel open + claim sequence can disturb the existing exclusive open even before the block layer knows the current open if for another exclusive access. Reorganize the interface such that, * blkdev_get() is extended to include exclusive access management. @holder argument is added and, if is @FMODE_EXCL specified, it will gain exclusive access atomically w.r.t. other exclusive accesses. * blkdev_put() is similarly extended. It now takes @mode argument and if @FMODE_EXCL is set, it releases an exclusive access. Also, when the last exclusive claim is released, the holder/slave symlinks are removed automatically. * bd_claim/release() and close_bdev_exclusive() are no longer necessary and either made static or removed. * bd_link_disk_holder() remains the same but bd_unlink_disk_holder() is no longer necessary and removed. * open_bdev_exclusive() becomes a simple wrapper around lookup_bdev() and blkdev_get(). It also has an unexpected extra bdev_read_only() test which probably should be moved into blkdev_get(). * open_by_devnum() is modified to take @holder argument and pass it to blkdev_get(). Most of bdev open/close operations are unified into blkdev_get/put() and most exclusive accesses are tested atomically at the open time (as it should). This cleans up code and removes some, both valid and invalid, but unnecessary all the same, corner cases. open_bdev_exclusive() and open_by_devnum() can use further cleanup - rename to blkdev_get_by_path() and blkdev_get_by_devt() and drop special features. Well, let's leave them for another day. Most conversions are straight-forward. drbd conversion is a bit more involved as there was some reordering, but the logic should stay the same. Signed-off-by: Tejun Heo Acked-by: Neil Brown Acked-by: Ryusuke Konishi Acked-by: Mike Snitzer Acked-by: Philipp Reisner Cc: Peter Osterlund Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Jan Kara Cc: Andrew Morton Cc: Andreas Dilger Cc: "Theodore Ts'o" Cc: Mark Fasheh Cc: Joel Becker Cc: Alex Elder Cc: Christoph Hellwig Cc: dm-devel@redhat.com Cc: drbd-dev@lists.linbit.com Cc: Leo Chen Cc: Scott Branden Cc: Chris Mason Cc: Steven Whitehouse Cc: Dave Kleikamp Cc: Joern Engel Cc: reiserfs-devel@vger.kernel.org Cc: Alexander Viro --- block/ioctl.c | 5 +- drivers/block/drbd/drbd_int.h | 2 - drivers/block/drbd/drbd_main.c | 7 +- drivers/block/drbd/drbd_nl.c | 103 ++++++++-------------- drivers/block/pktcdvd.c | 22 ++--- drivers/char/raw.c | 14 +-- drivers/md/dm-table.c | 15 +--- drivers/md/md.c | 14 +-- drivers/mtd/devices/block2mtd.c | 17 +--- drivers/s390/block/dasd_genhd.c | 2 +- fs/block_dev.c | 149 +++++++++++--------------------- fs/btrfs/volumes.c | 14 +-- fs/ext3/super.c | 12 +-- fs/ext4/super.c | 12 +-- fs/gfs2/ops_fstype.c | 4 +- fs/jfs/jfs_logmgr.c | 17 ++-- fs/logfs/dev_bdev.c | 4 +- fs/nilfs2/super.c | 4 +- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/partitions/check.c | 2 +- fs/reiserfs/journal.c | 17 ++-- fs/super.c | 14 +-- fs/xfs/linux-2.6/xfs_super.c | 2 +- include/linux/fs.h | 14 +-- kernel/power/swap.c | 5 +- mm/swapfile.c | 7 +- 26 files changed, 162 insertions(+), 318 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index d724ceb1d465..cc46d499fd27 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -294,11 +294,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return -EINVAL; if (get_user(n, (int __user *) arg)) return -EFAULT; - if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0) + if (!(mode & FMODE_EXCL) && + blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) return -EBUSY; ret = set_blocksize(bdev, n); if (!(mode & FMODE_EXCL)) - bd_release(bdev); + blkdev_put(bdev, mode | FMODE_EXCL); return ret; case BLKPG: ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9bdcf4393c0a..0590b9f67ec6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -923,8 +923,6 @@ struct drbd_md { struct drbd_backing_dev { struct block_device *backing_bdev; struct block_device *md_bdev; - struct file *lo_file; - struct file *md_file; struct drbd_md md; struct disk_conf dc; /* The user provided config... */ sector_t known_size; /* last known size of that backing device */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 25c7a73c5062..7ec1a82064a9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3361,11 +3361,8 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) if (ldev == NULL) return; - bd_release(ldev->backing_bdev); - bd_release(ldev->md_bdev); - - fput(ldev->lo_file); - fput(ldev->md_file); + blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); kfree(ldev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 87925e97e613..fd0346090289 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -855,7 +855,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp sector_t max_possible_sectors; sector_t min_md_device_sectors; struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ - struct inode *inode, *inode2; + struct block_device *bdev; struct lru_cache *resync_lru = NULL; union drbd_state ns, os; unsigned int max_seg_s; @@ -902,46 +902,40 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } } - nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); - if (IS_ERR(nbc->lo_file)) { + bdev = open_bdev_exclusive(nbc->dc.backing_dev, + FMODE_READ | FMODE_WRITE, mdev); + if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, - PTR_ERR(nbc->lo_file)); - nbc->lo_file = NULL; + PTR_ERR(bdev)); retcode = ERR_OPEN_DISK; goto fail; } + nbc->backing_bdev = bdev; - inode = nbc->lo_file->f_dentry->d_inode; - - if (!S_ISBLK(inode->i_mode)) { - retcode = ERR_DISK_NOT_BDEV; - goto fail; - } - - nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0); - if (IS_ERR(nbc->md_file)) { + /* + * meta_dev_idx >= 0: external fixed size, possibly multiple + * drbd sharing one meta device. TODO in that case, paranoia + * check that [md_bdev, meta_dev_idx] is not yet used by some + * other drbd minor! (if you use drbd.conf + drbdadm, that + * should check it for you already; but if you don't, or + * someone fooled it, we need to double check here) + */ + bdev = open_bdev_exclusive(nbc->dc.meta_dev, + FMODE_READ | FMODE_WRITE, + (nbc->dc.meta_dev_idx < 0) ? + (void *)mdev : (void *)drbd_m_holder); + if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, - PTR_ERR(nbc->md_file)); - nbc->md_file = NULL; + PTR_ERR(bdev)); retcode = ERR_OPEN_MD_DISK; goto fail; } + nbc->md_bdev = bdev; - inode2 = nbc->md_file->f_dentry->d_inode; - - if (!S_ISBLK(inode2->i_mode)) { - retcode = ERR_MD_NOT_BDEV; - goto fail; - } - - nbc->backing_bdev = inode->i_bdev; - if (bd_claim(nbc->backing_bdev, mdev)) { - printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n", - nbc->backing_bdev, mdev, - nbc->backing_bdev->bd_holder, - nbc->backing_bdev->bd_contains->bd_holder, - nbc->backing_bdev->bd_holders); - retcode = ERR_BDCLAIM_DISK; + if ((nbc->backing_bdev == nbc->md_bdev) != + (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || + nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { + retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -950,28 +944,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp offsetof(struct bm_extent, lce)); if (!resync_lru) { retcode = ERR_NOMEM; - goto release_bdev_fail; - } - - /* meta_dev_idx >= 0: external fixed size, - * possibly multiple drbd sharing one meta device. - * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is - * not yet used by some other drbd minor! - * (if you use drbd.conf + drbdadm, - * that should check it for you already; but if you don't, or someone - * fooled it, we need to double check here) */ - nbc->md_bdev = inode2->i_bdev; - if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev - : (void *) drbd_m_holder)) { - retcode = ERR_BDCLAIM_MD_DISK; - goto release_bdev_fail; - } - - if ((nbc->backing_bdev == nbc->md_bdev) != - (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || - nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { - retcode = ERR_MD_IDX_INVALID; - goto release_bdev2_fail; + goto fail; } /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ @@ -982,7 +955,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp (unsigned long long) drbd_get_max_capacity(nbc), (unsigned long long) nbc->dc.disk_size); retcode = ERR_DISK_TO_SMALL; - goto release_bdev2_fail; + goto fail; } if (nbc->dc.meta_dev_idx < 0) { @@ -999,7 +972,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp dev_warn(DEV, "refusing attach: md-device too small, " "at least %llu sectors needed for this meta-disk type\n", (unsigned long long) min_md_device_sectors); - goto release_bdev2_fail; + goto fail; } /* Make sure the new disk is big enough @@ -1007,7 +980,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (drbd_get_max_capacity(nbc) < drbd_get_capacity(mdev->this_bdev)) { retcode = ERR_DISK_TO_SMALL; - goto release_bdev2_fail; + goto fail; } nbc->known_size = drbd_get_capacity(nbc->backing_bdev); @@ -1030,7 +1003,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); drbd_resume_io(mdev); if (retcode < SS_SUCCESS) - goto release_bdev2_fail; + goto fail; if (!get_ldev_if_state(mdev, D_ATTACHING)) goto force_diskless; @@ -1264,18 +1237,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp force_diskless: drbd_force_state(mdev, NS(disk, D_DISKLESS)); drbd_md_sync(mdev); - release_bdev2_fail: - if (nbc) - bd_release(nbc->md_bdev); - release_bdev_fail: - if (nbc) - bd_release(nbc->backing_bdev); fail: if (nbc) { - if (nbc->lo_file) - fput(nbc->lo_file); - if (nbc->md_file) - fput(nbc->md_file); + if (nbc->backing_bdev) + blkdev_put(nbc->backing_bdev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL); + if (nbc->md_bdev) + blkdev_put(nbc->md_bdev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL); kfree(nbc); } lc_destroy(resync_lru); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 19b3568e9326..77d70eebb6b2 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2296,15 +2296,12 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * so bdget() can't fail. */ bdget(pd->bdev->bd_dev); - if ((ret = blkdev_get(pd->bdev, FMODE_READ))) + if ((ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd))) goto out; - if ((ret = bd_claim(pd->bdev, pd))) - goto out_putdev; - if ((ret = pkt_get_last_written(pd, &lba))) { printk(DRIVER_NAME": pkt_get_last_written failed\n"); - goto out_unclaim; + goto out_putdev; } set_capacity(pd->disk, lba << 2); @@ -2314,7 +2311,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) q = bdev_get_queue(pd->bdev); if (write) { if ((ret = pkt_open_write(pd))) - goto out_unclaim; + goto out_putdev; /* * Some CDRW drives can not handle writes larger than one packet, * even if the size is a multiple of the packet size. @@ -2329,23 +2326,21 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) } if ((ret = pkt_set_segment_merging(pd, q))) - goto out_unclaim; + goto out_putdev; if (write) { if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { printk(DRIVER_NAME": not enough memory for buffers\n"); ret = -ENOMEM; - goto out_unclaim; + goto out_putdev; } printk(DRIVER_NAME": %lukB available on disc\n", lba << 1); } return 0; -out_unclaim: - bd_release(pd->bdev); out_putdev: - blkdev_put(pd->bdev, FMODE_READ); + blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); out: return ret; } @@ -2362,8 +2357,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_lock_door(pd, 0); pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); - bd_release(pd->bdev); - blkdev_put(pd->bdev, FMODE_READ); + blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); pkt_shrink_pktlist(pd); } @@ -2733,7 +2727,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) bdev = bdget(dev); if (!bdev) return -ENOMEM; - ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY); + ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL); if (ret) return ret; diff --git a/drivers/char/raw.c b/drivers/char/raw.c index bfe25ea9766b..b4b9d5a47885 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -65,15 +65,12 @@ static int raw_open(struct inode *inode, struct file *filp) if (!bdev) goto out; igrab(bdev->bd_inode); - err = blkdev_get(bdev, filp->f_mode); + err = blkdev_get(bdev, filp->f_mode | FMODE_EXCL, raw_open); if (err) goto out; - err = bd_claim(bdev, raw_open); - if (err) - goto out1; err = set_blocksize(bdev, bdev_logical_block_size(bdev)); if (err) - goto out2; + goto out1; filp->f_flags |= O_DIRECT; filp->f_mapping = bdev->bd_inode->i_mapping; if (++raw_devices[minor].inuse == 1) @@ -83,10 +80,8 @@ static int raw_open(struct inode *inode, struct file *filp) mutex_unlock(&raw_mutex); return 0; -out2: - bd_release(bdev); out1: - blkdev_put(bdev, filp->f_mode); + blkdev_put(bdev, filp->f_mode | FMODE_EXCL); out: mutex_unlock(&raw_mutex); return err; @@ -110,8 +105,7 @@ static int raw_release(struct inode *inode, struct file *filp) } mutex_unlock(&raw_mutex); - bd_release(bdev); - blkdev_put(bdev, filp->f_mode); + blkdev_put(bdev, filp->f_mode | FMODE_EXCL); return 0; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2c876ffc63df..9e88ca0c55e9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -325,20 +325,13 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev, BUG_ON(d->dm_dev.bdev); - bdev = open_by_devnum(dev, d->dm_dev.mode); + bdev = open_by_devnum(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr); if (IS_ERR(bdev)) return PTR_ERR(bdev); - r = bd_claim(bdev, _claim_ptr); - if (r) { - blkdev_put(bdev, d->dm_dev.mode); - return r; - } - r = bd_link_disk_holder(bdev, dm_disk(md)); if (r) { - bd_release(bdev); - blkdev_put(bdev, d->dm_dev.mode); + blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL); return r; } @@ -354,9 +347,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) if (!d->dm_dev.bdev) return; - bd_unlink_disk_holder(d->dm_dev.bdev); - bd_release(d->dm_dev.bdev); - blkdev_put(d->dm_dev.bdev, d->dm_dev.mode); + blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL); d->dm_dev.bdev = NULL; } diff --git a/drivers/md/md.c b/drivers/md/md.c index c47644fca1a1..6af951ffe0bb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1907,7 +1907,6 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) MD_BUG(); return; } - bd_unlink_disk_holder(rdev->bdev); list_del_rcu(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; @@ -1935,19 +1934,13 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); + bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + shared ? (mdk_rdev_t *)lock_rdev : rdev); if (IS_ERR(bdev)) { printk(KERN_ERR "md: could not open %s.\n", __bdevname(dev, b)); return PTR_ERR(bdev); } - err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev); - if (err) { - printk(KERN_ERR "md: could not bd_claim %s.\n", - bdevname(bdev, b)); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); - return err; - } if (!shared) set_bit(AllReserved, &rdev->flags); rdev->bdev = bdev; @@ -1960,8 +1953,7 @@ static void unlock_rdev(mdk_rdev_t *rdev) rdev->bdev = NULL; if (!bdev) MD_BUG(); - bd_release(bdev); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } void md_autodetect_dev(dev_t dev); diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index a9e2d3b38aeb..aa557beb8f51 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -224,7 +224,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) if (dev->blkdev) { invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping, 0, -1); - close_bdev_exclusive(dev->blkdev, FMODE_READ|FMODE_WRITE); + blkdev_put(dev->blkdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } kfree(dev); @@ -234,7 +234,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) /* FIXME: ensure that mtd->size % erase_size == 0 */ static struct block2mtd_dev *add_device(char *devname, int erase_size) { - const fmode_t mode = FMODE_READ | FMODE_WRITE; + const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; struct block_device *bdev; struct block2mtd_dev *dev; char *name; @@ -255,17 +255,8 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) to resolve the device name by other means. */ dev_t devt = name_to_dev_t(devname); - if (devt) { - bdev = open_by_devnum(devt, mode); - if (!IS_ERR(bdev)) { - int ret; - ret = bd_claim(bdev, dev); - if (ret) { - blkdev_put(bdev, mode); - bdev = ERR_PTR(ret); - } - } - } + if (devt) + bdev = open_by_devnum(devt, mode, dev); } #endif diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 30a1ca3d08b7..5505bc07e1e7 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -103,7 +103,7 @@ int dasd_scan_partitions(struct dasd_block *block) struct block_device *bdev; bdev = bdget_disk(block->gdp, 0); - if (!bdev || blkdev_get(bdev, FMODE_READ) < 0) + if (!bdev || blkdev_get(bdev, FMODE_READ, NULL) < 0) return -ENODEV; /* * See fs/partition/check.c:register_disk,rescan_partitions diff --git a/fs/block_dev.c b/fs/block_dev.c index 9329068684d2..fc48912354d1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -660,7 +660,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, else if (bdev->bd_contains == bdev) return true; /* is a whole device which isn't held */ - else if (whole->bd_holder == bd_claim) + else if (whole->bd_holder == bd_may_claim) return true; /* is a partition of a device that is being partitioned */ else if (whole->bd_holder != NULL) return false; /* is a partition of a held device */ @@ -807,10 +807,10 @@ static void __bd_claim(struct block_device *bdev, struct block_device *whole, { /* note that for a whole device bd_holders * will be incremented twice, and bd_holder will - * be set to bd_claim before being set to holder + * be set to bd_may_claim before being set to holder */ whole->bd_holders++; - whole->bd_holder = bd_claim; + whole->bd_holder = bd_may_claim; bdev->bd_holders++; bdev->bd_holder = holder; } @@ -835,37 +835,7 @@ static void bd_finish_claiming(struct block_device *bdev, __bd_abort_claiming(whole, holder); /* not actually an abort */ } -/** - * bd_claim - claim a block device - * @bdev: block device to claim - * @holder: holder trying to claim @bdev - * - * Try to claim @bdev which must have been opened successfully. - * - * CONTEXT: - * Might sleep. - * - * RETURNS: - * 0 if successful, -EBUSY if @bdev is already claimed. - */ -int bd_claim(struct block_device *bdev, void *holder) -{ - struct block_device *whole = bdev->bd_contains; - int res; - - might_sleep(); - - spin_lock(&bdev_lock); - res = bd_prepare_to_claim(bdev, whole, holder); - if (res == 0) - __bd_claim(bdev, whole, holder); - spin_unlock(&bdev_lock); - - return res; -} -EXPORT_SYMBOL(bd_claim); - -void bd_release(struct block_device *bdev) +static void bd_release(struct block_device *bdev) { spin_lock(&bdev_lock); if (!--bdev->bd_contains->bd_holders) @@ -875,8 +845,6 @@ void bd_release(struct block_device *bdev) spin_unlock(&bdev_lock); } -EXPORT_SYMBOL(bd_release); - #ifdef CONFIG_SYSFS static int add_symlink(struct kobject *from, struct kobject *to) { @@ -943,7 +911,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) } EXPORT_SYMBOL_GPL(bd_link_disk_holder); -void bd_unlink_disk_holder(struct block_device *bdev) +static void bd_unlink_disk_holder(struct block_device *bdev) { struct gendisk *disk = bdev->bd_holder_disk; @@ -954,7 +922,9 @@ void bd_unlink_disk_holder(struct block_device *bdev) del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); } -EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); +#else +static inline void bd_unlink_disk_holder(struct block_device *bdev) +{ } #endif /* @@ -964,12 +934,12 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); * to be used for internal purposes. If you ever need it - reconsider * your API. */ -struct block_device *open_by_devnum(dev_t dev, fmode_t mode) +struct block_device *open_by_devnum(dev_t dev, fmode_t mode, void *holder) { struct block_device *bdev = bdget(dev); int err = -ENOMEM; if (bdev) - err = blkdev_get(bdev, mode); + err = blkdev_get(bdev, mode, holder); return err ? ERR_PTR(err) : bdev; } @@ -1235,17 +1205,37 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return ret; } -int blkdev_get(struct block_device *bdev, fmode_t mode) +int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) { - return __blkdev_get(bdev, mode, 0); + struct block_device *whole = NULL; + int res; + + WARN_ON_ONCE((mode & FMODE_EXCL) && !holder); + + if ((mode & FMODE_EXCL) && holder) { + whole = bd_start_claiming(bdev, holder); + if (IS_ERR(whole)) { + bdput(bdev); + return PTR_ERR(whole); + } + } + + res = __blkdev_get(bdev, mode, 0); + + if (whole) { + if (res == 0) + bd_finish_claiming(bdev, whole, holder); + else + bd_abort_claiming(whole, holder); + } + + return res; } EXPORT_SYMBOL(blkdev_get); static int blkdev_open(struct inode * inode, struct file * filp) { - struct block_device *whole = NULL; struct block_device *bdev; - int res; /* * Preserve backwards compatibility and allow large file access @@ -1266,26 +1256,9 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (bdev == NULL) return -ENOMEM; - if (filp->f_mode & FMODE_EXCL) { - whole = bd_start_claiming(bdev, filp); - if (IS_ERR(whole)) { - bdput(bdev); - return PTR_ERR(whole); - } - } - filp->f_mapping = bdev->bd_inode->i_mapping; - res = blkdev_get(bdev, filp->f_mode); - - if (whole) { - if (res == 0) - bd_finish_claiming(bdev, whole, filp); - else - bd_abort_claiming(whole, filp); - } - - return res; + return blkdev_get(bdev, filp->f_mode, filp); } static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) @@ -1329,6 +1302,13 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) int blkdev_put(struct block_device *bdev, fmode_t mode) { + if (mode & FMODE_EXCL) { + mutex_lock(&bdev->bd_mutex); + bd_release(bdev); + if (!bdev->bd_holders) + bd_unlink_disk_holder(bdev); + mutex_unlock(&bdev->bd_mutex); + } return __blkdev_put(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_put); @@ -1336,8 +1316,7 @@ EXPORT_SYMBOL(blkdev_put); static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); - if (bdev->bd_holder == filp) - bd_release(bdev); + return blkdev_put(bdev, filp->f_mode); } @@ -1494,55 +1473,27 @@ EXPORT_SYMBOL(lookup_bdev); */ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) { - struct block_device *bdev, *whole; + struct block_device *bdev; int error; bdev = lookup_bdev(path); if (IS_ERR(bdev)) return bdev; - whole = bd_start_claiming(bdev, holder); - if (IS_ERR(whole)) { - bdput(bdev); - return whole; - } - - error = blkdev_get(bdev, mode); + error = blkdev_get(bdev, mode | FMODE_EXCL, holder); if (error) - goto out_abort_claiming; + return ERR_PTR(error); - error = -EACCES; - if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) - goto out_blkdev_put; + if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { + blkdev_put(bdev, mode); + return ERR_PTR(-EACCES); + } - bd_finish_claiming(bdev, whole, holder); return bdev; - -out_blkdev_put: - blkdev_put(bdev, mode); -out_abort_claiming: - bd_abort_claiming(whole, holder); - return ERR_PTR(error); } EXPORT_SYMBOL(open_bdev_exclusive); -/** - * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() - * - * @bdev: blockdevice to close - * @mode: mode, must match that used to open. - * - * This is the counterpart to open_bdev_exclusive(). - */ -void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) -{ - bd_release(bdev); - blkdev_put(bdev, mode); -} - -EXPORT_SYMBOL(close_bdev_exclusive); - int __invalidate_device(struct block_device *bdev) { struct super_block *sb = get_super(bdev); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d39596224d21..f1b729d3b883 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -489,7 +489,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) continue; if (device->bdev) { - close_bdev_exclusive(device->bdev, device->mode); + blkdev_put(device->bdev, device->mode | FMODE_EXCL); device->bdev = NULL; fs_devices->open_devices--; } @@ -523,7 +523,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->bdev) { - close_bdev_exclusive(device->bdev, device->mode); + blkdev_put(device->bdev, device->mode | FMODE_EXCL); fs_devices->open_devices--; } if (device->writeable) { @@ -638,7 +638,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, error_brelse: brelse(bh); error_close: - close_bdev_exclusive(bdev, flags); + blkdev_put(bdev, flags | FMODE_EXCL); error: continue; } @@ -716,7 +716,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, brelse(bh); error_close: - close_bdev_exclusive(bdev, flags); + blkdev_put(bdev, flags | FMODE_EXCL); error: mutex_unlock(&uuid_mutex); return ret; @@ -1244,7 +1244,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->latest_bdev = next_device->bdev; if (device->bdev) { - close_bdev_exclusive(device->bdev, device->mode); + blkdev_put(device->bdev, device->mode | FMODE_EXCL); device->bdev = NULL; device->fs_devices->open_devices--; } @@ -1287,7 +1287,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) brelse(bh); error_close: if (bdev) - close_bdev_exclusive(bdev, FMODE_READ); + blkdev_put(bdev, FMODE_READ | FMODE_EXCL); out: mutex_unlock(&root->fs_info->volume_mutex); mutex_unlock(&uuid_mutex); @@ -1565,7 +1565,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) mutex_unlock(&root->fs_info->volume_mutex); return ret; error: - close_bdev_exclusive(bdev, 0); + blkdev_put(bdev, FMODE_EXCL); if (seeding_dev) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 2fedaf8b5012..23e7513dba9c 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -347,7 +347,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); + bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) goto fail; return bdev; @@ -364,8 +364,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) */ static int ext3_blkdev_put(struct block_device *bdev) { - bd_release(bdev); - return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } static int ext3_blkdev_remove(struct ext3_sb_info *sbi) @@ -2136,13 +2135,6 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, if (bdev == NULL) return NULL; - if (bd_claim(bdev, sb)) { - ext3_msg(sb, KERN_ERR, - "error: failed to claim external journal device"); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); - return NULL; - } - blocksize = sb->s_blocksize; hblock = bdev_logical_block_size(bdev); if (blocksize < hblock) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 61182fe6254e..5dd0b3e76fa8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -647,7 +647,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); + bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) goto fail; return bdev; @@ -663,8 +663,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) */ static int ext4_blkdev_put(struct block_device *bdev) { - bd_release(bdev); - return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } static int ext4_blkdev_remove(struct ext4_sb_info *sbi) @@ -3758,13 +3757,6 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if (bdev == NULL) return NULL; - if (bd_claim(bdev, sb)) { - ext4_msg(sb, KERN_ERR, - "failed to claim external journal device"); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); - return NULL; - } - blocksize = sb->s_blocksize; hblock = bdev_logical_block_size(bdev); if (blocksize < hblock) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3eb1393f7b81..c1f0763a022b 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1298,7 +1298,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, goto error_bdev; if (s->s_root) - close_bdev_exclusive(bdev, mode); + blkdev_put(bdev, mode | FMODE_EXCL); memset(&args, 0, sizeof(args)); args.ar_quota = GFS2_QUOTA_DEFAULT; @@ -1342,7 +1342,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, deactivate_locked_super(s); return ERR_PTR(error); error_bdev: - close_bdev_exclusive(bdev, mode); + blkdev_put(bdev, mode | FMODE_EXCL); return ERR_PTR(error); } diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index e1b8493b9aaa..5a290f22dcc3 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1120,16 +1120,13 @@ int lmLogOpen(struct super_block *sb) * file systems to log may have n-to-1 relationship; */ - bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); + bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + log); if (IS_ERR(bdev)) { rc = -PTR_ERR(bdev); goto free; } - if ((rc = bd_claim(bdev, log))) { - goto close; - } - log->bdev = bdev; memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); @@ -1137,7 +1134,7 @@ int lmLogOpen(struct super_block *sb) * initialize log: */ if ((rc = lmLogInit(log))) - goto unclaim; + goto close; list_add(&log->journal_list, &jfs_external_logs); @@ -1163,11 +1160,8 @@ int lmLogOpen(struct super_block *sb) list_del(&log->journal_list); lbmLogShutdown(log); - unclaim: - bd_release(bdev); - close: /* close external log device */ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1512,8 +1506,7 @@ int lmLogClose(struct super_block *sb) bdev = log->bdev; rc = lmLogShutdown(log); - bd_release(bdev); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); kfree(log); diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 92ca6fbe09bd..734b9025858e 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -300,7 +300,7 @@ static int bdev_write_sb(struct super_block *sb, struct page *page) static void bdev_put_device(struct logfs_super *s) { - close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } static int bdev_can_write_buf(struct super_block *sb, u64 ofs) @@ -331,7 +331,7 @@ int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type, if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { int mtdnr = MINOR(bdev->bd_dev); - close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); return logfs_get_sb_mtd(p, mtdnr); } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f804d41ec9d3..756a6798d7c8 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1233,7 +1233,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } if (!s_new) - close_bdev_exclusive(sd.bdev, mode); + blkdev_put(sd.bdev, mode | FMODE_EXCL); return root_dentry; @@ -1242,7 +1242,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, failed: if (!s_new) - close_bdev_exclusive(sd.bdev, mode); + blkdev_put(sd.bdev, mode | FMODE_EXCL); return ERR_PTR(err); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 52c7557f3e25..d0a2721eaceb 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1674,7 +1674,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, goto out; reg->hr_bdev = I_BDEV(filp->f_mapping->host); - ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); + ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); if (ret) { reg->hr_bdev = NULL; goto out; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 0a8b0ad0c7e2..2e6501d034ab 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -549,7 +549,7 @@ void register_disk(struct gendisk *disk) goto exit; bdev->bd_invalidated = 1; - err = blkdev_get(bdev, FMODE_READ); + err = blkdev_get(bdev, FMODE_READ, NULL); if (err < 0) goto exit; blkdev_put(bdev, FMODE_READ); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 076c8b194682..b488136f5ace 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2552,8 +2552,6 @@ static int release_journal_dev(struct super_block *super, result = 0; if (journal->j_dev_bd != NULL) { - if (journal->j_dev_bd->bd_dev != super->s_dev) - bd_release(journal->j_dev_bd); result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); journal->j_dev_bd = NULL; } @@ -2571,7 +2569,7 @@ static int journal_init_dev(struct super_block *super, { int result; dev_t jdev; - fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; + fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; char b[BDEVNAME_SIZE]; result = 0; @@ -2585,7 +2583,9 @@ static int journal_init_dev(struct super_block *super, /* there is no "jdev" option and journal is on separate device */ if ((!jdev_name || !jdev_name[0])) { - journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); + if (jdev == super->s_dev) + blkdev_mode &= ~FMODE_EXCL; + journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode, journal); journal->j_dev_mode = blkdev_mode; if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); @@ -2594,15 +2594,8 @@ static int journal_init_dev(struct super_block *super, "cannot init journal device '%s': %i", __bdevname(jdev, b), result); return result; - } else if (jdev != super->s_dev) { - result = bd_claim(journal->j_dev_bd, journal); - if (result) { - blkdev_put(journal->j_dev_bd, blkdev_mode); - return result; - } - + } else if (jdev != super->s_dev) set_blocksize(journal->j_dev_bd, super->s_blocksize); - } return 0; } diff --git a/fs/super.c b/fs/super.c index ca696155cd9a..22374bf0ba87 100644 --- a/fs/super.c +++ b/fs/super.c @@ -801,13 +801,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, /* * s_umount nests inside bd_mutex during - * __invalidate_device(). close_bdev_exclusive() - * acquires bd_mutex and can't be called under - * s_umount. Drop s_umount temporarily. This is safe - * as we're holding an active reference. + * __invalidate_device(). blkdev_put() acquires + * bd_mutex and can't be called under s_umount. Drop + * s_umount temporarily. This is safe as we're + * holding an active reference. */ up_write(&s->s_umount); - close_bdev_exclusive(bdev, mode); + blkdev_put(bdev, mode | FMODE_EXCL); down_write(&s->s_umount); } else { char b[BDEVNAME_SIZE]; @@ -831,7 +831,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - close_bdev_exclusive(bdev, mode); + blkdev_put(bdev, mode | FMODE_EXCL); error: return ERR_PTR(error); } @@ -862,7 +862,7 @@ void kill_block_super(struct super_block *sb) bdev->bd_super = NULL; generic_shutdown_super(sb); sync_blockdev(bdev); - close_bdev_exclusive(bdev, mode); + blkdev_put(bdev, mode | FMODE_EXCL); } EXPORT_SYMBOL(kill_block_super); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9f3a78fe6ae4..a1a6e5ceea67 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -623,7 +623,7 @@ xfs_blkdev_put( struct block_device *bdev) { if (bdev) - close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 66b7f2c5d7e9..1a033e8ebe4c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2006,7 +2006,8 @@ extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); -extern struct block_device *open_by_devnum(dev_t, fmode_t); +extern struct block_device *open_by_devnum(dev_t dev, fmode_t mode, + void *holder); extern void invalidate_bdev(struct block_device *); extern int sync_blockdev(struct block_device *bdev); extern struct super_block *freeze_bdev(struct block_device *); @@ -2037,22 +2038,16 @@ extern const struct file_operations def_fifo_fops; extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *, fmode_t); -extern int blkdev_put(struct block_device *, fmode_t); -extern int bd_claim(struct block_device *, void *); -extern void bd_release(struct block_device *); +extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); +extern int blkdev_put(struct block_device *bdev, fmode_t mode); #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -extern void bd_unlink_disk_holder(struct block_device *bdev); #else static inline int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) { return 0; } -static inline void bd_unlink_disk_holder(struct block_device *bdev) -{ -} #endif #endif @@ -2089,7 +2084,6 @@ extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); -extern void close_bdev_exclusive(struct block_device *, fmode_t); extern void blkdev_show(struct seq_file *,off_t); #else diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a0e4a86ccf94..513a77f1a0b3 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -223,7 +223,7 @@ static int swsusp_swap_check(void) return res; root_swap = res; - res = blkdev_get(hib_resume_bdev, FMODE_WRITE); + res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL); if (res) return res; @@ -907,7 +907,8 @@ int swsusp_check(void) { int error; - hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); + hib_resume_bdev = open_by_devnum(swsusp_resume_device, + FMODE_READ, NULL); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); diff --git a/mm/swapfile.c b/mm/swapfile.c index 67ddaaf98c74..b6adcfbf6f48 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1677,7 +1677,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); set_blocksize(bdev, p->old_block_size); - bd_release(bdev); + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } else { mutex_lock(&inode->i_mutex); inode->i_flags &= ~S_SWAPFILE; @@ -1939,7 +1939,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = -EINVAL; if (S_ISBLK(inode->i_mode)) { bdev = I_BDEV(inode); - error = bd_claim(bdev, sys_swapon); + error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, + sys_swapon); if (error < 0) { bdev = NULL; error = -EINVAL; @@ -2136,7 +2137,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) bad_swap: if (bdev) { set_blocksize(bdev, p->old_block_size); - bd_release(bdev); + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } destroy_swap_extents(p); swap_cgroup_swapoff(type); From 6a027eff62f6ae32d49f2ae5dadd6f4eee1ddae2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Nov 2010 11:55:17 +0100 Subject: [PATCH 36/69] block: reorganize claim/release implementation With claim/release rolled into blkdev_get/put(), there's no reason to keep bd_abort/finish_claim(), __bd_claim() and bd_release() as separate functions. It only makes the code difficult to follow. Collapse them into blkdev_get/put(). This will ease future changes around claim/release. Signed-off-by: Tejun Heo --- fs/block_dev.c | 127 +++++++++++++++++++------------------------------ 1 file changed, 48 insertions(+), 79 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index fc48912354d1..269bfbbd10fc 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -772,79 +772,6 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, } } -/* releases bdev_lock */ -static void __bd_abort_claiming(struct block_device *whole, void *holder) -{ - BUG_ON(whole->bd_claiming != holder); - whole->bd_claiming = NULL; - wake_up_bit(&whole->bd_claiming, 0); - - spin_unlock(&bdev_lock); - bdput(whole); -} - -/** - * bd_abort_claiming - abort claiming a block device - * @whole: whole block device returned by bd_start_claiming() - * @holder: holder trying to claim @bdev - * - * Abort a claiming block started by bd_start_claiming(). Note that - * @whole is not the block device to be claimed but the whole device - * returned by bd_start_claiming(). - * - * CONTEXT: - * Grabs and releases bdev_lock. - */ -static void bd_abort_claiming(struct block_device *whole, void *holder) -{ - spin_lock(&bdev_lock); - __bd_abort_claiming(whole, holder); /* releases bdev_lock */ -} - -/* increment holders when we have a legitimate claim. requires bdev_lock */ -static void __bd_claim(struct block_device *bdev, struct block_device *whole, - void *holder) -{ - /* note that for a whole device bd_holders - * will be incremented twice, and bd_holder will - * be set to bd_may_claim before being set to holder - */ - whole->bd_holders++; - whole->bd_holder = bd_may_claim; - bdev->bd_holders++; - bdev->bd_holder = holder; -} - -/** - * bd_finish_claiming - finish claiming a block device - * @bdev: block device of interest (passed to bd_start_claiming()) - * @whole: whole block device returned by bd_start_claiming() - * @holder: holder trying to claim @bdev - * - * Finish a claiming block started by bd_start_claiming(). - * - * CONTEXT: - * Grabs and releases bdev_lock. - */ -static void bd_finish_claiming(struct block_device *bdev, - struct block_device *whole, void *holder) -{ - spin_lock(&bdev_lock); - BUG_ON(!bd_may_claim(bdev, whole, holder)); - __bd_claim(bdev, whole, holder); - __bd_abort_claiming(whole, holder); /* not actually an abort */ -} - -static void bd_release(struct block_device *bdev) -{ - spin_lock(&bdev_lock); - if (!--bdev->bd_contains->bd_holders) - bdev->bd_contains->bd_holder = NULL; - if (!--bdev->bd_holders) - bdev->bd_holder = NULL; - spin_unlock(&bdev_lock); -} - #ifdef CONFIG_SYSFS static int add_symlink(struct kobject *from, struct kobject *to) { @@ -1223,10 +1150,30 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) res = __blkdev_get(bdev, mode, 0); if (whole) { - if (res == 0) - bd_finish_claiming(bdev, whole, holder); - else - bd_abort_claiming(whole, holder); + /* finish claiming */ + spin_lock(&bdev_lock); + + if (res == 0) { + BUG_ON(!bd_may_claim(bdev, whole, holder)); + /* + * Note that for a whole device bd_holders + * will be incremented twice, and bd_holder + * will be set to bd_may_claim before being + * set to holder + */ + whole->bd_holders++; + whole->bd_holder = bd_may_claim; + bdev->bd_holders++; + bdev->bd_holder = holder; + } + + /* tell others that we're done */ + BUG_ON(whole->bd_claiming != holder); + whole->bd_claiming = NULL; + wake_up_bit(&whole->bd_claiming, 0); + + spin_unlock(&bdev_lock); + bdput(whole); } return res; @@ -1272,6 +1219,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_part_count--; if (!--bdev->bd_openers) { + WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); } @@ -1303,10 +1251,31 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) int blkdev_put(struct block_device *bdev, fmode_t mode) { if (mode & FMODE_EXCL) { + bool bdev_free; + + /* + * Release a claim on the device. The holder fields + * are protected with bdev_lock. bd_mutex is to + * synchronize disk_holder unlinking. + */ mutex_lock(&bdev->bd_mutex); - bd_release(bdev); - if (!bdev->bd_holders) + spin_lock(&bdev_lock); + + WARN_ON_ONCE(--bdev->bd_holders < 0); + WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0); + + /* bd_contains might point to self, check in a separate step */ + if ((bdev_free = !bdev->bd_holders)) + bdev->bd_holder = NULL; + if (!bdev->bd_contains->bd_holders) + bdev->bd_contains->bd_holder = NULL; + + spin_unlock(&bdev_lock); + + /* if this was the last claim, holder link should go too */ + if (bdev_free) bd_unlink_disk_holder(bdev); + mutex_unlock(&bdev->bd_mutex); } return __blkdev_put(bdev, mode, 0); From 75f1dc0d076d1c1168f2115f1941ea627d38bd5a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Nov 2010 11:55:17 +0100 Subject: [PATCH 37/69] block: check bdev_read_only() from blkdev_get() bdev read-only status can be queried using bdev_read_only() and may change while the device is being opened. Enforce it by checking it from blkdev_get() after open succeeds. This makes bdev_read_only() check in open_bdev_exclusive() and fsg_lun_open() unnecessary. Drop them. Signed-off-by: Tejun Heo Cc: David Brownell Cc: linux-usb@vger.kernel.org --- drivers/usb/gadget/storage_common.c | 7 ++----- fs/block_dev.c | 11 ++++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c index 3b513bafaf2a..b015561fd602 100644 --- a/drivers/usb/gadget/storage_common.c +++ b/drivers/usb/gadget/storage_common.c @@ -543,7 +543,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename) ro = curlun->initially_ro; if (!ro) { filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0); - if (-EROFS == PTR_ERR(filp)) + if (PTR_ERR(filp) == -EROFS || PTR_ERR(filp) == -EACCES) ro = 1; } if (ro) @@ -558,10 +558,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (filp->f_path.dentry) inode = filp->f_path.dentry->d_inode; - if (inode && S_ISBLK(inode->i_mode)) { - if (bdev_read_only(inode->i_bdev)) - ro = 1; - } else if (!inode || !S_ISREG(inode->i_mode)) { + if (!inode || (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) { LINFO(curlun, "invalid file type: %s\n", filename); goto out; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 269bfbbd10fc..606a5259f87f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1149,6 +1149,12 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) res = __blkdev_get(bdev, mode, 0); + /* __blkdev_get() may alter read only status, check it afterwards */ + if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { + __blkdev_put(bdev, mode, 0); + res = -EACCES; + } + if (whole) { /* finish claiming */ spin_lock(&bdev_lock); @@ -1453,11 +1459,6 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h if (error) return ERR_PTR(error); - if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { - blkdev_put(bdev, mode); - return ERR_PTR(-EACCES); - } - return bdev; } From d4d77629953eabd3c14f6fa5746f6b28babfc55f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Nov 2010 11:55:18 +0100 Subject: [PATCH 38/69] block: clean up blkdev_get() wrappers and their users After recent blkdev_get() modifications, open_by_devnum() and open_bdev_exclusive() are simple wrappers around blkdev_get(). Replace them with blkdev_get_by_dev() and blkdev_get_by_path(). blkdev_get_by_dev() is identical to open_by_devnum(). blkdev_get_by_path() is slightly different in that it doesn't automatically add %FMODE_EXCL to @mode. All users are converted. Most conversions are mechanical and don't introduce any behavior difference. There are several exceptions. * btrfs now sets FMODE_EXCL in btrfs_device->mode, so there's no reason to OR it explicitly on blkdev_put(). * gfs2, nilfs2 and the generic mount_bdev() now set FMODE_EXCL in sb->s_mode. * With the above changes, sb->s_mode now always should contain FMODE_EXCL. WARN_ON_ONCE() added to kill_block_super() to detect errors. The new blkdev_get_*() functions are with proper docbook comments. While at it, add function description to blkdev_get() too. Signed-off-by: Tejun Heo Cc: Philipp Reisner Cc: Neil Brown Cc: Mike Snitzer Cc: Joern Engel Cc: Chris Mason Cc: Jan Kara Cc: "Theodore Ts'o" Cc: KONISHI Ryusuke Cc: reiserfs-devel@vger.kernel.org Cc: xfs-masters@oss.sgi.com Cc: Alexander Viro --- drivers/block/drbd/drbd_nl.c | 12 +-- drivers/md/dm-table.c | 2 +- drivers/md/md.c | 4 +- drivers/mtd/devices/block2mtd.c | 4 +- fs/block_dev.c | 139 +++++++++++++++++++++----------- fs/btrfs/volumes.c | 24 +++--- fs/btrfs/volumes.h | 2 +- fs/ext3/super.c | 2 +- fs/ext4/super.c | 2 +- fs/gfs2/ops_fstype.c | 8 +- fs/jfs/jfs_logmgr.c | 4 +- fs/logfs/dev_bdev.c | 3 +- fs/nilfs2/super.c | 8 +- fs/reiserfs/journal.c | 6 +- fs/super.c | 9 ++- fs/xfs/linux-2.6/xfs_super.c | 3 +- include/linux/fs.h | 7 +- kernel/power/swap.c | 4 +- 18 files changed, 149 insertions(+), 94 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fd0346090289..650e43ba4f7c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -902,8 +902,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } } - bdev = open_bdev_exclusive(nbc->dc.backing_dev, - FMODE_READ | FMODE_WRITE, mdev); + bdev = blkdev_get_by_path(nbc->dc.backing_dev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev); if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, PTR_ERR(bdev)); @@ -920,10 +920,10 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * should check it for you already; but if you don't, or * someone fooled it, we need to double check here) */ - bdev = open_bdev_exclusive(nbc->dc.meta_dev, - FMODE_READ | FMODE_WRITE, - (nbc->dc.meta_dev_idx < 0) ? - (void *)mdev : (void *)drbd_m_holder); + bdev = blkdev_get_by_path(nbc->dc.meta_dev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL, + (nbc->dc.meta_dev_idx < 0) ? + (void *)mdev : (void *)drbd_m_holder); if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, PTR_ERR(bdev)); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9e88ca0c55e9..67150c32986c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -325,7 +325,7 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev, BUG_ON(d->dm_dev.bdev); - bdev = open_by_devnum(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr); + bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr); if (IS_ERR(bdev)) return PTR_ERR(bdev); diff --git a/drivers/md/md.c b/drivers/md/md.c index 6af951ffe0bb..5aaa6bfbe638 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1934,8 +1934,8 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - shared ? (mdk_rdev_t *)lock_rdev : rdev); + bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + shared ? (mdk_rdev_t *)lock_rdev : rdev); if (IS_ERR(bdev)) { printk(KERN_ERR "md: could not open %s.\n", __bdevname(dev, b)); diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index aa557beb8f51..f29a6f9df6e7 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -247,7 +247,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) return NULL; /* Get a handle on the device */ - bdev = open_bdev_exclusive(devname, mode, dev); + bdev = blkdev_get_by_path(devname, mode, dev); #ifndef MODULE if (IS_ERR(bdev)) { @@ -256,7 +256,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) dev_t devt = name_to_dev_t(devname); if (devt) - bdev = open_by_devnum(devt, mode, dev); + bdev = blkdev_get_by_dev(devt, mode, dev); } #endif diff --git a/fs/block_dev.c b/fs/block_dev.c index 606a5259f87f..c1c1b8c3fb99 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -854,24 +854,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev) { } #endif -/* - * Tries to open block device by device number. Use it ONLY if you - * really do not have anything better - i.e. when you are behind a - * truly sucky interface and all you are given is a device number. _Never_ - * to be used for internal purposes. If you ever need it - reconsider - * your API. - */ -struct block_device *open_by_devnum(dev_t dev, fmode_t mode, void *holder) -{ - struct block_device *bdev = bdget(dev); - int err = -ENOMEM; - if (bdev) - err = blkdev_get(bdev, mode, holder); - return err ? ERR_PTR(err) : bdev; -} - -EXPORT_SYMBOL(open_by_devnum); - /** * flush_disk - invalidates all buffer-cache entries on a disk * @@ -1132,6 +1114,25 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return ret; } +/** + * blkdev_get - open a block device + * @bdev: block_device to open + * @mode: FMODE_* mask + * @holder: exclusive holder identifier + * + * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is + * open with exclusive access. Specifying %FMODE_EXCL with %NULL + * @holder is invalid. Exclusive opens may nest for the same @holder. + * + * On success, the reference count of @bdev is unchanged. On failure, + * @bdev is put. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * 0 on success, -errno on failure. + */ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) { struct block_device *whole = NULL; @@ -1186,6 +1187,80 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) } EXPORT_SYMBOL(blkdev_get); +/** + * blkdev_get_by_path - open a block device by name + * @path: path to the block device to open + * @mode: FMODE_* mask + * @holder: exclusive holder identifier + * + * Open the blockdevice described by the device file at @path. @mode + * and @holder are identical to blkdev_get(). + * + * On success, the returned block_device has reference count of one. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * Pointer to block_device on success, ERR_PTR(-errno) on failure. + */ +struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, + void *holder) +{ + struct block_device *bdev; + int err; + + bdev = lookup_bdev(path); + if (IS_ERR(bdev)) + return bdev; + + err = blkdev_get(bdev, mode, holder); + if (err) + return ERR_PTR(err); + + return bdev; +} +EXPORT_SYMBOL(blkdev_get_by_path); + +/** + * blkdev_get_by_dev - open a block device by device number + * @dev: device number of block device to open + * @mode: FMODE_* mask + * @holder: exclusive holder identifier + * + * Open the blockdevice described by device number @dev. @mode and + * @holder are identical to blkdev_get(). + * + * Use it ONLY if you really do not have anything better - i.e. when + * you are behind a truly sucky interface and all you are given is a + * device number. _Never_ to be used for internal purposes. If you + * ever need it - reconsider your API. + * + * On success, the returned block_device has reference count of one. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * Pointer to block_device on success, ERR_PTR(-errno) on failure. + */ +struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) +{ + struct block_device *bdev; + int err; + + bdev = bdget(dev); + if (!bdev) + return ERR_PTR(-ENOMEM); + + err = blkdev_get(bdev, mode, holder); + if (err) + return ERR_PTR(err); + + return bdev; +} +EXPORT_SYMBOL(blkdev_get_by_dev); + static int blkdev_open(struct inode * inode, struct file * filp) { struct block_device *bdev; @@ -1436,34 +1511,6 @@ struct block_device *lookup_bdev(const char *pathname) } EXPORT_SYMBOL(lookup_bdev); -/** - * open_bdev_exclusive - open a block device by name and set it up for use - * - * @path: special file representing the block device - * @mode: FMODE_... combination to pass be used - * @holder: owner for exclusion - * - * Open the blockdevice described by the special file at @path, claim it - * for the @holder. - */ -struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) -{ - struct block_device *bdev; - int error; - - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) - return bdev; - - error = blkdev_get(bdev, mode | FMODE_EXCL, holder); - if (error) - return ERR_PTR(error); - - return bdev; -} - -EXPORT_SYMBOL(open_bdev_exclusive); - int __invalidate_device(struct block_device *bdev) { struct super_block *sb = get_super(bdev); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f1b729d3b883..95324e9f9280 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -489,7 +489,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) continue; if (device->bdev) { - blkdev_put(device->bdev, device->mode | FMODE_EXCL); + blkdev_put(device->bdev, device->mode); device->bdev = NULL; fs_devices->open_devices--; } @@ -523,7 +523,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->bdev) { - blkdev_put(device->bdev, device->mode | FMODE_EXCL); + blkdev_put(device->bdev, device->mode); fs_devices->open_devices--; } if (device->writeable) { @@ -580,13 +580,15 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int seeding = 1; int ret = 0; + flags |= FMODE_EXCL; + list_for_each_entry(device, head, dev_list) { if (device->bdev) continue; if (!device->name) continue; - bdev = open_bdev_exclusive(device->name, flags, holder); + bdev = blkdev_get_by_path(device->name, flags, holder); if (IS_ERR(bdev)) { printk(KERN_INFO "open %s failed\n", device->name); goto error; @@ -638,7 +640,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, error_brelse: brelse(bh); error_close: - blkdev_put(bdev, flags | FMODE_EXCL); + blkdev_put(bdev, flags); error: continue; } @@ -684,7 +686,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, mutex_lock(&uuid_mutex); - bdev = open_bdev_exclusive(path, flags, holder); + flags |= FMODE_EXCL; + bdev = blkdev_get_by_path(path, flags, holder); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); @@ -716,7 +719,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, brelse(bh); error_close: - blkdev_put(bdev, flags | FMODE_EXCL); + blkdev_put(bdev, flags); error: mutex_unlock(&uuid_mutex); return ret; @@ -1179,8 +1182,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) goto out; } } else { - bdev = open_bdev_exclusive(device_path, FMODE_READ, - root->fs_info->bdev_holder); + bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL, + root->fs_info->bdev_holder); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); goto out; @@ -1244,7 +1247,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->latest_bdev = next_device->bdev; if (device->bdev) { - blkdev_put(device->bdev, device->mode | FMODE_EXCL); + blkdev_put(device->bdev, device->mode); device->bdev = NULL; device->fs_devices->open_devices--; } @@ -1439,7 +1442,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) return -EINVAL; - bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); + bdev = blkdev_get_by_path(device_path, FMODE_EXCL, + root->fs_info->bdev_holder); if (IS_ERR(bdev)) return PTR_ERR(bdev); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2b638b6e4eea..856e75770304 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -49,7 +49,7 @@ struct btrfs_device { struct block_device *bdev; - /* the mode sent to open_bdev_exclusive */ + /* the mode sent to blkdev_get */ fmode_t mode; char *name; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 23e7513dba9c..123720ba786d 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -347,7 +347,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); + bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) goto fail; return bdev; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5dd0b3e76fa8..bd63e6927219 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -647,7 +647,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); + bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) goto fail; return bdev; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index c1f0763a022b..bc56ccf98ffd 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1268,7 +1268,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ; + fmode_t mode = FMODE_READ | FMODE_EXCL; int error; struct gfs2_args args; struct gfs2_sbd *sdp; @@ -1276,7 +1276,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; - bdev = open_bdev_exclusive(dev_name, mode, fs_type); + bdev = blkdev_get_by_path(dev_name, mode, fs_type); if (IS_ERR(bdev)) return ERR_CAST(bdev); @@ -1298,7 +1298,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, goto error_bdev; if (s->s_root) - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, mode); memset(&args, 0, sizeof(args)); args.ar_quota = GFS2_QUOTA_DEFAULT; @@ -1342,7 +1342,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, deactivate_locked_super(s); return ERR_PTR(error); error_bdev: - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, mode); return ERR_PTR(error); } diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 5a290f22dcc3..278e3fb40b71 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1120,8 +1120,8 @@ int lmLogOpen(struct super_block *sb) * file systems to log may have n-to-1 relationship; */ - bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - log); + bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + log); if (IS_ERR(bdev)) { rc = -PTR_ERR(bdev); goto free; diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 734b9025858e..723bc5bca09a 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -325,7 +325,8 @@ int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type, { struct block_device *bdev; - bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, type); + bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + type); if (IS_ERR(bdev)) return PTR_ERR(bdev); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 756a6798d7c8..0030640e2d72 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1147,14 +1147,14 @@ nilfs_mount(struct file_system_type *fs_type, int flags, { struct nilfs_super_data sd; struct super_block *s; - fmode_t mode = FMODE_READ; + fmode_t mode = FMODE_READ | FMODE_EXCL; struct dentry *root_dentry; int err, s_new = false; if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; - sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); + sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); if (IS_ERR(sd.bdev)) return ERR_CAST(sd.bdev); @@ -1233,7 +1233,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } if (!s_new) - blkdev_put(sd.bdev, mode | FMODE_EXCL); + blkdev_put(sd.bdev, mode); return root_dentry; @@ -1242,7 +1242,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, failed: if (!s_new) - blkdev_put(sd.bdev, mode | FMODE_EXCL); + blkdev_put(sd.bdev, mode); return ERR_PTR(err); } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index b488136f5ace..e2fce519c0f2 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2585,7 +2585,8 @@ static int journal_init_dev(struct super_block *super, if ((!jdev_name || !jdev_name[0])) { if (jdev == super->s_dev) blkdev_mode &= ~FMODE_EXCL; - journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode, journal); + journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, + journal); journal->j_dev_mode = blkdev_mode; if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); @@ -2601,8 +2602,7 @@ static int journal_init_dev(struct super_block *super, } journal->j_dev_mode = blkdev_mode; - journal->j_dev_bd = open_bdev_exclusive(jdev_name, - blkdev_mode, journal); + journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; diff --git a/fs/super.c b/fs/super.c index 22374bf0ba87..5d9a4497849a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -766,13 +766,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ; + fmode_t mode = FMODE_READ | FMODE_EXCL; int error = 0; if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; - bdev = open_bdev_exclusive(dev_name, mode, fs_type); + bdev = blkdev_get_by_path(dev_name, mode, fs_type); if (IS_ERR(bdev)) return ERR_CAST(bdev); @@ -807,7 +807,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, * holding an active reference. */ up_write(&s->s_umount); - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, mode); down_write(&s->s_umount); } else { char b[BDEVNAME_SIZE]; @@ -831,7 +831,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, mode); error: return ERR_PTR(error); } @@ -862,6 +862,7 @@ void kill_block_super(struct super_block *sb) bdev->bd_super = NULL; generic_shutdown_super(sb); sync_blockdev(bdev); + WARN_ON_ONCE(!(mode & FMODE_EXCL)); blkdev_put(bdev, mode | FMODE_EXCL); } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a1a6e5ceea67..9209cd199c47 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -609,7 +609,8 @@ xfs_blkdev_get( { int error = 0; - *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); + *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + mp); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); printk("XFS: Invalid device [%s], error=%d\n", name, error); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1a033e8ebe4c..f48501563917 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2006,8 +2006,6 @@ extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); -extern struct block_device *open_by_devnum(dev_t dev, fmode_t mode, - void *holder); extern void invalidate_bdev(struct block_device *); extern int sync_blockdev(struct block_device *bdev); extern struct super_block *freeze_bdev(struct block_device *); @@ -2039,6 +2037,10 @@ extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); +extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, + void *holder); +extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, + void *holder); extern int blkdev_put(struct block_device *bdev, fmode_t mode); #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); @@ -2083,7 +2085,6 @@ static inline void unregister_chrdev(unsigned int major, const char *name) extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); -extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); extern void blkdev_show(struct seq_file *,off_t); #else diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 513a77f1a0b3..b019609d1b45 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -907,8 +907,8 @@ int swsusp_check(void) { int error; - hib_resume_bdev = open_by_devnum(swsusp_resume_device, - FMODE_READ, NULL); + hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, + FMODE_READ, NULL); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); From 1c66b360fe26204e2aa14e45086b4a6b8890b1a2 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 13 Nov 2010 16:22:02 +0800 Subject: [PATCH 39/69] ocfs2: Change some lock status member in ocfs2_lock_res to char. Commit 83fd9c7 changes l_level, l_requested and l_blocking of ocfs2_lock_res from int to unsigned char. But actually it is initially as -1(ocfs2_lock_res_init_common) which correspoding to 255 for unsigned char. So the whole dlm lock mechanism doesn't work now which means a disaster to ocfs2. Cc: Goldwyn Rodrigues Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/ocfs2.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d8408217e3bd..1efea3615589 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -159,7 +159,9 @@ struct ocfs2_lock_res { char l_name[OCFS2_LOCK_ID_MAX_LEN]; unsigned int l_ro_holders; unsigned int l_ex_holders; - unsigned char l_level; + char l_level; + char l_requested; + char l_blocking; /* Data packed - type enum ocfs2_lock_type */ unsigned char l_type; @@ -169,8 +171,6 @@ struct ocfs2_lock_res { unsigned char l_action; /* Data packed - enum type ocfs2_unlock_action */ unsigned char l_unlock_action; - unsigned char l_requested; - unsigned char l_blocking; unsigned int l_pending_gen; spinlock_t l_lock; From 68cee4f118c21a1c67e5764a91d766661db5b360 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 28 Oct 2010 13:50:37 +0400 Subject: [PATCH 40/69] slub: Fix slub_lock down/up imbalance There are two places, that do not release the slub_lock. Respective bugs were introduced by sysfs changes ab4d5ed5 (slub: Enable sysfs support for !CONFIG_SLUB_DEBUG) and 2bce6485 ( slub: Allow removal of slab caches during boot). Acked-by: Christoph Lameter Signed-off-by: Pavel Emelyanov Signed-off-by: Pekka Enberg --- mm/slub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 8fd5401bb071..981fb730aa04 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3273,9 +3273,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, kfree(n); kfree(s); } +err: up_write(&slub_lock); -err: if (flags & SLAB_PANIC) panic("Cannot create slabcache %s\n", name); else @@ -3862,6 +3862,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, x += sprintf(buf + x, " N%d=%lu", node, nodes[node]); #endif + up_read(&slub_lock); kfree(nodes); return x + sprintf(buf + x, "\n"); } From fc5fef5ef4f94c2d9ad0cc50871a3e2da5d8bc18 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 15 Nov 2010 13:25:31 +0900 Subject: [PATCH 41/69] MAINTAINERS: Add an fbdev git tree entry. Now that there's an fbdev git tree (this is also what is pulled in to -next), stub it in to the MAINTAINERS entry. Signed-off-by: Paul Mundt --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0094224ca79b..0f9f26caeb81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2435,6 +2435,7 @@ F: drivers/net/wan/sdla.c FRAMEBUFFER LAYER L: linux-fbdev@vger.kernel.org W: http://linux-fbdev.sourceforge.net/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/fbdev-2.6.git S: Orphan F: Documentation/fb/ F: drivers/video/fb* From dce1431cb36338bda1167591689ab1f77ccf8934 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Sat, 13 Nov 2010 02:06:27 -0500 Subject: [PATCH 42/69] fsl-diu-fb: drop dead ioctl define The fsl-diu-fb driver no longer uses this define, and we have a common one to cover this already (FBIO_WAITFORVSYNC). Signed-off-by: Mike Frysinger Signed-off-by: Paul Mundt --- include/linux/fsl-diu-fb.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h index fc295d7ea463..781d4671415f 100644 --- a/include/linux/fsl-diu-fb.h +++ b/include/linux/fsl-diu-fb.h @@ -54,7 +54,6 @@ struct aoi_display_offset { }; #define MFB_SET_CHROMA_KEY _IOW('M', 1, struct mfb_chroma_key) -#define MFB_WAIT_FOR_VSYNC _IOW('F', 0x20, u_int32_t) #define MFB_SET_BRIGHTNESS _IOW('M', 3, __u8) #define MFB_SET_ALPHA 0x80014d00 From 6318af900ca7cb2c94b27d3c358762e6ac187e25 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 15 Nov 2010 14:30:30 +0900 Subject: [PATCH 43/69] sh: intc: Fix up build failure introduced by radix tree changes. The radix tree retry logic got a bit of an overhaul and subsequently broke the virtual IRQ subgroup build. Simply switch over to radix_tree_deref_retry() as per the filemap changes, which the virq lookup logic was modelled after in the first place. Signed-off-by: Paul Mundt --- drivers/sh/intc/virq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c index e5bf5d3c698e..4e0ff7181164 100644 --- a/drivers/sh/intc/virq.c +++ b/drivers/sh/intc/virq.c @@ -215,7 +215,7 @@ static void __init intc_subgroup_map(struct intc_desc_int *d) entry = radix_tree_deref_slot((void **)entries[i]); if (unlikely(!entry)) continue; - if (unlikely(entry == RADIX_TREE_RETRY)) + if (radix_tree_deref_retry(entry)) goto restart; irq = create_irq(); From 574490e30a2a07cd7bc7ee7f63f1e61375a0359b Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 15 Nov 2010 03:11:09 +0000 Subject: [PATCH 44/69] ARM: mach-shmobile: ap4evb: add fsib 44100Hz rate Tested-by: Tony SIM Tested-by: TAKEI Mitsuharu Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ap4evb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 63c2fcac8e1e..d3260542b943 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -583,6 +583,10 @@ static int fsi_set_rate(int is_porta, int rate) return -EINVAL; switch (rate) { + case 44100: + clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 11283000)); + ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; + break; case 48000: clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 85428000)); clk_set_rate(fdiv_clk, clk_round_rate(fdiv_clk, 12204000)); From 044b9414c7caf9a26192c73a5b88fa1a8a32a1c1 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 3 Nov 2010 20:01:07 +0000 Subject: [PATCH 45/69] GFS2: Fix inode deallocation race This area of the code has always been a bit delicate due to the subtleties of lock ordering. The problem is that for "normal" alloc/dealloc, we always grab the inode locks first and the rgrp lock later. In order to ensure no races in looking up the unlinked, but still allocated inodes, we need to hold the rgrp lock when we do the lookup, which means that we can't take the inode glock. The solution is to borrow the technique already used by NFS to solve what is essentially the same problem (given an inode number, look up the inode carefully, checking that it really is in the expected state). We cannot do that directly from the allocation code (lock ordering again) so we give the job to the pre-existing delete workqueue and carry on with the allocation as normal. If we find there is no space, we do a journal flush (required anyway if space from a deallocation is to be released) which should block against the pending deallocations, so we should always get the space back. Signed-off-by: Steven Whitehouse --- fs/gfs2/export.c | 46 ++------------ fs/gfs2/glock.c | 21 ++++--- fs/gfs2/inode.c | 152 +++++++++++------------------------------------ fs/gfs2/inode.h | 4 +- fs/gfs2/rgrp.c | 91 ++++++++++++++-------------- 5 files changed, 98 insertions(+), 216 deletions(-) diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 06d582732d34..5ab3839dfcb9 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -138,10 +138,8 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, struct gfs2_inum_host *inum) { struct gfs2_sbd *sdp = sb->s_fs_info; - struct gfs2_holder i_gh; struct inode *inode; struct dentry *dentry; - int error; inode = gfs2_ilookup(sb, inum->no_addr); if (inode) { @@ -152,52 +150,16 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, goto out_inode; } - error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, - LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (error) - return ERR_PTR(error); - - error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE); - if (error) - goto fail; - - inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); - goto fail; - } - - error = gfs2_inode_refresh(GFS2_I(inode)); - if (error) { - iput(inode); - goto fail; - } - - /* Pick up the works we bypass in gfs2_inode_lookup */ - if (inode->i_state & I_NEW) - gfs2_set_iop(inode); - - if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { - iput(inode); - goto fail; - } - - error = -EIO; - if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) { - iput(inode); - goto fail; - } - - gfs2_glock_dq_uninit(&i_gh); + inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino, + GFS2_BLKST_DINODE); + if (IS_ERR(inode)) + return ERR_CAST(inode); out_inode: dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) dentry->d_op = &gfs2_dops; return dentry; -fail: - gfs2_glock_dq_uninit(&i_gh); - return ERR_PTR(error); } static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 87778857f099..f92c17704169 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -686,21 +686,20 @@ static void delete_work_func(struct work_struct *work) { struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); struct gfs2_sbd *sdp = gl->gl_sbd; - struct gfs2_inode *ip = NULL; + struct gfs2_inode *ip; struct inode *inode; - u64 no_addr = 0; + u64 no_addr = gl->gl_name.ln_number; + + ip = gl->gl_object; + /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ - spin_lock(&gl->gl_spin); - ip = (struct gfs2_inode *)gl->gl_object; if (ip) - no_addr = ip->i_no_addr; - spin_unlock(&gl->gl_spin); - if (ip) { inode = gfs2_ilookup(sdp->sd_vfs, no_addr); - if (inode) { - d_prune_aliases(inode); - iput(inode); - } + else + inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); + if (inode && !IS_ERR(inode)) { + d_prune_aliases(inode); + iput(inode); } gfs2_glock_put(gl); } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 06370f8bd8cf..e1213f7f9217 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -73,49 +73,6 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); } -struct gfs2_skip_data { - u64 no_addr; - int skipped; -}; - -static int iget_skip_test(struct inode *inode, void *opaque) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_skip_data *data = opaque; - - if (ip->i_no_addr == data->no_addr) { - if (inode->i_state & (I_FREEING|I_WILL_FREE)){ - data->skipped = 1; - return 0; - } - return 1; - } - return 0; -} - -static int iget_skip_set(struct inode *inode, void *opaque) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_skip_data *data = opaque; - - if (data->skipped) - return 1; - inode->i_ino = (unsigned long)(data->no_addr); - ip->i_no_addr = data->no_addr; - return 0; -} - -static struct inode *gfs2_iget_skip(struct super_block *sb, - u64 no_addr) -{ - struct gfs2_skip_data data; - unsigned long hash = (unsigned long)no_addr; - - data.no_addr = no_addr; - data.skipped = 0; - return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); -} - /** * GFS2 lookup code fills in vfs inode contents based on info obtained * from directory entry inside gfs2_inode_lookup(). This has caused issues @@ -243,93 +200,54 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, return ERR_PTR(error); } -/** - * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation - * and try to reclaim it by doing iput. - * - * This function assumes no rgrp locks are currently held. - * - * @sb: The super block - * no_addr: The inode number - * - */ - -void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) +struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, + u64 *no_formal_ino, unsigned int blktype) { - struct gfs2_sbd *sdp; - struct gfs2_inode *ip; - struct gfs2_glock *io_gl = NULL; - int error; - struct gfs2_holder gh; + struct super_block *sb = sdp->sd_vfs; + struct gfs2_holder i_gh; struct inode *inode; + int error; - inode = gfs2_iget_skip(sb, no_addr); - - if (!inode) - return; - - /* If it's not a new inode, someone's using it, so leave it alone. */ - if (!(inode->i_state & I_NEW)) { - iput(inode); - return; - } - - ip = GFS2_I(inode); - sdp = GFS2_SB(inode); - ip->i_no_formal_ino = -1; + error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, + LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return ERR_PTR(error); - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); - if (unlikely(error)) + error = gfs2_check_blk_type(sdp, no_addr, blktype); + if (error) goto fail; - ip->i_gl->gl_object = ip; - error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); - if (unlikely(error)) - goto fail_put; - - set_bit(GIF_INVALID, &ip->i_flags); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT, - &ip->i_iopen_gh); - if (unlikely(error)) - goto fail_iopen; + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); + if (IS_ERR(inode)) + goto fail; - ip->i_iopen_gh.gh_gl->gl_object = ip; - gfs2_glock_put(io_gl); - io_gl = NULL; + error = gfs2_inode_refresh(GFS2_I(inode)); + if (error) + goto fail_iput; - inode->i_mode = DT2IF(DT_UNKNOWN); + /* Pick up the works we bypass in gfs2_inode_lookup */ + if (inode->i_state & I_NEW) + gfs2_set_iop(inode); - /* - * We must read the inode in order to work out its type in - * this case. Note that this doesn't happen often as we normally - * know the type beforehand. This code path only occurs during - * unlinked inode recovery (where it is safe to do this glock, - * which is not true in the general case). - */ - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY, - &gh); - if (unlikely(error)) - goto fail_glock; + /* Two extra checks for NFS only */ + if (no_formal_ino) { + error = -ESTALE; + if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino) + goto fail_iput; - /* Inode is now uptodate */ - gfs2_glock_dq_uninit(&gh); - gfs2_set_iop(inode); + error = -EIO; + if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) + goto fail_iput; - /* The iput will cause it to be deleted. */ - iput(inode); - return; + error = 0; + } -fail_glock: - gfs2_glock_dq(&ip->i_iopen_gh); -fail_iopen: - if (io_gl) - gfs2_glock_put(io_gl); -fail_put: - ip->i_gl->gl_object = NULL; - gfs2_glock_put(ip->i_gl); fail: - iget_failed(inode); - return; + gfs2_glock_dq_uninit(&i_gh); + return error ? ERR_PTR(error) : inode; +fail_iput: + iput(inode); + goto fail; } static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 6720d7d5fbc6..d8499fadcc53 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -99,7 +99,9 @@ static inline int gfs2_check_internal_file_size(struct inode *inode, extern void gfs2_set_iop(struct inode *inode); extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, u64 no_addr, u64 no_formal_ino); -extern void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr); +extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, + u64 *no_formal_ino, + unsigned int blktype); extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); extern int gfs2_inode_refresh(struct gfs2_inode *ip); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bef3ab6cf5c1..33c8407b876f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -963,17 +963,18 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) * The inode, if one has been found, in inode. */ -static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, - u64 skip) +static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) { u32 goal = 0, block; u64 no_addr; struct gfs2_sbd *sdp = rgd->rd_sbd; unsigned int n; + struct gfs2_glock *gl; + struct gfs2_inode *ip; + int error; + int found = 0; - for(;;) { - if (goal >= rgd->rd_data) - break; + while (goal < rgd->rd_data) { down_write(&sdp->sd_log_flush_lock); n = 1; block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, @@ -990,11 +991,32 @@ static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, if (no_addr == skip) continue; *last_unlinked = no_addr; - return no_addr; + + error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); + if (error) + continue; + + /* If the inode is already in cache, we can ignore it here + * because the existing inode disposal code will deal with + * it when all refs have gone away. Accessing gl_object like + * this is not safe in general. Here it is ok because we do + * not dereference the pointer, and we only need an approx + * answer to whether it is NULL or not. + */ + ip = gl->gl_object; + + if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) + gfs2_glock_put(gl); + else + found++; + + /* Limit reclaim to sensible number of tasks */ + if (found > 2*NR_CPUS) + return; } rgd->rd_flags &= ~GFS2_RDF_CHECK; - return 0; + return; } /** @@ -1075,11 +1097,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd) * Try to acquire rgrp in way which avoids contending with others. * * Returns: errno - * unlinked: the block address of an unlinked block to be reclaimed */ -static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, - u64 *last_unlinked) +static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd, *begin = NULL; @@ -1089,7 +1109,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, int loops = 0; int error, rg_locked; - *unlinked = 0; rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); while (rgd) { @@ -1106,17 +1125,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, case 0: if (try_rgrp_fit(rgd, al)) goto out; - /* If the rg came in already locked, there's no - way we can recover from a failed try_rgrp_unlink - because that would require an iput which can only - happen after the rgrp is unlocked. */ - if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK) - *unlinked = try_rgrp_unlink(rgd, last_unlinked, - ip->i_no_addr); + if (rgd->rd_flags & GFS2_RDF_CHECK) + try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); if (!rg_locked) gfs2_glock_dq_uninit(&al->al_rgd_gh); - if (*unlinked) - return -EAGAIN; /* fall through */ case GLR_TRYFAILED: rgd = recent_rgrp_next(rgd); @@ -1145,13 +1157,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, case 0: if (try_rgrp_fit(rgd, al)) goto out; - if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK) - *unlinked = try_rgrp_unlink(rgd, last_unlinked, - ip->i_no_addr); + if (rgd->rd_flags & GFS2_RDF_CHECK) + try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); if (!rg_locked) gfs2_glock_dq_uninit(&al->al_rgd_gh); - if (*unlinked) - return -EAGAIN; break; case GLR_TRYFAILED: @@ -1204,12 +1213,12 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_alloc *al = ip->i_alloc; int error = 0; - u64 last_unlinked = NO_BLOCK, unlinked; + u64 last_unlinked = NO_BLOCK; + int tries = 0; if (gfs2_assert_warn(sdp, al->al_requested)) return -EINVAL; -try_again: if (hold_rindex) { /* We need to hold the rindex unless the inode we're using is the rindex itself, in which case it's already held. */ @@ -1218,31 +1227,23 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */ error = gfs2_ri_update_special(ip); + if (error) + return error; } - if (error) - return error; + do { + error = get_local_rgrp(ip, &last_unlinked); + /* If there is no space, flushing the log may release some */ + if (error) + gfs2_log_flush(sdp, NULL); + } while (error && tries++ < 3); - /* Find an rgrp suitable for allocation. If it encounters any unlinked - dinodes along the way, error will equal -EAGAIN and unlinked will - contains it block address. We then need to look up that inode and - try to free it, and try the allocation again. */ - error = get_local_rgrp(ip, &unlinked, &last_unlinked); if (error) { if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) gfs2_glock_dq_uninit(&al->al_ri_gh); - if (error != -EAGAIN) - return error; - - gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked); - /* regardless of whether or not gfs2_process_unlinked_inode - was successful, we don't want to repeat it again. */ - last_unlinked = unlinked; - gfs2_log_flush(sdp, NULL); - error = 0; - - goto try_again; + return error; } + /* no error, so we have the rgrp set in the inode's allocation. */ al->al_file = file; al->al_line = line; From e25cd062b16ed1d41a157aec5a108abd6ff2e9f9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 13 Nov 2010 08:44:33 -0800 Subject: [PATCH 46/69] PCI: sysfs: fix printk warnings Cast pci_resource_start() and pci_resource_len() to u64 for printk. drivers/pci/pci-sysfs.c:753: warning: format '%16Lx' expects type 'long long unsigned int', but argument 9 has type 'resource_size_t' drivers/pci/pci-sysfs.c:753: warning: format '%16Lx' expects type 'long long unsigned int', but argument 10 has type 'resource_size_t' Signed-off-by: Randy Dunlap Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 25accc9dda3b..95712a375cd5 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -754,7 +754,8 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, "at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n", current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff, pci_name(pdev), i, - pci_resource_start(pdev, i), pci_resource_len(pdev, i)); + (u64)pci_resource_start(pdev, i), + (u64)pci_resource_len(pdev, i)); return -EINVAL; } From bdc85df7a8417b9893443ff5520804699416b6f3 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 15 Nov 2010 19:37:36 +0100 Subject: [PATCH 47/69] blk-cgroup: Allow creation of hierarchical cgroups o Allow hierarchical cgroup creation for blkio controller o Currently we disallow it as both the io controller policies (throttling as well as proportion bandwidth) do not support hierarhical accounting and control. But the flip side is that blkio controller can not be used with libvirt as libvirt creates a cgroup hierarchy deeper than 1 level. //libvirt/qemu/ o So this patch will allow creation of cgroup hierarhcy but at the backend everything will be treated as flat. So if somebody created a an hierarchy like as follows. root / \ test1 test2 | test3 CFQ and throttling will practically treat all groups at same level. pivot / | \ \ root test1 test2 test3 o Once we have actual support for hierarchical accounting and control then we can introduce another cgroup tunable file "blkio.use_hierarchy" which will be 0 by default but if user wants to enforce hierarhical control then it can be set to 1. This way there should not be any ABI problems down the line. o The only not so pretty part is introduction of extra file "use_hierarchy" down the line. Kame-san had mentioned that hierarhical accounting is expensive in memory controller hence they keep it off by default. I suspect same will be the case for IO controller also as for each IO completion we shall have to account IO through hierarchy up to the root. if yes, then it probably is not a very bad idea to introduce this extra file so that it will be used only when somebody needs it and some people might enable hierarchy only in part of the hierarchy. o This is how basically memory controller also uses "use_hierarhcy" and they also allowed creation of hierarchies when actual backend support was not available. Signed-off-by: Vivek Goyal Acked-by: Balbir Singh Reviewed-by: Gui Jianfeng Reviewed-by: Ciju Rajan K Tested-by: Ciju Rajan K Signed-off-by: Jens Axboe --- Documentation/cgroups/blkio-controller.txt | 27 ++++++++++++++++++++++ block/blk-cgroup.c | 4 ---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index d6da611f8f63..4ed7b5ceeed2 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt @@ -89,6 +89,33 @@ Throttling/Upper Limit policy Limits for writes can be put using blkio.write_bps_device file. +Hierarchical Cgroups +==================== +- Currently none of the IO control policy supports hierarhical groups. But + cgroup interface does allow creation of hierarhical cgroups and internally + IO policies treat them as flat hierarchy. + + So this patch will allow creation of cgroup hierarhcy but at the backend + everything will be treated as flat. So if somebody created a hierarchy like + as follows. + + root + / \ + test1 test2 + | + test3 + + CFQ and throttling will practically treat all groups at same level. + + pivot + / | \ \ + root test1 test2 test3 + + Down the line we can implement hierarchical accounting/control support + and also introduce a new cgroup file "use_hierarchy" which will control + whether cgroup hierarchy is viewed as flat or hierarchical by the policy.. + This is how memory controller also has implemented the things. + Various user visible config options =================================== CONFIG_BLK_CGROUP diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b1febd0f6d2a..455768a3eb9e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) goto done; } - /* Currently we do not support hierarchy deeper than two level (0,1) */ - if (parent != cgroup->top_cgroup) - return ERR_PTR(-EPERM); - blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); if (!blkcg) return ERR_PTR(-ENOMEM); From 2aa25c22c445df63b5961883f28767643122f935 Mon Sep 17 00:00:00 2001 From: "Hans J. Koch" Date: Mon, 15 Nov 2010 21:38:56 +0100 Subject: [PATCH 48/69] hwmon: Change mail address of Hans J. Koch My old mail address doesn't exist anymore. This changes all occurrences to my new address. Signed-off-by: Hans J. Koch Signed-off-by: Jean Delvare --- drivers/hwmon/amc6821.c | 2 +- drivers/hwmon/lm93.c | 4 ++-- drivers/hwmon/max6650.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c index fa9708c2d723..4033974d1bb3 100644 --- a/drivers/hwmon/amc6821.c +++ b/drivers/hwmon/amc6821.c @@ -4,7 +4,7 @@ Copyright (C) 2009 T. Mertelj Based on max6650.c: - Copyright (C) 2007 Hans J. Koch + Copyright (C) 2007 Hans J. Koch This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c index 6669255aadcf..c9ed14eba5a6 100644 --- a/drivers/hwmon/lm93.c +++ b/drivers/hwmon/lm93.c @@ -20,7 +20,7 @@ Adapted to 2.6.20 by Carsten Emde Copyright (c) 2006 Carsten Emde, Open Source Automation Development Lab - Modified for mainline integration by Hans J. Koch + Modified for mainline integration by Hans J. Koch Copyright (c) 2007 Hans J. Koch, Linutronix GmbH This program is free software; you can redistribute it and/or modify @@ -2629,7 +2629,7 @@ static void __exit lm93_exit(void) } MODULE_AUTHOR("Mark M. Hoffman , " - "Hans J. Koch "); MODULE_DESCRIPTION("LM93 driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c index a0160ee5caef..9a11532ecae8 100644 --- a/drivers/hwmon/max6650.c +++ b/drivers/hwmon/max6650.c @@ -2,7 +2,7 @@ * max6650.c - Part of lm_sensors, Linux kernel modules for hardware * monitoring. * - * (C) 2007 by Hans J. Koch + * (C) 2007 by Hans J. Koch * * based on code written by John Morris * Copyright (c) 2003 Spirent Communications From 61ec2da506ec6544873f0aba026164e4bdd21751 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 21:38:56 +0100 Subject: [PATCH 49/69] hwmon: (lm95241) Check validity of input values This clears the following build-time warnings I was seeing: drivers/hwmon/lm95241.c: In function "set_interval": drivers/hwmon/lm95241.c:132:15: warning: ignoring return value of "strict_strtol", declared with attribute warn_unused_result drivers/hwmon/lm95241.c: In function "set_max2": drivers/hwmon/lm95241.c:278:1: warning: ignoring return value of "strict_strtol", declared with attribute warn_unused_result drivers/hwmon/lm95241.c: In function "set_max1": drivers/hwmon/lm95241.c:277:1: warning: ignoring return value of "strict_strtol", declared with attribute warn_unused_result drivers/hwmon/lm95241.c: In function "set_min2": drivers/hwmon/lm95241.c:249:1: warning: ignoring return value of "strict_strtol", declared with attribute warn_unused_result drivers/hwmon/lm95241.c: In function "set_min1": drivers/hwmon/lm95241.c:248:1: warning: ignoring return value of "strict_strtol", declared with attribute warn_unused_result drivers/hwmon/lm95241.c: In function "set_type2": drivers/hwmon/lm95241.c:220:1: warning: ignoring return value of "strict_strtol", declared with attribute warn_unused_result drivers/hwmon/lm95241.c: In function "set_type1": drivers/hwmon/lm95241.c:219:1: warning: ignoring return value of "strict_strtol", declared with attribute warn_unused_result This also fixes a small race in set_interval() as a side effect: by working with a temporary local variable we prevent data->interval from being accessed at a time it contains the interval value in the wrong unit. Signed-off-by: Jean Delvare Cc: Davide Rizzo --- drivers/hwmon/lm95241.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/lm95241.c b/drivers/hwmon/lm95241.c index 464340f25496..4546d82f024a 100644 --- a/drivers/hwmon/lm95241.c +++ b/drivers/hwmon/lm95241.c @@ -128,9 +128,12 @@ static ssize_t set_interval(struct device *dev, struct device_attribute *attr, { struct i2c_client *client = to_i2c_client(dev); struct lm95241_data *data = i2c_get_clientdata(client); + unsigned long val; - strict_strtol(buf, 10, &data->interval); - data->interval = data->interval * HZ / 1000; + if (strict_strtoul(buf, 10, &val) < 0) + return -EINVAL; + + data->interval = val * HZ / 1000; return count; } @@ -188,7 +191,9 @@ static ssize_t set_type##flag(struct device *dev, \ struct lm95241_data *data = i2c_get_clientdata(client); \ \ long val; \ - strict_strtol(buf, 10, &val); \ +\ + if (strict_strtol(buf, 10, &val) < 0) \ + return -EINVAL; \ \ if ((val == 1) || (val == 2)) { \ \ @@ -227,7 +232,9 @@ static ssize_t set_min##flag(struct device *dev, \ struct lm95241_data *data = i2c_get_clientdata(client); \ \ long val; \ - strict_strtol(buf, 10, &val); \ +\ + if (strict_strtol(buf, 10, &val) < 0) \ + return -EINVAL;\ \ mutex_lock(&data->update_lock); \ \ @@ -256,7 +263,9 @@ static ssize_t set_max##flag(struct device *dev, \ struct lm95241_data *data = i2c_get_clientdata(client); \ \ long val; \ - strict_strtol(buf, 10, &val); \ +\ + if (strict_strtol(buf, 10, &val) < 0) \ + return -EINVAL; \ \ mutex_lock(&data->update_lock); \ \ From ae51cd9bcd9ca841bf45c0ba33823c56ac1ce81e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 21:38:56 +0100 Subject: [PATCH 50/69] hwmon: (w83795) Fix fan control mode attributes There were two bugs: * Speed cruise mode was improperly reported for all fans but fan1. * Fan control method (PWM vs. DC) was mixed with the control mode. It will be added back as a separate attribute, as per the standard sysfs interface. Signed-off-by: Jean Delvare Acked-by: Guenter Roeck --- drivers/hwmon/w83795.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index 1d840aa83782..fd96e72adde0 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -857,20 +857,20 @@ show_pwm_enable(struct device *dev, struct device_attribute *attr, char *buf) int index = sensor_attr->index; u8 tmp; - if (1 == (data->pwm_fcms[0] & (1 << index))) { + /* Speed cruise mode */ + if (data->pwm_fcms[0] & (1 << index)) { tmp = 2; goto out; } + /* Thermal cruise or SmartFan IV mode */ for (tmp = 0; tmp < 6; tmp++) { if (data->pwm_tfmr[tmp] & (1 << index)) { tmp = 3; goto out; } } - if (data->pwm_fomc & (1 << index)) - tmp = 0; - else - tmp = 1; + /* Manual mode */ + tmp = 1; out: return sprintf(buf, "%u\n", tmp); @@ -890,23 +890,21 @@ store_pwm_enable(struct device *dev, struct device_attribute *attr, if (strict_strtoul(buf, 10, &val) < 0) return -EINVAL; - if (val > 2) + if (val < 1 || val > 2) return -EINVAL; mutex_lock(&data->update_lock); switch (val) { - case 0: case 1: + /* Clear speed cruise mode bits */ data->pwm_fcms[0] &= ~(1 << index); w83795_write(client, W83795_REG_FCMS1, data->pwm_fcms[0]); + /* Clear thermal cruise mode bits */ for (i = 0; i < 6; i++) { data->pwm_tfmr[i] &= ~(1 << index); w83795_write(client, W83795_REG_TFMR(i), data->pwm_tfmr[i]); } - data->pwm_fomc |= 1 << index; - data->pwm_fomc ^= val << index; - w83795_write(client, W83795_REG_FOMC, data->pwm_fomc); break; case 2: data->pwm_fcms[0] |= (1 << index); From d5ab845a13de7ff2d195917dad8879acfb6d8ff9 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 21:38:56 +0100 Subject: [PATCH 51/69] hwmon: (w83795) Expose fan control method Expose fan control method (DC vs. PWM) using the standard sysfs attributes. I've made it read-only as the board should be wired for a given mode, the BIOS should have set up the chip for this mode, and you shouldn't have to change it. But it would be easy enough to make it changeable if someone comes up with a use case. Signed-off-by: Jean Delvare Acked-by: Guenter Roeck --- drivers/hwmon/w83795.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index fd96e72adde0..95b1f860c14e 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -915,6 +915,21 @@ store_pwm_enable(struct device *dev, struct device_attribute *attr, return count; } +static ssize_t +show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct w83795_data *data = w83795_update_pwm_config(dev); + int index = to_sensor_dev_attr_2(attr)->index; + unsigned int mode; + + if (data->pwm_fomc & (1 << index)) + mode = 0; /* DC */ + else + mode = 1; /* PWM */ + + return sprintf(buf, "%u\n", mode); +} + static ssize_t show_temp_src(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1551,6 +1566,8 @@ store_sf_setup(struct device *dev, struct device_attribute *attr, show_pwm, store_pwm, PWM_FREQ, index - 1), \ SENSOR_ATTR_2(pwm##index##_enable, S_IWUSR | S_IRUGO, \ show_pwm_enable, store_pwm_enable, NOT_USED, index - 1), \ + SENSOR_ATTR_2(pwm##index##_mode, S_IRUGO, \ + show_pwm_mode, NULL, NOT_USED, index - 1), \ SENSOR_ATTR_2(fan##index##_target, S_IWUSR | S_IRUGO, \ show_fanin, store_fanin, FANIN_TARGET, index - 1) } @@ -1698,7 +1715,7 @@ static const struct sensor_device_attribute_2 w83795_dts[][8] = { SENSOR_ATTR_DTS(14), }; -static const struct sensor_device_attribute_2 w83795_pwm[][7] = { +static const struct sensor_device_attribute_2 w83795_pwm[][8] = { SENSOR_ATTR_PWM(1), SENSOR_ATTR_PWM(2), SENSOR_ATTR_PWM(3), From edff2f8d81ce976ad6895f1d649fcb164be80e3d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 21:38:56 +0100 Subject: [PATCH 52/69] hwmon: (w83795) List all usable temperature sources Temperature sources are not correlated directly with temperature channels. A look-up table is required to find out which temperature sources can be used depending on which temperature channels (both analog and digital) are enabled. Signed-off-by: Jean Delvare Acked-by: Guenter Roeck --- drivers/hwmon/w83795.c | 57 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index 95b1f860c14e..c941d3eb249e 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -178,6 +178,14 @@ static const u8 IN_LSB_SHIFT_IDX[][2] = { #define W83795_REG_TSS(index) (0x209 + (index)) +#define TSS_MAP_RESERVED 0xff +static const u8 tss_map[4][6] = { + { 0, 1, 2, 3, 4, 5}, + { 6, 7, 8, 9, 0, 1}, + {10, 11, 12, 13, 2, 3}, + { 4, 5, 4, 5, TSS_MAP_RESERVED, TSS_MAP_RESERVED}, +}; + #define PWM_OUTPUT 0 #define PWM_FREQ 1 #define PWM_START 2 @@ -930,6 +938,27 @@ show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf) return sprintf(buf, "%u\n", mode); } +/* + * Check whether a given temperature source can ever be useful. + * Returns the number of selectable temperature channels which are + * enabled. + */ +static int w83795_tss_useful(const struct w83795_data *data, int tsrc) +{ + int useful = 0, i; + + for (i = 0; i < 4; i++) { + if (tss_map[i][tsrc] == TSS_MAP_RESERVED) + continue; + if (tss_map[i][tsrc] < 6) /* Analog */ + useful += (data->has_temp >> tss_map[i][tsrc]) & 1; + else /* Digital */ + useful += (data->has_dts >> (tss_map[i][tsrc] - 6)) & 1; + } + + return useful; +} + static ssize_t show_temp_src(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1608,8 +1637,6 @@ store_sf_setup(struct device *dev, struct device_attribute *attr, SENSOR_ATTR_2(temp##index##_beep, S_IWUSR | S_IRUGO, \ show_alarm_beep, store_beep, BEEP_ENABLE, \ index + (index > 4 ? 11 : 17)), \ - SENSOR_ATTR_2(temp##index##_source_sel, S_IWUSR | S_IRUGO, \ - show_temp_src, store_temp_src, NOT_USED, index - 1), \ SENSOR_ATTR_2(temp##index##_pwm_enable, S_IWUSR | S_IRUGO, \ show_temp_pwm_enable, store_temp_pwm_enable, \ TEMP_PWM_ENABLE, index - 1), \ @@ -1695,7 +1722,7 @@ static const struct sensor_device_attribute_2 w83795_fan[][4] = { SENSOR_ATTR_FAN(14), }; -static const struct sensor_device_attribute_2 w83795_temp[][29] = { +static const struct sensor_device_attribute_2 w83795_temp[][28] = { SENSOR_ATTR_TEMP(1), SENSOR_ATTR_TEMP(2), SENSOR_ATTR_TEMP(3), @@ -1726,6 +1753,21 @@ static const struct sensor_device_attribute_2 w83795_pwm[][8] = { SENSOR_ATTR_PWM(8), }; +static const struct sensor_device_attribute_2 w83795_tss[6] = { + SENSOR_ATTR_2(temp1_source_sel, S_IWUSR | S_IRUGO, + show_temp_src, store_temp_src, NOT_USED, 0), + SENSOR_ATTR_2(temp2_source_sel, S_IWUSR | S_IRUGO, + show_temp_src, store_temp_src, NOT_USED, 1), + SENSOR_ATTR_2(temp3_source_sel, S_IWUSR | S_IRUGO, + show_temp_src, store_temp_src, NOT_USED, 2), + SENSOR_ATTR_2(temp4_source_sel, S_IWUSR | S_IRUGO, + show_temp_src, store_temp_src, NOT_USED, 3), + SENSOR_ATTR_2(temp5_source_sel, S_IWUSR | S_IRUGO, + show_temp_src, store_temp_src, NOT_USED, 4), + SENSOR_ATTR_2(temp6_source_sel, S_IWUSR | S_IRUGO, + show_temp_src, store_temp_src, NOT_USED, 5), +}; + static const struct sensor_device_attribute_2 sda_single_files[] = { SENSOR_ATTR_2(intrusion0_alarm, S_IWUSR | S_IRUGO, show_alarm_beep, store_chassis_clear, ALARM_STATUS, 46), @@ -1890,6 +1932,15 @@ static int w83795_handle_files(struct device *dev, int (*fn)(struct device *, } } + for (i = 0; i < ARRAY_SIZE(w83795_tss); i++) { + j = w83795_tss_useful(data, i); + if (!j) + continue; + err = fn(dev, &w83795_tss[i].dev_attr); + if (err) + return err; + } + for (i = 0; i < ARRAY_SIZE(sda_single_files); i++) { err = fn(dev, &sda_single_files[i].dev_attr); if (err) From 2a2d27da00250c9f117e35653ed5a6a3212e5d77 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 21:38:56 +0100 Subject: [PATCH 53/69] hwmon: (w83795) Print the actual temperature channels as sources Don't expose raw register values to user-space. Decode and encode temperature channels selected as temperature sources as needed. Signed-off-by: Jean Delvare Acked-by: Guenter Roeck --- drivers/hwmon/w83795.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index c941d3eb249e..400558d97f3d 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -966,17 +966,18 @@ show_temp_src(struct device *dev, struct device_attribute *attr, char *buf) to_sensor_dev_attr_2(attr); struct w83795_data *data = w83795_update_pwm_config(dev); int index = sensor_attr->index; - u8 val = index / 2; - u8 tmp = data->temp_src[val]; + u8 tmp = data->temp_src[index / 2]; if (index & 1) - val = 4; + tmp >>= 4; /* Pick high nibble */ else - val = 0; - tmp >>= val; - tmp &= 0x0f; + tmp &= 0x0f; /* Pick low nibble */ - return sprintf(buf, "%u\n", tmp); + /* Look-up the actual temperature channel number */ + if (tmp >= 4 || tss_map[tmp][index] == TSS_MAP_RESERVED) + return -EINVAL; /* Shouldn't happen */ + + return sprintf(buf, "%u\n", (unsigned int)tss_map[tmp][index] + 1); } static ssize_t @@ -988,12 +989,21 @@ store_temp_src(struct device *dev, struct device_attribute *attr, struct sensor_device_attribute_2 *sensor_attr = to_sensor_dev_attr_2(attr); int index = sensor_attr->index; - unsigned long tmp; + int tmp; + unsigned long channel; u8 val = index / 2; - if (strict_strtoul(buf, 10, &tmp) < 0) + if (strict_strtoul(buf, 10, &channel) < 0 || + channel < 1 || channel > 14) + return -EINVAL; + + /* Check if request can be fulfilled */ + for (tmp = 0; tmp < 4; tmp++) { + if (tss_map[tmp][index] == channel - 1) + break; + } + if (tmp == 4) /* No match */ return -EINVAL; - tmp = SENSORS_LIMIT(tmp, 0, 15); mutex_lock(&data->update_lock); if (index & 1) { From cf6b9ea661ef4f20b4a4cba1a232a732339aae2c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 21:38:56 +0100 Subject: [PATCH 54/69] hwmon: (w83795) Read the intrusion state properly We can't read the intrusion state from the real-time alarm registers as we do for all other alarm flags, because real-time alarm bits don't stick (by definition) and the intrusion state has to stick until explicitly cleared (otherwise it has little value.) So we have to use the interrupt status register instead, which is read from the same address but with a configuration bit flipped in another register. Signed-off-by: Jean Delvare Acked-by: Guenter Roeck --- drivers/hwmon/w83795.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index 400558d97f3d..600b2adbbd49 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -165,10 +165,11 @@ static const u8 IN_LSB_SHIFT_IDX[][2] = { #define W83795_REG_VID_CTRL 0x6A +#define W83795_REG_ALARM_CTRL 0x40 +#define ALARM_CTRL_RTSACS (1 << 7) #define W83795_REG_ALARM(index) (0x41 + (index)) -#define W83795_REG_BEEP(index) (0x50 + (index)) - #define W83795_REG_CLR_CHASSIS 0x4D +#define W83795_REG_BEEP(index) (0x50 + (index)) #define W83795_REG_FCMS1 0x201 @@ -585,6 +586,7 @@ static struct w83795_data *w83795_update_device(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct w83795_data *data = i2c_get_clientdata(client); u16 tmp; + u8 intrusion; int i; mutex_lock(&data->update_lock); @@ -656,9 +658,24 @@ static struct w83795_data *w83795_update_device(struct device *dev) w83795_read(client, W83795_REG_PWM(i, PWM_OUTPUT)); } - /* update alarm */ + /* Update intrusion and alarms + * It is important to read intrusion first, because reading from + * register SMI STS6 clears the interrupt status temporarily. */ + tmp = w83795_read(client, W83795_REG_ALARM_CTRL); + /* Switch to interrupt status for intrusion if needed */ + if (tmp & ALARM_CTRL_RTSACS) + w83795_write(client, W83795_REG_ALARM_CTRL, + tmp & ~ALARM_CTRL_RTSACS); + intrusion = w83795_read(client, W83795_REG_ALARM(5)) & (1 << 6); + /* Switch to real-time alarms */ + w83795_write(client, W83795_REG_ALARM_CTRL, tmp | ALARM_CTRL_RTSACS); for (i = 0; i < ARRAY_SIZE(data->alarms); i++) data->alarms[i] = w83795_read(client, W83795_REG_ALARM(i)); + data->alarms[5] |= intrusion; + /* Restore original configuration if needed */ + if (!(tmp & ALARM_CTRL_RTSACS)) + w83795_write(client, W83795_REG_ALARM_CTRL, + tmp & ~ALARM_CTRL_RTSACS); data->last_updated = jiffies; data->valid = 1; From 793c51d5fdfa76043f1221fdaa022f50146e8386 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 21:38:57 +0100 Subject: [PATCH 55/69] hwmon: (w83795) Clear intrusion alarm immediately When asked to clear the intrusion alarm, do so immediately. We have to invalidate the cache to make sure the new status will be read. But we also have to read from the status register once to clear the pending alarm, as writing to CLR_CHS surprising won't clear it automatically. Signed-off-by: Jean Delvare Acked-by: Guenter Roeck --- drivers/hwmon/w83795.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index 600b2adbbd49..90f4ffb357b7 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -755,6 +755,10 @@ store_chassis_clear(struct device *dev, val = w83795_read(client, W83795_REG_CLR_CHASSIS); val |= 0x80; w83795_write(client, W83795_REG_CLR_CHASSIS, val); + + /* Clear status and force cache refresh */ + w83795_read(client, W83795_REG_ALARM(5)); + data->valid = 0; mutex_unlock(&data->update_lock); return count; } From 52d159eecced3e4ead901e2a14347f5c11ea4bab Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 21:38:57 +0100 Subject: [PATCH 56/69] hwmon: (w83795) Check for BEEP pin availability On the W83795ADG, there's a single pin for BEEP and OVT#, so you can't have both. Check the configuration and don't create beep attributes when BEEP pin is not available. The W83795G has a dedicated BEEP pin so the functionality is always available there. Signed-off-by: Jean Delvare Acked-by: Guenter Roeck --- drivers/hwmon/w83795.c | 58 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index 90f4ffb357b7..cdbc7448491e 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -171,6 +171,9 @@ static const u8 IN_LSB_SHIFT_IDX[][2] = { #define W83795_REG_CLR_CHASSIS 0x4D #define W83795_REG_BEEP(index) (0x50 + (index)) +#define W83795_REG_OVT_CFG 0x58 +#define OVT_CFG_SEL (1 << 7) + #define W83795_REG_FCMS1 0x201 #define W83795_REG_FCMS2 0x208 @@ -378,6 +381,7 @@ struct w83795_data { u8 setup_pwm[3]; /* Register value */ u8 alarms[6]; /* Register value */ + u8 enable_beep; u8 beeps[6]; /* Register value */ char valid; @@ -508,8 +512,11 @@ static void w83795_update_limits(struct i2c_client *client) } /* Read beep settings */ - for (i = 0; i < ARRAY_SIZE(data->beeps); i++) - data->beeps[i] = w83795_read(client, W83795_REG_BEEP(i)); + if (data->enable_beep) { + for (i = 0; i < ARRAY_SIZE(data->beeps); i++) + data->beeps[i] = + w83795_read(client, W83795_REG_BEEP(i)); + } data->valid_limits = 1; } @@ -1588,7 +1595,7 @@ store_sf_setup(struct device *dev, struct device_attribute *attr, #define NOT_USED -1 -/* Don't change the attribute order, _max and _min are accessed by index +/* Don't change the attribute order, _max, _min and _beep are accessed by index * somewhere else in the code */ #define SENSOR_ATTR_IN(index) { \ SENSOR_ATTR_2(in##index##_input, S_IRUGO, show_in, NULL, \ @@ -1603,6 +1610,8 @@ store_sf_setup(struct device *dev, struct device_attribute *attr, show_alarm_beep, store_beep, BEEP_ENABLE, \ index + ((index > 14) ? 1 : 0)) } +/* Don't change the attribute order, _beep is accessed by index + * somewhere else in the code */ #define SENSOR_ATTR_FAN(index) { \ SENSOR_ATTR_2(fan##index##_input, S_IRUGO, show_fan, \ NULL, FAN_INPUT, index - 1), \ @@ -1631,6 +1640,8 @@ store_sf_setup(struct device *dev, struct device_attribute *attr, SENSOR_ATTR_2(fan##index##_target, S_IWUSR | S_IRUGO, \ show_fanin, store_fanin, FANIN_TARGET, index - 1) } +/* Don't change the attribute order, _beep is accessed by index + * somewhere else in the code */ #define SENSOR_ATTR_DTS(index) { \ SENSOR_ATTR_2(temp##index##_type, S_IRUGO , \ show_dts_mode, NULL, NOT_USED, index - 7), \ @@ -1649,6 +1660,8 @@ store_sf_setup(struct device *dev, struct device_attribute *attr, SENSOR_ATTR_2(temp##index##_beep, S_IWUSR | S_IRUGO, \ show_alarm_beep, store_beep, BEEP_ENABLE, index + 17) } +/* Don't change the attribute order, _beep is accessed by index + * somewhere else in the code */ #define SENSOR_ATTR_TEMP(index) { \ SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 4 ? S_IWUSR : 0), \ show_temp_mode, store_temp_mode, NOT_USED, index - 1), \ @@ -1802,10 +1815,6 @@ static const struct sensor_device_attribute_2 w83795_tss[6] = { static const struct sensor_device_attribute_2 sda_single_files[] = { SENSOR_ATTR_2(intrusion0_alarm, S_IWUSR | S_IRUGO, show_alarm_beep, store_chassis_clear, ALARM_STATUS, 46), - SENSOR_ATTR_2(intrusion0_beep, S_IWUSR | S_IRUGO, show_alarm_beep, - store_beep, BEEP_ENABLE, 46), - SENSOR_ATTR_2(beep_enable, S_IWUSR | S_IRUGO, show_alarm_beep, - store_beep, BEEP_ENABLE, 47), #ifdef CONFIG_SENSORS_W83795_FANCTRL SENSOR_ATTR_2(speed_cruise_tolerance, S_IWUSR | S_IRUGO, show_fanin, store_fanin, FANIN_TOL, NOT_USED), @@ -1818,6 +1827,13 @@ static const struct sensor_device_attribute_2 sda_single_files[] = { #endif }; +static const struct sensor_device_attribute_2 sda_beep_files[] = { + SENSOR_ATTR_2(intrusion0_beep, S_IWUSR | S_IRUGO, show_alarm_beep, + store_beep, BEEP_ENABLE, 46), + SENSOR_ATTR_2(beep_enable, S_IWUSR | S_IRUGO, show_alarm_beep, + store_beep, BEEP_ENABLE, 47), +}; + /* * Driver interface */ @@ -1947,6 +1963,8 @@ static int w83795_handle_files(struct device *dev, int (*fn)(struct device *, if (!(data->has_in & (1 << i))) continue; for (j = 0; j < ARRAY_SIZE(w83795_in[0]); j++) { + if (j == 4 && !data->enable_beep) + continue; err = fn(dev, &w83795_in[i][j].dev_attr); if (err) return err; @@ -1957,6 +1975,8 @@ static int w83795_handle_files(struct device *dev, int (*fn)(struct device *, if (!(data->has_fan & (1 << i))) continue; for (j = 0; j < ARRAY_SIZE(w83795_fan[0]); j++) { + if (j == 3 && !data->enable_beep) + continue; err = fn(dev, &w83795_fan[i][j].dev_attr); if (err) return err; @@ -1978,6 +1998,14 @@ static int w83795_handle_files(struct device *dev, int (*fn)(struct device *, return err; } + if (data->enable_beep) { + for (i = 0; i < ARRAY_SIZE(sda_beep_files); i++) { + err = fn(dev, &sda_beep_files[i].dev_attr); + if (err) + return err; + } + } + #ifdef CONFIG_SENSORS_W83795_FANCTRL for (i = 0; i < data->has_pwm; i++) { for (j = 0; j < ARRAY_SIZE(w83795_pwm[0]); j++) { @@ -1996,6 +2024,8 @@ static int w83795_handle_files(struct device *dev, int (*fn)(struct device *, #else for (j = 0; j < 8; j++) { #endif + if (j == 7 && !data->enable_beep) + continue; err = fn(dev, &w83795_temp[i][j].dev_attr); if (err) return err; @@ -2007,6 +2037,8 @@ static int w83795_handle_files(struct device *dev, int (*fn)(struct device *, if (!(data->has_dts & (1 << i))) continue; for (j = 0; j < ARRAY_SIZE(w83795_dts[0]); j++) { + if (j == 7 && !data->enable_beep) + continue; err = fn(dev, &w83795_dts[i][j].dev_attr); if (err) return err; @@ -2146,6 +2178,18 @@ static int w83795_probe(struct i2c_client *client, else data->has_pwm = 2; + /* Check if BEEP pin is available */ + if (data->chip_type == w83795g) { + /* The W83795G has a dedicated BEEP pin */ + data->enable_beep = 1; + } else { + /* The W83795ADG has a shared pin for OVT# and BEEP, so you + * can't have both */ + tmp = w83795_read(client, W83795_REG_OVT_CFG); + if ((tmp & OVT_CFG_SEL) == 0) + data->enable_beep = 1; + } + err = w83795_handle_files(dev, device_create_file); if (err) goto exit_remove; From 22d3243de86bc92d874abb7c5b185d5c47aba323 Mon Sep 17 00:00:00 2001 From: Jim Bos Date: Mon, 15 Nov 2010 21:22:37 +0100 Subject: [PATCH 57/69] Fix gcc 4.5.1 miscompiling drivers/char/i8k.c (again) The fix in commit 6b4e81db2552 ("i8k: Tell gcc that *regs gets clobbered") to work around the gcc miscompiling i8k.c to add "+m (*regs)" caused register pressure problems and a build failure. Changing the 'asm' statement to 'asm volatile' instead should prevent that and works around the gcc bug as well, so we can remove the "+m". [ Background on the gcc bug: a memory clobber fails to mark the function the asm resides in as non-pure (aka "__attribute__((const))"), so if the function does nothing else that triggers the non-pure logic, gcc will think that that function has no side effects at all. As a result, callers will be mis-compiled. Adding the "+m" made gcc see that it's not a pure function, and so does "asm volatile". The problem was never really the need to mark "*regs" as changed, since the memory clobber did that part - the problem was just a bug in the gcc "pure" function analysis - Linus ] Signed-off-by: Jim Bos Acked-by: Jakub Jelinek Cc: Andi Kleen Cc: Andreas Schwab Signed-off-by: Linus Torvalds --- drivers/char/i8k.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index f0863bec186f..d72433f2d310 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -120,7 +120,7 @@ static int i8k_smm(struct smm_regs *regs) int eax = regs->eax; #if defined(CONFIG_X86_64) - asm("pushq %%rax\n\t" + asm volatile("pushq %%rax\n\t" "movl 0(%%rax),%%edx\n\t" "pushq %%rdx\n\t" "movl 4(%%rax),%%ebx\n\t" @@ -142,11 +142,11 @@ static int i8k_smm(struct smm_regs *regs) "lahf\n\t" "shrl $8,%%eax\n\t" "andl $1,%%eax\n" - :"=a"(rc), "+m" (*regs) + :"=a"(rc) : "a"(regs) : "%ebx", "%ecx", "%edx", "%esi", "%edi", "memory"); #else - asm("pushl %%eax\n\t" + asm volatile("pushl %%eax\n\t" "movl 0(%%eax),%%edx\n\t" "push %%edx\n\t" "movl 4(%%eax),%%ebx\n\t" @@ -168,7 +168,7 @@ static int i8k_smm(struct smm_regs *regs) "lahf\n\t" "shrl $8,%%eax\n\t" "andl $1,%%eax\n" - :"=a"(rc), "+m" (*regs) + :"=a"(rc) : "a"(regs) : "%ebx", "%ecx", "%edx", "%esi", "%edi", "memory"); #endif From 968ab1838a5d48f02f5b471aa1d0e59e2cc2ccbc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 15 Nov 2010 13:37:37 -0800 Subject: [PATCH 58/69] include/linux/kernel.h: Move logging bits to include/linux/printk.h Move the logging bits from kernel.h into printk.h so that there is a bit more logical separation of the generic from the printk logging specific parts. Signed-off-by: Joe Perches Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 245 +--------------------------------------- include/linux/printk.h | 248 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 249 insertions(+), 244 deletions(-) create mode 100644 include/linux/printk.h diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fc3da9e4da19..b6de9a6f7018 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -17,13 +17,11 @@ #include #include #include +#include #include #include #include -extern const char linux_banner[]; -extern const char linux_proc_banner[]; - #define USHRT_MAX ((u16)(~0U)) #define SHRT_MAX ((s16)(USHRT_MAX>>1)) #define SHRT_MIN ((s16)(-SHRT_MAX - 1)) @@ -110,31 +108,6 @@ extern const char linux_proc_banner[]; */ #define lower_32_bits(n) ((u32)(n)) -#define KERN_EMERG "<0>" /* system is unusable */ -#define KERN_ALERT "<1>" /* action must be taken immediately */ -#define KERN_CRIT "<2>" /* critical conditions */ -#define KERN_ERR "<3>" /* error conditions */ -#define KERN_WARNING "<4>" /* warning conditions */ -#define KERN_NOTICE "<5>" /* normal but significant condition */ -#define KERN_INFO "<6>" /* informational */ -#define KERN_DEBUG "<7>" /* debug-level messages */ - -/* Use the default kernel loglevel */ -#define KERN_DEFAULT "" -/* - * Annotation for a "continued" line of log printout (only done after a - * line that had no enclosing \n). Only to be used by core/arch code - * during early bootup (a continued line is not SMP-safe otherwise). - */ -#define KERN_CONT "" - -extern int console_printk[]; - -#define console_loglevel (console_printk[0]) -#define default_message_loglevel (console_printk[1]) -#define minimum_console_loglevel (console_printk[2]) -#define default_console_loglevel (console_printk[3]) - struct completion; struct pt_regs; struct user; @@ -187,11 +160,6 @@ static inline void might_fault(void) } #endif -struct va_format { - const char *fmt; - va_list *va; -}; - extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(int state); NORET_TYPE void panic(const char * fmt, ...) @@ -245,115 +213,8 @@ extern int func_ptr_is_kernel_text(void *ptr); struct pid; extern struct pid *session_of_pgrp(struct pid *pgrp); -/* - * FW_BUG - * Add this to a message where you are sure the firmware is buggy or behaves - * really stupid or out of spec. Be aware that the responsible BIOS developer - * should be able to fix this issue or at least get a concrete idea of the - * problem by reading your message without the need of looking at the kernel - * code. - * - * Use it for definite and high priority BIOS bugs. - * - * FW_WARN - * Use it for not that clear (e.g. could the kernel messed up things already?) - * and medium priority BIOS bugs. - * - * FW_INFO - * Use this one if you want to tell the user or vendor about something - * suspicious, but generally harmless related to the firmware. - * - * Use it for information or very low priority BIOS bugs. - */ -#define FW_BUG "[Firmware Bug]: " -#define FW_WARN "[Firmware Warn]: " -#define FW_INFO "[Firmware Info]: " - -/* - * HW_ERR - * Add this to a message for hardware errors, so that user can report - * it to hardware vendor instead of LKML or software vendor. - */ -#define HW_ERR "[Hardware Error]: " - -#ifdef CONFIG_PRINTK -asmlinkage int vprintk(const char *fmt, va_list args) - __attribute__ ((format (printf, 1, 0))); -asmlinkage int printk(const char * fmt, ...) - __attribute__ ((format (printf, 1, 2))) __cold; - -/* - * Please don't use printk_ratelimit(), because it shares ratelimiting state - * with all other unrelated printk_ratelimit() callsites. Instead use - * printk_ratelimited() or plain old __ratelimit(). - */ -extern int __printk_ratelimit(const char *func); -#define printk_ratelimit() __printk_ratelimit(__func__) -extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, - unsigned int interval_msec); - -extern int printk_delay_msec; -extern int dmesg_restrict; - -/* - * Print a one-time message (analogous to WARN_ONCE() et al): - */ -#define printk_once(x...) ({ \ - static bool __print_once; \ - \ - if (!__print_once) { \ - __print_once = true; \ - printk(x); \ - } \ -}) - -void log_buf_kexec_setup(void); -#else -static inline int vprintk(const char *s, va_list args) - __attribute__ ((format (printf, 1, 0))); -static inline int vprintk(const char *s, va_list args) { return 0; } -static inline int printk(const char *s, ...) - __attribute__ ((format (printf, 1, 2))); -static inline int __cold printk(const char *s, ...) { return 0; } -static inline int printk_ratelimit(void) { return 0; } -static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ - unsigned int interval_msec) \ - { return false; } - -/* No effect, but we still get type checking even in the !PRINTK case: */ -#define printk_once(x...) printk(x) - -static inline void log_buf_kexec_setup(void) -{ -} -#endif - -/* - * Dummy printk for disabled debugging statements to use whilst maintaining - * gcc's format and side-effect checking. - */ -static inline __attribute__ ((format (printf, 1, 2))) -int no_printk(const char *s, ...) { return 0; } - -extern int printk_needs_cpu(int cpu); -extern void printk_tick(void); - -extern void asmlinkage __attribute__((format(printf, 1, 2))) - early_printk(const char *fmt, ...); - unsigned long int_sqrt(unsigned long); -static inline void console_silent(void) -{ - console_loglevel = 0; -} - -static inline void console_verbose(void) -{ - if (console_loglevel) - console_loglevel = 15; -} - extern void bust_spinlocks(int yes); extern void wake_up_klogd(void); extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ @@ -390,22 +251,6 @@ extern enum system_states { #define TAINT_CRAP 10 #define TAINT_FIRMWARE_WORKAROUND 11 -extern void dump_stack(void) __cold; - -enum { - DUMP_PREFIX_NONE, - DUMP_PREFIX_ADDRESS, - DUMP_PREFIX_OFFSET -}; -extern void hex_dump_to_buffer(const void *buf, size_t len, - int rowsize, int groupsize, - char *linebuf, size_t linebuflen, bool ascii); -extern void print_hex_dump(const char *level, const char *prefix_str, - int prefix_type, int rowsize, int groupsize, - const void *buf, size_t len, bool ascii); -extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, - const void *buf, size_t len); - extern const char hex_asc[]; #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] @@ -419,94 +264,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte) extern int hex_to_bin(char ch); -#ifndef pr_fmt -#define pr_fmt(fmt) fmt -#endif - -#define pr_emerg(fmt, ...) \ - printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) -#define pr_alert(fmt, ...) \ - printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) -#define pr_crit(fmt, ...) \ - printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) -#define pr_err(fmt, ...) \ - printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) -#define pr_warning(fmt, ...) \ - printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) -#define pr_warn pr_warning -#define pr_notice(fmt, ...) \ - printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) -#define pr_info(fmt, ...) \ - printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) -#define pr_cont(fmt, ...) \ - printk(KERN_CONT fmt, ##__VA_ARGS__) - -/* pr_devel() should produce zero code unless DEBUG is defined */ -#ifdef DEBUG -#define pr_devel(fmt, ...) \ - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) -#else -#define pr_devel(fmt, ...) \ - ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) -#endif - -/* If you are writing a driver, please use dev_dbg instead */ -#if defined(DEBUG) -#define pr_debug(fmt, ...) \ - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) -#elif defined(CONFIG_DYNAMIC_DEBUG) -/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */ -#define pr_debug(fmt, ...) \ - dynamic_pr_debug(fmt, ##__VA_ARGS__) -#else -#define pr_debug(fmt, ...) \ - ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) -#endif - -/* - * ratelimited messages with local ratelimit_state, - * no local ratelimit_state used in the !PRINTK case - */ -#ifdef CONFIG_PRINTK -#define printk_ratelimited(fmt, ...) ({ \ - static DEFINE_RATELIMIT_STATE(_rs, \ - DEFAULT_RATELIMIT_INTERVAL, \ - DEFAULT_RATELIMIT_BURST); \ - \ - if (__ratelimit(&_rs)) \ - printk(fmt, ##__VA_ARGS__); \ -}) -#else -/* No effect, but we still get type checking even in the !PRINTK case: */ -#define printk_ratelimited printk -#endif - -#define pr_emerg_ratelimited(fmt, ...) \ - printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) -#define pr_alert_ratelimited(fmt, ...) \ - printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) -#define pr_crit_ratelimited(fmt, ...) \ - printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) -#define pr_err_ratelimited(fmt, ...) \ - printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) -#define pr_warning_ratelimited(fmt, ...) \ - printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) -#define pr_warn_ratelimited pr_warning_ratelimited -#define pr_notice_ratelimited(fmt, ...) \ - printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) -#define pr_info_ratelimited(fmt, ...) \ - printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) -/* no pr_cont_ratelimited, don't do that... */ -/* If you are writing a driver, please use dev_dbg instead */ -#if defined(DEBUG) -#define pr_debug_ratelimited(fmt, ...) \ - printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) -#else -#define pr_debug_ratelimited(fmt, ...) \ - ({ if (0) printk_ratelimited(KERN_DEBUG pr_fmt(fmt), \ - ##__VA_ARGS__); 0; }) -#endif - /* * General tracing related utility functions - trace_printk(), * tracing_on/tracing_off and tracing_start()/tracing_stop diff --git a/include/linux/printk.h b/include/linux/printk.h new file mode 100644 index 000000000000..b772ca5fbdf0 --- /dev/null +++ b/include/linux/printk.h @@ -0,0 +1,248 @@ +#ifndef __KERNEL_PRINTK__ +#define __KERNEL_PRINTK__ + +extern const char linux_banner[]; +extern const char linux_proc_banner[]; + +#define KERN_EMERG "<0>" /* system is unusable */ +#define KERN_ALERT "<1>" /* action must be taken immediately */ +#define KERN_CRIT "<2>" /* critical conditions */ +#define KERN_ERR "<3>" /* error conditions */ +#define KERN_WARNING "<4>" /* warning conditions */ +#define KERN_NOTICE "<5>" /* normal but significant condition */ +#define KERN_INFO "<6>" /* informational */ +#define KERN_DEBUG "<7>" /* debug-level messages */ + +/* Use the default kernel loglevel */ +#define KERN_DEFAULT "" +/* + * Annotation for a "continued" line of log printout (only done after a + * line that had no enclosing \n). Only to be used by core/arch code + * during early bootup (a continued line is not SMP-safe otherwise). + */ +#define KERN_CONT "" + +extern int console_printk[]; + +#define console_loglevel (console_printk[0]) +#define default_message_loglevel (console_printk[1]) +#define minimum_console_loglevel (console_printk[2]) +#define default_console_loglevel (console_printk[3]) + +struct va_format { + const char *fmt; + va_list *va; +}; + +/* + * FW_BUG + * Add this to a message where you are sure the firmware is buggy or behaves + * really stupid or out of spec. Be aware that the responsible BIOS developer + * should be able to fix this issue or at least get a concrete idea of the + * problem by reading your message without the need of looking at the kernel + * code. + * + * Use it for definite and high priority BIOS bugs. + * + * FW_WARN + * Use it for not that clear (e.g. could the kernel messed up things already?) + * and medium priority BIOS bugs. + * + * FW_INFO + * Use this one if you want to tell the user or vendor about something + * suspicious, but generally harmless related to the firmware. + * + * Use it for information or very low priority BIOS bugs. + */ +#define FW_BUG "[Firmware Bug]: " +#define FW_WARN "[Firmware Warn]: " +#define FW_INFO "[Firmware Info]: " + +/* + * HW_ERR + * Add this to a message for hardware errors, so that user can report + * it to hardware vendor instead of LKML or software vendor. + */ +#define HW_ERR "[Hardware Error]: " + +#ifdef CONFIG_PRINTK +asmlinkage int vprintk(const char *fmt, va_list args) + __attribute__ ((format (printf, 1, 0))); +asmlinkage int printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))) __cold; + +/* + * Please don't use printk_ratelimit(), because it shares ratelimiting state + * with all other unrelated printk_ratelimit() callsites. Instead use + * printk_ratelimited() or plain old __ratelimit(). + */ +extern int __printk_ratelimit(const char *func); +#define printk_ratelimit() __printk_ratelimit(__func__) +extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, + unsigned int interval_msec); + +extern int printk_delay_msec; +extern int dmesg_restrict; + +/* + * Print a one-time message (analogous to WARN_ONCE() et al): + */ +#define printk_once(x...) ({ \ + static bool __print_once; \ + \ + if (!__print_once) { \ + __print_once = true; \ + printk(x); \ + } \ +}) + +void log_buf_kexec_setup(void); +#else +static inline int vprintk(const char *s, va_list args) + __attribute__ ((format (printf, 1, 0))); +static inline int vprintk(const char *s, va_list args) { return 0; } +static inline int printk(const char *s, ...) + __attribute__ ((format (printf, 1, 2))); +static inline int __cold printk(const char *s, ...) { return 0; } +static inline int printk_ratelimit(void) { return 0; } +static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ + unsigned int interval_msec) \ + { return false; } + +/* No effect, but we still get type checking even in the !PRINTK case: */ +#define printk_once(x...) printk(x) + +static inline void log_buf_kexec_setup(void) +{ +} +#endif + +/* + * Dummy printk for disabled debugging statements to use whilst maintaining + * gcc's format and side-effect checking. + */ +static inline __attribute__ ((format (printf, 1, 2))) +int no_printk(const char *s, ...) { return 0; } + +extern int printk_needs_cpu(int cpu); +extern void printk_tick(void); + +extern void asmlinkage __attribute__((format(printf, 1, 2))) + early_printk(const char *fmt, ...); + +static inline void console_silent(void) +{ + console_loglevel = 0; +} + +static inline void console_verbose(void) +{ + if (console_loglevel) + console_loglevel = 15; +} + +extern void dump_stack(void) __cold; + +enum { + DUMP_PREFIX_NONE, + DUMP_PREFIX_ADDRESS, + DUMP_PREFIX_OFFSET +}; +extern void hex_dump_to_buffer(const void *buf, size_t len, + int rowsize, int groupsize, + char *linebuf, size_t linebuflen, bool ascii); +extern void print_hex_dump(const char *level, const char *prefix_str, + int prefix_type, int rowsize, int groupsize, + const void *buf, size_t len, bool ascii); +extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, + const void *buf, size_t len); + +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#define pr_emerg(fmt, ...) \ + printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert(fmt, ...) \ + printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit(fmt, ...) \ + printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err(fmt, ...) \ + printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warning(fmt, ...) \ + printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warn pr_warning +#define pr_notice(fmt, ...) \ + printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) \ + printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +#define pr_cont(fmt, ...) \ + printk(KERN_CONT fmt, ##__VA_ARGS__) + +/* pr_devel() should produce zero code unless DEBUG is defined */ +#ifdef DEBUG +#define pr_devel(fmt, ...) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_devel(fmt, ...) \ + ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) +#endif + +/* If you are writing a driver, please use dev_dbg instead */ +#if defined(DEBUG) +#define pr_debug(fmt, ...) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#elif defined(CONFIG_DYNAMIC_DEBUG) +/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */ +#define pr_debug(fmt, ...) \ + dynamic_pr_debug(fmt, ##__VA_ARGS__) +#else +#define pr_debug(fmt, ...) \ + ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) +#endif + +/* + * ratelimited messages with local ratelimit_state, + * no local ratelimit_state used in the !PRINTK case + */ +#ifdef CONFIG_PRINTK +#define printk_ratelimited(fmt, ...) ({ \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + \ + if (__ratelimit(&_rs)) \ + printk(fmt, ##__VA_ARGS__); \ +}) +#else +/* No effect, but we still get type checking even in the !PRINTK case: */ +#define printk_ratelimited printk +#endif + +#define pr_emerg_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warning_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warn_ratelimited pr_warning_ratelimited +#define pr_notice_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +/* no pr_cont_ratelimited, don't do that... */ +/* If you are writing a driver, please use dev_dbg instead */ +#if defined(DEBUG) +#define pr_debug_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_debug_ratelimited(fmt, ...) \ + ({ if (0) printk_ratelimited(KERN_DEBUG pr_fmt(fmt), \ + ##__VA_ARGS__); 0; }) +#endif + +#endif From dc6641be0ea8819ef095fdcefc2b695611999a21 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Nov 2010 22:40:38 +0100 Subject: [PATCH 59/69] i2c: Remove obsolete cleanup for clientdata A few new i2c-drivers came into the kernel which clear the clientdata-pointer on exit. This is obsolete meanwhile, so fix it and hope the word will spread. Signed-off-by: Wolfram Sang Acked-by: Alan Cox Acked-by: Guennadi Liakhovetski Acked-by: Greg Kroah-Hartman Signed-off-by: Jean Delvare --- drivers/media/video/imx074.c | 2 -- drivers/media/video/ov6650.c | 2 -- drivers/misc/apds9802als.c | 1 - drivers/staging/olpc_dcon/olpc_dcon.c | 3 --- 4 files changed, 8 deletions(-) diff --git a/drivers/media/video/imx074.c b/drivers/media/video/imx074.c index 380e459f899d..27b5dfdfbb93 100644 --- a/drivers/media/video/imx074.c +++ b/drivers/media/video/imx074.c @@ -451,7 +451,6 @@ static int imx074_probe(struct i2c_client *client, ret = imx074_video_probe(icd, client); if (ret < 0) { icd->ops = NULL; - i2c_set_clientdata(client, NULL); kfree(priv); return ret; } @@ -468,7 +467,6 @@ static int imx074_remove(struct i2c_client *client) icd->ops = NULL; if (icl->free_bus) icl->free_bus(icl); - i2c_set_clientdata(client, NULL); client->driver = NULL; kfree(priv); diff --git a/drivers/media/video/ov6650.c b/drivers/media/video/ov6650.c index 31f19373bbae..cf93de988068 100644 --- a/drivers/media/video/ov6650.c +++ b/drivers/media/video/ov6650.c @@ -1174,7 +1174,6 @@ static int ov6650_probe(struct i2c_client *client, if (ret) { icd->ops = NULL; - i2c_set_clientdata(client, NULL); kfree(priv); } @@ -1185,7 +1184,6 @@ static int ov6650_remove(struct i2c_client *client) { struct ov6650 *priv = to_ov6650(client); - i2c_set_clientdata(client, NULL); kfree(priv); return 0; } diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c index 0ed09358027e..644d4cd071cc 100644 --- a/drivers/misc/apds9802als.c +++ b/drivers/misc/apds9802als.c @@ -251,7 +251,6 @@ static int apds9802als_probe(struct i2c_client *client, return res; als_error1: - i2c_set_clientdata(client, NULL); kfree(data); return res; } diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c index 75aa7a36307d..f286a4c56f66 100644 --- a/drivers/staging/olpc_dcon/olpc_dcon.c +++ b/drivers/staging/olpc_dcon/olpc_dcon.c @@ -733,7 +733,6 @@ static int dcon_probe(struct i2c_client *client, const struct i2c_device_id *id) edev: platform_device_unregister(dcon_device); dcon_device = NULL; - i2c_set_clientdata(client, NULL); eirq: free_irq(DCON_IRQ, &dcon_driver); einit: @@ -757,8 +756,6 @@ static int dcon_remove(struct i2c_client *client) platform_device_unregister(dcon_device); cancel_work_sync(&dcon_work); - i2c_set_clientdata(client, NULL); - return 0; } From dfdee5f00cc9ce21b0a7e786039bcfec26fbcb4b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 22:40:38 +0100 Subject: [PATCH 60/69] i2c: Delete unused adapter IDs Delete unused I2C adapter IDs. Special cases are: * I2C_HW_B_RIVA was still set in driver rivafb, however no other driver is ever looking for this value, so we can safely remove it. * I2C_HW_B_HDPVR is used in staging driver lirc_zilog, however no adapter ID is ever set to this value, so the code in question never runs. As the code additionally expects that I2C_HW_B_HDPVR may not be defined, we can delete it now and let the lirc_zilog driver maintainer rewrite this piece of code. Big thanks for Hans Verkuil for doing all the hard work :) Signed-off-by: Jean Delvare Acked-by: Jarod Wilson Acked-by: Mauro Carvalho Chehab Acked-by: Hans Verkuil --- drivers/video/riva/rivafb-i2c.c | 1 - include/linux/i2c-id.h | 22 ---------------------- 2 files changed, 23 deletions(-) diff --git a/drivers/video/riva/rivafb-i2c.c b/drivers/video/riva/rivafb-i2c.c index a0e22ac483a3..167400e2a182 100644 --- a/drivers/video/riva/rivafb-i2c.c +++ b/drivers/video/riva/rivafb-i2c.c @@ -94,7 +94,6 @@ static int __devinit riva_setup_i2c_bus(struct riva_i2c_chan *chan, strcpy(chan->adapter.name, name); chan->adapter.owner = THIS_MODULE; - chan->adapter.id = I2C_HW_B_RIVA; chan->adapter.class = i2c_class; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->par->pdev->dev; diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index e844a0b18695..4bef5c557160 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -32,28 +32,6 @@ */ /* --- Bit algorithm adapters */ -#define I2C_HW_B_BT848 0x010005 /* BT848 video boards */ -#define I2C_HW_B_RIVA 0x010010 /* Riva based graphics cards */ -#define I2C_HW_B_ZR36067 0x010019 /* Zoran-36057/36067 based boards */ #define I2C_HW_B_CX2388x 0x01001b /* connexant 2388x based tv cards */ -#define I2C_HW_B_EM28XX 0x01001f /* em28xx video capture cards */ -#define I2C_HW_B_CX2341X 0x010020 /* Conexant CX2341X MPEG encoder cards */ -#define I2C_HW_B_CX23885 0x010022 /* conexant 23885 based tv cards (bus1) */ -#define I2C_HW_B_AU0828 0x010023 /* auvitek au0828 usb bridge */ -#define I2C_HW_B_CX231XX 0x010024 /* Conexant CX231XX USB based cards */ -#define I2C_HW_B_HDPVR 0x010025 /* Hauppauge HD PVR */ - -/* --- SGI adapters */ -#define I2C_HW_SGI_VINO 0x160000 - -/* --- SMBus only adapters */ -#define I2C_HW_SMBUS_W9968CF 0x04000d -#define I2C_HW_SMBUS_OV511 0x04000e /* OV511(+) USB 1.1 webcam ICs */ -#define I2C_HW_SMBUS_OV518 0x04000f /* OV518(+) USB 1.1 webcam ICs */ -#define I2C_HW_SMBUS_CAFE 0x040012 /* Marvell 88ALP01 "CAFE" cam */ - -/* --- Miscellaneous adapters */ -#define I2C_HW_SAA7146 0x060000 /* SAA7146 video decoder bus */ -#define I2C_HW_SAA7134 0x090000 /* SAA7134 video decoder bus */ #endif /* LINUX_I2C_ID_H */ From f3dc65dafa651bca6606ac0b41ead1be50d05652 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 22:40:38 +0100 Subject: [PATCH 61/69] i2c: Drivers shouldn't include Drivers don't need to include , especially not when they don't use anything that header file provides. Signed-off-by: Jean Delvare Cc: Michael Hunold Acked-by: Mauro Carvalho Chehab --- drivers/media/common/saa7146_i2c.c | 1 - drivers/media/video/ir-kbd-i2c.c | 1 - drivers/staging/olpc_dcon/olpc_dcon.c | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/media/common/saa7146_i2c.c b/drivers/media/common/saa7146_i2c.c index 3d88542612ea..74ee172b5bc9 100644 --- a/drivers/media/common/saa7146_i2c.c +++ b/drivers/media/common/saa7146_i2c.c @@ -391,7 +391,6 @@ static int saa7146_i2c_xfer(struct i2c_adapter* adapter, struct i2c_msg *msg, in /*****************************************************************************/ /* i2c-adapter helper functions */ -#include /* exported algorithm data */ static struct i2c_algorithm saa7146_algo = { diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c index 5a000c65ae98..ce4a75375909 100644 --- a/drivers/media/video/ir-kbd-i2c.c +++ b/drivers/media/video/ir-kbd-i2c.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c index f286a4c56f66..4ca45ec7fd84 100644 --- a/drivers/staging/olpc_dcon/olpc_dcon.c +++ b/drivers/staging/olpc_dcon/olpc_dcon.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include From e1e18ee1cb58228a577668284c1dd03d859d7157 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 22:40:38 +0100 Subject: [PATCH 62/69] i2c: Mark i2c_adapter.id as deprecated It's about time to make it clear that i2c_adapter.id is deprecated. Hopefully this will remind the last user to move over to a different strategy. Signed-off-by: Jean Delvare Acked-by: Jarod Wilson Acked-by: Mauro Carvalho Chehab Acked-by: Hans Verkuil --- Documentation/feature-removal-schedule.txt | 10 ++++++++++ drivers/i2c/i2c-mux.c | 1 - include/linux/i2c.h | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index d8f36f984faa..6c2f55e05f13 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -554,3 +554,13 @@ Why: This is a legacy interface which have been replaced by a more Who: NeilBrown ---------------------------- + +What: i2c_adapter.id +When: June 2011 +Why: This field is deprecated. I2C device drivers shouldn't change their + behavior based on the underlying I2C adapter. Instead, the I2C + adapter driver should instantiate the I2C devices and provide the + needed platform-specific information. +Who: Jean Delvare + +---------------------------- diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index d32a4843fc3a..d7a4833be416 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -120,7 +120,6 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, snprintf(priv->adap.name, sizeof(priv->adap.name), "i2c-%d-mux (chan_id %d)", i2c_adapter_id(parent), chan_id); priv->adap.owner = THIS_MODULE; - priv->adap.id = parent->id; priv->adap.algo = &priv->algo; priv->adap.algo_data = priv; priv->adap.dev.parent = &parent->dev; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 889b35abaeda..56cfe23ffb39 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -353,7 +353,7 @@ struct i2c_algorithm { */ struct i2c_adapter { struct module *owner; - unsigned int id; + unsigned int id __deprecated; unsigned int class; /* classes to allow probing for */ const struct i2c_algorithm *algo; /* the algorithm to access the bus */ void *algo_data; From 2236baa75f704851d3cd3310569058151acb1f06 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Nov 2010 22:40:38 +0100 Subject: [PATCH 63/69] i2c: Sanity checks on adapter registration Make sure I2C adapters being registered have the required struct fields set. If they don't, problems will happen later. Signed-off-by: Jean Delvare --- drivers/i2c/i2c-core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index d231f683f576..6b4cc567645b 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -848,6 +848,18 @@ static int i2c_register_adapter(struct i2c_adapter *adap) goto out_list; } + /* Sanity checks */ + if (unlikely(adap->name[0] == '\0')) { + pr_err("i2c-core: Attempt to register an adapter with " + "no name!\n"); + return -EINVAL; + } + if (unlikely(!adap->algo)) { + pr_err("i2c-core: Attempt to register adapter '%s' with " + "no algo!\n", adap->name); + return -EINVAL; + } + rt_mutex_init(&adap->bus_lock); mutex_init(&adap->userspace_clients_lock); INIT_LIST_HEAD(&adap->userspace_clients); From 12b3052c3ee8f508b2c7ee4ddd63ed03423409d8 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 15 Nov 2010 18:36:29 -0500 Subject: [PATCH 64/69] capabilities/syslog: open code cap_syslog logic to fix build failure The addition of CONFIG_SECURITY_DMESG_RESTRICT resulted in a build failure when CONFIG_PRINTK=n. This is because the capabilities code which used the new option was built even though the variable in question didn't exist. The patch here fixes this by moving the capabilities checks out of the LSM and into the caller. All (known) LSMs should have been calling the capabilities hook already so it actually makes the code organization better to eliminate the hook altogether. Signed-off-by: Eric Paris Acked-by: James Morris Signed-off-by: Linus Torvalds --- include/linux/security.h | 9 ++++----- kernel/printk.c | 15 ++++++++++++++- security/capability.c | 5 +++++ security/commoncap.c | 21 --------------------- security/security.c | 4 ++-- security/selinux/hooks.c | 6 +----- security/smack/smack_lsm.c | 8 ++------ 7 files changed, 28 insertions(+), 40 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index b8246a8df7d2..fd4d55fb8845 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -77,7 +77,6 @@ extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, extern int cap_task_setscheduler(struct task_struct *p); extern int cap_task_setioprio(struct task_struct *p, int ioprio); extern int cap_task_setnice(struct task_struct *p, int nice); -extern int cap_syslog(int type, bool from_file); extern int cap_vm_enough_memory(struct mm_struct *mm, long pages); struct msghdr; @@ -1388,7 +1387,7 @@ struct security_operations { int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); int (*quota_on) (struct dentry *dentry); - int (*syslog) (int type, bool from_file); + int (*syslog) (int type); int (*settime) (struct timespec *ts, struct timezone *tz); int (*vm_enough_memory) (struct mm_struct *mm, long pages); @@ -1671,7 +1670,7 @@ int security_real_capable_noaudit(struct task_struct *tsk, int cap); int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); int security_quota_on(struct dentry *dentry); -int security_syslog(int type, bool from_file); +int security_syslog(int type); int security_settime(struct timespec *ts, struct timezone *tz); int security_vm_enough_memory(long pages); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); @@ -1901,9 +1900,9 @@ static inline int security_quota_on(struct dentry *dentry) return 0; } -static inline int security_syslog(int type, bool from_file) +static inline int security_syslog(int type) { - return cap_syslog(type, from_file); + return 0; } static inline int security_settime(struct timespec *ts, struct timezone *tz) diff --git a/kernel/printk.c b/kernel/printk.c index 38e7d5868d60..9a2264fc42ca 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -274,7 +274,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) char c; int error = 0; - error = security_syslog(type, from_file); + /* + * If this is from /proc/kmsg we only do the capabilities checks + * at open time. + */ + if (type == SYSLOG_ACTION_OPEN || !from_file) { + if (dmesg_restrict && !capable(CAP_SYS_ADMIN)) + return -EPERM; + if ((type != SYSLOG_ACTION_READ_ALL && + type != SYSLOG_ACTION_SIZE_BUFFER) && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + } + + error = security_syslog(type); if (error) return error; diff --git a/security/capability.c b/security/capability.c index 30ae00fbecd5..c773635ca3a0 100644 --- a/security/capability.c +++ b/security/capability.c @@ -17,6 +17,11 @@ static int cap_sysctl(ctl_table *table, int op) return 0; } +static int cap_syslog(int type) +{ + return 0; +} + static int cap_quotactl(int cmds, int type, int id, struct super_block *sb) { return 0; diff --git a/security/commoncap.c b/security/commoncap.c index 04b80f9912bf..64c2ed9c9015 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -27,7 +27,6 @@ #include #include #include -#include /* * If a non-root user executes a setuid-root binary in @@ -883,26 +882,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, return error; } -/** - * cap_syslog - Determine whether syslog function is permitted - * @type: Function requested - * @from_file: Whether this request came from an open file (i.e. /proc) - * - * Determine whether the current process is permitted to use a particular - * syslog function, returning 0 if permission is granted, -ve if not. - */ -int cap_syslog(int type, bool from_file) -{ - if (type != SYSLOG_ACTION_OPEN && from_file) - return 0; - if (dmesg_restrict && !capable(CAP_SYS_ADMIN)) - return -EPERM; - if ((type != SYSLOG_ACTION_READ_ALL && - type != SYSLOG_ACTION_SIZE_BUFFER) && !capable(CAP_SYS_ADMIN)) - return -EPERM; - return 0; -} - /** * cap_vm_enough_memory - Determine whether a new virtual mapping is permitted * @mm: The VM space in which the new mapping is to be made diff --git a/security/security.c b/security/security.c index 3ef5e2a7a741..1b798d3df710 100644 --- a/security/security.c +++ b/security/security.c @@ -197,9 +197,9 @@ int security_quota_on(struct dentry *dentry) return security_ops->quota_on(dentry); } -int security_syslog(int type, bool from_file) +int security_syslog(int type) { - return security_ops->syslog(type, from_file); + return security_ops->syslog(type); } int security_settime(struct timespec *ts, struct timezone *tz) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d9154cf90ae1..65fa8bf596f5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1973,14 +1973,10 @@ static int selinux_quota_on(struct dentry *dentry) return dentry_has_perm(cred, NULL, dentry, FILE__QUOTAON); } -static int selinux_syslog(int type, bool from_file) +static int selinux_syslog(int type) { int rc; - rc = cap_syslog(type, from_file); - if (rc) - return rc; - switch (type) { case SYSLOG_ACTION_READ_ALL: /* Read last kernel messages */ case SYSLOG_ACTION_SIZE_BUFFER: /* Return size of the log buffer */ diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index bc39f4067af6..489a85afa477 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -157,15 +157,11 @@ static int smack_ptrace_traceme(struct task_struct *ptp) * * Returns 0 on success, error code otherwise. */ -static int smack_syslog(int type, bool from_file) +static int smack_syslog(int typefrom_file) { - int rc; + int rc = 0; char *sp = current_security(); - rc = cap_syslog(type, from_file); - if (rc != 0) - return rc; - if (capable(CAP_MAC_OVERRIDE)) return 0; From e53beacd23d9cb47590da6a7a7f6d417b941a994 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 15 Nov 2010 18:31:02 -0800 Subject: [PATCH 65/69] Linux 2.6.37-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6619720f50dd..ab5359db3d17 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 37 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Flesh-Eating Bats with Fangs # *DOCUMENTATION* From d07335e51df0c6dec202d315fc4f1f7e100eec4e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 16 Nov 2010 12:52:38 +0100 Subject: [PATCH 66/69] block: Rename "block_remap" tracepoint to "block_bio_remap" to clarify the event. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 10 +++++----- drivers/md/dm.c | 4 ++-- include/trace/events/block.h | 6 +++--- kernel/trace/blktrace.c | 12 ++++++------ 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 4ce953f1b390..151070541e21 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -33,7 +33,7 @@ #include "blk.h" -EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); @@ -1329,9 +1329,9 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; - trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, - bdev->bd_dev, - bio->bi_sector - p->start_sect); + trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, + bdev->bd_dev, + bio->bi_sector - p->start_sect); } } @@ -1500,7 +1500,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; if (old_sector != -1) - trace_block_remap(q, bio, old_dev, old_sector); + trace_block_bio_remap(q, bio, old_dev, old_sector); old_sector = bio->bi_sector; old_dev = bio->bi_bdev->bd_dev; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7cb1352f7e7a..0a2b5516bc21 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -990,8 +990,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ - trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, - tio->io->bio->bi_bdev->bd_dev, sector); + trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone, + tio->io->bio->bi_bdev->bd_dev, sector); generic_make_request(clone); } else if (r < 0 || r == DM_MAPIO_REQUEUE) { diff --git a/include/trace/events/block.h b/include/trace/events/block.h index d8ce278515c3..b56c65dc105d 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -486,16 +486,16 @@ TRACE_EVENT(block_split, ); /** - * block_remap - map request for a partition to the raw device + * block_bio_remap - map request for a logical device to the raw device * @q: queue holding the operation * @bio: revised operation * @dev: device for the operation * @from: original sector for the operation * - * An operation for a partition on a block device has been mapped to the + * An operation for a logical device has been mapped to the * raw block device. */ -TRACE_EVENT(block_remap, +TRACE_EVENT(block_bio_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, sector_t from), diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7b8ec0281548..2b8e2ee7c0ef 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -887,7 +887,7 @@ static void blk_add_trace_split(void *ignore, } /** - * blk_add_trace_remap - Add a trace for a remap operation + * blk_add_trace_bio_remap - Add a trace for a bio-remap operation * @ignore: trace callback data parameter (not used) * @q: queue the io is for * @bio: the source bio @@ -899,9 +899,9 @@ static void blk_add_trace_split(void *ignore, * it spans a stripe (or similar). Add a trace for that action. * **/ -static void blk_add_trace_remap(void *ignore, - struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from) +static void blk_add_trace_bio_remap(void *ignore, + struct request_queue *q, struct bio *bio, + dev_t dev, sector_t from) { struct blk_trace *bt = q->blk_trace; struct blk_io_trace_remap r; @@ -1016,7 +1016,7 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_split(blk_add_trace_split, NULL); WARN_ON(ret); - ret = register_trace_block_remap(blk_add_trace_remap, NULL); + ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); WARN_ON(ret); ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); WARN_ON(ret); @@ -1025,7 +1025,7 @@ static void blk_register_tracepoints(void) static void blk_unregister_tracepoints(void) { unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); - unregister_trace_block_remap(blk_add_trace_remap, NULL); + unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); unregister_trace_block_split(blk_add_trace_split, NULL); unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); From b54ce60eb7f61f8e314b8b241b0469eda3bb1d42 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Tue, 30 Nov 2010 20:52:46 +0100 Subject: [PATCH 67/69] cfq-iosched: Get rid of st->active When a cfq group is running, it won't be dequeued from service tree, so there's no need to store the active one in st->active. Just gid rid of it. Signed-off-by: Gui Jianfeng Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 73a58628f54a..e18d316ae652 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -87,7 +87,6 @@ struct cfq_rb_root { unsigned count; unsigned total_weight; u64 min_vdisktime; - struct rb_node *active; }; #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ .count = 0, .min_vdisktime = 0, } @@ -563,11 +562,6 @@ static void update_min_vdisktime(struct cfq_rb_root *st) u64 vdisktime = st->min_vdisktime; struct cfq_group *cfqg; - if (st->active) { - cfqg = rb_entry_cfqg(st->active); - vdisktime = cfqg->vdisktime; - } - if (st->left) { cfqg = rb_entry_cfqg(st->left); vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); @@ -894,9 +888,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) { struct cfq_rb_root *st = &cfqd->grp_service_tree; - if (st->active == &cfqg->rb_node) - st->active = NULL; - BUG_ON(cfqg->nr_cfqq < 1); cfqg->nr_cfqq--; @@ -1095,7 +1086,7 @@ static void cfq_put_cfqg(struct cfq_group *cfqg) if (!atomic_dec_and_test(&cfqg->ref)) return; for_each_cfqg_st(cfqg, i, j, st) - BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); + BUG_ON(!RB_EMPTY_ROOT(&st->rb)); kfree(cfqg); } @@ -1687,9 +1678,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfqq == cfqd->active_queue) cfqd->active_queue = NULL; - if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active) - cfqd->grp_service_tree.active = NULL; - if (cfqd->active_cic) { put_io_context(cfqd->active_cic->ioc); cfqd->active_cic = NULL; @@ -2199,7 +2187,6 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) if (RB_EMPTY_ROOT(&st->rb)) return NULL; cfqg = cfq_rb_first_group(st); - st->active = &cfqg->rb_node; update_min_vdisktime(st); return cfqg; } From 760701bfe14faee8ea0608a9cab2046071d98a39 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Tue, 30 Nov 2010 20:52:47 +0100 Subject: [PATCH 68/69] cfq-iosched: Get rid of on_st flag It's able to check whether a CFQ group on a service tree by checking "cfqg->rb_node". There's no need to maintain an extra flag here. Signed-off-by: Gui Jianfeng Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e18d316ae652..5d0349d602fe 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -179,7 +179,6 @@ struct cfq_group { /* group service_tree key */ u64 vdisktime; unsigned int weight; - bool on_st; /* number of cfqq currently on this group */ int nr_cfqq; @@ -863,7 +862,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) struct rb_node *n; cfqg->nr_cfqq++; - if (cfqg->on_st) + if (!RB_EMPTY_NODE(&cfqg->rb_node)) return; /* @@ -879,7 +878,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) cfqg->vdisktime = st->min_vdisktime; __cfq_group_service_tree_add(st, cfqg); - cfqg->on_st = true; st->total_weight += cfqg->weight; } @@ -896,7 +894,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) return; cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); - cfqg->on_st = false; st->total_weight -= cfqg->weight; if (!RB_EMPTY_NODE(&cfqg->rb_node)) cfq_rb_erase(&cfqg->rb_node, st); From e4ea0c16a85d221ebcc3a21f32e321440459e0fc Mon Sep 17 00:00:00 2001 From: Shaohua Li writes Date: Mon, 13 Dec 2010 14:32:22 +0100 Subject: [PATCH 69/69] block cfq: select new workload if priority changed If priority is changed, continuing to check workload_expires and service tree count of the previous workload does not make sense. We should always choose the workload with lowest key of new priority in such case. Signed-off-by: Shaohua Li Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5d0349d602fe..9b186fd6bf47 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2101,6 +2101,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) unsigned count; struct cfq_rb_root *st; unsigned group_slice; + enum wl_prio_t original_prio = cfqd->serving_prio; if (!cfqg) { cfqd->serving_prio = IDLE_WORKLOAD; @@ -2119,6 +2120,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) return; } + if (original_prio != cfqd->serving_prio) + goto new_workload; + /* * For RT and BE, we have to choose also the type * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload @@ -2133,6 +2137,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) if (count && !time_after(jiffies, cfqd->workload_expires)) return; +new_workload: /* otherwise select new workload type */ cfqd->serving_type = cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);