From f21e0e81d81b649ad309cedc7226f1bed72982e0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 24 May 2011 08:12:40 +0800 Subject: [PATCH 001/151] regulator: Do bulk enables of regulators in parallel In order to reduce the impact of ramp times rather than enabling the regulators for a device in series use async tasks to run the actual enables. This means that the delays which the enables implement can all run in parallel, though it does mean that the order in which the supplies come on may be unstable. For super bonus fun points if any of the regulators are shared between multiple supplies on the same device (as is rather likely) then this will test our locking. Note that in this case we only delay once for each physical regulator so the threads shouldn't block each other while delaying. It'd be even nicer if we could coalesce writes to a shared enable registers in PMICs but that's definitely future work, and it may also be useful and is certainly more achievable to optimise out the parallelism if none of the regulators implement ramp delays. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 32 ++++++++++++++++++++++++------ include/linux/regulator/consumer.h | 3 +++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index d3e38790906e..7b38af90a012 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -2264,6 +2265,13 @@ int regulator_bulk_get(struct device *dev, int num_consumers, } EXPORT_SYMBOL_GPL(regulator_bulk_get); +static void regulator_bulk_enable_async(void *data, async_cookie_t cookie) +{ + struct regulator_bulk_data *bulk = data; + + bulk->ret = regulator_enable(bulk->consumer); +} + /** * regulator_bulk_enable - enable multiple regulator consumers * @@ -2279,21 +2287,33 @@ EXPORT_SYMBOL_GPL(regulator_bulk_get); int regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers) { + LIST_HEAD(async_domain); int i; - int ret; + int ret = 0; + + for (i = 0; i < num_consumers; i++) + async_schedule_domain(regulator_bulk_enable_async, + &consumers[i], &async_domain); + + async_synchronize_full_domain(&async_domain); + /* If any consumer failed we need to unwind any that succeeded */ for (i = 0; i < num_consumers; i++) { - ret = regulator_enable(consumers[i].consumer); - if (ret != 0) + if (consumers[i].ret != 0) { + ret = consumers[i].ret; goto err; + } } return 0; err: - pr_err("Failed to enable %s: %d\n", consumers[i].supply, ret); - for (--i; i >= 0; --i) - regulator_disable(consumers[i].consumer); + for (i = 0; i < num_consumers; i++) + if (consumers[i].ret == 0) + regulator_disable(consumers[i].consumer); + else + pr_err("Failed to enable %s: %d\n", + consumers[i].supply, consumers[i].ret); return ret; } diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 9e87c1cb7270..26f6ea4444e3 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -122,6 +122,9 @@ struct regulator; struct regulator_bulk_data { const char *supply; struct regulator *consumer; + + /* Internal use */ + int ret; }; #if defined(CONFIG_REGULATOR) From 2ae3636b79aee1a69b2e84eff68bb123090796d3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 24 May 2011 23:14:40 +0800 Subject: [PATCH 002/151] regulator: Use _cansleep() for WM8994 regulator GPIOs The WM8994 regulator driver is perfectly happy if the GPIO used to enable the regulator sleeps so call the appropriate GPIO API. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm8994-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c index 35b2958d5106..1a6a690f24db 100644 --- a/drivers/regulator/wm8994-regulator.c +++ b/drivers/regulator/wm8994-regulator.c @@ -43,7 +43,7 @@ static int wm8994_ldo_enable(struct regulator_dev *rdev) if (!ldo->enable) return 0; - gpio_set_value(ldo->enable, 1); + gpio_set_value_cansleep(ldo->enable, 1); ldo->is_enabled = true; return 0; @@ -57,7 +57,7 @@ static int wm8994_ldo_disable(struct regulator_dev *rdev) if (!ldo->enable) return -EINVAL; - gpio_set_value(ldo->enable, 0); + gpio_set_value_cansleep(ldo->enable, 0); ldo->is_enabled = false; return 0; From 7736f11dbadce33d3f12bf0e8114d0f1da5e8622 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Fri, 27 May 2011 12:25:27 -0700 Subject: [PATCH 003/151] regulator: twl-regulator: fix n_voltages for twl6030 variable LDOs The n_voltages initializer for the TWL6030_ADJUSTABLE_LDO macro is off by one, causing the the highest supported voltage to be unreachable. Setting the machine constraints to only allow the highest voltage causes errors: machine_constraints_voltage: VAUX3_6030: unsupportable voltage constraints twl_reg twl_reg.39: can't register VAUX3_6030, -22 twl_reg: probe of twl_reg.39 failed with error -22 This patch fixes the off by one error. Tested by setting VAUX3_6030 to 3.3V. Signed-off-by: Colin Cross Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/twl-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 87fe0f75a56e..503c2bc64c84 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -864,7 +864,7 @@ static struct regulator_ops twlsmps_ops = { .desc = { \ .name = #label, \ .id = TWL6030_REG_##label, \ - .n_voltages = (max_mVolts - min_mVolts)/100, \ + .n_voltages = (max_mVolts - min_mVolts)/100 + 1, \ .ops = &twl6030ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ From c3d4913cd4cd469cbf29d411293e937729e83f3a Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Tue, 31 May 2011 10:34:45 +0900 Subject: [PATCH 004/151] pch_dma: fix DMA issue(ch8-ch11) ISSUE: In case PCH_DMA with I2S communications with ch8~ch11, sometimes I2S data is not send correctly. CAUSE: The following patch I submitted before was not enough modification for supporting DMA ch8~ch11. The modification for status register of ch8~11 was not enough. pch_dma: Support I2S for ML7213 IOH author Tomoya MORINAGA Mon, 9 May 2011 07:09:38 +0000 (16:09 +0900) committer Vinod Koul Mon, 9 May 2011 11:42:23 +0000 (16:42 +0530) commit 194f5f2706c7472f9c6bb2d17fa788993606581f tree c9d4903ea02b18939a4f390956a48be1a3734517 parent 60092d0bde4c8741198da4a69b693d3709385bf1 This patch fixes the issue. We can confirm PCH_DMA with I2S communications with ch8~ch11 works well. Signed-off-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 69 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index ff5b38f9d45b..65c32f893a57 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -45,7 +45,8 @@ #define DMA_STATUS_MASK_BITS 0x3 #define DMA_STATUS_SHIFT_BITS 16 #define DMA_STATUS_IRQ(x) (0x1 << (x)) -#define DMA_STATUS_ERR(x) (0x1 << ((x) + 8)) +#define DMA_STATUS0_ERR(x) (0x1 << ((x) + 8)) +#define DMA_STATUS2_ERR(x) (0x1 << (x)) #define DMA_DESC_WIDTH_SHIFT_BITS 12 #define DMA_DESC_WIDTH_1_BYTE (0x3 << DMA_DESC_WIDTH_SHIFT_BITS) @@ -133,6 +134,7 @@ struct pch_dma { #define PCH_DMA_CTL3 0x0C #define PCH_DMA_STS0 0x10 #define PCH_DMA_STS1 0x14 +#define PCH_DMA_STS2 0x18 #define dma_readl(pd, name) \ readl((pd)->membase + PCH_DMA_##name) @@ -183,13 +185,19 @@ static void pdc_enable_irq(struct dma_chan *chan, int enable) { struct pch_dma *pd = to_pd(chan->device); u32 val; + int pos; + + if (chan->chan_id < 8) + pos = chan->chan_id; + else + pos = chan->chan_id + 8; val = dma_readl(pd, CTL2); if (enable) - val |= 0x1 << chan->chan_id; + val |= 0x1 << pos; else - val &= ~(0x1 << chan->chan_id); + val &= ~(0x1 << pos); dma_writel(pd, CTL2, val); @@ -262,7 +270,7 @@ static void pdc_set_mode(struct dma_chan *chan, u32 mode) chan->chan_id, val); } -static u32 pdc_get_status(struct pch_dma_chan *pd_chan) +static u32 pdc_get_status0(struct pch_dma_chan *pd_chan) { struct pch_dma *pd = to_pd(pd_chan->chan.device); u32 val; @@ -272,9 +280,27 @@ static u32 pdc_get_status(struct pch_dma_chan *pd_chan) DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id)); } +static u32 pdc_get_status2(struct pch_dma_chan *pd_chan) +{ + struct pch_dma *pd = to_pd(pd_chan->chan.device); + u32 val; + + val = dma_readl(pd, STS2); + return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS + + DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8))); +} + static bool pdc_is_idle(struct pch_dma_chan *pd_chan) { - if (pdc_get_status(pd_chan) == DMA_STATUS_IDLE) + u32 sts; + + if (pd_chan->chan.chan_id < 8) + sts = pdc_get_status0(pd_chan); + else + sts = pdc_get_status2(pd_chan); + + + if (sts == DMA_STATUS_IDLE) return true; else return false; @@ -693,30 +719,45 @@ static irqreturn_t pd_irq(int irq, void *devid) struct pch_dma *pd = (struct pch_dma *)devid; struct pch_dma_chan *pd_chan; u32 sts0; + u32 sts2; int i; - int ret = IRQ_NONE; + int ret0 = IRQ_NONE; + int ret2 = IRQ_NONE; sts0 = dma_readl(pd, STS0); + sts2 = dma_readl(pd, STS2); dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0); for (i = 0; i < pd->dma.chancnt; i++) { pd_chan = &pd->channels[i]; - if (sts0 & DMA_STATUS_IRQ(i)) { - if (sts0 & DMA_STATUS_ERR(i)) - set_bit(0, &pd_chan->err_status); + if (i < 8) { + if (sts0 & DMA_STATUS_IRQ(i)) { + if (sts0 & DMA_STATUS0_ERR(i)) + set_bit(0, &pd_chan->err_status); - tasklet_schedule(&pd_chan->tasklet); - ret = IRQ_HANDLED; - } + tasklet_schedule(&pd_chan->tasklet); + ret0 = IRQ_HANDLED; + } + } else { + if (sts2 & DMA_STATUS_IRQ(i - 8)) { + if (sts2 & DMA_STATUS2_ERR(i)) + set_bit(0, &pd_chan->err_status); + tasklet_schedule(&pd_chan->tasklet); + ret2 = IRQ_HANDLED; + } + } } /* clear interrupt bits in status register */ - dma_writel(pd, STS0, sts0); + if (ret0) + dma_writel(pd, STS0, sts0); + if (ret2) + dma_writel(pd, STS2, sts2); - return ret; + return ret0 | ret2; } #ifdef CONFIG_PM From badc1446891c158f065c5a9726febdae74eb5ac5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 1 Jun 2011 21:25:47 +0100 Subject: [PATCH 005/151] [IA64] Hook up gpiolib support Allow people to use gpiolib on ia64, mostly for build coverage as it seems more useful to standardise on availablity of the API than handle it being optional. Signed-off-by: Mark Brown Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 4 +++ arch/ia64/include/asm/gpio.h | 55 ++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 arch/ia64/include/asm/gpio.h diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 38280ef4a2af..578701ea03d4 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -27,6 +27,7 @@ config IA64 select GENERIC_PENDING_IRQ if SMP select IRQ_PER_CPU select GENERIC_IRQ_SHOW + select ARCH_WANT_OPTIONAL_GPIOLIB default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -89,6 +90,9 @@ config GENERIC_TIME_VSYSCALL config HAVE_SETUP_PER_CPU_AREA def_bool y +config GENERIC_GPIO + def_bool y + config DMI bool default y diff --git a/arch/ia64/include/asm/gpio.h b/arch/ia64/include/asm/gpio.h new file mode 100644 index 000000000000..590a20debc4e --- /dev/null +++ b/arch/ia64/include/asm/gpio.h @@ -0,0 +1,55 @@ +/* + * Generic GPIO API implementation for IA-64. + * + * A stright copy of that for PowerPC which was: + * + * Copyright (c) 2007-2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _ASM_IA64_GPIO_H +#define _ASM_IA64_GPIO_H + +#include +#include + +#ifdef CONFIG_GPIOLIB + +/* + * We don't (yet) implement inlined/rapid versions for on-chip gpios. + * Just call gpiolib. + */ +static inline int gpio_get_value(unsigned int gpio) +{ + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned int gpio, int value) +{ + __gpio_set_value(gpio, value); +} + +static inline int gpio_cansleep(unsigned int gpio) +{ + return __gpio_cansleep(gpio); +} + +static inline int gpio_to_irq(unsigned int gpio) +{ + return __gpio_to_irq(gpio); +} + +static inline int irq_to_gpio(unsigned int irq) +{ + return -EINVAL; +} + +#endif /* CONFIG_GPIOLIB */ + +#endif /* _ASM_IA64_GPIO_H */ From 5fa29a17fabfe204fa9f20edd5fc81ab2364eb4b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 29 May 2011 13:10:02 +0300 Subject: [PATCH 006/151] dmaengine: add ep93xx DMA support The ep93xx DMA controller has 10 independent memory to peripheral (M2P) channels, and 2 dedicated memory to memory (M2M) channels. M2M channels can also be used by SPI and IDE to perform DMA transfers to/from their memory mapped FIFOs. This driver supports both M2P and M2M channels with DMA_SLAVE, DMA_CYCLIC and DMA_MEMCPY (M2M only) capabilities. Signed-off-by: Mika Westerberg Signed-off-by: Ryan Mallon Acked-by: H Hartley Sweeten Acked-by: Vinod Koul Cc: Dan Williams Signed-off-by: Vinod Koul --- arch/arm/mach-ep93xx/include/mach/dma.h | 87 ++ drivers/dma/Kconfig | 7 + drivers/dma/Makefile | 1 + drivers/dma/ep93xx_dma.c | 1355 +++++++++++++++++++++++ 4 files changed, 1450 insertions(+) create mode 100644 drivers/dma/ep93xx_dma.c diff --git a/arch/arm/mach-ep93xx/include/mach/dma.h b/arch/arm/mach-ep93xx/include/mach/dma.h index 5e31b2b25da9..6e7049a796a4 100644 --- a/arch/arm/mach-ep93xx/include/mach/dma.h +++ b/arch/arm/mach-ep93xx/include/mach/dma.h @@ -15,6 +15,8 @@ #include #include +#include +#include /** * struct ep93xx_dma_buffer - Information about a buffer to be transferred @@ -146,4 +148,89 @@ void ep93xx_dma_m2p_submit_recursive(struct ep93xx_dma_m2p_client *m2p, */ void ep93xx_dma_m2p_flush(struct ep93xx_dma_m2p_client *m2p); +/* + * M2P channels. + * + * Note that these values are also directly used for setting the PPALLOC + * register. + */ +#define EP93XX_DMA_I2S1 0 +#define EP93XX_DMA_I2S2 1 +#define EP93XX_DMA_AAC1 2 +#define EP93XX_DMA_AAC2 3 +#define EP93XX_DMA_AAC3 4 +#define EP93XX_DMA_I2S3 5 +#define EP93XX_DMA_UART1 6 +#define EP93XX_DMA_UART2 7 +#define EP93XX_DMA_UART3 8 +#define EP93XX_DMA_IRDA 9 +/* M2M channels */ +#define EP93XX_DMA_SSP 10 +#define EP93XX_DMA_IDE 11 + +/** + * struct ep93xx_dma_data - configuration data for the EP93xx dmaengine + * @port: peripheral which is requesting the channel + * @direction: TX/RX channel + * @name: optional name for the channel, this is displayed in /proc/interrupts + * + * This information is passed as private channel parameter in a filter + * function. Note that this is only needed for slave/cyclic channels. For + * memcpy channels %NULL data should be passed. + */ +struct ep93xx_dma_data { + int port; + enum dma_data_direction direction; + const char *name; +}; + +/** + * struct ep93xx_dma_chan_data - platform specific data for a DMA channel + * @name: name of the channel, used for getting the right clock for the channel + * @base: mapped registers + * @irq: interrupt number used by this channel + */ +struct ep93xx_dma_chan_data { + const char *name; + void __iomem *base; + int irq; +}; + +/** + * struct ep93xx_dma_platform_data - platform data for the dmaengine driver + * @channels: array of channels which are passed to the driver + * @num_channels: number of channels in the array + * + * This structure is passed to the DMA engine driver via platform data. For + * M2P channels, contract is that even channels are for TX and odd for RX. + * There is no requirement for the M2M channels. + */ +struct ep93xx_dma_platform_data { + struct ep93xx_dma_chan_data *channels; + size_t num_channels; +}; + +static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan) +{ + return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p"); +} + +/** + * ep93xx_dma_chan_direction - returns direction the channel can be used + * @chan: channel + * + * This function can be used in filter functions to find out whether the + * channel supports given DMA direction. Only M2P channels have such + * limitation, for M2M channels the direction is configurable. + */ +static inline enum dma_data_direction +ep93xx_dma_chan_direction(struct dma_chan *chan) +{ + if (!ep93xx_dma_chan_is_m2p(chan)) + return DMA_NONE; + + /* even channels are for TX, odd for RX */ + return (chan->chan_id % 2 == 0) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; +} + #endif /* __ASM_ARCH_DMA_H */ diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 25cf327cd1cb..2e3b3d38c465 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -237,6 +237,13 @@ config MXS_DMA Support the MXS DMA engine. This engine including APBH-DMA and APBX-DMA is integrated into Freescale i.MX23/28 chips. +config EP93XX_DMA + bool "Cirrus Logic EP93xx DMA support" + depends on ARCH_EP93XX + select DMA_ENGINE + help + Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller. + config DMA_ENGINE bool diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 836095ab3c5c..30cf3b1f0c5c 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -25,3 +25,4 @@ obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o obj-$(CONFIG_PL330_DMA) += pl330.o obj-$(CONFIG_PCH_DMA) += pch_dma.o obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o +obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c new file mode 100644 index 000000000000..0766c1e53b1d --- /dev/null +++ b/drivers/dma/ep93xx_dma.c @@ -0,0 +1,1355 @@ +/* + * Driver for the Cirrus Logic EP93xx DMA Controller + * + * Copyright (C) 2011 Mika Westerberg + * + * DMA M2P implementation is based on the original + * arch/arm/mach-ep93xx/dma-m2p.c which has following copyrights: + * + * Copyright (C) 2006 Lennert Buytenhek + * Copyright (C) 2006 Applied Data Systems + * Copyright (C) 2009 Ryan Mallon + * + * This driver is based on dw_dmac and amba-pl08x drivers. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include + +/* M2P registers */ +#define M2P_CONTROL 0x0000 +#define M2P_CONTROL_STALLINT BIT(0) +#define M2P_CONTROL_NFBINT BIT(1) +#define M2P_CONTROL_CH_ERROR_INT BIT(3) +#define M2P_CONTROL_ENABLE BIT(4) +#define M2P_CONTROL_ICE BIT(6) + +#define M2P_INTERRUPT 0x0004 +#define M2P_INTERRUPT_STALL BIT(0) +#define M2P_INTERRUPT_NFB BIT(1) +#define M2P_INTERRUPT_ERROR BIT(3) + +#define M2P_PPALLOC 0x0008 +#define M2P_STATUS 0x000c + +#define M2P_MAXCNT0 0x0020 +#define M2P_BASE0 0x0024 +#define M2P_MAXCNT1 0x0030 +#define M2P_BASE1 0x0034 + +#define M2P_STATE_IDLE 0 +#define M2P_STATE_STALL 1 +#define M2P_STATE_ON 2 +#define M2P_STATE_NEXT 3 + +/* M2M registers */ +#define M2M_CONTROL 0x0000 +#define M2M_CONTROL_DONEINT BIT(2) +#define M2M_CONTROL_ENABLE BIT(3) +#define M2M_CONTROL_START BIT(4) +#define M2M_CONTROL_DAH BIT(11) +#define M2M_CONTROL_SAH BIT(12) +#define M2M_CONTROL_PW_SHIFT 9 +#define M2M_CONTROL_PW_8 (0 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_16 (1 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_32 (2 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_MASK (3 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_TM_SHIFT 13 +#define M2M_CONTROL_TM_TX (1 << M2M_CONTROL_TM_SHIFT) +#define M2M_CONTROL_TM_RX (2 << M2M_CONTROL_TM_SHIFT) +#define M2M_CONTROL_RSS_SHIFT 22 +#define M2M_CONTROL_RSS_SSPRX (1 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_RSS_SSPTX (2 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_RSS_IDE (3 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_NO_HDSK BIT(24) +#define M2M_CONTROL_PWSC_SHIFT 25 + +#define M2M_INTERRUPT 0x0004 +#define M2M_INTERRUPT_DONEINT BIT(1) + +#define M2M_BCR0 0x0010 +#define M2M_BCR1 0x0014 +#define M2M_SAR_BASE0 0x0018 +#define M2M_SAR_BASE1 0x001c +#define M2M_DAR_BASE0 0x002c +#define M2M_DAR_BASE1 0x0030 + +#define DMA_MAX_CHAN_BYTES 0xffff +#define DMA_MAX_CHAN_DESCRIPTORS 32 + +struct ep93xx_dma_engine; + +/** + * struct ep93xx_dma_desc - EP93xx specific transaction descriptor + * @src_addr: source address of the transaction + * @dst_addr: destination address of the transaction + * @size: size of the transaction (in bytes) + * @complete: this descriptor is completed + * @txd: dmaengine API descriptor + * @tx_list: list of linked descriptors + * @node: link used for putting this into a channel queue + */ +struct ep93xx_dma_desc { + u32 src_addr; + u32 dst_addr; + size_t size; + bool complete; + struct dma_async_tx_descriptor txd; + struct list_head tx_list; + struct list_head node; +}; + +/** + * struct ep93xx_dma_chan - an EP93xx DMA M2P/M2M channel + * @chan: dmaengine API channel + * @edma: pointer to to the engine device + * @regs: memory mapped registers + * @irq: interrupt number of the channel + * @clk: clock used by this channel + * @tasklet: channel specific tasklet used for callbacks + * @lock: lock protecting the fields following + * @flags: flags for the channel + * @buffer: which buffer to use next (0/1) + * @last_completed: last completed cookie value + * @active: flattened chain of descriptors currently being processed + * @queue: pending descriptors which are handled next + * @free_list: list of free descriptors which can be used + * @runtime_addr: physical address currently used as dest/src (M2M only). This + * is set via %DMA_SLAVE_CONFIG before slave operation is + * prepared + * @runtime_ctrl: M2M runtime values for the control register. + * + * As EP93xx DMA controller doesn't support real chained DMA descriptors we + * will have slightly different scheme here: @active points to a head of + * flattened DMA descriptor chain. + * + * @queue holds pending transactions. These are linked through the first + * descriptor in the chain. When a descriptor is moved to the @active queue, + * the first and chained descriptors are flattened into a single list. + * + * @chan.private holds pointer to &struct ep93xx_dma_data which contains + * necessary channel configuration information. For memcpy channels this must + * be %NULL. + */ +struct ep93xx_dma_chan { + struct dma_chan chan; + const struct ep93xx_dma_engine *edma; + void __iomem *regs; + int irq; + struct clk *clk; + struct tasklet_struct tasklet; + /* protects the fields following */ + spinlock_t lock; + unsigned long flags; +/* Channel is configured for cyclic transfers */ +#define EP93XX_DMA_IS_CYCLIC 0 + + int buffer; + dma_cookie_t last_completed; + struct list_head active; + struct list_head queue; + struct list_head free_list; + u32 runtime_addr; + u32 runtime_ctrl; +}; + +/** + * struct ep93xx_dma_engine - the EP93xx DMA engine instance + * @dma_dev: holds the dmaengine device + * @m2m: is this an M2M or M2P device + * @hw_setup: method which sets the channel up for operation + * @hw_shutdown: shuts the channel down and flushes whatever is left + * @hw_submit: pushes active descriptor(s) to the hardware + * @hw_interrupt: handle the interrupt + * @num_channels: number of channels for this instance + * @channels: array of channels + * + * There is one instance of this struct for the M2P channels and one for the + * M2M channels. hw_xxx() methods are used to perform operations which are + * different on M2M and M2P channels. These methods are called with channel + * lock held and interrupts disabled so they cannot sleep. + */ +struct ep93xx_dma_engine { + struct dma_device dma_dev; + bool m2m; + int (*hw_setup)(struct ep93xx_dma_chan *); + void (*hw_shutdown)(struct ep93xx_dma_chan *); + void (*hw_submit)(struct ep93xx_dma_chan *); + int (*hw_interrupt)(struct ep93xx_dma_chan *); +#define INTERRUPT_UNKNOWN 0 +#define INTERRUPT_DONE 1 +#define INTERRUPT_NEXT_BUFFER 2 + + size_t num_channels; + struct ep93xx_dma_chan channels[]; +}; + +static inline struct device *chan2dev(struct ep93xx_dma_chan *edmac) +{ + return &edmac->chan.dev->device; +} + +static struct ep93xx_dma_chan *to_ep93xx_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct ep93xx_dma_chan, chan); +} + +/** + * ep93xx_dma_set_active - set new active descriptor chain + * @edmac: channel + * @desc: head of the new active descriptor chain + * + * Sets @desc to be the head of the new active descriptor chain. This is the + * chain which is processed next. The active list must be empty before calling + * this function. + * + * Called with @edmac->lock held and interrupts disabled. + */ +static void ep93xx_dma_set_active(struct ep93xx_dma_chan *edmac, + struct ep93xx_dma_desc *desc) +{ + BUG_ON(!list_empty(&edmac->active)); + + list_add_tail(&desc->node, &edmac->active); + + /* Flatten the @desc->tx_list chain into @edmac->active list */ + while (!list_empty(&desc->tx_list)) { + struct ep93xx_dma_desc *d = list_first_entry(&desc->tx_list, + struct ep93xx_dma_desc, node); + + /* + * We copy the callback parameters from the first descriptor + * to all the chained descriptors. This way we can call the + * callback without having to find out the first descriptor in + * the chain. Useful for cyclic transfers. + */ + d->txd.callback = desc->txd.callback; + d->txd.callback_param = desc->txd.callback_param; + + list_move_tail(&d->node, &edmac->active); + } +} + +/* Called with @edmac->lock held and interrupts disabled */ +static struct ep93xx_dma_desc * +ep93xx_dma_get_active(struct ep93xx_dma_chan *edmac) +{ + return list_first_entry(&edmac->active, struct ep93xx_dma_desc, node); +} + +/** + * ep93xx_dma_advance_active - advances to the next active descriptor + * @edmac: channel + * + * Function advances active descriptor to the next in the @edmac->active and + * returns %true if we still have descriptors in the chain to process. + * Otherwise returns %false. + * + * When the channel is in cyclic mode always returns %true. + * + * Called with @edmac->lock held and interrupts disabled. + */ +static bool ep93xx_dma_advance_active(struct ep93xx_dma_chan *edmac) +{ + list_rotate_left(&edmac->active); + + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) + return true; + + /* + * If txd.cookie is set it means that we are back in the first + * descriptor in the chain and hence done with it. + */ + return !ep93xx_dma_get_active(edmac)->txd.cookie; +} + +/* + * M2P DMA implementation + */ + +static void m2p_set_control(struct ep93xx_dma_chan *edmac, u32 control) +{ + writel(control, edmac->regs + M2P_CONTROL); + /* + * EP93xx User's Guide states that we must perform a dummy read after + * write to the control register. + */ + readl(edmac->regs + M2P_CONTROL); +} + +static int m2p_hw_setup(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_data *data = edmac->chan.private; + u32 control; + + writel(data->port & 0xf, edmac->regs + M2P_PPALLOC); + + control = M2P_CONTROL_CH_ERROR_INT | M2P_CONTROL_ICE + | M2P_CONTROL_ENABLE; + m2p_set_control(edmac, control); + + return 0; +} + +static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac) +{ + return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3; +} + +static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac) +{ + u32 control; + + control = readl(edmac->regs + M2P_CONTROL); + control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); + m2p_set_control(edmac, control); + + while (m2p_channel_state(edmac) >= M2P_STATE_ON) + cpu_relax(); + + m2p_set_control(edmac, 0); + + while (m2p_channel_state(edmac) == M2P_STATE_STALL) + cpu_relax(); +} + +static void m2p_fill_desc(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac); + u32 bus_addr; + + if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_TO_DEVICE) + bus_addr = desc->src_addr; + else + bus_addr = desc->dst_addr; + + if (edmac->buffer == 0) { + writel(desc->size, edmac->regs + M2P_MAXCNT0); + writel(bus_addr, edmac->regs + M2P_BASE0); + } else { + writel(desc->size, edmac->regs + M2P_MAXCNT1); + writel(bus_addr, edmac->regs + M2P_BASE1); + } + + edmac->buffer ^= 1; +} + +static void m2p_hw_submit(struct ep93xx_dma_chan *edmac) +{ + u32 control = readl(edmac->regs + M2P_CONTROL); + + m2p_fill_desc(edmac); + control |= M2P_CONTROL_STALLINT; + + if (ep93xx_dma_advance_active(edmac)) { + m2p_fill_desc(edmac); + control |= M2P_CONTROL_NFBINT; + } + + m2p_set_control(edmac, control); +} + +static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac) +{ + u32 irq_status = readl(edmac->regs + M2P_INTERRUPT); + u32 control; + + if (irq_status & M2P_INTERRUPT_ERROR) { + struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac); + + /* Clear the error interrupt */ + writel(1, edmac->regs + M2P_INTERRUPT); + + /* + * It seems that there is no easy way of reporting errors back + * to client so we just report the error here and continue as + * usual. + * + * Revisit this when there is a mechanism to report back the + * errors. + */ + dev_err(chan2dev(edmac), + "DMA transfer failed! Details:\n" + "\tcookie : %d\n" + "\tsrc_addr : 0x%08x\n" + "\tdst_addr : 0x%08x\n" + "\tsize : %zu\n", + desc->txd.cookie, desc->src_addr, desc->dst_addr, + desc->size); + } + + switch (irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)) { + case M2P_INTERRUPT_STALL: + /* Disable interrupts */ + control = readl(edmac->regs + M2P_CONTROL); + control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); + m2p_set_control(edmac, control); + + return INTERRUPT_DONE; + + case M2P_INTERRUPT_NFB: + if (ep93xx_dma_advance_active(edmac)) + m2p_fill_desc(edmac); + + return INTERRUPT_NEXT_BUFFER; + } + + return INTERRUPT_UNKNOWN; +} + +/* + * M2M DMA implementation + * + * For the M2M transfers we don't use NFB at all. This is because it simply + * doesn't work well with memcpy transfers. When you submit both buffers it is + * extremely unlikely that you get an NFB interrupt, but it instead reports + * DONE interrupt and both buffers are already transferred which means that we + * weren't able to update the next buffer. + * + * So for now we "simulate" NFB by just submitting buffer after buffer + * without double buffering. + */ + +static int m2m_hw_setup(struct ep93xx_dma_chan *edmac) +{ + const struct ep93xx_dma_data *data = edmac->chan.private; + u32 control = 0; + + if (!data) { + /* This is memcpy channel, nothing to configure */ + writel(control, edmac->regs + M2M_CONTROL); + return 0; + } + + switch (data->port) { + case EP93XX_DMA_SSP: + /* + * This was found via experimenting - anything less than 5 + * causes the channel to perform only a partial transfer which + * leads to problems since we don't get DONE interrupt then. + */ + control = (5 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_NO_HDSK; + + if (data->direction == DMA_TO_DEVICE) { + control |= M2M_CONTROL_DAH; + control |= M2M_CONTROL_TM_TX; + control |= M2M_CONTROL_RSS_SSPTX; + } else { + control |= M2M_CONTROL_SAH; + control |= M2M_CONTROL_TM_RX; + control |= M2M_CONTROL_RSS_SSPRX; + } + break; + + case EP93XX_DMA_IDE: + /* + * This IDE part is totally untested. Values below are taken + * from the EP93xx Users's Guide and might not be correct. + */ + control |= M2M_CONTROL_NO_HDSK; + control |= M2M_CONTROL_RSS_IDE; + control |= M2M_CONTROL_PW_16; + + if (data->direction == DMA_TO_DEVICE) { + /* Worst case from the UG */ + control = (3 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_DAH; + control |= M2M_CONTROL_TM_TX; + } else { + control = (2 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_SAH; + control |= M2M_CONTROL_TM_RX; + } + break; + + default: + return -EINVAL; + } + + writel(control, edmac->regs + M2M_CONTROL); + return 0; +} + +static void m2m_hw_shutdown(struct ep93xx_dma_chan *edmac) +{ + /* Just disable the channel */ + writel(0, edmac->regs + M2M_CONTROL); +} + +static void m2m_fill_desc(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac); + + if (edmac->buffer == 0) { + writel(desc->src_addr, edmac->regs + M2M_SAR_BASE0); + writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE0); + writel(desc->size, edmac->regs + M2M_BCR0); + } else { + writel(desc->src_addr, edmac->regs + M2M_SAR_BASE1); + writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE1); + writel(desc->size, edmac->regs + M2M_BCR1); + } + + edmac->buffer ^= 1; +} + +static void m2m_hw_submit(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_data *data = edmac->chan.private; + u32 control = readl(edmac->regs + M2M_CONTROL); + + /* + * Since we allow clients to configure PW (peripheral width) we always + * clear PW bits here and then set them according what is given in + * the runtime configuration. + */ + control &= ~M2M_CONTROL_PW_MASK; + control |= edmac->runtime_ctrl; + + m2m_fill_desc(edmac); + control |= M2M_CONTROL_DONEINT; + + /* + * Now we can finally enable the channel. For M2M channel this must be + * done _after_ the BCRx registers are programmed. + */ + control |= M2M_CONTROL_ENABLE; + writel(control, edmac->regs + M2M_CONTROL); + + if (!data) { + /* + * For memcpy channels the software trigger must be asserted + * in order to start the memcpy operation. + */ + control |= M2M_CONTROL_START; + writel(control, edmac->regs + M2M_CONTROL); + } +} + +static int m2m_hw_interrupt(struct ep93xx_dma_chan *edmac) +{ + u32 control; + + if (!(readl(edmac->regs + M2M_INTERRUPT) & M2M_INTERRUPT_DONEINT)) + return INTERRUPT_UNKNOWN; + + /* Clear the DONE bit */ + writel(0, edmac->regs + M2M_INTERRUPT); + + /* Disable interrupts and the channel */ + control = readl(edmac->regs + M2M_CONTROL); + control &= ~(M2M_CONTROL_DONEINT | M2M_CONTROL_ENABLE); + writel(control, edmac->regs + M2M_CONTROL); + + /* + * Since we only get DONE interrupt we have to find out ourselves + * whether there still is something to process. So we try to advance + * the chain an see whether it succeeds. + */ + if (ep93xx_dma_advance_active(edmac)) { + edmac->edma->hw_submit(edmac); + return INTERRUPT_NEXT_BUFFER; + } + + return INTERRUPT_DONE; +} + +/* + * DMA engine API implementation + */ + +static struct ep93xx_dma_desc * +ep93xx_dma_desc_get(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc, *_desc; + struct ep93xx_dma_desc *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + list_for_each_entry_safe(desc, _desc, &edmac->free_list, node) { + if (async_tx_test_ack(&desc->txd)) { + list_del_init(&desc->node); + + /* Re-initialize the descriptor */ + desc->src_addr = 0; + desc->dst_addr = 0; + desc->size = 0; + desc->complete = false; + desc->txd.cookie = 0; + desc->txd.callback = NULL; + desc->txd.callback_param = NULL; + + ret = desc; + break; + } + } + spin_unlock_irqrestore(&edmac->lock, flags); + return ret; +} + +static void ep93xx_dma_desc_put(struct ep93xx_dma_chan *edmac, + struct ep93xx_dma_desc *desc) +{ + if (desc) { + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + list_splice_init(&desc->tx_list, &edmac->free_list); + list_add(&desc->node, &edmac->free_list); + spin_unlock_irqrestore(&edmac->lock, flags); + } +} + +/** + * ep93xx_dma_advance_work - start processing the next pending transaction + * @edmac: channel + * + * If we have pending transactions queued and we are currently idling, this + * function takes the next queued transaction from the @edmac->queue and + * pushes it to the hardware for execution. + */ +static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *new; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + if (!list_empty(&edmac->active) || list_empty(&edmac->queue)) { + spin_unlock_irqrestore(&edmac->lock, flags); + return; + } + + /* Take the next descriptor from the pending queue */ + new = list_first_entry(&edmac->queue, struct ep93xx_dma_desc, node); + list_del_init(&new->node); + + ep93xx_dma_set_active(edmac, new); + + /* Push it to the hardware */ + edmac->edma->hw_submit(edmac); + spin_unlock_irqrestore(&edmac->lock, flags); +} + +static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc) +{ + struct device *dev = desc->txd.chan->device->dev; + + if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(dev, desc->src_addr, desc->size, + DMA_TO_DEVICE); + else + dma_unmap_page(dev, desc->src_addr, desc->size, + DMA_TO_DEVICE); + } + if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(dev, desc->dst_addr, desc->size, + DMA_FROM_DEVICE); + else + dma_unmap_page(dev, desc->dst_addr, desc->size, + DMA_FROM_DEVICE); + } +} + +static void ep93xx_dma_tasklet(unsigned long data) +{ + struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data; + struct ep93xx_dma_desc *desc, *d; + dma_async_tx_callback callback; + void *callback_param; + LIST_HEAD(list); + + spin_lock_irq(&edmac->lock); + desc = ep93xx_dma_get_active(edmac); + if (desc->complete) { + edmac->last_completed = desc->txd.cookie; + list_splice_init(&edmac->active, &list); + } + spin_unlock_irq(&edmac->lock); + + /* Pick up the next descriptor from the queue */ + ep93xx_dma_advance_work(edmac); + + callback = desc->txd.callback; + callback_param = desc->txd.callback_param; + + /* Now we can release all the chained descriptors */ + list_for_each_entry_safe(desc, d, &list, node) { + /* + * For the memcpy channels the API requires us to unmap the + * buffers unless requested otherwise. + */ + if (!edmac->chan.private) + ep93xx_dma_unmap_buffers(desc); + + ep93xx_dma_desc_put(edmac, desc); + } + + if (callback) + callback(callback_param); +} + +static irqreturn_t ep93xx_dma_interrupt(int irq, void *dev_id) +{ + struct ep93xx_dma_chan *edmac = dev_id; + irqreturn_t ret = IRQ_HANDLED; + + spin_lock(&edmac->lock); + + switch (edmac->edma->hw_interrupt(edmac)) { + case INTERRUPT_DONE: + ep93xx_dma_get_active(edmac)->complete = true; + tasklet_schedule(&edmac->tasklet); + break; + + case INTERRUPT_NEXT_BUFFER: + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) + tasklet_schedule(&edmac->tasklet); + break; + + default: + dev_warn(chan2dev(edmac), "unknown interrupt!\n"); + ret = IRQ_NONE; + break; + } + + spin_unlock(&edmac->lock); + return ret; +} + +/** + * ep93xx_dma_tx_submit - set the prepared descriptor(s) to be executed + * @tx: descriptor to be executed + * + * Function will execute given descriptor on the hardware or if the hardware + * is busy, queue the descriptor to be executed later on. Returns cookie which + * can be used to poll the status of the descriptor. + */ +static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(tx->chan); + struct ep93xx_dma_desc *desc; + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + + cookie = edmac->chan.cookie; + + if (++cookie < 0) + cookie = 1; + + desc = container_of(tx, struct ep93xx_dma_desc, txd); + + edmac->chan.cookie = cookie; + desc->txd.cookie = cookie; + + /* + * If nothing is currently prosessed, we push this descriptor + * directly to the hardware. Otherwise we put the descriptor + * to the pending queue. + */ + if (list_empty(&edmac->active)) { + ep93xx_dma_set_active(edmac, desc); + edmac->edma->hw_submit(edmac); + } else { + list_add_tail(&desc->node, &edmac->queue); + } + + spin_unlock_irqrestore(&edmac->lock, flags); + return cookie; +} + +/** + * ep93xx_dma_alloc_chan_resources - allocate resources for the channel + * @chan: channel to allocate resources + * + * Function allocates necessary resources for the given DMA channel and + * returns number of allocated descriptors for the channel. Negative errno + * is returned in case of failure. + */ +static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_data *data = chan->private; + const char *name = dma_chan_name(chan); + int ret, i; + + /* Sanity check the channel parameters */ + if (!edmac->edma->m2m) { + if (!data) + return -EINVAL; + if (data->port < EP93XX_DMA_I2S1 || + data->port > EP93XX_DMA_IRDA) + return -EINVAL; + if (data->direction != ep93xx_dma_chan_direction(chan)) + return -EINVAL; + } else { + if (data) { + switch (data->port) { + case EP93XX_DMA_SSP: + case EP93XX_DMA_IDE: + if (data->direction != DMA_TO_DEVICE && + data->direction != DMA_FROM_DEVICE) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + } + + if (data && data->name) + name = data->name; + + ret = clk_enable(edmac->clk); + if (ret) + return ret; + + ret = request_irq(edmac->irq, ep93xx_dma_interrupt, 0, name, edmac); + if (ret) + goto fail_clk_disable; + + spin_lock_irq(&edmac->lock); + edmac->last_completed = 1; + edmac->chan.cookie = 1; + ret = edmac->edma->hw_setup(edmac); + spin_unlock_irq(&edmac->lock); + + if (ret) + goto fail_free_irq; + + for (i = 0; i < DMA_MAX_CHAN_DESCRIPTORS; i++) { + struct ep93xx_dma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + dev_warn(chan2dev(edmac), "not enough descriptors\n"); + break; + } + + INIT_LIST_HEAD(&desc->tx_list); + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.tx_submit = ep93xx_dma_tx_submit; + + ep93xx_dma_desc_put(edmac, desc); + } + + return i; + +fail_free_irq: + free_irq(edmac->irq, edmac); +fail_clk_disable: + clk_disable(edmac->clk); + + return ret; +} + +/** + * ep93xx_dma_free_chan_resources - release resources for the channel + * @chan: channel + * + * Function releases all the resources allocated for the given channel. + * The channel must be idle when this is called. + */ +static void ep93xx_dma_free_chan_resources(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *d; + unsigned long flags; + LIST_HEAD(list); + + BUG_ON(!list_empty(&edmac->active)); + BUG_ON(!list_empty(&edmac->queue)); + + spin_lock_irqsave(&edmac->lock, flags); + edmac->edma->hw_shutdown(edmac); + edmac->runtime_addr = 0; + edmac->runtime_ctrl = 0; + edmac->buffer = 0; + list_splice_init(&edmac->free_list, &list); + spin_unlock_irqrestore(&edmac->lock, flags); + + list_for_each_entry_safe(desc, d, &list, node) + kfree(desc); + + clk_disable(edmac->clk); + free_irq(edmac->irq, edmac); +} + +/** + * ep93xx_dma_prep_dma_memcpy - prepare a memcpy DMA operation + * @chan: channel + * @dest: destination bus address + * @src: source bus address + * @len: size of the transaction + * @flags: flags for the descriptor + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +struct dma_async_tx_descriptor * +ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + size_t bytes, offset; + + first = NULL; + for (offset = 0; offset < len; offset += bytes) { + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couln't get descriptor\n"); + goto fail; + } + + bytes = min_t(size_t, len - offset, DMA_MAX_CHAN_BYTES); + + desc->src_addr = src + offset; + desc->dst_addr = dest + offset; + desc->size = bytes; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + first->txd.flags = flags; + + return &first->txd; +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_prep_slave_sg - prepare a slave DMA operation + * @chan: channel + * @sgl: list of buffers to transfer + * @sg_len: number of entries in @sgl + * @dir: direction of tha DMA transfer + * @flags: flags for the descriptor + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction dir, + unsigned long flags) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + struct scatterlist *sg; + int i; + + if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) { + dev_warn(chan2dev(edmac), + "channel was configured with different direction\n"); + return NULL; + } + + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) { + dev_warn(chan2dev(edmac), + "channel is already used for cyclic transfers\n"); + return NULL; + } + + first = NULL; + for_each_sg(sgl, sg, sg_len, i) { + size_t sg_len = sg_dma_len(sg); + + if (sg_len > DMA_MAX_CHAN_BYTES) { + dev_warn(chan2dev(edmac), "too big transfer size %d\n", + sg_len); + goto fail; + } + + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couln't get descriptor\n"); + goto fail; + } + + if (dir == DMA_TO_DEVICE) { + desc->src_addr = sg_dma_address(sg); + desc->dst_addr = edmac->runtime_addr; + } else { + desc->src_addr = edmac->runtime_addr; + desc->dst_addr = sg_dma_address(sg); + } + desc->size = sg_len; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + first->txd.flags = flags; + + return &first->txd; + +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_prep_dma_cyclic - prepare a cyclic DMA operation + * @chan: channel + * @dma_addr: DMA mapped address of the buffer + * @buf_len: length of the buffer (in bytes) + * @period_len: lenght of a single period + * @dir: direction of the operation + * + * Prepares a descriptor for cyclic DMA operation. This means that once the + * descriptor is submitted, we will be submitting in a @period_len sized + * buffers and calling callback once the period has been elapsed. Transfer + * terminates only when client calls dmaengine_terminate_all() for this + * channel. + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, + size_t buf_len, size_t period_len, + enum dma_data_direction dir) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + size_t offset = 0; + + if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) { + dev_warn(chan2dev(edmac), + "channel was configured with different direction\n"); + return NULL; + } + + if (test_and_set_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) { + dev_warn(chan2dev(edmac), + "channel is already used for cyclic transfers\n"); + return NULL; + } + + if (period_len > DMA_MAX_CHAN_BYTES) { + dev_warn(chan2dev(edmac), "too big period length %d\n", + period_len); + return NULL; + } + + /* Split the buffer into period size chunks */ + first = NULL; + for (offset = 0; offset < buf_len; offset += period_len) { + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couln't get descriptor\n"); + goto fail; + } + + if (dir == DMA_TO_DEVICE) { + desc->src_addr = dma_addr + offset; + desc->dst_addr = edmac->runtime_addr; + } else { + desc->src_addr = edmac->runtime_addr; + desc->dst_addr = dma_addr + offset; + } + + desc->size = period_len; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + + return &first->txd; + +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_terminate_all - terminate all transactions + * @edmac: channel + * + * Stops all DMA transactions. All descriptors are put back to the + * @edmac->free_list and callbacks are _not_ called. + */ +static int ep93xx_dma_terminate_all(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc, *_d; + unsigned long flags; + LIST_HEAD(list); + + spin_lock_irqsave(&edmac->lock, flags); + /* First we disable and flush the DMA channel */ + edmac->edma->hw_shutdown(edmac); + clear_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags); + list_splice_init(&edmac->active, &list); + list_splice_init(&edmac->queue, &list); + /* + * We then re-enable the channel. This way we can continue submitting + * the descriptors by just calling ->hw_submit() again. + */ + edmac->edma->hw_setup(edmac); + spin_unlock_irqrestore(&edmac->lock, flags); + + list_for_each_entry_safe(desc, _d, &list, node) + ep93xx_dma_desc_put(edmac, desc); + + return 0; +} + +static int ep93xx_dma_slave_config(struct ep93xx_dma_chan *edmac, + struct dma_slave_config *config) +{ + enum dma_slave_buswidth width; + unsigned long flags; + u32 addr, ctrl; + + if (!edmac->edma->m2m) + return -EINVAL; + + switch (config->direction) { + case DMA_FROM_DEVICE: + width = config->src_addr_width; + addr = config->src_addr; + break; + + case DMA_TO_DEVICE: + width = config->dst_addr_width; + addr = config->dst_addr; + break; + + default: + return -EINVAL; + } + + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + ctrl = 0; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + ctrl = M2M_CONTROL_PW_16; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + ctrl = M2M_CONTROL_PW_32; + break; + default: + return -EINVAL; + } + + spin_lock_irqsave(&edmac->lock, flags); + edmac->runtime_addr = addr; + edmac->runtime_ctrl = ctrl; + spin_unlock_irqrestore(&edmac->lock, flags); + + return 0; +} + +/** + * ep93xx_dma_control - manipulate all pending operations on a channel + * @chan: channel + * @cmd: control command to perform + * @arg: optional argument + * + * Controls the channel. Function returns %0 in case of success or negative + * error in case of failure. + */ +static int ep93xx_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct dma_slave_config *config; + + switch (cmd) { + case DMA_TERMINATE_ALL: + return ep93xx_dma_terminate_all(edmac); + + case DMA_SLAVE_CONFIG: + config = (struct dma_slave_config *)arg; + return ep93xx_dma_slave_config(edmac, config); + + default: + break; + } + + return -ENOSYS; +} + +/** + * ep93xx_dma_tx_status - check if a transaction is completed + * @chan: channel + * @cookie: transaction specific cookie + * @state: state of the transaction is stored here if given + * + * This function can be used to query state of a given transaction. + */ +static enum dma_status ep93xx_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + dma_cookie_t last_used, last_completed; + enum dma_status ret; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + last_used = chan->cookie; + last_completed = edmac->last_completed; + spin_unlock_irqrestore(&edmac->lock, flags); + + ret = dma_async_is_complete(cookie, last_completed, last_used); + dma_set_tx_state(state, last_completed, last_used, 0); + + return ret; +} + +/** + * ep93xx_dma_issue_pending - push pending transactions to the hardware + * @chan: channel + * + * When this function is called, all pending transactions are pushed to the + * hardware and executed. + */ +static void ep93xx_dma_issue_pending(struct dma_chan *chan) +{ + ep93xx_dma_advance_work(to_ep93xx_dma_chan(chan)); +} + +static int __init ep93xx_dma_probe(struct platform_device *pdev) +{ + struct ep93xx_dma_platform_data *pdata = dev_get_platdata(&pdev->dev); + struct ep93xx_dma_engine *edma; + struct dma_device *dma_dev; + size_t edma_size; + int ret, i; + + edma_size = pdata->num_channels * sizeof(struct ep93xx_dma_chan); + edma = kzalloc(sizeof(*edma) + edma_size, GFP_KERNEL); + if (!edma) + return -ENOMEM; + + dma_dev = &edma->dma_dev; + edma->m2m = platform_get_device_id(pdev)->driver_data; + edma->num_channels = pdata->num_channels; + + INIT_LIST_HEAD(&dma_dev->channels); + for (i = 0; i < pdata->num_channels; i++) { + const struct ep93xx_dma_chan_data *cdata = &pdata->channels[i]; + struct ep93xx_dma_chan *edmac = &edma->channels[i]; + + edmac->chan.device = dma_dev; + edmac->regs = cdata->base; + edmac->irq = cdata->irq; + edmac->edma = edma; + + edmac->clk = clk_get(NULL, cdata->name); + if (IS_ERR(edmac->clk)) { + dev_warn(&pdev->dev, "failed to get clock for %s\n", + cdata->name); + continue; + } + + spin_lock_init(&edmac->lock); + INIT_LIST_HEAD(&edmac->active); + INIT_LIST_HEAD(&edmac->queue); + INIT_LIST_HEAD(&edmac->free_list); + tasklet_init(&edmac->tasklet, ep93xx_dma_tasklet, + (unsigned long)edmac); + + list_add_tail(&edmac->chan.device_node, + &dma_dev->channels); + } + + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask); + + dma_dev->dev = &pdev->dev; + dma_dev->device_alloc_chan_resources = ep93xx_dma_alloc_chan_resources; + dma_dev->device_free_chan_resources = ep93xx_dma_free_chan_resources; + dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg; + dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic; + dma_dev->device_control = ep93xx_dma_control; + dma_dev->device_issue_pending = ep93xx_dma_issue_pending; + dma_dev->device_tx_status = ep93xx_dma_tx_status; + + dma_set_max_seg_size(dma_dev->dev, DMA_MAX_CHAN_BYTES); + + if (edma->m2m) { + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_dev->device_prep_dma_memcpy = ep93xx_dma_prep_dma_memcpy; + + edma->hw_setup = m2m_hw_setup; + edma->hw_shutdown = m2m_hw_shutdown; + edma->hw_submit = m2m_hw_submit; + edma->hw_interrupt = m2m_hw_interrupt; + } else { + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + + edma->hw_setup = m2p_hw_setup; + edma->hw_shutdown = m2p_hw_shutdown; + edma->hw_submit = m2p_hw_submit; + edma->hw_interrupt = m2p_hw_interrupt; + } + + ret = dma_async_device_register(dma_dev); + if (unlikely(ret)) { + for (i = 0; i < edma->num_channels; i++) { + struct ep93xx_dma_chan *edmac = &edma->channels[i]; + if (!IS_ERR_OR_NULL(edmac->clk)) + clk_put(edmac->clk); + } + kfree(edma); + } else { + dev_info(dma_dev->dev, "EP93xx M2%s DMA ready\n", + edma->m2m ? "M" : "P"); + } + + return ret; +} + +static struct platform_device_id ep93xx_dma_driver_ids[] = { + { "ep93xx-dma-m2p", 0 }, + { "ep93xx-dma-m2m", 1 }, + { }, +}; + +static struct platform_driver ep93xx_dma_driver = { + .driver = { + .name = "ep93xx-dma", + }, + .id_table = ep93xx_dma_driver_ids, +}; + +static int __init ep93xx_dma_module_init(void) +{ + return platform_driver_probe(&ep93xx_dma_driver, ep93xx_dma_probe); +} +subsys_initcall(ep93xx_dma_module_init); + +MODULE_AUTHOR("Mika Westerberg "); +MODULE_DESCRIPTION("EP93xx DMA driver"); +MODULE_LICENSE("GPL"); From f911d026e84a137e35701a4f23732f47ce40a6b8 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 29 May 2011 13:10:03 +0300 Subject: [PATCH 007/151] ep93xx: add dmaengine platform code Add platform support code for the new EP93xx dmaengine driver. Signed-off-by: Mika Westerberg Signed-off-by: Ryan Mallon Acked-by: H Hartley Sweeten Signed-off-by: Vinod Koul --- arch/arm/mach-ep93xx/Makefile | 2 + arch/arm/mach-ep93xx/dma.c | 108 ++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 arch/arm/mach-ep93xx/dma.c diff --git a/arch/arm/mach-ep93xx/Makefile b/arch/arm/mach-ep93xx/Makefile index 33ee2c863d18..4920f7ae8330 100644 --- a/arch/arm/mach-ep93xx/Makefile +++ b/arch/arm/mach-ep93xx/Makefile @@ -6,6 +6,8 @@ obj-m := obj-n := obj- := +obj-$(CONFIG_EP93XX_DMA) += dma.o + obj-$(CONFIG_MACH_ADSSPHERE) += adssphere.o obj-$(CONFIG_MACH_EDB93XX) += edb93xx.o obj-$(CONFIG_MACH_GESBC9312) += gesbc9312.o diff --git a/arch/arm/mach-ep93xx/dma.c b/arch/arm/mach-ep93xx/dma.c new file mode 100644 index 000000000000..5a2570881255 --- /dev/null +++ b/arch/arm/mach-ep93xx/dma.c @@ -0,0 +1,108 @@ +/* + * arch/arm/mach-ep93xx/dma.c + * + * Platform support code for the EP93xx dmaengine driver. + * + * Copyright (C) 2011 Mika Westerberg + * + * This work is based on the original dma-m2p implementation with + * following copyrights: + * + * Copyright (C) 2006 Lennert Buytenhek + * Copyright (C) 2006 Applied Data Systems + * Copyright (C) 2009 Ryan Mallon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define DMA_CHANNEL(_name, _base, _irq) \ + { .name = (_name), .base = (_base), .irq = (_irq) } + +/* + * DMA M2P channels. + * + * On the EP93xx chip the following peripherals my be allocated to the 10 + * Memory to Internal Peripheral (M2P) channels (5 transmit + 5 receive). + * + * I2S contains 3 Tx and 3 Rx DMA Channels + * AAC contains 3 Tx and 3 Rx DMA Channels + * UART1 contains 1 Tx and 1 Rx DMA Channels + * UART2 contains 1 Tx and 1 Rx DMA Channels + * UART3 contains 1 Tx and 1 Rx DMA Channels + * IrDA contains 1 Tx and 1 Rx DMA Channels + * + * Registers are mapped statically in ep93xx_map_io(). + */ +static struct ep93xx_dma_chan_data ep93xx_dma_m2p_channels[] = { + DMA_CHANNEL("m2p0", EP93XX_DMA_BASE + 0x0000, IRQ_EP93XX_DMAM2P0), + DMA_CHANNEL("m2p1", EP93XX_DMA_BASE + 0x0040, IRQ_EP93XX_DMAM2P1), + DMA_CHANNEL("m2p2", EP93XX_DMA_BASE + 0x0080, IRQ_EP93XX_DMAM2P2), + DMA_CHANNEL("m2p3", EP93XX_DMA_BASE + 0x00c0, IRQ_EP93XX_DMAM2P3), + DMA_CHANNEL("m2p4", EP93XX_DMA_BASE + 0x0240, IRQ_EP93XX_DMAM2P4), + DMA_CHANNEL("m2p5", EP93XX_DMA_BASE + 0x0200, IRQ_EP93XX_DMAM2P5), + DMA_CHANNEL("m2p6", EP93XX_DMA_BASE + 0x02c0, IRQ_EP93XX_DMAM2P6), + DMA_CHANNEL("m2p7", EP93XX_DMA_BASE + 0x0280, IRQ_EP93XX_DMAM2P7), + DMA_CHANNEL("m2p8", EP93XX_DMA_BASE + 0x0340, IRQ_EP93XX_DMAM2P8), + DMA_CHANNEL("m2p9", EP93XX_DMA_BASE + 0x0300, IRQ_EP93XX_DMAM2P9), +}; + +static struct ep93xx_dma_platform_data ep93xx_dma_m2p_data = { + .channels = ep93xx_dma_m2p_channels, + .num_channels = ARRAY_SIZE(ep93xx_dma_m2p_channels), +}; + +static struct platform_device ep93xx_dma_m2p_device = { + .name = "ep93xx-dma-m2p", + .id = -1, + .dev = { + .platform_data = &ep93xx_dma_m2p_data, + }, +}; + +/* + * DMA M2M channels. + * + * There are 2 M2M channels which support memcpy/memset and in addition simple + * hardware requests from/to SSP and IDE. We do not implement an external + * hardware requests. + * + * Registers are mapped statically in ep93xx_map_io(). + */ +static struct ep93xx_dma_chan_data ep93xx_dma_m2m_channels[] = { + DMA_CHANNEL("m2m0", EP93XX_DMA_BASE + 0x0100, IRQ_EP93XX_DMAM2M0), + DMA_CHANNEL("m2m1", EP93XX_DMA_BASE + 0x0140, IRQ_EP93XX_DMAM2M1), +}; + +static struct ep93xx_dma_platform_data ep93xx_dma_m2m_data = { + .channels = ep93xx_dma_m2m_channels, + .num_channels = ARRAY_SIZE(ep93xx_dma_m2m_channels), +}; + +static struct platform_device ep93xx_dma_m2m_device = { + .name = "ep93xx-dma-m2m", + .id = -1, + .dev = { + .platform_data = &ep93xx_dma_m2m_data, + }, +}; + +static int __init ep93xx_dma_init(void) +{ + platform_device_register(&ep93xx_dma_m2p_device); + platform_device_register(&ep93xx_dma_m2m_device); + return 0; +} +arch_initcall(ep93xx_dma_init); From a103fc67c612bfc0f6388885fea7244967afaad4 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 29 May 2011 13:10:04 +0300 Subject: [PATCH 008/151] ASoC: ep93xx: convert to use the DMA engine API Now that we have the EP93xx DMA engine driver in place, we convert the ASoC drivers (I2S, AC97 and PCM) to take advantage of this new API. There are no functional changes. Signed-off-by: Mika Westerberg Acked-by: H Hartley Sweeten Acked-by: Liam Girdwood Acked-by: Mark Brown Signed-off-by: Vinod Koul --- sound/soc/ep93xx/ep93xx-ac97.c | 4 +- sound/soc/ep93xx/ep93xx-i2s.c | 4 +- sound/soc/ep93xx/ep93xx-pcm.c | 137 ++++++++++++++++++--------------- 3 files changed, 81 insertions(+), 64 deletions(-) diff --git a/sound/soc/ep93xx/ep93xx-ac97.c b/sound/soc/ep93xx/ep93xx-ac97.c index 104e95cda0ad..c7417c76552b 100644 --- a/sound/soc/ep93xx/ep93xx-ac97.c +++ b/sound/soc/ep93xx/ep93xx-ac97.c @@ -106,12 +106,12 @@ static struct ep93xx_ac97_info *ep93xx_ac97_info; static struct ep93xx_pcm_dma_params ep93xx_ac97_pcm_out = { .name = "ac97-pcm-out", - .dma_port = EP93XX_DMA_M2P_PORT_AAC1, + .dma_port = EP93XX_DMA_AAC1, }; static struct ep93xx_pcm_dma_params ep93xx_ac97_pcm_in = { .name = "ac97-pcm-in", - .dma_port = EP93XX_DMA_M2P_PORT_AAC1, + .dma_port = EP93XX_DMA_AAC1, }; static inline unsigned ep93xx_ac97_read_reg(struct ep93xx_ac97_info *info, diff --git a/sound/soc/ep93xx/ep93xx-i2s.c b/sound/soc/ep93xx/ep93xx-i2s.c index 042f4e93746f..30df42568dbb 100644 --- a/sound/soc/ep93xx/ep93xx-i2s.c +++ b/sound/soc/ep93xx/ep93xx-i2s.c @@ -70,11 +70,11 @@ struct ep93xx_i2s_info { struct ep93xx_pcm_dma_params ep93xx_i2s_dma_params[] = { [SNDRV_PCM_STREAM_PLAYBACK] = { .name = "i2s-pcm-out", - .dma_port = EP93XX_DMA_M2P_PORT_I2S1, + .dma_port = EP93XX_DMA_I2S1, }, [SNDRV_PCM_STREAM_CAPTURE] = { .name = "i2s-pcm-in", - .dma_port = EP93XX_DMA_M2P_PORT_I2S1, + .dma_port = EP93XX_DMA_I2S1, }, }; diff --git a/sound/soc/ep93xx/ep93xx-pcm.c b/sound/soc/ep93xx/ep93xx-pcm.c index a456e491155f..a07f99c9c375 100644 --- a/sound/soc/ep93xx/ep93xx-pcm.c +++ b/sound/soc/ep93xx/ep93xx-pcm.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -53,43 +54,34 @@ static const struct snd_pcm_hardware ep93xx_pcm_hardware = { struct ep93xx_runtime_data { - struct ep93xx_dma_m2p_client cl; - struct ep93xx_pcm_dma_params *params; int pointer_bytes; - struct tasklet_struct period_tasklet; int periods; - struct ep93xx_dma_buffer buf[32]; + int period_bytes; + struct dma_chan *dma_chan; + struct ep93xx_dma_data dma_data; }; -static void ep93xx_pcm_period_elapsed(unsigned long data) +static void ep93xx_pcm_dma_callback(void *data) { - struct snd_pcm_substream *substream = (struct snd_pcm_substream *)data; - snd_pcm_period_elapsed(substream); -} + struct snd_pcm_substream *substream = data; + struct ep93xx_runtime_data *rtd = substream->runtime->private_data; -static void ep93xx_pcm_buffer_started(void *cookie, - struct ep93xx_dma_buffer *buf) -{ + rtd->pointer_bytes += rtd->period_bytes; + rtd->pointer_bytes %= rtd->period_bytes * rtd->periods; + + snd_pcm_period_elapsed(substream); } -static void ep93xx_pcm_buffer_finished(void *cookie, - struct ep93xx_dma_buffer *buf, - int bytes, int error) +static bool ep93xx_pcm_dma_filter(struct dma_chan *chan, void *filter_param) { - struct snd_pcm_substream *substream = cookie; - struct ep93xx_runtime_data *rtd = substream->runtime->private_data; - - if (buf == rtd->buf + rtd->periods - 1) - rtd->pointer_bytes = 0; - else - rtd->pointer_bytes += buf->size; + struct ep93xx_dma_data *data = filter_param; - if (!error) { - ep93xx_dma_m2p_submit_recursive(&rtd->cl, buf); - tasklet_schedule(&rtd->period_tasklet); - } else { - snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + if (data->direction == ep93xx_dma_chan_direction(chan)) { + chan->private = data; + return true; } + + return false; } static int ep93xx_pcm_open(struct snd_pcm_substream *substream) @@ -98,30 +90,38 @@ static int ep93xx_pcm_open(struct snd_pcm_substream *substream) struct snd_soc_dai *cpu_dai = soc_rtd->cpu_dai; struct ep93xx_pcm_dma_params *dma_params; struct ep93xx_runtime_data *rtd; + dma_cap_mask_t mask; int ret; - dma_params = snd_soc_dai_get_dma_data(cpu_dai, substream); + ret = snd_pcm_hw_constraint_integer(substream->runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + if (ret < 0) + return ret; + snd_soc_set_runtime_hwparams(substream, &ep93xx_pcm_hardware); rtd = kmalloc(sizeof(*rtd), GFP_KERNEL); if (!rtd) return -ENOMEM; - memset(&rtd->period_tasklet, 0, sizeof(rtd->period_tasklet)); - rtd->period_tasklet.func = ep93xx_pcm_period_elapsed; - rtd->period_tasklet.data = (unsigned long)substream; - - rtd->cl.name = dma_params->name; - rtd->cl.flags = dma_params->dma_port | EP93XX_DMA_M2P_IGNORE_ERROR | - ((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? - EP93XX_DMA_M2P_TX : EP93XX_DMA_M2P_RX); - rtd->cl.cookie = substream; - rtd->cl.buffer_started = ep93xx_pcm_buffer_started; - rtd->cl.buffer_finished = ep93xx_pcm_buffer_finished; - ret = ep93xx_dma_m2p_client_register(&rtd->cl); - if (ret < 0) { + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + dma_cap_set(DMA_CYCLIC, mask); + + dma_params = snd_soc_dai_get_dma_data(cpu_dai, substream); + rtd->dma_data.port = dma_params->dma_port; + rtd->dma_data.name = dma_params->name; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + rtd->dma_data.direction = DMA_TO_DEVICE; + else + rtd->dma_data.direction = DMA_FROM_DEVICE; + + rtd->dma_chan = dma_request_channel(mask, ep93xx_pcm_dma_filter, + &rtd->dma_data); + if (!rtd->dma_chan) { kfree(rtd); - return ret; + return -EINVAL; } substream->runtime->private_data = rtd; @@ -132,31 +132,52 @@ static int ep93xx_pcm_close(struct snd_pcm_substream *substream) { struct ep93xx_runtime_data *rtd = substream->runtime->private_data; - ep93xx_dma_m2p_client_unregister(&rtd->cl); + dma_release_channel(rtd->dma_chan); kfree(rtd); return 0; } +static int ep93xx_pcm_dma_submit(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct ep93xx_runtime_data *rtd = runtime->private_data; + struct dma_chan *chan = rtd->dma_chan; + struct dma_device *dma_dev = chan->device; + struct dma_async_tx_descriptor *desc; + + rtd->pointer_bytes = 0; + desc = dma_dev->device_prep_dma_cyclic(chan, runtime->dma_addr, + rtd->period_bytes * rtd->periods, + rtd->period_bytes, + rtd->dma_data.direction); + if (!desc) + return -EINVAL; + + desc->callback = ep93xx_pcm_dma_callback; + desc->callback_param = substream; + + dmaengine_submit(desc); + return 0; +} + +static void ep93xx_pcm_dma_flush(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct ep93xx_runtime_data *rtd = runtime->private_data; + + dmaengine_terminate_all(rtd->dma_chan); +} + static int ep93xx_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { struct snd_pcm_runtime *runtime = substream->runtime; struct ep93xx_runtime_data *rtd = runtime->private_data; - size_t totsize = params_buffer_bytes(params); - size_t period = params_period_bytes(params); - int i; snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer); - runtime->dma_bytes = totsize; - - rtd->periods = (totsize + period - 1) / period; - for (i = 0; i < rtd->periods; i++) { - rtd->buf[i].bus_addr = runtime->dma_addr + (i * period); - rtd->buf[i].size = period; - if ((i + 1) * period > totsize) - rtd->buf[i].size = totsize - (i * period); - } + rtd->periods = params_periods(params); + rtd->period_bytes = params_period_bytes(params); return 0; } @@ -168,24 +189,20 @@ static int ep93xx_pcm_hw_free(struct snd_pcm_substream *substream) static int ep93xx_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { - struct ep93xx_runtime_data *rtd = substream->runtime->private_data; int ret; - int i; ret = 0; switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - rtd->pointer_bytes = 0; - for (i = 0; i < rtd->periods; i++) - ep93xx_dma_m2p_submit(&rtd->cl, rtd->buf + i); + ret = ep93xx_pcm_dma_submit(substream); break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - ep93xx_dma_m2p_flush(&rtd->cl); + ep93xx_pcm_dma_flush(substream); break; default: From 8e4a93008db7780e45838fe65840b289f389ef4a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 29 May 2011 13:10:05 +0300 Subject: [PATCH 009/151] ep93xx: remove the old M2P DMA code Since we have converted all existing users of the old DMA API to use the DMA engine API the old code can be dropped. Signed-off-by: Mika Westerberg Acked-by: Ryan Mallon Acked-by: H Hartley Sweeten Signed-off-by: Vinod Koul --- arch/arm/mach-ep93xx/Makefile | 2 +- arch/arm/mach-ep93xx/dma-m2p.c | 411 ------------------------ arch/arm/mach-ep93xx/include/mach/dma.h | 143 --------- 3 files changed, 1 insertion(+), 555 deletions(-) delete mode 100644 arch/arm/mach-ep93xx/dma-m2p.c diff --git a/arch/arm/mach-ep93xx/Makefile b/arch/arm/mach-ep93xx/Makefile index 4920f7ae8330..21e721ab7378 100644 --- a/arch/arm/mach-ep93xx/Makefile +++ b/arch/arm/mach-ep93xx/Makefile @@ -1,7 +1,7 @@ # # Makefile for the linux kernel. # -obj-y := core.o clock.o dma-m2p.o gpio.o +obj-y := core.o clock.o gpio.o obj-m := obj-n := obj- := diff --git a/arch/arm/mach-ep93xx/dma-m2p.c b/arch/arm/mach-ep93xx/dma-m2p.c deleted file mode 100644 index a696d354b1f8..000000000000 --- a/arch/arm/mach-ep93xx/dma-m2p.c +++ /dev/null @@ -1,411 +0,0 @@ -/* - * arch/arm/mach-ep93xx/dma-m2p.c - * M2P DMA handling for Cirrus EP93xx chips. - * - * Copyright (C) 2006 Lennert Buytenhek - * Copyright (C) 2006 Applied Data Systems - * - * Copyright (C) 2009 Ryan Mallon - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - */ - -/* - * On the EP93xx chip the following peripherals my be allocated to the 10 - * Memory to Internal Peripheral (M2P) channels (5 transmit + 5 receive). - * - * I2S contains 3 Tx and 3 Rx DMA Channels - * AAC contains 3 Tx and 3 Rx DMA Channels - * UART1 contains 1 Tx and 1 Rx DMA Channels - * UART2 contains 1 Tx and 1 Rx DMA Channels - * UART3 contains 1 Tx and 1 Rx DMA Channels - * IrDA contains 1 Tx and 1 Rx DMA Channels - * - * SSP and IDE use the Memory to Memory (M2M) channels and are not covered - * with this implementation. - */ - -#define pr_fmt(fmt) "ep93xx " KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include - -#include -#include - -#define M2P_CONTROL 0x00 -#define M2P_CONTROL_STALL_IRQ_EN (1 << 0) -#define M2P_CONTROL_NFB_IRQ_EN (1 << 1) -#define M2P_CONTROL_ERROR_IRQ_EN (1 << 3) -#define M2P_CONTROL_ENABLE (1 << 4) -#define M2P_INTERRUPT 0x04 -#define M2P_INTERRUPT_STALL (1 << 0) -#define M2P_INTERRUPT_NFB (1 << 1) -#define M2P_INTERRUPT_ERROR (1 << 3) -#define M2P_PPALLOC 0x08 -#define M2P_STATUS 0x0c -#define M2P_REMAIN 0x14 -#define M2P_MAXCNT0 0x20 -#define M2P_BASE0 0x24 -#define M2P_MAXCNT1 0x30 -#define M2P_BASE1 0x34 - -#define STATE_IDLE 0 /* Channel is inactive. */ -#define STATE_STALL 1 /* Channel is active, no buffers pending. */ -#define STATE_ON 2 /* Channel is active, one buffer pending. */ -#define STATE_NEXT 3 /* Channel is active, two buffers pending. */ - -struct m2p_channel { - char *name; - void __iomem *base; - int irq; - - struct clk *clk; - spinlock_t lock; - - void *client; - unsigned next_slot:1; - struct ep93xx_dma_buffer *buffer_xfer; - struct ep93xx_dma_buffer *buffer_next; - struct list_head buffers_pending; -}; - -static struct m2p_channel m2p_rx[] = { - {"m2p1", EP93XX_DMA_BASE + 0x0040, IRQ_EP93XX_DMAM2P1}, - {"m2p3", EP93XX_DMA_BASE + 0x00c0, IRQ_EP93XX_DMAM2P3}, - {"m2p5", EP93XX_DMA_BASE + 0x0200, IRQ_EP93XX_DMAM2P5}, - {"m2p7", EP93XX_DMA_BASE + 0x0280, IRQ_EP93XX_DMAM2P7}, - {"m2p9", EP93XX_DMA_BASE + 0x0300, IRQ_EP93XX_DMAM2P9}, - {NULL}, -}; - -static struct m2p_channel m2p_tx[] = { - {"m2p0", EP93XX_DMA_BASE + 0x0000, IRQ_EP93XX_DMAM2P0}, - {"m2p2", EP93XX_DMA_BASE + 0x0080, IRQ_EP93XX_DMAM2P2}, - {"m2p4", EP93XX_DMA_BASE + 0x0240, IRQ_EP93XX_DMAM2P4}, - {"m2p6", EP93XX_DMA_BASE + 0x02c0, IRQ_EP93XX_DMAM2P6}, - {"m2p8", EP93XX_DMA_BASE + 0x0340, IRQ_EP93XX_DMAM2P8}, - {NULL}, -}; - -static void feed_buf(struct m2p_channel *ch, struct ep93xx_dma_buffer *buf) -{ - if (ch->next_slot == 0) { - writel(buf->size, ch->base + M2P_MAXCNT0); - writel(buf->bus_addr, ch->base + M2P_BASE0); - } else { - writel(buf->size, ch->base + M2P_MAXCNT1); - writel(buf->bus_addr, ch->base + M2P_BASE1); - } - ch->next_slot ^= 1; -} - -static void choose_buffer_xfer(struct m2p_channel *ch) -{ - struct ep93xx_dma_buffer *buf; - - ch->buffer_xfer = NULL; - if (!list_empty(&ch->buffers_pending)) { - buf = list_entry(ch->buffers_pending.next, - struct ep93xx_dma_buffer, list); - list_del(&buf->list); - feed_buf(ch, buf); - ch->buffer_xfer = buf; - } -} - -static void choose_buffer_next(struct m2p_channel *ch) -{ - struct ep93xx_dma_buffer *buf; - - ch->buffer_next = NULL; - if (!list_empty(&ch->buffers_pending)) { - buf = list_entry(ch->buffers_pending.next, - struct ep93xx_dma_buffer, list); - list_del(&buf->list); - feed_buf(ch, buf); - ch->buffer_next = buf; - } -} - -static inline void m2p_set_control(struct m2p_channel *ch, u32 v) -{ - /* - * The control register must be read immediately after being written so - * that the internal state machine is correctly updated. See the ep93xx - * users' guide for details. - */ - writel(v, ch->base + M2P_CONTROL); - readl(ch->base + M2P_CONTROL); -} - -static inline int m2p_channel_state(struct m2p_channel *ch) -{ - return (readl(ch->base + M2P_STATUS) >> 4) & 0x3; -} - -static irqreturn_t m2p_irq(int irq, void *dev_id) -{ - struct m2p_channel *ch = dev_id; - struct ep93xx_dma_m2p_client *cl; - u32 irq_status, v; - int error = 0; - - cl = ch->client; - - spin_lock(&ch->lock); - irq_status = readl(ch->base + M2P_INTERRUPT); - - if (irq_status & M2P_INTERRUPT_ERROR) { - writel(M2P_INTERRUPT_ERROR, ch->base + M2P_INTERRUPT); - error = 1; - } - - if ((irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)) == 0) { - spin_unlock(&ch->lock); - return IRQ_NONE; - } - - switch (m2p_channel_state(ch)) { - case STATE_IDLE: - pr_crit("dma interrupt without a dma buffer\n"); - BUG(); - break; - - case STATE_STALL: - cl->buffer_finished(cl->cookie, ch->buffer_xfer, 0, error); - if (ch->buffer_next != NULL) { - cl->buffer_finished(cl->cookie, ch->buffer_next, - 0, error); - } - choose_buffer_xfer(ch); - choose_buffer_next(ch); - if (ch->buffer_xfer != NULL) - cl->buffer_started(cl->cookie, ch->buffer_xfer); - break; - - case STATE_ON: - cl->buffer_finished(cl->cookie, ch->buffer_xfer, 0, error); - ch->buffer_xfer = ch->buffer_next; - choose_buffer_next(ch); - cl->buffer_started(cl->cookie, ch->buffer_xfer); - break; - - case STATE_NEXT: - pr_crit("dma interrupt while next\n"); - BUG(); - break; - } - - v = readl(ch->base + M2P_CONTROL) & ~(M2P_CONTROL_STALL_IRQ_EN | - M2P_CONTROL_NFB_IRQ_EN); - if (ch->buffer_xfer != NULL) - v |= M2P_CONTROL_STALL_IRQ_EN; - if (ch->buffer_next != NULL) - v |= M2P_CONTROL_NFB_IRQ_EN; - m2p_set_control(ch, v); - - spin_unlock(&ch->lock); - return IRQ_HANDLED; -} - -static struct m2p_channel *find_free_channel(struct ep93xx_dma_m2p_client *cl) -{ - struct m2p_channel *ch; - int i; - - if (cl->flags & EP93XX_DMA_M2P_RX) - ch = m2p_rx; - else - ch = m2p_tx; - - for (i = 0; ch[i].base; i++) { - struct ep93xx_dma_m2p_client *client; - - client = ch[i].client; - if (client != NULL) { - int port; - - port = cl->flags & EP93XX_DMA_M2P_PORT_MASK; - if (port == (client->flags & - EP93XX_DMA_M2P_PORT_MASK)) { - pr_warning("DMA channel already used by %s\n", - cl->name ? : "unknown client"); - return ERR_PTR(-EBUSY); - } - } - } - - for (i = 0; ch[i].base; i++) { - if (ch[i].client == NULL) - return ch + i; - } - - pr_warning("No free DMA channel for %s\n", - cl->name ? : "unknown client"); - return ERR_PTR(-ENODEV); -} - -static void channel_enable(struct m2p_channel *ch) -{ - struct ep93xx_dma_m2p_client *cl = ch->client; - u32 v; - - clk_enable(ch->clk); - - v = cl->flags & EP93XX_DMA_M2P_PORT_MASK; - writel(v, ch->base + M2P_PPALLOC); - - v = cl->flags & EP93XX_DMA_M2P_ERROR_MASK; - v |= M2P_CONTROL_ENABLE | M2P_CONTROL_ERROR_IRQ_EN; - m2p_set_control(ch, v); -} - -static void channel_disable(struct m2p_channel *ch) -{ - u32 v; - - v = readl(ch->base + M2P_CONTROL); - v &= ~(M2P_CONTROL_STALL_IRQ_EN | M2P_CONTROL_NFB_IRQ_EN); - m2p_set_control(ch, v); - - while (m2p_channel_state(ch) >= STATE_ON) - cpu_relax(); - - m2p_set_control(ch, 0x0); - - while (m2p_channel_state(ch) == STATE_STALL) - cpu_relax(); - - clk_disable(ch->clk); -} - -int ep93xx_dma_m2p_client_register(struct ep93xx_dma_m2p_client *cl) -{ - struct m2p_channel *ch; - int err; - - ch = find_free_channel(cl); - if (IS_ERR(ch)) - return PTR_ERR(ch); - - err = request_irq(ch->irq, m2p_irq, 0, cl->name ? : "dma-m2p", ch); - if (err) - return err; - - ch->client = cl; - ch->next_slot = 0; - ch->buffer_xfer = NULL; - ch->buffer_next = NULL; - INIT_LIST_HEAD(&ch->buffers_pending); - - cl->channel = ch; - - channel_enable(ch); - - return 0; -} -EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_client_register); - -void ep93xx_dma_m2p_client_unregister(struct ep93xx_dma_m2p_client *cl) -{ - struct m2p_channel *ch = cl->channel; - - channel_disable(ch); - free_irq(ch->irq, ch); - ch->client = NULL; -} -EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_client_unregister); - -void ep93xx_dma_m2p_submit(struct ep93xx_dma_m2p_client *cl, - struct ep93xx_dma_buffer *buf) -{ - struct m2p_channel *ch = cl->channel; - unsigned long flags; - u32 v; - - spin_lock_irqsave(&ch->lock, flags); - v = readl(ch->base + M2P_CONTROL); - if (ch->buffer_xfer == NULL) { - ch->buffer_xfer = buf; - feed_buf(ch, buf); - cl->buffer_started(cl->cookie, buf); - - v |= M2P_CONTROL_STALL_IRQ_EN; - m2p_set_control(ch, v); - - } else if (ch->buffer_next == NULL) { - ch->buffer_next = buf; - feed_buf(ch, buf); - - v |= M2P_CONTROL_NFB_IRQ_EN; - m2p_set_control(ch, v); - } else { - list_add_tail(&buf->list, &ch->buffers_pending); - } - spin_unlock_irqrestore(&ch->lock, flags); -} -EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_submit); - -void ep93xx_dma_m2p_submit_recursive(struct ep93xx_dma_m2p_client *cl, - struct ep93xx_dma_buffer *buf) -{ - struct m2p_channel *ch = cl->channel; - - list_add_tail(&buf->list, &ch->buffers_pending); -} -EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_submit_recursive); - -void ep93xx_dma_m2p_flush(struct ep93xx_dma_m2p_client *cl) -{ - struct m2p_channel *ch = cl->channel; - - channel_disable(ch); - ch->next_slot = 0; - ch->buffer_xfer = NULL; - ch->buffer_next = NULL; - INIT_LIST_HEAD(&ch->buffers_pending); - channel_enable(ch); -} -EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_flush); - -static int init_channel(struct m2p_channel *ch) -{ - ch->clk = clk_get(NULL, ch->name); - if (IS_ERR(ch->clk)) - return PTR_ERR(ch->clk); - - spin_lock_init(&ch->lock); - ch->client = NULL; - - return 0; -} - -static int __init ep93xx_dma_m2p_init(void) -{ - int i; - int ret; - - for (i = 0; m2p_rx[i].base; i++) { - ret = init_channel(m2p_rx + i); - if (ret) - return ret; - } - - for (i = 0; m2p_tx[i].base; i++) { - ret = init_channel(m2p_tx + i); - if (ret) - return ret; - } - - pr_info("M2P DMA subsystem initialized\n"); - return 0; -} -arch_initcall(ep93xx_dma_m2p_init); diff --git a/arch/arm/mach-ep93xx/include/mach/dma.h b/arch/arm/mach-ep93xx/include/mach/dma.h index 6e7049a796a4..46d4d876e6fb 100644 --- a/arch/arm/mach-ep93xx/include/mach/dma.h +++ b/arch/arm/mach-ep93xx/include/mach/dma.h @@ -1,153 +1,10 @@ -/** - * DOC: EP93xx DMA M2P memory to peripheral and peripheral to memory engine - * - * The EP93xx DMA M2P subsystem handles DMA transfers between memory and - * peripherals. DMA M2P channels are available for audio, UARTs and IrDA. - * See chapter 10 of the EP93xx users guide for full details on the DMA M2P - * engine. - * - * See sound/soc/ep93xx/ep93xx-pcm.c for an example use of the DMA M2P code. - * - */ - #ifndef __ASM_ARCH_DMA_H #define __ASM_ARCH_DMA_H -#include #include #include #include -/** - * struct ep93xx_dma_buffer - Information about a buffer to be transferred - * using the DMA M2P engine - * - * @list: Entry in DMA buffer list - * @bus_addr: Physical address of the buffer - * @size: Size of the buffer in bytes - */ -struct ep93xx_dma_buffer { - struct list_head list; - u32 bus_addr; - u16 size; -}; - -/** - * struct ep93xx_dma_m2p_client - Information about a DMA M2P client - * - * @name: Unique name for this client - * @flags: Client flags - * @cookie: User data to pass to callback functions - * @buffer_started: Non NULL function to call when a transfer is started. - * The arguments are the user data cookie and the DMA - * buffer which is starting. - * @buffer_finished: Non NULL function to call when a transfer is completed. - * The arguments are the user data cookie, the DMA buffer - * which has completed, and a boolean flag indicating if - * the transfer had an error. - */ -struct ep93xx_dma_m2p_client { - char *name; - u8 flags; - void *cookie; - void (*buffer_started)(void *cookie, - struct ep93xx_dma_buffer *buf); - void (*buffer_finished)(void *cookie, - struct ep93xx_dma_buffer *buf, - int bytes, int error); - - /* private: Internal use only */ - void *channel; -}; - -/* DMA M2P ports */ -#define EP93XX_DMA_M2P_PORT_I2S1 0x00 -#define EP93XX_DMA_M2P_PORT_I2S2 0x01 -#define EP93XX_DMA_M2P_PORT_AAC1 0x02 -#define EP93XX_DMA_M2P_PORT_AAC2 0x03 -#define EP93XX_DMA_M2P_PORT_AAC3 0x04 -#define EP93XX_DMA_M2P_PORT_I2S3 0x05 -#define EP93XX_DMA_M2P_PORT_UART1 0x06 -#define EP93XX_DMA_M2P_PORT_UART2 0x07 -#define EP93XX_DMA_M2P_PORT_UART3 0x08 -#define EP93XX_DMA_M2P_PORT_IRDA 0x09 -#define EP93XX_DMA_M2P_PORT_MASK 0x0f - -/* DMA M2P client flags */ -#define EP93XX_DMA_M2P_TX 0x00 /* Memory to peripheral */ -#define EP93XX_DMA_M2P_RX 0x10 /* Peripheral to memory */ - -/* - * DMA M2P client error handling flags. See the EP93xx users guide - * documentation on the DMA M2P CONTROL register for more details - */ -#define EP93XX_DMA_M2P_ABORT_ON_ERROR 0x20 /* Abort on peripheral error */ -#define EP93XX_DMA_M2P_IGNORE_ERROR 0x40 /* Ignore peripheral errors */ -#define EP93XX_DMA_M2P_ERROR_MASK 0x60 /* Mask of error bits */ - -/** - * ep93xx_dma_m2p_client_register - Register a client with the DMA M2P - * subsystem - * - * @m2p: Client information to register - * returns 0 on success - * - * The DMA M2P subsystem allocates a channel and an interrupt line for the DMA - * client - */ -int ep93xx_dma_m2p_client_register(struct ep93xx_dma_m2p_client *m2p); - -/** - * ep93xx_dma_m2p_client_unregister - Unregister a client from the DMA M2P - * subsystem - * - * @m2p: Client to unregister - * - * Any transfers currently in progress will be completed in hardware, but - * ignored in software. - */ -void ep93xx_dma_m2p_client_unregister(struct ep93xx_dma_m2p_client *m2p); - -/** - * ep93xx_dma_m2p_submit - Submit a DMA M2P transfer - * - * @m2p: DMA Client to submit the transfer on - * @buf: DMA Buffer to submit - * - * If the current or next transfer positions are free on the M2P client then - * the transfer is started immediately. If not, the transfer is added to the - * list of pending transfers. This function must not be called from the - * buffer_finished callback for an M2P channel. - * - */ -void ep93xx_dma_m2p_submit(struct ep93xx_dma_m2p_client *m2p, - struct ep93xx_dma_buffer *buf); - -/** - * ep93xx_dma_m2p_submit_recursive - Put a DMA transfer on the pending list - * for an M2P channel - * - * @m2p: DMA Client to submit the transfer on - * @buf: DMA Buffer to submit - * - * This function must only be called from the buffer_finished callback for an - * M2P channel. It is commonly used to add the next transfer in a chained list - * of DMA transfers. - */ -void ep93xx_dma_m2p_submit_recursive(struct ep93xx_dma_m2p_client *m2p, - struct ep93xx_dma_buffer *buf); - -/** - * ep93xx_dma_m2p_flush - Flush all pending transfers on a DMA M2P client - * - * @m2p: DMA client to flush transfers on - * - * Any transfers currently in progress will be completed in hardware, but - * ignored in software. - * - */ -void ep93xx_dma_m2p_flush(struct ep93xx_dma_m2p_client *m2p); - /* * M2P channels. * From d41071575b0b20b780bb0e8e7e70c62c1b07a883 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 29 May 2011 13:10:06 +0300 Subject: [PATCH 010/151] spi/ep93xx: add DMA support This patch adds DMA support for the EP93xx SPI driver. By default the DMA is not enabled but it can be enabled by setting ep93xx_spi_info.use_dma to true in board configuration file. Note that the SPI driver still uses PIO for small transfers (<= 8 bytes) for performance reasons. Signed-off-by: Mika Westerberg Acked-by: H Hartley Sweeten Cc: Grant Likely Acked-by: Grant Likely Signed-off-by: Vinod Koul --- Documentation/spi/ep93xx_spi | 10 + arch/arm/mach-ep93xx/core.c | 6 +- .../arm/mach-ep93xx/include/mach/ep93xx_spi.h | 2 + drivers/spi/ep93xx_spi.c | 303 +++++++++++++++++- 4 files changed, 308 insertions(+), 13 deletions(-) diff --git a/Documentation/spi/ep93xx_spi b/Documentation/spi/ep93xx_spi index 6325f5b48635..d8eb01c15db1 100644 --- a/Documentation/spi/ep93xx_spi +++ b/Documentation/spi/ep93xx_spi @@ -88,6 +88,16 @@ static void __init ts72xx_init_machine(void) ARRAY_SIZE(ts72xx_spi_devices)); } +The driver can use DMA for the transfers also. In this case ts72xx_spi_info +becomes: + +static struct ep93xx_spi_info ts72xx_spi_info = { + .num_chipselect = ARRAY_SIZE(ts72xx_spi_devices), + .use_dma = true; +}; + +Note that CONFIG_EP93XX_DMA should be enabled as well. + Thanks to ========= Martin Guy, H. Hartley Sweeten and others who helped me during development of diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 82079545adc4..cc9f1d4b104d 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -488,11 +488,15 @@ static struct resource ep93xx_spi_resources[] = { }, }; +static u64 ep93xx_spi_dma_mask = DMA_BIT_MASK(32); + static struct platform_device ep93xx_spi_device = { .name = "ep93xx-spi", .id = 0, .dev = { - .platform_data = &ep93xx_spi_master_data, + .platform_data = &ep93xx_spi_master_data, + .coherent_dma_mask = DMA_BIT_MASK(32), + .dma_mask = &ep93xx_spi_dma_mask, }, .num_resources = ARRAY_SIZE(ep93xx_spi_resources), .resource = ep93xx_spi_resources, diff --git a/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h b/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h index 0a37961b3453..9bb63ac13f04 100644 --- a/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h +++ b/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h @@ -7,9 +7,11 @@ struct spi_device; * struct ep93xx_spi_info - EP93xx specific SPI descriptor * @num_chipselect: number of chip selects on this board, must be * at least one + * @use_dma: use DMA for the transfers */ struct ep93xx_spi_info { int num_chipselect; + bool use_dma; }; /** diff --git a/drivers/spi/ep93xx_spi.c b/drivers/spi/ep93xx_spi.c index d3570071e98f..1cf645479bfe 100644 --- a/drivers/spi/ep93xx_spi.c +++ b/drivers/spi/ep93xx_spi.c @@ -1,7 +1,7 @@ /* * Driver for Cirrus Logic EP93xx SPI controller. * - * Copyright (c) 2010 Mika Westerberg + * Copyright (C) 2010-2011 Mika Westerberg * * Explicit FIFO handling code was inspired by amba-pl022 driver. * @@ -21,13 +21,16 @@ #include #include #include +#include #include #include #include #include #include +#include #include +#include #include #define SSPCR0 0x0000 @@ -71,6 +74,7 @@ * @pdev: pointer to platform device * @clk: clock for the controller * @regs_base: pointer to ioremap()'d registers + * @sspdr_phys: physical address of the SSPDR register * @irq: IRQ number used by the driver * @min_rate: minimum clock rate (in Hz) supported by the controller * @max_rate: maximum clock rate (in Hz) supported by the controller @@ -84,6 +88,14 @@ * @rx: current byte in transfer to receive * @fifo_level: how full is FIFO (%0..%SPI_FIFO_SIZE - %1). Receiving one * frame decreases this level and sending one frame increases it. + * @dma_rx: RX DMA channel + * @dma_tx: TX DMA channel + * @dma_rx_data: RX parameters passed to the DMA engine + * @dma_tx_data: TX parameters passed to the DMA engine + * @rx_sgt: sg table for RX transfers + * @tx_sgt: sg table for TX transfers + * @zeropage: dummy page used as RX buffer when only TX buffer is passed in by + * the client * * This structure holds EP93xx SPI controller specific information. When * @running is %true, driver accepts transfer requests from protocol drivers. @@ -100,6 +112,7 @@ struct ep93xx_spi { const struct platform_device *pdev; struct clk *clk; void __iomem *regs_base; + unsigned long sspdr_phys; int irq; unsigned long min_rate; unsigned long max_rate; @@ -112,6 +125,13 @@ struct ep93xx_spi { size_t tx; size_t rx; size_t fifo_level; + struct dma_chan *dma_rx; + struct dma_chan *dma_tx; + struct ep93xx_dma_data dma_rx_data; + struct ep93xx_dma_data dma_tx_data; + struct sg_table rx_sgt; + struct sg_table tx_sgt; + void *zeropage; }; /** @@ -496,14 +516,195 @@ static int ep93xx_spi_read_write(struct ep93xx_spi *espi) espi->fifo_level++; } - if (espi->rx == t->len) { - msg->actual_length += t->len; + if (espi->rx == t->len) return 0; - } return -EINPROGRESS; } +static void ep93xx_spi_pio_transfer(struct ep93xx_spi *espi) +{ + /* + * Now everything is set up for the current transfer. We prime the TX + * FIFO, enable interrupts, and wait for the transfer to complete. + */ + if (ep93xx_spi_read_write(espi)) { + ep93xx_spi_enable_interrupts(espi); + wait_for_completion(&espi->wait); + } +} + +/** + * ep93xx_spi_dma_prepare() - prepares a DMA transfer + * @espi: ep93xx SPI controller struct + * @dir: DMA transfer direction + * + * Function configures the DMA, maps the buffer and prepares the DMA + * descriptor. Returns a valid DMA descriptor in case of success and ERR_PTR + * in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_spi_dma_prepare(struct ep93xx_spi *espi, enum dma_data_direction dir) +{ + struct spi_transfer *t = espi->current_msg->state; + struct dma_async_tx_descriptor *txd; + enum dma_slave_buswidth buswidth; + struct dma_slave_config conf; + struct scatterlist *sg; + struct sg_table *sgt; + struct dma_chan *chan; + const void *buf, *pbuf; + size_t len = t->len; + int i, ret, nents; + + if (bits_per_word(espi) > 8) + buswidth = DMA_SLAVE_BUSWIDTH_2_BYTES; + else + buswidth = DMA_SLAVE_BUSWIDTH_1_BYTE; + + memset(&conf, 0, sizeof(conf)); + conf.direction = dir; + + if (dir == DMA_FROM_DEVICE) { + chan = espi->dma_rx; + buf = t->rx_buf; + sgt = &espi->rx_sgt; + + conf.src_addr = espi->sspdr_phys; + conf.src_addr_width = buswidth; + } else { + chan = espi->dma_tx; + buf = t->tx_buf; + sgt = &espi->tx_sgt; + + conf.dst_addr = espi->sspdr_phys; + conf.dst_addr_width = buswidth; + } + + ret = dmaengine_slave_config(chan, &conf); + if (ret) + return ERR_PTR(ret); + + /* + * We need to split the transfer into PAGE_SIZE'd chunks. This is + * because we are using @espi->zeropage to provide a zero RX buffer + * for the TX transfers and we have only allocated one page for that. + * + * For performance reasons we allocate a new sg_table only when + * needed. Otherwise we will re-use the current one. Eventually the + * last sg_table is released in ep93xx_spi_release_dma(). + */ + + nents = DIV_ROUND_UP(len, PAGE_SIZE); + if (nents != sgt->nents) { + sg_free_table(sgt); + + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); + if (ret) + return ERR_PTR(ret); + } + + pbuf = buf; + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + size_t bytes = min_t(size_t, len, PAGE_SIZE); + + if (buf) { + sg_set_page(sg, virt_to_page(pbuf), bytes, + offset_in_page(pbuf)); + } else { + sg_set_page(sg, virt_to_page(espi->zeropage), + bytes, 0); + } + + pbuf += bytes; + len -= bytes; + } + + if (WARN_ON(len)) { + dev_warn(&espi->pdev->dev, "len = %d expected 0!", len); + return ERR_PTR(-EINVAL); + } + + nents = dma_map_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); + if (!nents) + return ERR_PTR(-ENOMEM); + + txd = chan->device->device_prep_slave_sg(chan, sgt->sgl, nents, + dir, DMA_CTRL_ACK); + if (!txd) { + dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); + return ERR_PTR(-ENOMEM); + } + return txd; +} + +/** + * ep93xx_spi_dma_finish() - finishes with a DMA transfer + * @espi: ep93xx SPI controller struct + * @dir: DMA transfer direction + * + * Function finishes with the DMA transfer. After this, the DMA buffer is + * unmapped. + */ +static void ep93xx_spi_dma_finish(struct ep93xx_spi *espi, + enum dma_data_direction dir) +{ + struct dma_chan *chan; + struct sg_table *sgt; + + if (dir == DMA_FROM_DEVICE) { + chan = espi->dma_rx; + sgt = &espi->rx_sgt; + } else { + chan = espi->dma_tx; + sgt = &espi->tx_sgt; + } + + dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); +} + +static void ep93xx_spi_dma_callback(void *callback_param) +{ + complete(callback_param); +} + +static void ep93xx_spi_dma_transfer(struct ep93xx_spi *espi) +{ + struct spi_message *msg = espi->current_msg; + struct dma_async_tx_descriptor *rxd, *txd; + + rxd = ep93xx_spi_dma_prepare(espi, DMA_FROM_DEVICE); + if (IS_ERR(rxd)) { + dev_err(&espi->pdev->dev, "DMA RX failed: %ld\n", PTR_ERR(rxd)); + msg->status = PTR_ERR(rxd); + return; + } + + txd = ep93xx_spi_dma_prepare(espi, DMA_TO_DEVICE); + if (IS_ERR(txd)) { + ep93xx_spi_dma_finish(espi, DMA_FROM_DEVICE); + dev_err(&espi->pdev->dev, "DMA TX failed: %ld\n", PTR_ERR(rxd)); + msg->status = PTR_ERR(txd); + return; + } + + /* We are ready when RX is done */ + rxd->callback = ep93xx_spi_dma_callback; + rxd->callback_param = &espi->wait; + + /* Now submit both descriptors and wait while they finish */ + dmaengine_submit(rxd); + dmaengine_submit(txd); + + dma_async_issue_pending(espi->dma_rx); + dma_async_issue_pending(espi->dma_tx); + + wait_for_completion(&espi->wait); + + ep93xx_spi_dma_finish(espi, DMA_TO_DEVICE); + ep93xx_spi_dma_finish(espi, DMA_FROM_DEVICE); +} + /** * ep93xx_spi_process_transfer() - processes one SPI transfer * @espi: ep93xx SPI controller struct @@ -556,13 +757,14 @@ static void ep93xx_spi_process_transfer(struct ep93xx_spi *espi, espi->tx = 0; /* - * Now everything is set up for the current transfer. We prime the TX - * FIFO, enable interrupts, and wait for the transfer to complete. + * There is no point of setting up DMA for the transfers which will + * fit into the FIFO and can be transferred with a single interrupt. + * So in these cases we will be using PIO and don't bother for DMA. */ - if (ep93xx_spi_read_write(espi)) { - ep93xx_spi_enable_interrupts(espi); - wait_for_completion(&espi->wait); - } + if (espi->dma_rx && t->len > SPI_FIFO_SIZE) + ep93xx_spi_dma_transfer(espi); + else + ep93xx_spi_pio_transfer(espi); /* * In case of error during transmit, we bail out from processing @@ -571,6 +773,8 @@ static void ep93xx_spi_process_transfer(struct ep93xx_spi *espi, if (msg->status) return; + msg->actual_length += t->len; + /* * After this transfer is finished, perform any possible * post-transfer actions requested by the protocol driver. @@ -752,6 +956,75 @@ static irqreturn_t ep93xx_spi_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static bool ep93xx_spi_dma_filter(struct dma_chan *chan, void *filter_param) +{ + if (ep93xx_dma_chan_is_m2p(chan)) + return false; + + chan->private = filter_param; + return true; +} + +static int ep93xx_spi_setup_dma(struct ep93xx_spi *espi) +{ + dma_cap_mask_t mask; + int ret; + + espi->zeropage = (void *)get_zeroed_page(GFP_KERNEL); + if (!espi->zeropage) + return -ENOMEM; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + espi->dma_rx_data.port = EP93XX_DMA_SSP; + espi->dma_rx_data.direction = DMA_FROM_DEVICE; + espi->dma_rx_data.name = "ep93xx-spi-rx"; + + espi->dma_rx = dma_request_channel(mask, ep93xx_spi_dma_filter, + &espi->dma_rx_data); + if (!espi->dma_rx) { + ret = -ENODEV; + goto fail_free_page; + } + + espi->dma_tx_data.port = EP93XX_DMA_SSP; + espi->dma_tx_data.direction = DMA_TO_DEVICE; + espi->dma_tx_data.name = "ep93xx-spi-tx"; + + espi->dma_tx = dma_request_channel(mask, ep93xx_spi_dma_filter, + &espi->dma_tx_data); + if (!espi->dma_tx) { + ret = -ENODEV; + goto fail_release_rx; + } + + return 0; + +fail_release_rx: + dma_release_channel(espi->dma_rx); + espi->dma_rx = NULL; +fail_free_page: + free_page((unsigned long)espi->zeropage); + + return ret; +} + +static void ep93xx_spi_release_dma(struct ep93xx_spi *espi) +{ + if (espi->dma_rx) { + dma_release_channel(espi->dma_rx); + sg_free_table(&espi->rx_sgt); + } + if (espi->dma_tx) { + dma_release_channel(espi->dma_tx); + sg_free_table(&espi->tx_sgt); + } + + if (espi->zeropage) + free_page((unsigned long)espi->zeropage); +} + static int __init ep93xx_spi_probe(struct platform_device *pdev) { struct spi_master *master; @@ -818,6 +1091,7 @@ static int __init ep93xx_spi_probe(struct platform_device *pdev) goto fail_put_clock; } + espi->sspdr_phys = res->start + SSPDR; espi->regs_base = ioremap(res->start, resource_size(res)); if (!espi->regs_base) { dev_err(&pdev->dev, "failed to map resources\n"); @@ -832,10 +1106,13 @@ static int __init ep93xx_spi_probe(struct platform_device *pdev) goto fail_unmap_regs; } + if (info->use_dma && ep93xx_spi_setup_dma(espi)) + dev_warn(&pdev->dev, "DMA setup failed. Falling back to PIO\n"); + espi->wq = create_singlethread_workqueue("ep93xx_spid"); if (!espi->wq) { dev_err(&pdev->dev, "unable to create workqueue\n"); - goto fail_free_irq; + goto fail_free_dma; } INIT_WORK(&espi->msg_work, ep93xx_spi_work); INIT_LIST_HEAD(&espi->msg_queue); @@ -857,7 +1134,8 @@ static int __init ep93xx_spi_probe(struct platform_device *pdev) fail_free_queue: destroy_workqueue(espi->wq); -fail_free_irq: +fail_free_dma: + ep93xx_spi_release_dma(espi); free_irq(espi->irq, espi); fail_unmap_regs: iounmap(espi->regs_base); @@ -901,6 +1179,7 @@ static int __exit ep93xx_spi_remove(struct platform_device *pdev) } spin_unlock_irq(&espi->lock); + ep93xx_spi_release_dma(espi); free_irq(espi->irq, espi); iounmap(espi->regs_base); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); From 7dab35c0c01c5d960d7b551a607270adccfadb42 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 1 Jun 2011 15:10:30 -0700 Subject: [PATCH 011/151] dma: ipu_idmac.c: use resource_size in ioremap Signed-off-by: H Hartley Sweeten Cc: Dan Williams Cc: Vinod Koul Cc: Guennadi Liakhovetski Cc: Anatolij Gustschin Signed-off-by: Vinod Koul --- drivers/dma/ipu/ipu_idmac.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c index c1a125e7d1df..25447a8ca282 100644 --- a/drivers/dma/ipu/ipu_idmac.c +++ b/drivers/dma/ipu/ipu_idmac.c @@ -1705,16 +1705,14 @@ static int __init ipu_probe(struct platform_device *pdev) ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base); /* Remap IPU common registers */ - ipu_data.reg_ipu = ioremap(mem_ipu->start, - mem_ipu->end - mem_ipu->start + 1); + ipu_data.reg_ipu = ioremap(mem_ipu->start, resource_size(mem_ipu)); if (!ipu_data.reg_ipu) { ret = -ENOMEM; goto err_ioremap_ipu; } /* Remap Image Converter and Image DMA Controller registers */ - ipu_data.reg_ic = ioremap(mem_ic->start, - mem_ic->end - mem_ic->start + 1); + ipu_data.reg_ic = ioremap(mem_ic->start, resource_size(mem_ic)); if (!ipu_data.reg_ic) { ret = -ENOMEM; goto err_ioremap_ic; From 114df7d66efd5c23561782f38e97c48fb30d4f5d Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 1 Jun 2011 15:16:09 -0700 Subject: [PATCH 012/151] dma: at_hdmac.c: use resource_size Signed-off-by: H Hartley Sweeten Cc: Dan Williams Cc: Vinod Koul Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 36144f88d718..6a483eac7b3f 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1216,7 +1216,7 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.cap_mask = pdata->cap_mask; atdma->all_chan_mask = (1 << pdata->nr_channels) - 1; - size = io->end - io->start + 1; + size = resource_size(io); if (!request_mem_region(io->start, size, pdev->dev.driver->name)) { err = -EBUSY; goto err_kfree; @@ -1362,7 +1362,7 @@ static int __exit at_dma_remove(struct platform_device *pdev) atdma->regs = NULL; io = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(io->start, io->end - io->start + 1); + release_mem_region(io->start, resource_size(io)); kfree(atdma); From c08957a2cf3c4a14e68d72c845d3c52cf3d826e1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 7 Jun 2011 23:36:18 +0100 Subject: [PATCH 013/151] regulator: Properly register dummy regulator driver Recent changes in the driver core appear to mean that the data structures for the driver core are not fully initialised unless the driver is bound. Make sure the driver core knows the dummy driver is in use by binding it to a driver. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/dummy.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/regulator/dummy.c b/drivers/regulator/dummy.c index c7410bde7b5d..f6ef6694ab98 100644 --- a/drivers/regulator/dummy.c +++ b/drivers/regulator/dummy.c @@ -36,6 +36,29 @@ static struct regulator_desc dummy_desc = { .ops = &dummy_ops, }; +static int __devinit dummy_regulator_probe(struct platform_device *pdev) +{ + int ret; + + dummy_regulator_rdev = regulator_register(&dummy_desc, NULL, + &dummy_initdata, NULL); + if (IS_ERR(dummy_regulator_rdev)) { + ret = PTR_ERR(dummy_regulator_rdev); + pr_err("Failed to register regulator: %d\n", ret); + return ret; + } + + return 0; +} + +static struct platform_driver dummy_regulator_driver = { + .probe = dummy_regulator_probe, + .driver = { + .name = "reg-dummy", + .owner = THIS_MODULE, + }, +}; + static struct platform_device *dummy_pdev; void __init regulator_dummy_init(void) @@ -55,12 +78,9 @@ void __init regulator_dummy_init(void) return; } - dummy_regulator_rdev = regulator_register(&dummy_desc, NULL, - &dummy_initdata, NULL); - if (IS_ERR(dummy_regulator_rdev)) { - ret = PTR_ERR(dummy_regulator_rdev); - pr_err("Failed to register regulator: %d\n", ret); + ret = platform_driver_register(&dummy_regulator_driver); + if (ret != 0) { + pr_err("Failed to register dummy regulator driver: %d\n", ret); platform_device_unregister(dummy_pdev); - return; } } From f5726ae33c382366ea1b23240d5620dcf675d81d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 9 Jun 2011 16:22:20 +0100 Subject: [PATCH 014/151] regulator: Increase the limit on sysfs file names With verbose filenames we can easily hit 32 characters. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7b38af90a012..75312bd6aac4 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1033,7 +1033,7 @@ static void unset_regulator_supplies(struct regulator_dev *rdev) } } -#define REG_STR_SIZE 32 +#define REG_STR_SIZE 64 static struct regulator *create_regulator(struct regulator_dev *rdev, struct device *dev, From e0eaedefda8e14ed3f445f382c568c5d69e4223f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 9 Jun 2011 16:22:21 +0100 Subject: [PATCH 015/151] regulator: Include the device name in the microamps_requested_ file We may have multiple devices requesting a supply with the same name so include the device name in the generated filename for microamps_requested to avoid duplicate files. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 75312bd6aac4..e3b67ee48b23 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1053,8 +1053,9 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, if (dev) { /* create a 'requested_microamps_name' sysfs entry */ - size = scnprintf(buf, REG_STR_SIZE, "microamps_requested_%s", - supply_name); + size = scnprintf(buf, REG_STR_SIZE, + "microamps_requested_%s-%s", + dev_name(dev), supply_name); if (size >= REG_STR_SIZE) goto overflow_err; From 3801b86aa482d26a8ae460f67fca29e016491a86 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 9 Jun 2011 16:22:22 +0100 Subject: [PATCH 016/151] regulator: Refactor supply implementation to work as regular consumers Currently the regulator supply implementation is somewhat complex and fragile as it doesn't look like standard consumers but is instead a parallel implementation. This causes issues with locking and reference counting. Move the implementation over to using standard consumers to address this. Rather than only notifying the supply on the first enable/disable we do so every time the regulator is enabled or disabled, simplifying locking as we don't need to hold a lock on the consumer we are about to enable. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 101 +++++++++++-------------------- include/linux/regulator/driver.h | 4 +- 2 files changed, 37 insertions(+), 68 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index e3b67ee48b23..f0cc3983ffee 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -82,8 +82,7 @@ struct regulator { }; static int _regulator_is_enabled(struct regulator_dev *rdev); -static int _regulator_disable(struct regulator_dev *rdev, - struct regulator_dev **supply_rdev_ptr); +static int _regulator_disable(struct regulator_dev *rdev); static int _regulator_get_voltage(struct regulator_dev *rdev); static int _regulator_get_current_limit(struct regulator_dev *rdev); static unsigned int _regulator_get_mode(struct regulator_dev *rdev); @@ -91,6 +90,9 @@ static void _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data); static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV); +static struct regulator *create_regulator(struct regulator_dev *rdev, + struct device *dev, + const char *supply_name); static const char *rdev_get_name(struct regulator_dev *rdev) { @@ -930,21 +932,20 @@ static int set_machine_constraints(struct regulator_dev *rdev, * core if it's child is enabled. */ static int set_supply(struct regulator_dev *rdev, - struct regulator_dev *supply_rdev) + struct regulator_dev *supply_rdev) { int err; - err = sysfs_create_link(&rdev->dev.kobj, &supply_rdev->dev.kobj, - "supply"); - if (err) { - rdev_err(rdev, "could not add device link %s err %d\n", - supply_rdev->dev.kobj.name, err); - goto out; + rdev_info(rdev, "supplied by %s\n", rdev_get_name(supply_rdev)); + + rdev->supply = create_regulator(supply_rdev, &rdev->dev, "SUPPLY"); + if (IS_ERR(rdev->supply)) { + err = PTR_ERR(rdev->supply); + rdev->supply = NULL; + return err; } - rdev->supply = supply_rdev; - list_add(&rdev->slist, &supply_rdev->supply_list); -out: - return err; + + return 0; } /** @@ -1303,19 +1304,6 @@ static int _regulator_enable(struct regulator_dev *rdev) { int ret, delay; - if (rdev->use_count == 0) { - /* do we need to enable the supply regulator first */ - if (rdev->supply) { - mutex_lock(&rdev->supply->mutex); - ret = _regulator_enable(rdev->supply); - mutex_unlock(&rdev->supply->mutex); - if (ret < 0) { - rdev_err(rdev, "failed to enable: %d\n", ret); - return ret; - } - } - } - /* check voltage and requested load before enabling */ if (rdev->constraints && (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS)) @@ -1390,19 +1378,27 @@ int regulator_enable(struct regulator *regulator) struct regulator_dev *rdev = regulator->rdev; int ret = 0; + if (rdev->supply) { + ret = regulator_enable(rdev->supply); + if (ret != 0) + return ret; + } + mutex_lock(&rdev->mutex); ret = _regulator_enable(rdev); mutex_unlock(&rdev->mutex); + + if (ret != 0) + regulator_disable(rdev->supply); + return ret; } EXPORT_SYMBOL_GPL(regulator_enable); /* locks held by regulator_disable() */ -static int _regulator_disable(struct regulator_dev *rdev, - struct regulator_dev **supply_rdev_ptr) +static int _regulator_disable(struct regulator_dev *rdev) { int ret = 0; - *supply_rdev_ptr = NULL; if (WARN(rdev->use_count <= 0, "unbalanced disables for %s\n", rdev_get_name(rdev))) @@ -1429,9 +1425,6 @@ static int _regulator_disable(struct regulator_dev *rdev, NULL); } - /* decrease our supplies ref count and disable if required */ - *supply_rdev_ptr = rdev->supply; - rdev->use_count = 0; } else if (rdev->use_count > 1) { @@ -1442,6 +1435,7 @@ static int _regulator_disable(struct regulator_dev *rdev, rdev->use_count--; } + return ret; } @@ -1460,29 +1454,21 @@ static int _regulator_disable(struct regulator_dev *rdev, int regulator_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; - struct regulator_dev *supply_rdev = NULL; int ret = 0; mutex_lock(&rdev->mutex); - ret = _regulator_disable(rdev, &supply_rdev); + ret = _regulator_disable(rdev); mutex_unlock(&rdev->mutex); - /* decrease our supplies ref count and disable if required */ - while (supply_rdev != NULL) { - rdev = supply_rdev; - - mutex_lock(&rdev->mutex); - _regulator_disable(rdev, &supply_rdev); - mutex_unlock(&rdev->mutex); - } + if (ret == 0 && rdev->supply) + regulator_disable(rdev->supply); return ret; } EXPORT_SYMBOL_GPL(regulator_disable); /* locks held by regulator_force_disable() */ -static int _regulator_force_disable(struct regulator_dev *rdev, - struct regulator_dev **supply_rdev_ptr) +static int _regulator_force_disable(struct regulator_dev *rdev) { int ret = 0; @@ -1499,10 +1485,6 @@ static int _regulator_force_disable(struct regulator_dev *rdev, REGULATOR_EVENT_DISABLE, NULL); } - /* decrease our supplies ref count and disable if required */ - *supply_rdev_ptr = rdev->supply; - - rdev->use_count = 0; return ret; } @@ -1518,16 +1500,16 @@ static int _regulator_force_disable(struct regulator_dev *rdev, int regulator_force_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; - struct regulator_dev *supply_rdev = NULL; int ret; mutex_lock(&rdev->mutex); regulator->uA_load = 0; - ret = _regulator_force_disable(rdev, &supply_rdev); + ret = _regulator_force_disable(regulator->rdev); mutex_unlock(&rdev->mutex); - if (supply_rdev) - regulator_disable(get_device_regulator(rdev_get_dev(supply_rdev))); + if (rdev->supply) + while (rdev->open_count--) + regulator_disable(rdev->supply); return ret; } @@ -2138,7 +2120,7 @@ int regulator_set_optimum_mode(struct regulator *regulator, int uA_load) /* get input voltage */ input_uV = 0; if (rdev->supply) - input_uV = _regulator_get_voltage(rdev->supply); + input_uV = regulator_get_voltage(rdev->supply); if (input_uV <= 0) input_uV = rdev->constraints->input_uV; if (input_uV <= 0) { @@ -2208,17 +2190,8 @@ EXPORT_SYMBOL_GPL(regulator_unregister_notifier); static void _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data) { - struct regulator_dev *_rdev; - /* call rdev chain first */ blocking_notifier_call_chain(&rdev->notifier, event, NULL); - - /* now notify regulator we supply */ - list_for_each_entry(_rdev, &rdev->supply_list, slist) { - mutex_lock(&_rdev->mutex); - _notifier_call_chain(_rdev, event, data); - mutex_unlock(&_rdev->mutex); - } } /** @@ -2610,9 +2583,7 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, rdev->owner = regulator_desc->owner; rdev->desc = regulator_desc; INIT_LIST_HEAD(&rdev->consumer_list); - INIT_LIST_HEAD(&rdev->supply_list); INIT_LIST_HEAD(&rdev->list); - INIT_LIST_HEAD(&rdev->slist); BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier); /* preform any regulator specific init */ @@ -2724,7 +2695,7 @@ void regulator_unregister(struct regulator_dev *rdev) unset_regulator_supplies(rdev); list_del(&rdev->list); if (rdev->supply) - sysfs_remove_link(&rdev->dev.kobj, "supply"); + regulator_put(rdev->supply); device_unregister(&rdev->dev); kfree(rdev->constraints); mutex_unlock(®ulator_list_mutex); diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 6c433b89c80d..1a80bc77517d 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -188,18 +188,16 @@ struct regulator_dev { /* lists we belong to */ struct list_head list; /* list of all regulators */ - struct list_head slist; /* list of supplied regulators */ /* lists we own */ struct list_head consumer_list; /* consumers we supply */ - struct list_head supply_list; /* regulators we supply */ struct blocking_notifier_head notifier; struct mutex mutex; /* consumer lock */ struct module *owner; struct device dev; struct regulation_constraints *constraints; - struct regulator_dev *supply; /* for tree */ + struct regulator *supply; /* for tree */ void *reg_data; /* regulator_dev data */ From 7d51a0dbe51282f3ed13cadf6e7f13a974374be2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 9 Jun 2011 16:06:37 +0100 Subject: [PATCH 017/151] regulator: Add rdev_crit() macro No actual users but provide the macro so there's less surprise when it's not there. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index f0cc3983ffee..cc3dfd66f395 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -34,6 +34,8 @@ #include "dummy.h" +#define rdev_crit(rdev, fmt, ...) \ + pr_crit("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__) #define rdev_err(rdev, fmt, ...) \ pr_err("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__) #define rdev_warn(rdev, fmt, ...) \ From e2f5e5a71dfe6bf155590de0fdd6d748ac79bf76 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 10 Jun 2011 15:15:05 -0700 Subject: [PATCH 018/151] dma/ep93xx_dma.c: local symbols should be static The symbol 'ep93xx_dma_prep_dma_memcpy' is only used in this driver and should be marked static. Signed-off-by: H Hartley Sweeten Cc: Mika Westerberg Cc: Dan Williams Cc: Vinod Koul Acked-by: Mika Westerberg Signed-off-by: Vinod Koul --- drivers/dma/ep93xx_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index 0766c1e53b1d..5d7a49bd7c26 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -902,7 +902,7 @@ static void ep93xx_dma_free_chan_resources(struct dma_chan *chan) * * Returns a valid DMA descriptor or %NULL in case of failure. */ -struct dma_async_tx_descriptor * +static struct dma_async_tx_descriptor * ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) { From a03a202e95fdaa3ff52ccfc2594ec531e5917816 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 20 Jun 2011 17:02:47 +0200 Subject: [PATCH 019/151] dmaengine: failure to get a specific DMA channel is not critical There exist systems with multiple DMA controllers with different capabilities. For example, on some sh-mobile / rmobile systems there are DMA controllers, whose channels can be configured to be used with SD- and MMC-host controllers, serial ports etc. Besides there are also DMA controllers, that can only be used for one special function, e.g., for USB. In such cases the DMA client filter function can just choose to specify to the DMA driver, which channel it needs. Then the .device_alloc_chan_resources() method of the DMA driver will check, whether it can provide that dunction. If not, it will fail and the loop in __dma_request_channel() will continue to the next DMA device, until it finds a suitable one. This works fine with just one minor glitch: the kernel logs error messages like dmaengine: failed to get : (-) after each such non-critical failure. This patch lowers priority of this message to the debug level. Reported-by: Kuninori Morimoto Signed-off-by: Guennadi Liakhovetski Tested-by: Kuninori Morimoto Tested-by: Magnus Damm Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 8bcb15fb959d..f7f21a5de3e1 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -509,8 +509,8 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v dma_chan_name(chan)); list_del_rcu(&device->global_node); } else if (err) - pr_err("dmaengine: failed to get %s: (%d)\n", - dma_chan_name(chan), err); + pr_debug("dmaengine: failed to get %s: (%d)\n", + dma_chan_name(chan), err); else break; if (--device->privatecnt == 0) From d3ad8434aa83ef7c88bc91edcfe012cdcbab9f3e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 27 Jun 2011 12:36:29 -0400 Subject: [PATCH 020/151] jbd2: use WRITE_SYNC in journal checkpoint In journal checkpoint, we write the buffer and wait for its finish. But in cfq, the async queue has a very low priority, and in our test, if there are too many sync queues and every queue is filled up with requests, the write request will be delayed for quite a long time and all the tasks which are waiting for journal space will end with errors like: INFO: task attr_set:3816 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. attr_set D ffff880028393480 0 3816 1 0x00000000 ffff8802073fbae8 0000000000000086 ffff8802140847c8 ffff8800283934e8 ffff8802073fb9d8 ffffffff8103e456 ffff8802140847b8 ffff8801ed728080 ffff8801db4bc080 ffff8801ed728450 ffff880028393480 0000000000000002 Call Trace: [] ? __dequeue_entity+0x33/0x38 [] ? need_resched+0x23/0x2d [] ? thread_return+0xa2/0xbc [] ? jbd2_journal_dirty_metadata+0x116/0x126 [jbd2] [] ? jbd2_journal_dirty_metadata+0x116/0x126 [jbd2] [] __mutex_lock_common+0x14e/0x1a9 [] ? brelse+0x13/0x15 [ext4] [] __mutex_lock_slowpath+0x19/0x1b [] mutex_lock+0x1b/0x32 [] __jbd2_journal_insert_checkpoint+0xe3/0x20c [jbd2] [] start_this_handle+0x438/0x527 [jbd2] [] ? autoremove_wake_function+0x0/0x3e [] jbd2_journal_start+0xa1/0xcc [jbd2] [] ext4_journal_start_sb+0x57/0x81 [ext4] [] ext4_xattr_set+0x6c/0xe3 [ext4] [] ext4_xattr_user_set+0x42/0x4b [ext4] [] generic_setxattr+0x6b/0x76 [] __vfs_setxattr_noperm+0x47/0xc0 [] vfs_setxattr+0x7f/0x9a [] setxattr+0xb5/0xe8 [] ? do_filp_open+0x571/0xa6e [] sys_fsetxattr+0x6b/0x91 [] system_call_fastpath+0x16/0x1b So this patch tries to use WRITE_SYNC in __flush_batch so that the request will be moved into sync queue and handled by cfq timely. We also use the new plug, sot that all the WRITE_SYNC requests can be given as a whole when we unplug it. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" Cc: Jan Kara Reported-by: Robin Dong --- fs/jbd2/checkpoint.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 2c62c5aae82f..16a698bd906d 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -257,9 +257,12 @@ static void __flush_batch(journal_t *journal, int *batch_count) { int i; + struct blk_plug plug; + blk_start_plug(&plug); for (i = 0; i < *batch_count; i++) - write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC); + blk_finish_plug(&plug); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; From ed7a7e16724a4123fce1fc0ff1f5131a0596f189 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Mon, 27 Jun 2011 15:35:53 -0400 Subject: [PATCH 021/151] ext4: fix incorrect error msg in ext4_ext_insert_index In function ext4_ext_insert_index when eh_entries of curp is bigger than eh_max, error messages will be printed out, but the content is about logical and ei_block, that's incorret. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f815cc81e7a2..eb63c7b8dfd2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -808,8 +808,9 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) > le16_to_cpu(curp->p_hdr->eh_max))) { EXT4_ERROR_INODE(inode, - "logical %d == ei_block %d!", - logical, le32_to_cpu(curp->p_idx->ei_block)); + "eh_entries %d > eh_max %d!", + le16_to_cpu(curp->p_hdr->eh_entries), + le16_to_cpu(curp->p_hdr->eh_max)); return -EIO; } if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { From ff9893dc8aa622a4f122293a6861566a284edea5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 27 Jun 2011 16:36:31 -0400 Subject: [PATCH 022/151] ext4: split ext4_ind_truncate from ext4_truncate We are about to move all indirect inode functions to a new file. Before we do that, let's split ext4_ind_truncate() out of ext4_truncate() leaving only generic code in the latter, so we will be able to move ext4_ind_truncate() to the new file. Signed-off-by: Amir Goldstein Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 ++ fs/ext4/inode.c | 36 ++++++++++++++++++++---------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1921392cd708..8532dd43d320 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1834,6 +1834,8 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); +extern void ext4_ind_truncate(struct inode *inode); + /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e3126c051006..a8f310b77f56 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4470,6 +4470,26 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) * ext4_truncate() run will find them and release them. */ void ext4_truncate(struct inode *inode) +{ + trace_ext4_truncate_enter(inode); + + if (!ext4_can_truncate(inode)) + return; + + ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); + + if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) + ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); + + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + ext4_ext_truncate(inode); + else + ext4_ind_truncate(inode); + + trace_ext4_truncate_exit(inode); +} + +void ext4_ind_truncate(struct inode *inode) { handle_t *handle; struct ext4_inode_info *ei = EXT4_I(inode); @@ -4484,22 +4504,6 @@ void ext4_truncate(struct inode *inode) ext4_lblk_t last_block, max_block; unsigned blocksize = inode->i_sb->s_blocksize; - trace_ext4_truncate_enter(inode); - - if (!ext4_can_truncate(inode)) - return; - - ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); - - if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) - ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); - - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { - ext4_ext_truncate(inode); - trace_ext4_truncate_exit(inode); - return; - } - handle = start_transaction(inode); if (IS_ERR(handle)) return; /* AKPM: return what? */ From 8bb2b247124ba6093455d4aef26743b1bef27bc5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 27 Jun 2011 17:10:28 -0400 Subject: [PATCH 023/151] ext4: rename ext4_indirect_* funcs to ext4_ind_* We are going to move all ext4_ind_* functions to indirect.c. Before we do that, let's rename 2 functions called ext4_indirect_* to ext4_ind_*, to keep to the naming convention. Signed-off-by: Amir Goldstein Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a8f310b77f56..6c1d28e37235 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1075,8 +1075,7 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) * Calculate the number of metadata blocks need to reserve * to allocate a new block at @lblocks for non extent file based file */ -static int ext4_indirect_calc_metadata_amount(struct inode *inode, - sector_t lblock) +static int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) { struct ext4_inode_info *ei = EXT4_I(inode); sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); @@ -1107,7 +1106,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return ext4_ext_calc_metadata_amount(inode, lblock); - return ext4_indirect_calc_metadata_amount(inode, lblock); + return ext4_ind_calc_metadata_amount(inode, lblock); } /* @@ -5456,8 +5455,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } -static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, - int chunk) +static int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) { int indirects; @@ -5483,7 +5481,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - return ext4_indirect_trans_blocks(inode, nrblocks, chunk); + return ext4_ind_trans_blocks(inode, nrblocks, chunk); return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); } From 1f7d1e77419050831a905353683807fa69a26625 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 27 Jun 2011 19:16:02 -0400 Subject: [PATCH 024/151] ext4: move __ext4_check_blockref to block_validity.c In preparation for moving the indirect functions to a separate file, move __ext4_check_blockref() to block_validity.c and rename it to ext4_check_blockref() which is exported as globally visible function. Also, rename the cpp macro ext4_check_inode_blockref() to ext4_ind_check_inode(), to make it clear that it is only valid for use with non-extent mapped inodes. Signed-off-by: "Theodore Ts'o" --- fs/ext4/block_validity.c | 20 ++++++++++++++++++++ fs/ext4/ext4.h | 15 +++++++++++++++ fs/ext4/inode.c | 35 +---------------------------------- 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index fac90f3fba80..af103be491b0 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -246,3 +246,23 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, return 1; } +int ext4_check_blockref(const char *function, unsigned int line, + struct inode *inode, __le32 *p, unsigned int max) +{ + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + __le32 *bref = p; + unsigned int blk; + + while (bref < p+max) { + blk = le32_to_cpu(*bref++); + if (blk && + unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), + blk, 1))) { + es->s_last_error_block = cpu_to_le64(blk); + ext4_error_inode(inode, function, line, blk, + "invalid block"); + return -EIO; + } + } + return 0; +} diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8532dd43d320..82ba7eb7c4a5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2124,6 +2124,19 @@ static inline void ext4_mark_super_dirty(struct super_block *sb) sb->s_dirt =1; } +/* + * Block validity checking + */ +#define ext4_check_indirect_blockref(inode, bh) \ + ext4_check_blockref(__func__, __LINE__, inode, \ + (__le32 *)(bh)->b_data, \ + EXT4_ADDR_PER_BLOCK((inode)->i_sb)) + +#define ext4_ind_check_inode(inode) \ + ext4_check_blockref(__func__, __LINE__, inode, \ + EXT4_I(inode)->i_data, \ + EXT4_NDIR_BLOCKS) + /* * Inodes and files operations */ @@ -2153,6 +2166,8 @@ extern void ext4_exit_system_zone(void); extern int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, unsigned int count); +extern int ext4_check_blockref(const char *, unsigned int, + struct inode *, __le32 *, unsigned int); /* extents.c */ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6c1d28e37235..3dca5264ccff 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -360,39 +360,6 @@ static int ext4_block_to_path(struct inode *inode, return n; } -static int __ext4_check_blockref(const char *function, unsigned int line, - struct inode *inode, - __le32 *p, unsigned int max) -{ - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; - __le32 *bref = p; - unsigned int blk; - - while (bref < p+max) { - blk = le32_to_cpu(*bref++); - if (blk && - unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), - blk, 1))) { - es->s_last_error_block = cpu_to_le64(blk); - ext4_error_inode(inode, function, line, blk, - "invalid block"); - return -EIO; - } - } - return 0; -} - - -#define ext4_check_indirect_blockref(inode, bh) \ - __ext4_check_blockref(__func__, __LINE__, inode, \ - (__le32 *)(bh)->b_data, \ - EXT4_ADDR_PER_BLOCK((inode)->i_sb)) - -#define ext4_check_inode_blockref(inode) \ - __ext4_check_blockref(__func__, __LINE__, inode, \ - EXT4_I(inode)->i_data, \ - EXT4_NDIR_BLOCKS) - /** * ext4_get_branch - read the chain of indirect blocks leading to data * @inode: inode in question @@ -5010,7 +4977,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) (S_ISLNK(inode->i_mode) && !ext4_inode_is_fast_symlink(inode))) { /* Validate block references which are part of inode */ - ret = ext4_check_inode_blockref(inode); + ret = ext4_ind_check_inode(inode); } if (ret) goto bad_inode; From 9f125d641beb898f5bf2fe69583192c18043517a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 27 Jun 2011 19:16:04 -0400 Subject: [PATCH 025/151] ext4: move common truncate functions to header file Move two functions that will be needed by the indirect functions to be moved to indirect.c as well as inode.c to truncate.h as inline functions, so that we can avoid having duplicate copies of the function (which can be a maintenance problem) without having to expose them as globally functions. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 48 ++++++---------------------------------------- fs/ext4/truncate.h | 43 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 42 deletions(-) create mode 100644 fs/ext4/truncate.h diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3dca5264ccff..9b82ac7b0f55 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -47,6 +47,7 @@ #include "xattr.h" #include "acl.h" #include "ext4_extents.h" +#include "truncate.h" #include @@ -88,33 +89,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); } -/* - * Work out how many blocks we need to proceed with the next chunk of a - * truncate transaction. - */ -static unsigned long blocks_for_truncate(struct inode *inode) -{ - ext4_lblk_t needed; - - needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); - - /* Give ourselves just enough room to cope with inodes in which - * i_blocks is corrupt: we've seen disk corruptions in the past - * which resulted in random data in an inode which looked enough - * like a regular file for ext4 to try to delete it. Things - * will go a bit crazy if that happens, but at least we should - * try not to panic the whole kernel. */ - if (needed < 2) - needed = 2; - - /* But we need to bound the transaction so we don't overflow the - * journal. */ - if (needed > EXT4_MAX_TRANS_DATA) - needed = EXT4_MAX_TRANS_DATA; - - return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; -} - /* * Truncate transactions can be complex and absolutely huge. So we need to * be able to restart the transaction at a conventient checkpoint to make @@ -129,7 +103,7 @@ static handle_t *start_transaction(struct inode *inode) { handle_t *result; - result = ext4_journal_start(inode, blocks_for_truncate(inode)); + result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); if (!IS_ERR(result)) return result; @@ -149,7 +123,7 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) return 0; if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) return 0; - if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) + if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) return 0; return 1; } @@ -204,7 +178,7 @@ void ext4_evict_inode(struct inode *inode) if (is_bad_inode(inode)) goto no_delete; - handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3); + handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); if (IS_ERR(handle)) { ext4_std_error(inode->i_sb, PTR_ERR(handle)); /* @@ -1555,16 +1529,6 @@ static int do_journal_get_write_access(handle_t *handle, return ret; } -/* - * Truncate blocks that were not used by write. We have to truncate the - * pagecache as well so that corresponding buffers get properly unmapped. - */ -static void ext4_truncate_failed_write(struct inode *inode) -{ - truncate_inode_pages(inode->i_mapping, inode->i_size); - ext4_truncate(inode); -} - static int ext4_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); static int ext4_write_begin(struct file *file, struct address_space *mapping, @@ -4134,7 +4098,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, if (unlikely(err)) goto out_err; err = ext4_truncate_restart_trans(handle, inode, - blocks_for_truncate(inode)); + ext4_blocks_for_truncate(inode)); if (unlikely(err)) goto out_err; if (bh) { @@ -4329,7 +4293,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, if (try_to_extend_transaction(handle, inode)) { ext4_mark_inode_dirty(handle, inode); ext4_truncate_restart_trans(handle, inode, - blocks_for_truncate(inode)); + ext4_blocks_for_truncate(inode)); } /* diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h new file mode 100644 index 000000000000..011ba6670d99 --- /dev/null +++ b/fs/ext4/truncate.h @@ -0,0 +1,43 @@ +/* + * linux/fs/ext4/truncate.h + * + * Common inline functions needed for truncate support + */ + +/* + * Truncate blocks that were not used by write. We have to truncate the + * pagecache as well so that corresponding buffers get properly unmapped. + */ +static inline void ext4_truncate_failed_write(struct inode *inode) +{ + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext4_truncate(inode); +} + +/* + * Work out how many blocks we need to proceed with the next chunk of a + * truncate transaction. + */ +static inline unsigned long ext4_blocks_for_truncate(struct inode *inode) +{ + ext4_lblk_t needed; + + needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); + + /* Give ourselves just enough room to cope with inodes in which + * i_blocks is corrupt: we've seen disk corruptions in the past + * which resulted in random data in an inode which looked enough + * like a regular file for ext4 to try to delete it. Things + * will go a bit crazy if that happens, but at least we should + * try not to panic the whole kernel. */ + if (needed < 2) + needed = 2; + + /* But we need to bound the transaction so we don't overflow the + * journal. */ + if (needed > EXT4_MAX_TRANS_DATA) + needed = EXT4_MAX_TRANS_DATA; + + return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; +} + From dae1e52cb1267bf8f52e5e47a80fab566d7e8aa4 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 27 Jun 2011 19:40:50 -0400 Subject: [PATCH 026/151] ext4: move ext4_ind_* functions from inode.c to indirect.c This patch moves functions from inode.c to indirect.c. The moved functions are ext4_ind_* functions and their helpers. Functions called from inode.c are declared extern. Signed-off-by: Amir Goldstein Signed-off-by: "Theodore Ts'o" --- fs/ext4/Makefile | 2 +- fs/ext4/block_validity.c | 1 + fs/ext4/ext4.h | 9 + fs/ext4/indirect.c | 1510 ++++++++++++++++++++++++++++++++++++++ fs/ext4/inode.c | 1486 ------------------------------------- 5 files changed, 1521 insertions(+), 1487 deletions(-) create mode 100644 fs/ext4/indirect.c diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 04109460ba9e..56fd8f865930 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ - mmp.o + mmp.o indirect.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index af103be491b0..8efb2f0a3447 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -266,3 +266,4 @@ int ext4_check_blockref(const char *function, unsigned int line, } return 0; } + diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 82ba7eb7c4a5..ddaf5043fb38 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1834,6 +1834,15 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); + +/* indirect.c */ +extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags); +extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs); +extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); +extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); extern void ext4_ind_truncate(struct inode *inode); /* ioctl.c */ diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c new file mode 100644 index 000000000000..c3e85a86e821 --- /dev/null +++ b/fs/ext4/indirect.c @@ -0,0 +1,1510 @@ +/* + * linux/fs/ext4/indirect.c + * + * from + * + * linux/fs/ext4/inode.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Goal-directed block allocation by Stephen Tweedie + * (sct@redhat.com), 1993, 1998 + */ + +#include +#include "ext4_jbd2.h" +#include "truncate.h" + +#include + +typedef struct { + __le32 *p; + __le32 key; + struct buffer_head *bh; +} Indirect; + +static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) +{ + p->key = *(p->p = v); + p->bh = bh; +} + +/** + * ext4_block_to_path - parse the block number into array of offsets + * @inode: inode in question (we are only interested in its superblock) + * @i_block: block number to be parsed + * @offsets: array to store the offsets in + * @boundary: set this non-zero if the referred-to block is likely to be + * followed (on disk) by an indirect block. + * + * To store the locations of file's data ext4 uses a data structure common + * for UNIX filesystems - tree of pointers anchored in the inode, with + * data blocks at leaves and indirect blocks in intermediate nodes. + * This function translates the block number into path in that tree - + * return value is the path length and @offsets[n] is the offset of + * pointer to (n+1)th node in the nth one. If @block is out of range + * (negative or too large) warning is printed and zero returned. + * + * Note: function doesn't find node addresses, so no IO is needed. All + * we need to know is the capacity of indirect blocks (taken from the + * inode->i_sb). + */ + +/* + * Portability note: the last comparison (check that we fit into triple + * indirect block) is spelled differently, because otherwise on an + * architecture with 32-bit longs and 8Kb pages we might get into trouble + * if our filesystem had 8Kb blocks. We might use long long, but that would + * kill us on x86. Oh, well, at least the sign propagation does not matter - + * i_block would have to be negative in the very beginning, so we would not + * get there at all. + */ + +static int ext4_block_to_path(struct inode *inode, + ext4_lblk_t i_block, + ext4_lblk_t offsets[4], int *boundary) +{ + int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); + int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); + const long direct_blocks = EXT4_NDIR_BLOCKS, + indirect_blocks = ptrs, + double_blocks = (1 << (ptrs_bits * 2)); + int n = 0; + int final = 0; + + if (i_block < direct_blocks) { + offsets[n++] = i_block; + final = direct_blocks; + } else if ((i_block -= direct_blocks) < indirect_blocks) { + offsets[n++] = EXT4_IND_BLOCK; + offsets[n++] = i_block; + final = ptrs; + } else if ((i_block -= indirect_blocks) < double_blocks) { + offsets[n++] = EXT4_DIND_BLOCK; + offsets[n++] = i_block >> ptrs_bits; + offsets[n++] = i_block & (ptrs - 1); + final = ptrs; + } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { + offsets[n++] = EXT4_TIND_BLOCK; + offsets[n++] = i_block >> (ptrs_bits * 2); + offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); + offsets[n++] = i_block & (ptrs - 1); + final = ptrs; + } else { + ext4_warning(inode->i_sb, "block %lu > max in inode %lu", + i_block + direct_blocks + + indirect_blocks + double_blocks, inode->i_ino); + } + if (boundary) + *boundary = final - 1 - (i_block & (ptrs - 1)); + return n; +} + +/** + * ext4_get_branch - read the chain of indirect blocks leading to data + * @inode: inode in question + * @depth: depth of the chain (1 - direct pointer, etc.) + * @offsets: offsets of pointers in inode/indirect blocks + * @chain: place to store the result + * @err: here we store the error value + * + * Function fills the array of triples and returns %NULL + * if everything went OK or the pointer to the last filled triple + * (incomplete one) otherwise. Upon the return chain[i].key contains + * the number of (i+1)-th block in the chain (as it is stored in memory, + * i.e. little-endian 32-bit), chain[i].p contains the address of that + * number (it points into struct inode for i==0 and into the bh->b_data + * for i>0) and chain[i].bh points to the buffer_head of i-th indirect + * block for i>0 and NULL for i==0. In other words, it holds the block + * numbers of the chain, addresses they were taken from (and where we can + * verify that chain did not change) and buffer_heads hosting these + * numbers. + * + * Function stops when it stumbles upon zero pointer (absent block) + * (pointer to last triple returned, *@err == 0) + * or when it gets an IO error reading an indirect block + * (ditto, *@err == -EIO) + * or when it reads all @depth-1 indirect blocks successfully and finds + * the whole chain, all way to the data (returns %NULL, *err == 0). + * + * Need to be called with + * down_read(&EXT4_I(inode)->i_data_sem) + */ +static Indirect *ext4_get_branch(struct inode *inode, int depth, + ext4_lblk_t *offsets, + Indirect chain[4], int *err) +{ + struct super_block *sb = inode->i_sb; + Indirect *p = chain; + struct buffer_head *bh; + + *err = 0; + /* i_data is not going away, no lock needed */ + add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); + if (!p->key) + goto no_block; + while (--depth) { + bh = sb_getblk(sb, le32_to_cpu(p->key)); + if (unlikely(!bh)) + goto failure; + + if (!bh_uptodate_or_lock(bh)) { + if (bh_submit_read(bh) < 0) { + put_bh(bh); + goto failure; + } + /* validate block references */ + if (ext4_check_indirect_blockref(inode, bh)) { + put_bh(bh); + goto failure; + } + } + + add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); + /* Reader: end */ + if (!p->key) + goto no_block; + } + return NULL; + +failure: + *err = -EIO; +no_block: + return p; +} + +/** + * ext4_find_near - find a place for allocation with sufficient locality + * @inode: owner + * @ind: descriptor of indirect block. + * + * This function returns the preferred place for block allocation. + * It is used when heuristic for sequential allocation fails. + * Rules are: + * + if there is a block to the left of our position - allocate near it. + * + if pointer will live in indirect block - allocate near that block. + * + if pointer will live in inode - allocate in the same + * cylinder group. + * + * In the latter case we colour the starting block by the callers PID to + * prevent it from clashing with concurrent allocations for a different inode + * in the same block group. The PID is used here so that functionally related + * files will be close-by on-disk. + * + * Caller must make sure that @ind is valid and will stay that way. + */ +static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; + __le32 *p; + ext4_fsblk_t bg_start; + ext4_fsblk_t last_block; + ext4_grpblk_t colour; + ext4_group_t block_group; + int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); + + /* Try to find previous block */ + for (p = ind->p - 1; p >= start; p--) { + if (*p) + return le32_to_cpu(*p); + } + + /* No such thing, so let's try location of indirect block */ + if (ind->bh) + return ind->bh->b_blocknr; + + /* + * It is going to be referred to from the inode itself? OK, just put it + * into the same cylinder group then. + */ + block_group = ei->i_block_group; + if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { + block_group &= ~(flex_size-1); + if (S_ISREG(inode->i_mode)) + block_group++; + } + bg_start = ext4_group_first_block_no(inode->i_sb, block_group); + last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + + /* + * If we are doing delayed allocation, we don't need take + * colour into account. + */ + if (test_opt(inode->i_sb, DELALLOC)) + return bg_start; + + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) + colour = (current->pid % 16) * + (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + else + colour = (current->pid % 16) * ((last_block - bg_start) / 16); + return bg_start + colour; +} + +/** + * ext4_find_goal - find a preferred place for allocation. + * @inode: owner + * @block: block we want + * @partial: pointer to the last triple within a chain + * + * Normally this function find the preferred place for block allocation, + * returns it. + * Because this is only used for non-extent files, we limit the block nr + * to 32 bits. + */ +static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, + Indirect *partial) +{ + ext4_fsblk_t goal; + + /* + * XXX need to get goal block from mballoc's data structures + */ + + goal = ext4_find_near(inode, partial); + goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; + return goal; +} + +/** + * ext4_blks_to_allocate - Look up the block map and count the number + * of direct blocks need to be allocated for the given branch. + * + * @branch: chain of indirect blocks + * @k: number of blocks need for indirect blocks + * @blks: number of data blocks to be mapped. + * @blocks_to_boundary: the offset in the indirect block + * + * return the total number of blocks to be allocate, including the + * direct and indirect blocks. + */ +static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, + int blocks_to_boundary) +{ + unsigned int count = 0; + + /* + * Simple case, [t,d]Indirect block(s) has not allocated yet + * then it's clear blocks on that path have not allocated + */ + if (k > 0) { + /* right now we don't handle cross boundary allocation */ + if (blks < blocks_to_boundary + 1) + count += blks; + else + count += blocks_to_boundary + 1; + return count; + } + + count++; + while (count < blks && count <= blocks_to_boundary && + le32_to_cpu(*(branch[0].p + count)) == 0) { + count++; + } + return count; +} + +/** + * ext4_alloc_blocks: multiple allocate blocks needed for a branch + * @handle: handle for this transaction + * @inode: inode which needs allocated blocks + * @iblock: the logical block to start allocated at + * @goal: preferred physical block of allocation + * @indirect_blks: the number of blocks need to allocate for indirect + * blocks + * @blks: number of desired blocks + * @new_blocks: on return it will store the new block numbers for + * the indirect blocks(if needed) and the first direct block, + * @err: on return it will store the error code + * + * This function will return the number of blocks allocated as + * requested by the passed-in parameters. + */ +static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, ext4_fsblk_t goal, + int indirect_blks, int blks, + ext4_fsblk_t new_blocks[4], int *err) +{ + struct ext4_allocation_request ar; + int target, i; + unsigned long count = 0, blk_allocated = 0; + int index = 0; + ext4_fsblk_t current_block = 0; + int ret = 0; + + /* + * Here we try to allocate the requested multiple blocks at once, + * on a best-effort basis. + * To build a branch, we should allocate blocks for + * the indirect blocks(if not allocated yet), and at least + * the first direct block of this branch. That's the + * minimum number of blocks need to allocate(required) + */ + /* first we try to allocate the indirect blocks */ + target = indirect_blks; + while (target > 0) { + count = target; + /* allocating blocks for indirect blocks and direct blocks */ + current_block = ext4_new_meta_blocks(handle, inode, goal, + 0, &count, err); + if (*err) + goto failed_out; + + if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { + EXT4_ERROR_INODE(inode, + "current_block %llu + count %lu > %d!", + current_block, count, + EXT4_MAX_BLOCK_FILE_PHYS); + *err = -EIO; + goto failed_out; + } + + target -= count; + /* allocate blocks for indirect blocks */ + while (index < indirect_blks && count) { + new_blocks[index++] = current_block++; + count--; + } + if (count > 0) { + /* + * save the new block number + * for the first direct block + */ + new_blocks[index] = current_block; + printk(KERN_INFO "%s returned more blocks than " + "requested\n", __func__); + WARN_ON(1); + break; + } + } + + target = blks - count ; + blk_allocated = count; + if (!target) + goto allocated; + /* Now allocate data blocks */ + memset(&ar, 0, sizeof(ar)); + ar.inode = inode; + ar.goal = goal; + ar.len = target; + ar.logical = iblock; + if (S_ISREG(inode->i_mode)) + /* enable in-core preallocation only for regular files */ + ar.flags = EXT4_MB_HINT_DATA; + + current_block = ext4_mb_new_blocks(handle, &ar, err); + if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { + EXT4_ERROR_INODE(inode, + "current_block %llu + ar.len %d > %d!", + current_block, ar.len, + EXT4_MAX_BLOCK_FILE_PHYS); + *err = -EIO; + goto failed_out; + } + + if (*err && (target == blks)) { + /* + * if the allocation failed and we didn't allocate + * any blocks before + */ + goto failed_out; + } + if (!*err) { + if (target == blks) { + /* + * save the new block number + * for the first direct block + */ + new_blocks[index] = current_block; + } + blk_allocated += ar.len; + } +allocated: + /* total number of blocks allocated for direct blocks */ + ret = blk_allocated; + *err = 0; + return ret; +failed_out: + for (i = 0; i < index; i++) + ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); + return ret; +} + +/** + * ext4_alloc_branch - allocate and set up a chain of blocks. + * @handle: handle for this transaction + * @inode: owner + * @indirect_blks: number of allocated indirect blocks + * @blks: number of allocated direct blocks + * @goal: preferred place for allocation + * @offsets: offsets (in the blocks) to store the pointers to next. + * @branch: place to store the chain in. + * + * This function allocates blocks, zeroes out all but the last one, + * links them into chain and (if we are synchronous) writes them to disk. + * In other words, it prepares a branch that can be spliced onto the + * inode. It stores the information about that chain in the branch[], in + * the same format as ext4_get_branch() would do. We are calling it after + * we had read the existing part of chain and partial points to the last + * triple of that (one with zero ->key). Upon the exit we have the same + * picture as after the successful ext4_get_block(), except that in one + * place chain is disconnected - *branch->p is still zero (we did not + * set the last link), but branch->key contains the number that should + * be placed into *branch->p to fill that gap. + * + * If allocation fails we free all blocks we've allocated (and forget + * their buffer_heads) and return the error value the from failed + * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain + * as described above and return 0. + */ +static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, int indirect_blks, + int *blks, ext4_fsblk_t goal, + ext4_lblk_t *offsets, Indirect *branch) +{ + int blocksize = inode->i_sb->s_blocksize; + int i, n = 0; + int err = 0; + struct buffer_head *bh; + int num; + ext4_fsblk_t new_blocks[4]; + ext4_fsblk_t current_block; + + num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, + *blks, new_blocks, &err); + if (err) + return err; + + branch[0].key = cpu_to_le32(new_blocks[0]); + /* + * metadata blocks and data blocks are allocated. + */ + for (n = 1; n <= indirect_blks; n++) { + /* + * Get buffer_head for parent block, zero it out + * and set the pointer to new one, then send + * parent to disk. + */ + bh = sb_getblk(inode->i_sb, new_blocks[n-1]); + if (unlikely(!bh)) { + err = -EIO; + goto failed; + } + + branch[n].bh = bh; + lock_buffer(bh); + BUFFER_TRACE(bh, "call get_create_access"); + err = ext4_journal_get_create_access(handle, bh); + if (err) { + /* Don't brelse(bh) here; it's done in + * ext4_journal_forget() below */ + unlock_buffer(bh); + goto failed; + } + + memset(bh->b_data, 0, blocksize); + branch[n].p = (__le32 *) bh->b_data + offsets[n]; + branch[n].key = cpu_to_le32(new_blocks[n]); + *branch[n].p = branch[n].key; + if (n == indirect_blks) { + current_block = new_blocks[n]; + /* + * End of chain, update the last new metablock of + * the chain to point to the new allocated + * data blocks numbers + */ + for (i = 1; i < num; i++) + *(branch[n].p + i) = cpu_to_le32(++current_block); + } + BUFFER_TRACE(bh, "marking uptodate"); + set_buffer_uptodate(bh); + unlock_buffer(bh); + + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); + if (err) + goto failed; + } + *blks = num; + return err; +failed: + /* Allocation failed, free what we already allocated */ + ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); + for (i = 1; i <= n ; i++) { + /* + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. + */ + ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, + EXT4_FREE_BLOCKS_FORGET); + } + for (i = n+1; i < indirect_blks; i++) + ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); + + ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); + + return err; +} + +/** + * ext4_splice_branch - splice the allocated branch onto inode. + * @handle: handle for this transaction + * @inode: owner + * @block: (logical) number of block we are adding + * @chain: chain of indirect blocks (with a missing link - see + * ext4_alloc_branch) + * @where: location of missing link + * @num: number of indirect blocks we are adding + * @blks: number of direct blocks we are adding + * + * This function fills the missing link and does all housekeeping needed in + * inode (->i_blocks, etc.). In case of success we end up with the full + * chain to new block and return 0. + */ +static int ext4_splice_branch(handle_t *handle, struct inode *inode, + ext4_lblk_t block, Indirect *where, int num, + int blks) +{ + int i; + int err = 0; + ext4_fsblk_t current_block; + + /* + * If we're splicing into a [td]indirect block (as opposed to the + * inode) then we need to get write access to the [td]indirect block + * before the splice. + */ + if (where->bh) { + BUFFER_TRACE(where->bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, where->bh); + if (err) + goto err_out; + } + /* That's it */ + + *where->p = where->key; + + /* + * Update the host buffer_head or inode to point to more just allocated + * direct blocks blocks + */ + if (num == 0 && blks > 1) { + current_block = le32_to_cpu(where->key) + 1; + for (i = 1; i < blks; i++) + *(where->p + i) = cpu_to_le32(current_block++); + } + + /* We are done with atomic stuff, now do the rest of housekeeping */ + /* had we spliced it onto indirect block? */ + if (where->bh) { + /* + * If we spliced it onto an indirect block, we haven't + * altered the inode. Note however that if it is being spliced + * onto an indirect block at the very end of the file (the + * file is growing) then we *will* alter the inode to reflect + * the new i_size. But that is not done here - it is done in + * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. + */ + jbd_debug(5, "splicing indirect only\n"); + BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, where->bh); + if (err) + goto err_out; + } else { + /* + * OK, we spliced it into the inode itself on a direct block. + */ + ext4_mark_inode_dirty(handle, inode); + jbd_debug(5, "splicing direct\n"); + } + return err; + +err_out: + for (i = 1; i <= num; i++) { + /* + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. + */ + ext4_free_blocks(handle, inode, where[i].bh, 0, 1, + EXT4_FREE_BLOCKS_FORGET); + } + ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), + blks, 0); + + return err; +} + +/* + * The ext4_ind_map_blocks() function handles non-extents inodes + * (i.e., using the traditional indirect/double-indirect i_blocks + * scheme) for ext4_map_blocks(). + * + * Allocation strategy is simple: if we have to allocate something, we will + * have to go the whole way to leaf. So let's do it before attaching anything + * to tree, set linkage between the newborn blocks, write them if sync is + * required, recheck the path, free and repeat if check fails, otherwise + * set the last missing link (that will protect us from any truncate-generated + * removals - all blocks on the path are immune now) and possibly force the + * write on the parent block. + * That has a nice additional property: no special recovery from the failed + * allocations is needed - we simply release blocks and do not touch anything + * reachable from inode. + * + * `handle' can be NULL if create == 0. + * + * return > 0, # of blocks mapped or allocated. + * return = 0, if plain lookup failed. + * return < 0, error case. + * + * The ext4_ind_get_blocks() function should be called with + * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem + * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or + * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system + * blocks. + */ +int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + int flags) +{ + int err = -EIO; + ext4_lblk_t offsets[4]; + Indirect chain[4]; + Indirect *partial; + ext4_fsblk_t goal; + int indirect_blks; + int blocks_to_boundary = 0; + int depth; + int count = 0; + ext4_fsblk_t first_block = 0; + + trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); + J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); + J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); + depth = ext4_block_to_path(inode, map->m_lblk, offsets, + &blocks_to_boundary); + + if (depth == 0) + goto out; + + partial = ext4_get_branch(inode, depth, offsets, chain, &err); + + /* Simplest case - block found, no allocation needed */ + if (!partial) { + first_block = le32_to_cpu(chain[depth - 1].key); + count++; + /*map more blocks*/ + while (count < map->m_len && count <= blocks_to_boundary) { + ext4_fsblk_t blk; + + blk = le32_to_cpu(*(chain[depth-1].p + count)); + + if (blk == first_block + count) + count++; + else + break; + } + goto got_it; + } + + /* Next simple case - plain lookup or failed read of indirect block */ + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) + goto cleanup; + + /* + * Okay, we need to do block allocation. + */ + goal = ext4_find_goal(inode, map->m_lblk, partial); + + /* the number of blocks need to allocate for [d,t]indirect blocks */ + indirect_blks = (chain + depth) - partial - 1; + + /* + * Next look up the indirect map to count the totoal number of + * direct blocks to allocate for this branch. + */ + count = ext4_blks_to_allocate(partial, indirect_blks, + map->m_len, blocks_to_boundary); + /* + * Block out ext4_truncate while we alter the tree + */ + err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, + &count, goal, + offsets + (partial - chain), partial); + + /* + * The ext4_splice_branch call will free and forget any buffers + * on the new chain if there is a failure, but that risks using + * up transaction credits, especially for bitmaps where the + * credits cannot be returned. Can we handle this somehow? We + * may need to return -EAGAIN upwards in the worst case. --sct + */ + if (!err) + err = ext4_splice_branch(handle, inode, map->m_lblk, + partial, indirect_blks, count); + if (err) + goto cleanup; + + map->m_flags |= EXT4_MAP_NEW; + + ext4_update_inode_fsync_trans(handle, inode, 1); +got_it: + map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = le32_to_cpu(chain[depth-1].key); + map->m_len = count; + if (count > blocks_to_boundary) + map->m_flags |= EXT4_MAP_BOUNDARY; + err = count; + /* Clean up and exit */ + partial = chain + depth - 1; /* the whole chain */ +cleanup: + while (partial > chain) { + BUFFER_TRACE(partial->bh, "call brelse"); + brelse(partial->bh); + partial--; + } +out: + trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, + map->m_pblk, map->m_len, err); + return err; +} + +/* + * O_DIRECT for ext3 (or indirect map) based files + * + * If the O_DIRECT write will extend the file then add this inode to the + * orphan list. So recovery will truncate it back to the original size + * if the machine crashes during the write. + * + * If the O_DIRECT write is intantiating holes inside i_size and the machine + * crashes then stale disk data _may_ be exposed inside the file. But current + * VFS code falls back into buffered path in that case so we are safe. + */ +ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct ext4_inode_info *ei = EXT4_I(inode); + handle_t *handle; + ssize_t ret; + int orphan = 0; + size_t count = iov_length(iov, nr_segs); + int retries = 0; + + if (rw == WRITE) { + loff_t final_size = offset + count; + + if (final_size > inode->i_size) { + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + ret = ext4_orphan_add(handle, inode); + if (ret) { + ext4_journal_stop(handle); + goto out; + } + orphan = 1; + ei->i_disksize = inode->i_size; + ext4_journal_stop(handle); + } + } + +retry: + if (rw == READ && ext4_should_dioread_nolock(inode)) + ret = __blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block, NULL, NULL, 0); + else { + ret = blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block, NULL); + + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + ext4_truncate_failed_write(inode); + } + } + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + + if (orphan) { + int err; + + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) { + /* This is really bad luck. We've written the data + * but cannot extend i_size. Bail out and pretend + * the write failed... */ + ret = PTR_ERR(handle); + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + + goto out; + } + if (inode->i_nlink) + ext4_orphan_del(handle, inode); + if (ret > 0) { + loff_t end = offset + ret; + if (end > inode->i_size) { + ei->i_disksize = end; + i_size_write(inode, end); + /* + * We're going to return a positive `ret' + * here due to non-zero-length I/O, so there's + * no way of reporting error returns from + * ext4_mark_inode_dirty() to userspace. So + * ignore it. + */ + ext4_mark_inode_dirty(handle, inode); + } + } + err = ext4_journal_stop(handle); + if (ret == 0) + ret = err; + } +out: + return ret; +} + +/* + * Calculate the number of metadata blocks need to reserve + * to allocate a new block at @lblocks for non extent file based file + */ +int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); + int blk_bits; + + if (lblock < EXT4_NDIR_BLOCKS) + return 0; + + lblock -= EXT4_NDIR_BLOCKS; + + if (ei->i_da_metadata_calc_len && + (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { + ei->i_da_metadata_calc_len++; + return 0; + } + ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; + ei->i_da_metadata_calc_len = 1; + blk_bits = order_base_2(lblock); + return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; +} + +int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) +{ + int indirects; + + /* if nrblocks are contiguous */ + if (chunk) { + /* + * With N contiguous data blocks, we need at most + * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, + * 2 dindirect blocks, and 1 tindirect block + */ + return DIV_ROUND_UP(nrblocks, + EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; + } + /* + * if nrblocks are not contiguous, worse case, each block touch + * a indirect block, and each indirect block touch a double indirect + * block, plus a triple indirect block + */ + indirects = nrblocks * 2 + 1; + return indirects; +} + +/* + * Truncate transactions can be complex and absolutely huge. So we need to + * be able to restart the transaction at a conventient checkpoint to make + * sure we don't overflow the journal. + * + * start_transaction gets us a new handle for a truncate transaction, + * and extend_transaction tries to extend the existing one a bit. If + * extend fails, we need to propagate the failure up and restart the + * transaction in the top-level truncate loop. --sct + */ +static handle_t *start_transaction(struct inode *inode) +{ + handle_t *result; + + result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); + if (!IS_ERR(result)) + return result; + + ext4_std_error(inode->i_sb, PTR_ERR(result)); + return result; +} + +/* + * Try to extend this transaction for the purposes of truncation. + * + * Returns 0 if we managed to create more room. If we can't create more + * room, and the transaction must be restarted we return 1. + */ +static int try_to_extend_transaction(handle_t *handle, struct inode *inode) +{ + if (!ext4_handle_valid(handle)) + return 0; + if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) + return 0; + if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) + return 0; + return 1; +} + +/* + * Probably it should be a library function... search for first non-zero word + * or memcmp with zero_page, whatever is better for particular architecture. + * Linus? + */ +static inline int all_zeroes(__le32 *p, __le32 *q) +{ + while (p < q) + if (*p++) + return 0; + return 1; +} + +/** + * ext4_find_shared - find the indirect blocks for partial truncation. + * @inode: inode in question + * @depth: depth of the affected branch + * @offsets: offsets of pointers in that branch (see ext4_block_to_path) + * @chain: place to store the pointers to partial indirect blocks + * @top: place to the (detached) top of branch + * + * This is a helper function used by ext4_truncate(). + * + * When we do truncate() we may have to clean the ends of several + * indirect blocks but leave the blocks themselves alive. Block is + * partially truncated if some data below the new i_size is referred + * from it (and it is on the path to the first completely truncated + * data block, indeed). We have to free the top of that path along + * with everything to the right of the path. Since no allocation + * past the truncation point is possible until ext4_truncate() + * finishes, we may safely do the latter, but top of branch may + * require special attention - pageout below the truncation point + * might try to populate it. + * + * We atomically detach the top of branch from the tree, store the + * block number of its root in *@top, pointers to buffer_heads of + * partially truncated blocks - in @chain[].bh and pointers to + * their last elements that should not be removed - in + * @chain[].p. Return value is the pointer to last filled element + * of @chain. + * + * The work left to caller to do the actual freeing of subtrees: + * a) free the subtree starting from *@top + * b) free the subtrees whose roots are stored in + * (@chain[i].p+1 .. end of @chain[i].bh->b_data) + * c) free the subtrees growing from the inode past the @chain[0]. + * (no partially truncated stuff there). */ + +static Indirect *ext4_find_shared(struct inode *inode, int depth, + ext4_lblk_t offsets[4], Indirect chain[4], + __le32 *top) +{ + Indirect *partial, *p; + int k, err; + + *top = 0; + /* Make k index the deepest non-null offset + 1 */ + for (k = depth; k > 1 && !offsets[k-1]; k--) + ; + partial = ext4_get_branch(inode, k, offsets, chain, &err); + /* Writer: pointers */ + if (!partial) + partial = chain + k-1; + /* + * If the branch acquired continuation since we've looked at it - + * fine, it should all survive and (new) top doesn't belong to us. + */ + if (!partial->key && *partial->p) + /* Writer: end */ + goto no_top; + for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) + ; + /* + * OK, we've found the last block that must survive. The rest of our + * branch should be detached before unlocking. However, if that rest + * of branch is all ours and does not grow immediately from the inode + * it's easier to cheat and just decrement partial->p. + */ + if (p == chain + k - 1 && p > chain) { + p->p--; + } else { + *top = *p->p; + /* Nope, don't do this in ext4. Must leave the tree intact */ +#if 0 + *p->p = 0; +#endif + } + /* Writer: end */ + + while (partial > p) { + brelse(partial->bh); + partial--; + } +no_top: + return partial; +} + +/* + * Zero a number of block pointers in either an inode or an indirect block. + * If we restart the transaction we must again get write access to the + * indirect block for further modification. + * + * We release `count' blocks on disk, but (last - first) may be greater + * than `count' because there can be holes in there. + * + * Return 0 on success, 1 on invalid block range + * and < 0 on fatal error. + */ +static int ext4_clear_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, + ext4_fsblk_t block_to_free, + unsigned long count, __le32 *first, + __le32 *last) +{ + __le32 *p; + int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; + int err; + + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + flags |= EXT4_FREE_BLOCKS_METADATA; + + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, + count)) { + EXT4_ERROR_INODE(inode, "attempt to clear invalid " + "blocks %llu len %lu", + (unsigned long long) block_to_free, count); + return 1; + } + + if (try_to_extend_transaction(handle, inode)) { + if (bh) { + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); + if (unlikely(err)) + goto out_err; + } + err = ext4_mark_inode_dirty(handle, inode); + if (unlikely(err)) + goto out_err; + err = ext4_truncate_restart_trans(handle, inode, + ext4_blocks_for_truncate(inode)); + if (unlikely(err)) + goto out_err; + if (bh) { + BUFFER_TRACE(bh, "retaking write access"); + err = ext4_journal_get_write_access(handle, bh); + if (unlikely(err)) + goto out_err; + } + } + + for (p = first; p < last; p++) + *p = 0; + + ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); + return 0; +out_err: + ext4_std_error(inode->i_sb, err); + return err; +} + +/** + * ext4_free_data - free a list of data blocks + * @handle: handle for this transaction + * @inode: inode we are dealing with + * @this_bh: indirect buffer_head which contains *@first and *@last + * @first: array of block numbers + * @last: points immediately past the end of array + * + * We are freeing all blocks referred from that array (numbers are stored as + * little-endian 32-bit) and updating @inode->i_blocks appropriately. + * + * We accumulate contiguous runs of blocks to free. Conveniently, if these + * blocks are contiguous then releasing them at one time will only affect one + * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't + * actually use a lot of journal space. + * + * @this_bh will be %NULL if @first and @last point into the inode's direct + * block pointers. + */ +static void ext4_free_data(handle_t *handle, struct inode *inode, + struct buffer_head *this_bh, + __le32 *first, __le32 *last) +{ + ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ + unsigned long count = 0; /* Number of blocks in the run */ + __le32 *block_to_free_p = NULL; /* Pointer into inode/ind + corresponding to + block_to_free */ + ext4_fsblk_t nr; /* Current block # */ + __le32 *p; /* Pointer into inode/ind + for current block */ + int err = 0; + + if (this_bh) { /* For indirect block */ + BUFFER_TRACE(this_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, this_bh); + /* Important: if we can't update the indirect pointers + * to the blocks, we can't free them. */ + if (err) + return; + } + + for (p = first; p < last; p++) { + nr = le32_to_cpu(*p); + if (nr) { + /* accumulate blocks to free if they're contiguous */ + if (count == 0) { + block_to_free = nr; + block_to_free_p = p; + count = 1; + } else if (nr == block_to_free + count) { + count++; + } else { + err = ext4_clear_blocks(handle, inode, this_bh, + block_to_free, count, + block_to_free_p, p); + if (err) + break; + block_to_free = nr; + block_to_free_p = p; + count = 1; + } + } + } + + if (!err && count > 0) + err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, + count, block_to_free_p, p); + if (err < 0) + /* fatal error */ + return; + + if (this_bh) { + BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); + + /* + * The buffer head should have an attached journal head at this + * point. However, if the data is corrupted and an indirect + * block pointed to itself, it would have been detached when + * the block was cleared. Check for this instead of OOPSing. + */ + if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) + ext4_handle_dirty_metadata(handle, inode, this_bh); + else + EXT4_ERROR_INODE(inode, + "circular indirect block detected at " + "block %llu", + (unsigned long long) this_bh->b_blocknr); + } +} + +/** + * ext4_free_branches - free an array of branches + * @handle: JBD handle for this transaction + * @inode: inode we are dealing with + * @parent_bh: the buffer_head which contains *@first and *@last + * @first: array of block numbers + * @last: pointer immediately past the end of array + * @depth: depth of the branches to free + * + * We are freeing all blocks referred from these branches (numbers are + * stored as little-endian 32-bit) and updating @inode->i_blocks + * appropriately. + */ +static void ext4_free_branches(handle_t *handle, struct inode *inode, + struct buffer_head *parent_bh, + __le32 *first, __le32 *last, int depth) +{ + ext4_fsblk_t nr; + __le32 *p; + + if (ext4_handle_is_aborted(handle)) + return; + + if (depth--) { + struct buffer_head *bh; + int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); + p = last; + while (--p >= first) { + nr = le32_to_cpu(*p); + if (!nr) + continue; /* A hole */ + + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), + nr, 1)) { + EXT4_ERROR_INODE(inode, + "invalid indirect mapped " + "block %lu (level %d)", + (unsigned long) nr, depth); + break; + } + + /* Go read the buffer for the next level down */ + bh = sb_bread(inode->i_sb, nr); + + /* + * A read failure? Report error and clear slot + * (should be rare). + */ + if (!bh) { + EXT4_ERROR_INODE_BLOCK(inode, nr, + "Read failure"); + continue; + } + + /* This zaps the entire block. Bottom up. */ + BUFFER_TRACE(bh, "free child branches"); + ext4_free_branches(handle, inode, bh, + (__le32 *) bh->b_data, + (__le32 *) bh->b_data + addr_per_block, + depth); + brelse(bh); + + /* + * Everything below this this pointer has been + * released. Now let this top-of-subtree go. + * + * We want the freeing of this indirect block to be + * atomic in the journal with the updating of the + * bitmap block which owns it. So make some room in + * the journal. + * + * We zero the parent pointer *after* freeing its + * pointee in the bitmaps, so if extend_transaction() + * for some reason fails to put the bitmap changes and + * the release into the same transaction, recovery + * will merely complain about releasing a free block, + * rather than leaking blocks. + */ + if (ext4_handle_is_aborted(handle)) + return; + if (try_to_extend_transaction(handle, inode)) { + ext4_mark_inode_dirty(handle, inode); + ext4_truncate_restart_trans(handle, inode, + ext4_blocks_for_truncate(inode)); + } + + /* + * The forget flag here is critical because if + * we are journaling (and not doing data + * journaling), we have to make sure a revoke + * record is written to prevent the journal + * replay from overwriting the (former) + * indirect block if it gets reallocated as a + * data block. This must happen in the same + * transaction where the data blocks are + * actually freed. + */ + ext4_free_blocks(handle, inode, NULL, nr, 1, + EXT4_FREE_BLOCKS_METADATA| + EXT4_FREE_BLOCKS_FORGET); + + if (parent_bh) { + /* + * The block which we have just freed is + * pointed to by an indirect block: journal it + */ + BUFFER_TRACE(parent_bh, "get_write_access"); + if (!ext4_journal_get_write_access(handle, + parent_bh)){ + *p = 0; + BUFFER_TRACE(parent_bh, + "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, + inode, + parent_bh); + } + } + } + } else { + /* We have reached the bottom of the tree. */ + BUFFER_TRACE(parent_bh, "free data blocks"); + ext4_free_data(handle, inode, parent_bh, first, last); + } +} + +void ext4_ind_truncate(struct inode *inode) +{ + handle_t *handle; + struct ext4_inode_info *ei = EXT4_I(inode); + __le32 *i_data = ei->i_data; + int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + ext4_lblk_t offsets[4]; + Indirect chain[4]; + Indirect *partial; + __le32 nr = 0; + int n = 0; + ext4_lblk_t last_block, max_block; + unsigned blocksize = inode->i_sb->s_blocksize; + + handle = start_transaction(inode); + if (IS_ERR(handle)) + return; /* AKPM: return what? */ + + last_block = (inode->i_size + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + + if (inode->i_size & (blocksize - 1)) + if (ext4_block_truncate_page(handle, mapping, inode->i_size)) + goto out_stop; + + if (last_block != max_block) { + n = ext4_block_to_path(inode, last_block, offsets, NULL); + if (n == 0) + goto out_stop; /* error */ + } + + /* + * OK. This truncate is going to happen. We add the inode to the + * orphan list, so that if this truncate spans multiple transactions, + * and we crash, we will resume the truncate when the filesystem + * recovers. It also marks the inode dirty, to catch the new size. + * + * Implication: the file must always be in a sane, consistent + * truncatable state while each transaction commits. + */ + if (ext4_orphan_add(handle, inode)) + goto out_stop; + + /* + * From here we block out all ext4_get_block() callers who want to + * modify the block allocation tree. + */ + down_write(&ei->i_data_sem); + + ext4_discard_preallocations(inode); + + /* + * The orphan list entry will now protect us from any crash which + * occurs before the truncate completes, so it is now safe to propagate + * the new, shorter inode size (held for now in i_size) into the + * on-disk inode. We do this via i_disksize, which is the value which + * ext4 *really* writes onto the disk inode. + */ + ei->i_disksize = inode->i_size; + + if (last_block == max_block) { + /* + * It is unnecessary to free any data blocks if last_block is + * equal to the indirect block limit. + */ + goto out_unlock; + } else if (n == 1) { /* direct blocks */ + ext4_free_data(handle, inode, NULL, i_data+offsets[0], + i_data + EXT4_NDIR_BLOCKS); + goto do_indirects; + } + + partial = ext4_find_shared(inode, n, offsets, chain, &nr); + /* Kill the top of shared branch (not detached) */ + if (nr) { + if (partial == chain) { + /* Shared branch grows from the inode */ + ext4_free_branches(handle, inode, NULL, + &nr, &nr+1, (chain+n-1) - partial); + *partial->p = 0; + /* + * We mark the inode dirty prior to restart, + * and prior to stop. No need for it here. + */ + } else { + /* Shared branch grows from an indirect block */ + BUFFER_TRACE(partial->bh, "get_write_access"); + ext4_free_branches(handle, inode, partial->bh, + partial->p, + partial->p+1, (chain+n-1) - partial); + } + } + /* Clear the ends of indirect blocks on the shared branch */ + while (partial > chain) { + ext4_free_branches(handle, inode, partial->bh, partial->p + 1, + (__le32*)partial->bh->b_data+addr_per_block, + (chain+n-1) - partial); + BUFFER_TRACE(partial->bh, "call brelse"); + brelse(partial->bh); + partial--; + } +do_indirects: + /* Kill the remaining (whole) subtrees */ + switch (offsets[0]) { + default: + nr = i_data[EXT4_IND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); + i_data[EXT4_IND_BLOCK] = 0; + } + case EXT4_IND_BLOCK: + nr = i_data[EXT4_DIND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); + i_data[EXT4_DIND_BLOCK] = 0; + } + case EXT4_DIND_BLOCK: + nr = i_data[EXT4_TIND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); + i_data[EXT4_TIND_BLOCK] = 0; + } + case EXT4_TIND_BLOCK: + ; + } + +out_unlock: + up_write(&ei->i_data_sem); + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); + + /* + * In a multi-transaction truncate, we only make the final transaction + * synchronous + */ + if (IS_SYNC(inode)) + ext4_handle_sync(handle); +out_stop: + /* + * If this was a simple ftruncate(), and the file will remain alive + * then we need to clear up the orphan record which we created above. + * However, if this was a real unlink then we were called by + * ext4_delete_inode(), and we allow that function to clean up the + * orphan info for us. + */ + if (inode->i_nlink) + ext4_orphan_del(handle, inode); + + ext4_journal_stop(handle); + trace_ext4_truncate_exit(inode); +} + diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9b82ac7b0f55..de50b16a8f67 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -12,10 +12,6 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * - * Goal-directed block allocation by Stephen Tweedie - * (sct@redhat.com), 1993, 1998 - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 * 64-bit file support on 64-bit platforms by Jakub Jelinek * (jj@sunsite.ms.mff.cuni.cz) * @@ -89,45 +85,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); } -/* - * Truncate transactions can be complex and absolutely huge. So we need to - * be able to restart the transaction at a conventient checkpoint to make - * sure we don't overflow the journal. - * - * start_transaction gets us a new handle for a truncate transaction, - * and extend_transaction tries to extend the existing one a bit. If - * extend fails, we need to propagate the failure up and restart the - * transaction in the top-level truncate loop. --sct - */ -static handle_t *start_transaction(struct inode *inode) -{ - handle_t *result; - - result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); - if (!IS_ERR(result)) - return result; - - ext4_std_error(inode->i_sb, PTR_ERR(result)); - return result; -} - -/* - * Try to extend this transaction for the purposes of truncation. - * - * Returns 0 if we managed to create more room. If we can't create more - * room, and the transaction must be restarted we return 1. - */ -static int try_to_extend_transaction(handle_t *handle, struct inode *inode) -{ - if (!ext4_handle_valid(handle)) - return 0; - if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) - return 0; - if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) - return 0; - return 1; -} - /* * Restart the transaction associated with *handle. This does a commit, * so before we call here everything must be consistently dirtied against @@ -251,760 +208,6 @@ void ext4_evict_inode(struct inode *inode) ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ } -typedef struct { - __le32 *p; - __le32 key; - struct buffer_head *bh; -} Indirect; - -static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) -{ - p->key = *(p->p = v); - p->bh = bh; -} - -/** - * ext4_block_to_path - parse the block number into array of offsets - * @inode: inode in question (we are only interested in its superblock) - * @i_block: block number to be parsed - * @offsets: array to store the offsets in - * @boundary: set this non-zero if the referred-to block is likely to be - * followed (on disk) by an indirect block. - * - * To store the locations of file's data ext4 uses a data structure common - * for UNIX filesystems - tree of pointers anchored in the inode, with - * data blocks at leaves and indirect blocks in intermediate nodes. - * This function translates the block number into path in that tree - - * return value is the path length and @offsets[n] is the offset of - * pointer to (n+1)th node in the nth one. If @block is out of range - * (negative or too large) warning is printed and zero returned. - * - * Note: function doesn't find node addresses, so no IO is needed. All - * we need to know is the capacity of indirect blocks (taken from the - * inode->i_sb). - */ - -/* - * Portability note: the last comparison (check that we fit into triple - * indirect block) is spelled differently, because otherwise on an - * architecture with 32-bit longs and 8Kb pages we might get into trouble - * if our filesystem had 8Kb blocks. We might use long long, but that would - * kill us on x86. Oh, well, at least the sign propagation does not matter - - * i_block would have to be negative in the very beginning, so we would not - * get there at all. - */ - -static int ext4_block_to_path(struct inode *inode, - ext4_lblk_t i_block, - ext4_lblk_t offsets[4], int *boundary) -{ - int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); - const long direct_blocks = EXT4_NDIR_BLOCKS, - indirect_blocks = ptrs, - double_blocks = (1 << (ptrs_bits * 2)); - int n = 0; - int final = 0; - - if (i_block < direct_blocks) { - offsets[n++] = i_block; - final = direct_blocks; - } else if ((i_block -= direct_blocks) < indirect_blocks) { - offsets[n++] = EXT4_IND_BLOCK; - offsets[n++] = i_block; - final = ptrs; - } else if ((i_block -= indirect_blocks) < double_blocks) { - offsets[n++] = EXT4_DIND_BLOCK; - offsets[n++] = i_block >> ptrs_bits; - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { - offsets[n++] = EXT4_TIND_BLOCK; - offsets[n++] = i_block >> (ptrs_bits * 2); - offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else { - ext4_warning(inode->i_sb, "block %lu > max in inode %lu", - i_block + direct_blocks + - indirect_blocks + double_blocks, inode->i_ino); - } - if (boundary) - *boundary = final - 1 - (i_block & (ptrs - 1)); - return n; -} - -/** - * ext4_get_branch - read the chain of indirect blocks leading to data - * @inode: inode in question - * @depth: depth of the chain (1 - direct pointer, etc.) - * @offsets: offsets of pointers in inode/indirect blocks - * @chain: place to store the result - * @err: here we store the error value - * - * Function fills the array of triples and returns %NULL - * if everything went OK or the pointer to the last filled triple - * (incomplete one) otherwise. Upon the return chain[i].key contains - * the number of (i+1)-th block in the chain (as it is stored in memory, - * i.e. little-endian 32-bit), chain[i].p contains the address of that - * number (it points into struct inode for i==0 and into the bh->b_data - * for i>0) and chain[i].bh points to the buffer_head of i-th indirect - * block for i>0 and NULL for i==0. In other words, it holds the block - * numbers of the chain, addresses they were taken from (and where we can - * verify that chain did not change) and buffer_heads hosting these - * numbers. - * - * Function stops when it stumbles upon zero pointer (absent block) - * (pointer to last triple returned, *@err == 0) - * or when it gets an IO error reading an indirect block - * (ditto, *@err == -EIO) - * or when it reads all @depth-1 indirect blocks successfully and finds - * the whole chain, all way to the data (returns %NULL, *err == 0). - * - * Need to be called with - * down_read(&EXT4_I(inode)->i_data_sem) - */ -static Indirect *ext4_get_branch(struct inode *inode, int depth, - ext4_lblk_t *offsets, - Indirect chain[4], int *err) -{ - struct super_block *sb = inode->i_sb; - Indirect *p = chain; - struct buffer_head *bh; - - *err = 0; - /* i_data is not going away, no lock needed */ - add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); - if (!p->key) - goto no_block; - while (--depth) { - bh = sb_getblk(sb, le32_to_cpu(p->key)); - if (unlikely(!bh)) - goto failure; - - if (!bh_uptodate_or_lock(bh)) { - if (bh_submit_read(bh) < 0) { - put_bh(bh); - goto failure; - } - /* validate block references */ - if (ext4_check_indirect_blockref(inode, bh)) { - put_bh(bh); - goto failure; - } - } - - add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); - /* Reader: end */ - if (!p->key) - goto no_block; - } - return NULL; - -failure: - *err = -EIO; -no_block: - return p; -} - -/** - * ext4_find_near - find a place for allocation with sufficient locality - * @inode: owner - * @ind: descriptor of indirect block. - * - * This function returns the preferred place for block allocation. - * It is used when heuristic for sequential allocation fails. - * Rules are: - * + if there is a block to the left of our position - allocate near it. - * + if pointer will live in indirect block - allocate near that block. - * + if pointer will live in inode - allocate in the same - * cylinder group. - * - * In the latter case we colour the starting block by the callers PID to - * prevent it from clashing with concurrent allocations for a different inode - * in the same block group. The PID is used here so that functionally related - * files will be close-by on-disk. - * - * Caller must make sure that @ind is valid and will stay that way. - */ -static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; - __le32 *p; - ext4_fsblk_t bg_start; - ext4_fsblk_t last_block; - ext4_grpblk_t colour; - ext4_group_t block_group; - int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); - - /* Try to find previous block */ - for (p = ind->p - 1; p >= start; p--) { - if (*p) - return le32_to_cpu(*p); - } - - /* No such thing, so let's try location of indirect block */ - if (ind->bh) - return ind->bh->b_blocknr; - - /* - * It is going to be referred to from the inode itself? OK, just put it - * into the same cylinder group then. - */ - block_group = ei->i_block_group; - if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { - block_group &= ~(flex_size-1); - if (S_ISREG(inode->i_mode)) - block_group++; - } - bg_start = ext4_group_first_block_no(inode->i_sb, block_group); - last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; - - /* - * If we are doing delayed allocation, we don't need take - * colour into account. - */ - if (test_opt(inode->i_sb, DELALLOC)) - return bg_start; - - if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) - colour = (current->pid % 16) * - (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); - else - colour = (current->pid % 16) * ((last_block - bg_start) / 16); - return bg_start + colour; -} - -/** - * ext4_find_goal - find a preferred place for allocation. - * @inode: owner - * @block: block we want - * @partial: pointer to the last triple within a chain - * - * Normally this function find the preferred place for block allocation, - * returns it. - * Because this is only used for non-extent files, we limit the block nr - * to 32 bits. - */ -static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, - Indirect *partial) -{ - ext4_fsblk_t goal; - - /* - * XXX need to get goal block from mballoc's data structures - */ - - goal = ext4_find_near(inode, partial); - goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; - return goal; -} - -/** - * ext4_blks_to_allocate - Look up the block map and count the number - * of direct blocks need to be allocated for the given branch. - * - * @branch: chain of indirect blocks - * @k: number of blocks need for indirect blocks - * @blks: number of data blocks to be mapped. - * @blocks_to_boundary: the offset in the indirect block - * - * return the total number of blocks to be allocate, including the - * direct and indirect blocks. - */ -static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, - int blocks_to_boundary) -{ - unsigned int count = 0; - - /* - * Simple case, [t,d]Indirect block(s) has not allocated yet - * then it's clear blocks on that path have not allocated - */ - if (k > 0) { - /* right now we don't handle cross boundary allocation */ - if (blks < blocks_to_boundary + 1) - count += blks; - else - count += blocks_to_boundary + 1; - return count; - } - - count++; - while (count < blks && count <= blocks_to_boundary && - le32_to_cpu(*(branch[0].p + count)) == 0) { - count++; - } - return count; -} - -/** - * ext4_alloc_blocks: multiple allocate blocks needed for a branch - * @handle: handle for this transaction - * @inode: inode which needs allocated blocks - * @iblock: the logical block to start allocated at - * @goal: preferred physical block of allocation - * @indirect_blks: the number of blocks need to allocate for indirect - * blocks - * @blks: number of desired blocks - * @new_blocks: on return it will store the new block numbers for - * the indirect blocks(if needed) and the first direct block, - * @err: on return it will store the error code - * - * This function will return the number of blocks allocated as - * requested by the passed-in parameters. - */ -static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, ext4_fsblk_t goal, - int indirect_blks, int blks, - ext4_fsblk_t new_blocks[4], int *err) -{ - struct ext4_allocation_request ar; - int target, i; - unsigned long count = 0, blk_allocated = 0; - int index = 0; - ext4_fsblk_t current_block = 0; - int ret = 0; - - /* - * Here we try to allocate the requested multiple blocks at once, - * on a best-effort basis. - * To build a branch, we should allocate blocks for - * the indirect blocks(if not allocated yet), and at least - * the first direct block of this branch. That's the - * minimum number of blocks need to allocate(required) - */ - /* first we try to allocate the indirect blocks */ - target = indirect_blks; - while (target > 0) { - count = target; - /* allocating blocks for indirect blocks and direct blocks */ - current_block = ext4_new_meta_blocks(handle, inode, goal, - 0, &count, err); - if (*err) - goto failed_out; - - if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { - EXT4_ERROR_INODE(inode, - "current_block %llu + count %lu > %d!", - current_block, count, - EXT4_MAX_BLOCK_FILE_PHYS); - *err = -EIO; - goto failed_out; - } - - target -= count; - /* allocate blocks for indirect blocks */ - while (index < indirect_blks && count) { - new_blocks[index++] = current_block++; - count--; - } - if (count > 0) { - /* - * save the new block number - * for the first direct block - */ - new_blocks[index] = current_block; - printk(KERN_INFO "%s returned more blocks than " - "requested\n", __func__); - WARN_ON(1); - break; - } - } - - target = blks - count ; - blk_allocated = count; - if (!target) - goto allocated; - /* Now allocate data blocks */ - memset(&ar, 0, sizeof(ar)); - ar.inode = inode; - ar.goal = goal; - ar.len = target; - ar.logical = iblock; - if (S_ISREG(inode->i_mode)) - /* enable in-core preallocation only for regular files */ - ar.flags = EXT4_MB_HINT_DATA; - - current_block = ext4_mb_new_blocks(handle, &ar, err); - if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { - EXT4_ERROR_INODE(inode, - "current_block %llu + ar.len %d > %d!", - current_block, ar.len, - EXT4_MAX_BLOCK_FILE_PHYS); - *err = -EIO; - goto failed_out; - } - - if (*err && (target == blks)) { - /* - * if the allocation failed and we didn't allocate - * any blocks before - */ - goto failed_out; - } - if (!*err) { - if (target == blks) { - /* - * save the new block number - * for the first direct block - */ - new_blocks[index] = current_block; - } - blk_allocated += ar.len; - } -allocated: - /* total number of blocks allocated for direct blocks */ - ret = blk_allocated; - *err = 0; - return ret; -failed_out: - for (i = 0; i < index; i++) - ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); - return ret; -} - -/** - * ext4_alloc_branch - allocate and set up a chain of blocks. - * @handle: handle for this transaction - * @inode: owner - * @indirect_blks: number of allocated indirect blocks - * @blks: number of allocated direct blocks - * @goal: preferred place for allocation - * @offsets: offsets (in the blocks) to store the pointers to next. - * @branch: place to store the chain in. - * - * This function allocates blocks, zeroes out all but the last one, - * links them into chain and (if we are synchronous) writes them to disk. - * In other words, it prepares a branch that can be spliced onto the - * inode. It stores the information about that chain in the branch[], in - * the same format as ext4_get_branch() would do. We are calling it after - * we had read the existing part of chain and partial points to the last - * triple of that (one with zero ->key). Upon the exit we have the same - * picture as after the successful ext4_get_block(), except that in one - * place chain is disconnected - *branch->p is still zero (we did not - * set the last link), but branch->key contains the number that should - * be placed into *branch->p to fill that gap. - * - * If allocation fails we free all blocks we've allocated (and forget - * their buffer_heads) and return the error value the from failed - * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain - * as described above and return 0. - */ -static int ext4_alloc_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, int indirect_blks, - int *blks, ext4_fsblk_t goal, - ext4_lblk_t *offsets, Indirect *branch) -{ - int blocksize = inode->i_sb->s_blocksize; - int i, n = 0; - int err = 0; - struct buffer_head *bh; - int num; - ext4_fsblk_t new_blocks[4]; - ext4_fsblk_t current_block; - - num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, - *blks, new_blocks, &err); - if (err) - return err; - - branch[0].key = cpu_to_le32(new_blocks[0]); - /* - * metadata blocks and data blocks are allocated. - */ - for (n = 1; n <= indirect_blks; n++) { - /* - * Get buffer_head for parent block, zero it out - * and set the pointer to new one, then send - * parent to disk. - */ - bh = sb_getblk(inode->i_sb, new_blocks[n-1]); - if (unlikely(!bh)) { - err = -EIO; - goto failed; - } - - branch[n].bh = bh; - lock_buffer(bh); - BUFFER_TRACE(bh, "call get_create_access"); - err = ext4_journal_get_create_access(handle, bh); - if (err) { - /* Don't brelse(bh) here; it's done in - * ext4_journal_forget() below */ - unlock_buffer(bh); - goto failed; - } - - memset(bh->b_data, 0, blocksize); - branch[n].p = (__le32 *) bh->b_data + offsets[n]; - branch[n].key = cpu_to_le32(new_blocks[n]); - *branch[n].p = branch[n].key; - if (n == indirect_blks) { - current_block = new_blocks[n]; - /* - * End of chain, update the last new metablock of - * the chain to point to the new allocated - * data blocks numbers - */ - for (i = 1; i < num; i++) - *(branch[n].p + i) = cpu_to_le32(++current_block); - } - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); - if (err) - goto failed; - } - *blks = num; - return err; -failed: - /* Allocation failed, free what we already allocated */ - ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); - for (i = 1; i <= n ; i++) { - /* - * branch[i].bh is newly allocated, so there is no - * need to revoke the block, which is why we don't - * need to set EXT4_FREE_BLOCKS_METADATA. - */ - ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, - EXT4_FREE_BLOCKS_FORGET); - } - for (i = n+1; i < indirect_blks; i++) - ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); - - ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); - - return err; -} - -/** - * ext4_splice_branch - splice the allocated branch onto inode. - * @handle: handle for this transaction - * @inode: owner - * @block: (logical) number of block we are adding - * @chain: chain of indirect blocks (with a missing link - see - * ext4_alloc_branch) - * @where: location of missing link - * @num: number of indirect blocks we are adding - * @blks: number of direct blocks we are adding - * - * This function fills the missing link and does all housekeeping needed in - * inode (->i_blocks, etc.). In case of success we end up with the full - * chain to new block and return 0. - */ -static int ext4_splice_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t block, Indirect *where, int num, - int blks) -{ - int i; - int err = 0; - ext4_fsblk_t current_block; - - /* - * If we're splicing into a [td]indirect block (as opposed to the - * inode) then we need to get write access to the [td]indirect block - * before the splice. - */ - if (where->bh) { - BUFFER_TRACE(where->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, where->bh); - if (err) - goto err_out; - } - /* That's it */ - - *where->p = where->key; - - /* - * Update the host buffer_head or inode to point to more just allocated - * direct blocks blocks - */ - if (num == 0 && blks > 1) { - current_block = le32_to_cpu(where->key) + 1; - for (i = 1; i < blks; i++) - *(where->p + i) = cpu_to_le32(current_block++); - } - - /* We are done with atomic stuff, now do the rest of housekeeping */ - /* had we spliced it onto indirect block? */ - if (where->bh) { - /* - * If we spliced it onto an indirect block, we haven't - * altered the inode. Note however that if it is being spliced - * onto an indirect block at the very end of the file (the - * file is growing) then we *will* alter the inode to reflect - * the new i_size. But that is not done here - it is done in - * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. - */ - jbd_debug(5, "splicing indirect only\n"); - BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, where->bh); - if (err) - goto err_out; - } else { - /* - * OK, we spliced it into the inode itself on a direct block. - */ - ext4_mark_inode_dirty(handle, inode); - jbd_debug(5, "splicing direct\n"); - } - return err; - -err_out: - for (i = 1; i <= num; i++) { - /* - * branch[i].bh is newly allocated, so there is no - * need to revoke the block, which is why we don't - * need to set EXT4_FREE_BLOCKS_METADATA. - */ - ext4_free_blocks(handle, inode, where[i].bh, 0, 1, - EXT4_FREE_BLOCKS_FORGET); - } - ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), - blks, 0); - - return err; -} - -/* - * The ext4_ind_map_blocks() function handles non-extents inodes - * (i.e., using the traditional indirect/double-indirect i_blocks - * scheme) for ext4_map_blocks(). - * - * Allocation strategy is simple: if we have to allocate something, we will - * have to go the whole way to leaf. So let's do it before attaching anything - * to tree, set linkage between the newborn blocks, write them if sync is - * required, recheck the path, free and repeat if check fails, otherwise - * set the last missing link (that will protect us from any truncate-generated - * removals - all blocks on the path are immune now) and possibly force the - * write on the parent block. - * That has a nice additional property: no special recovery from the failed - * allocations is needed - we simply release blocks and do not touch anything - * reachable from inode. - * - * `handle' can be NULL if create == 0. - * - * return > 0, # of blocks mapped or allocated. - * return = 0, if plain lookup failed. - * return < 0, error case. - * - * The ext4_ind_get_blocks() function should be called with - * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem - * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or - * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system - * blocks. - */ -static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, - struct ext4_map_blocks *map, - int flags) -{ - int err = -EIO; - ext4_lblk_t offsets[4]; - Indirect chain[4]; - Indirect *partial; - ext4_fsblk_t goal; - int indirect_blks; - int blocks_to_boundary = 0; - int depth; - int count = 0; - ext4_fsblk_t first_block = 0; - - trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); - J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); - J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); - depth = ext4_block_to_path(inode, map->m_lblk, offsets, - &blocks_to_boundary); - - if (depth == 0) - goto out; - - partial = ext4_get_branch(inode, depth, offsets, chain, &err); - - /* Simplest case - block found, no allocation needed */ - if (!partial) { - first_block = le32_to_cpu(chain[depth - 1].key); - count++; - /*map more blocks*/ - while (count < map->m_len && count <= blocks_to_boundary) { - ext4_fsblk_t blk; - - blk = le32_to_cpu(*(chain[depth-1].p + count)); - - if (blk == first_block + count) - count++; - else - break; - } - goto got_it; - } - - /* Next simple case - plain lookup or failed read of indirect block */ - if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) - goto cleanup; - - /* - * Okay, we need to do block allocation. - */ - goal = ext4_find_goal(inode, map->m_lblk, partial); - - /* the number of blocks need to allocate for [d,t]indirect blocks */ - indirect_blks = (chain + depth) - partial - 1; - - /* - * Next look up the indirect map to count the totoal number of - * direct blocks to allocate for this branch. - */ - count = ext4_blks_to_allocate(partial, indirect_blks, - map->m_len, blocks_to_boundary); - /* - * Block out ext4_truncate while we alter the tree - */ - err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, - &count, goal, - offsets + (partial - chain), partial); - - /* - * The ext4_splice_branch call will free and forget any buffers - * on the new chain if there is a failure, but that risks using - * up transaction credits, especially for bitmaps where the - * credits cannot be returned. Can we handle this somehow? We - * may need to return -EAGAIN upwards in the worst case. --sct - */ - if (!err) - err = ext4_splice_branch(handle, inode, map->m_lblk, - partial, indirect_blks, count); - if (err) - goto cleanup; - - map->m_flags |= EXT4_MAP_NEW; - - ext4_update_inode_fsync_trans(handle, inode, 1); -got_it: - map->m_flags |= EXT4_MAP_MAPPED; - map->m_pblk = le32_to_cpu(chain[depth-1].key); - map->m_len = count; - if (count > blocks_to_boundary) - map->m_flags |= EXT4_MAP_BOUNDARY; - err = count; - /* Clean up and exit */ - partial = chain + depth - 1; /* the whole chain */ -cleanup: - while (partial > chain) { - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - partial--; - } -out: - trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, - map->m_pblk, map->m_len, err); - return err; -} - #ifdef CONFIG_QUOTA qsize_t *ext4_get_reserved_space(struct inode *inode) { @@ -1012,32 +215,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) } #endif -/* - * Calculate the number of metadata blocks need to reserve - * to allocate a new block at @lblocks for non extent file based file - */ -static int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); - int blk_bits; - - if (lblock < EXT4_NDIR_BLOCKS) - return 0; - - lblock -= EXT4_NDIR_BLOCKS; - - if (ei->i_da_metadata_calc_len && - (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { - ei->i_da_metadata_calc_len++; - return 0; - } - ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; - ei->i_da_metadata_calc_len = 1; - blk_bits = order_base_2(lblock); - return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; -} - /* * Calculate the number of metadata blocks need to reserve * to allocate a block located at @lblock @@ -3379,114 +2556,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait) return try_to_free_buffers(page); } -/* - * O_DIRECT for ext3 (or indirect map) based files - * - * If the O_DIRECT write will extend the file then add this inode to the - * orphan list. So recovery will truncate it back to the original size - * if the machine crashes during the write. - * - * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. But current - * VFS code falls back into buffered path in that case so we are safe. - */ -static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct ext4_inode_info *ei = EXT4_I(inode); - handle_t *handle; - ssize_t ret; - int orphan = 0; - size_t count = iov_length(iov, nr_segs); - int retries = 0; - - if (rw == WRITE) { - loff_t final_size = offset + count; - - if (final_size > inode->i_size) { - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, 2); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - ret = ext4_orphan_add(handle, inode); - if (ret) { - ext4_journal_stop(handle); - goto out; - } - orphan = 1; - ei->i_disksize = inode->i_size; - ext4_journal_stop(handle); - } - } - -retry: - if (rw == READ && ext4_should_dioread_nolock(inode)) - ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, - ext4_get_block, NULL, NULL, 0); - else { - ret = blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, - ext4_get_block, NULL); - - if (unlikely((rw & WRITE) && ret < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); - - if (end > isize) - ext4_truncate_failed_write(inode); - } - } - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - - if (orphan) { - int err; - - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, 2); - if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ - ret = PTR_ERR(handle); - if (inode->i_nlink) - ext4_orphan_del(NULL, inode); - - goto out; - } - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - if (ret > 0) { - loff_t end = offset + ret; - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); - /* - * We're going to return a positive `ret' - * here due to non-zero-length I/O, so there's - * no way of reporting error returns from - * ext4_mark_inode_dirty() to userspace. So - * ignore it. - */ - ext4_mark_inode_dirty(handle, inode); - } - } - err = ext4_journal_stop(handle); - if (ret == 0) - ret = err; - } -out: - return ret; -} - /* * ext4_get_block used when preparing for a DIO write or buffer write. * We allocate an uinitialized extent if blocks haven't been allocated. @@ -3958,383 +3027,6 @@ int ext4_block_zero_page_range(handle_t *handle, return err; } -/* - * Probably it should be a library function... search for first non-zero word - * or memcmp with zero_page, whatever is better for particular architecture. - * Linus? - */ -static inline int all_zeroes(__le32 *p, __le32 *q) -{ - while (p < q) - if (*p++) - return 0; - return 1; -} - -/** - * ext4_find_shared - find the indirect blocks for partial truncation. - * @inode: inode in question - * @depth: depth of the affected branch - * @offsets: offsets of pointers in that branch (see ext4_block_to_path) - * @chain: place to store the pointers to partial indirect blocks - * @top: place to the (detached) top of branch - * - * This is a helper function used by ext4_truncate(). - * - * When we do truncate() we may have to clean the ends of several - * indirect blocks but leave the blocks themselves alive. Block is - * partially truncated if some data below the new i_size is referred - * from it (and it is on the path to the first completely truncated - * data block, indeed). We have to free the top of that path along - * with everything to the right of the path. Since no allocation - * past the truncation point is possible until ext4_truncate() - * finishes, we may safely do the latter, but top of branch may - * require special attention - pageout below the truncation point - * might try to populate it. - * - * We atomically detach the top of branch from the tree, store the - * block number of its root in *@top, pointers to buffer_heads of - * partially truncated blocks - in @chain[].bh and pointers to - * their last elements that should not be removed - in - * @chain[].p. Return value is the pointer to last filled element - * of @chain. - * - * The work left to caller to do the actual freeing of subtrees: - * a) free the subtree starting from *@top - * b) free the subtrees whose roots are stored in - * (@chain[i].p+1 .. end of @chain[i].bh->b_data) - * c) free the subtrees growing from the inode past the @chain[0]. - * (no partially truncated stuff there). */ - -static Indirect *ext4_find_shared(struct inode *inode, int depth, - ext4_lblk_t offsets[4], Indirect chain[4], - __le32 *top) -{ - Indirect *partial, *p; - int k, err; - - *top = 0; - /* Make k index the deepest non-null offset + 1 */ - for (k = depth; k > 1 && !offsets[k-1]; k--) - ; - partial = ext4_get_branch(inode, k, offsets, chain, &err); - /* Writer: pointers */ - if (!partial) - partial = chain + k-1; - /* - * If the branch acquired continuation since we've looked at it - - * fine, it should all survive and (new) top doesn't belong to us. - */ - if (!partial->key && *partial->p) - /* Writer: end */ - goto no_top; - for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) - ; - /* - * OK, we've found the last block that must survive. The rest of our - * branch should be detached before unlocking. However, if that rest - * of branch is all ours and does not grow immediately from the inode - * it's easier to cheat and just decrement partial->p. - */ - if (p == chain + k - 1 && p > chain) { - p->p--; - } else { - *top = *p->p; - /* Nope, don't do this in ext4. Must leave the tree intact */ -#if 0 - *p->p = 0; -#endif - } - /* Writer: end */ - - while (partial > p) { - brelse(partial->bh); - partial--; - } -no_top: - return partial; -} - -/* - * Zero a number of block pointers in either an inode or an indirect block. - * If we restart the transaction we must again get write access to the - * indirect block for further modification. - * - * We release `count' blocks on disk, but (last - first) may be greater - * than `count' because there can be holes in there. - * - * Return 0 on success, 1 on invalid block range - * and < 0 on fatal error. - */ -static int ext4_clear_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, - ext4_fsblk_t block_to_free, - unsigned long count, __le32 *first, - __le32 *last) -{ - __le32 *p; - int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; - int err; - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA; - - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, - count)) { - EXT4_ERROR_INODE(inode, "attempt to clear invalid " - "blocks %llu len %lu", - (unsigned long long) block_to_free, count); - return 1; - } - - if (try_to_extend_transaction(handle, inode)) { - if (bh) { - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); - if (unlikely(err)) - goto out_err; - } - err = ext4_mark_inode_dirty(handle, inode); - if (unlikely(err)) - goto out_err; - err = ext4_truncate_restart_trans(handle, inode, - ext4_blocks_for_truncate(inode)); - if (unlikely(err)) - goto out_err; - if (bh) { - BUFFER_TRACE(bh, "retaking write access"); - err = ext4_journal_get_write_access(handle, bh); - if (unlikely(err)) - goto out_err; - } - } - - for (p = first; p < last; p++) - *p = 0; - - ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); - return 0; -out_err: - ext4_std_error(inode->i_sb, err); - return err; -} - -/** - * ext4_free_data - free a list of data blocks - * @handle: handle for this transaction - * @inode: inode we are dealing with - * @this_bh: indirect buffer_head which contains *@first and *@last - * @first: array of block numbers - * @last: points immediately past the end of array - * - * We are freeing all blocks referred from that array (numbers are stored as - * little-endian 32-bit) and updating @inode->i_blocks appropriately. - * - * We accumulate contiguous runs of blocks to free. Conveniently, if these - * blocks are contiguous then releasing them at one time will only affect one - * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't - * actually use a lot of journal space. - * - * @this_bh will be %NULL if @first and @last point into the inode's direct - * block pointers. - */ -static void ext4_free_data(handle_t *handle, struct inode *inode, - struct buffer_head *this_bh, - __le32 *first, __le32 *last) -{ - ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ - unsigned long count = 0; /* Number of blocks in the run */ - __le32 *block_to_free_p = NULL; /* Pointer into inode/ind - corresponding to - block_to_free */ - ext4_fsblk_t nr; /* Current block # */ - __le32 *p; /* Pointer into inode/ind - for current block */ - int err = 0; - - if (this_bh) { /* For indirect block */ - BUFFER_TRACE(this_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, this_bh); - /* Important: if we can't update the indirect pointers - * to the blocks, we can't free them. */ - if (err) - return; - } - - for (p = first; p < last; p++) { - nr = le32_to_cpu(*p); - if (nr) { - /* accumulate blocks to free if they're contiguous */ - if (count == 0) { - block_to_free = nr; - block_to_free_p = p; - count = 1; - } else if (nr == block_to_free + count) { - count++; - } else { - err = ext4_clear_blocks(handle, inode, this_bh, - block_to_free, count, - block_to_free_p, p); - if (err) - break; - block_to_free = nr; - block_to_free_p = p; - count = 1; - } - } - } - - if (!err && count > 0) - err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, - count, block_to_free_p, p); - if (err < 0) - /* fatal error */ - return; - - if (this_bh) { - BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); - - /* - * The buffer head should have an attached journal head at this - * point. However, if the data is corrupted and an indirect - * block pointed to itself, it would have been detached when - * the block was cleared. Check for this instead of OOPSing. - */ - if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) - ext4_handle_dirty_metadata(handle, inode, this_bh); - else - EXT4_ERROR_INODE(inode, - "circular indirect block detected at " - "block %llu", - (unsigned long long) this_bh->b_blocknr); - } -} - -/** - * ext4_free_branches - free an array of branches - * @handle: JBD handle for this transaction - * @inode: inode we are dealing with - * @parent_bh: the buffer_head which contains *@first and *@last - * @first: array of block numbers - * @last: pointer immediately past the end of array - * @depth: depth of the branches to free - * - * We are freeing all blocks referred from these branches (numbers are - * stored as little-endian 32-bit) and updating @inode->i_blocks - * appropriately. - */ -static void ext4_free_branches(handle_t *handle, struct inode *inode, - struct buffer_head *parent_bh, - __le32 *first, __le32 *last, int depth) -{ - ext4_fsblk_t nr; - __le32 *p; - - if (ext4_handle_is_aborted(handle)) - return; - - if (depth--) { - struct buffer_head *bh; - int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); - p = last; - while (--p >= first) { - nr = le32_to_cpu(*p); - if (!nr) - continue; /* A hole */ - - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), - nr, 1)) { - EXT4_ERROR_INODE(inode, - "invalid indirect mapped " - "block %lu (level %d)", - (unsigned long) nr, depth); - break; - } - - /* Go read the buffer for the next level down */ - bh = sb_bread(inode->i_sb, nr); - - /* - * A read failure? Report error and clear slot - * (should be rare). - */ - if (!bh) { - EXT4_ERROR_INODE_BLOCK(inode, nr, - "Read failure"); - continue; - } - - /* This zaps the entire block. Bottom up. */ - BUFFER_TRACE(bh, "free child branches"); - ext4_free_branches(handle, inode, bh, - (__le32 *) bh->b_data, - (__le32 *) bh->b_data + addr_per_block, - depth); - brelse(bh); - - /* - * Everything below this this pointer has been - * released. Now let this top-of-subtree go. - * - * We want the freeing of this indirect block to be - * atomic in the journal with the updating of the - * bitmap block which owns it. So make some room in - * the journal. - * - * We zero the parent pointer *after* freeing its - * pointee in the bitmaps, so if extend_transaction() - * for some reason fails to put the bitmap changes and - * the release into the same transaction, recovery - * will merely complain about releasing a free block, - * rather than leaking blocks. - */ - if (ext4_handle_is_aborted(handle)) - return; - if (try_to_extend_transaction(handle, inode)) { - ext4_mark_inode_dirty(handle, inode); - ext4_truncate_restart_trans(handle, inode, - ext4_blocks_for_truncate(inode)); - } - - /* - * The forget flag here is critical because if - * we are journaling (and not doing data - * journaling), we have to make sure a revoke - * record is written to prevent the journal - * replay from overwriting the (former) - * indirect block if it gets reallocated as a - * data block. This must happen in the same - * transaction where the data blocks are - * actually freed. - */ - ext4_free_blocks(handle, inode, NULL, nr, 1, - EXT4_FREE_BLOCKS_METADATA| - EXT4_FREE_BLOCKS_FORGET); - - if (parent_bh) { - /* - * The block which we have just freed is - * pointed to by an indirect block: journal it - */ - BUFFER_TRACE(parent_bh, "get_write_access"); - if (!ext4_journal_get_write_access(handle, - parent_bh)){ - *p = 0; - BUFFER_TRACE(parent_bh, - "call ext4_handle_dirty_metadata"); - ext4_handle_dirty_metadata(handle, - inode, - parent_bh); - } - } - } - } else { - /* We have reached the bottom of the tree. */ - BUFFER_TRACE(parent_bh, "free data blocks"); - ext4_free_data(handle, inode, parent_bh, first, last); - } -} - int ext4_can_truncate(struct inode *inode) { if (S_ISREG(inode->i_mode)) @@ -4419,161 +3111,6 @@ void ext4_truncate(struct inode *inode) trace_ext4_truncate_exit(inode); } -void ext4_ind_truncate(struct inode *inode) -{ - handle_t *handle; - struct ext4_inode_info *ei = EXT4_I(inode); - __le32 *i_data = ei->i_data; - int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); - struct address_space *mapping = inode->i_mapping; - ext4_lblk_t offsets[4]; - Indirect chain[4]; - Indirect *partial; - __le32 nr = 0; - int n = 0; - ext4_lblk_t last_block, max_block; - unsigned blocksize = inode->i_sb->s_blocksize; - - handle = start_transaction(inode); - if (IS_ERR(handle)) - return; /* AKPM: return what? */ - - last_block = (inode->i_size + blocksize-1) - >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); - max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) - >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); - - if (inode->i_size & (blocksize - 1)) - if (ext4_block_truncate_page(handle, mapping, inode->i_size)) - goto out_stop; - - if (last_block != max_block) { - n = ext4_block_to_path(inode, last_block, offsets, NULL); - if (n == 0) - goto out_stop; /* error */ - } - - /* - * OK. This truncate is going to happen. We add the inode to the - * orphan list, so that if this truncate spans multiple transactions, - * and we crash, we will resume the truncate when the filesystem - * recovers. It also marks the inode dirty, to catch the new size. - * - * Implication: the file must always be in a sane, consistent - * truncatable state while each transaction commits. - */ - if (ext4_orphan_add(handle, inode)) - goto out_stop; - - /* - * From here we block out all ext4_get_block() callers who want to - * modify the block allocation tree. - */ - down_write(&ei->i_data_sem); - - ext4_discard_preallocations(inode); - - /* - * The orphan list entry will now protect us from any crash which - * occurs before the truncate completes, so it is now safe to propagate - * the new, shorter inode size (held for now in i_size) into the - * on-disk inode. We do this via i_disksize, which is the value which - * ext4 *really* writes onto the disk inode. - */ - ei->i_disksize = inode->i_size; - - if (last_block == max_block) { - /* - * It is unnecessary to free any data blocks if last_block is - * equal to the indirect block limit. - */ - goto out_unlock; - } else if (n == 1) { /* direct blocks */ - ext4_free_data(handle, inode, NULL, i_data+offsets[0], - i_data + EXT4_NDIR_BLOCKS); - goto do_indirects; - } - - partial = ext4_find_shared(inode, n, offsets, chain, &nr); - /* Kill the top of shared branch (not detached) */ - if (nr) { - if (partial == chain) { - /* Shared branch grows from the inode */ - ext4_free_branches(handle, inode, NULL, - &nr, &nr+1, (chain+n-1) - partial); - *partial->p = 0; - /* - * We mark the inode dirty prior to restart, - * and prior to stop. No need for it here. - */ - } else { - /* Shared branch grows from an indirect block */ - BUFFER_TRACE(partial->bh, "get_write_access"); - ext4_free_branches(handle, inode, partial->bh, - partial->p, - partial->p+1, (chain+n-1) - partial); - } - } - /* Clear the ends of indirect blocks on the shared branch */ - while (partial > chain) { - ext4_free_branches(handle, inode, partial->bh, partial->p + 1, - (__le32*)partial->bh->b_data+addr_per_block, - (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - partial--; - } -do_indirects: - /* Kill the remaining (whole) subtrees */ - switch (offsets[0]) { - default: - nr = i_data[EXT4_IND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); - i_data[EXT4_IND_BLOCK] = 0; - } - case EXT4_IND_BLOCK: - nr = i_data[EXT4_DIND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); - i_data[EXT4_DIND_BLOCK] = 0; - } - case EXT4_DIND_BLOCK: - nr = i_data[EXT4_TIND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); - i_data[EXT4_TIND_BLOCK] = 0; - } - case EXT4_TIND_BLOCK: - ; - } - -out_unlock: - up_write(&ei->i_data_sem); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); - ext4_mark_inode_dirty(handle, inode); - - /* - * In a multi-transaction truncate, we only make the final transaction - * synchronous - */ - if (IS_SYNC(inode)) - ext4_handle_sync(handle); -out_stop: - /* - * If this was a simple ftruncate(), and the file will remain alive - * then we need to clear up the orphan record which we created above. - * However, if this was a real unlink then we were called by - * ext4_delete_inode(), and we allow that function to clean up the - * orphan info for us. - */ - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - - ext4_journal_stop(handle); - trace_ext4_truncate_exit(inode); -} - /* * ext4_get_inode_loc returns with an extra refcount against the inode's * underlying buffer_head on success. If 'in_mem' is true, we have all @@ -5386,29 +3923,6 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } -static int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) -{ - int indirects; - - /* if nrblocks are contiguous */ - if (chunk) { - /* - * With N contiguous data blocks, we need at most - * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, - * 2 dindirect blocks, and 1 tindirect block - */ - return DIV_ROUND_UP(nrblocks, - EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; - } - /* - * if nrblocks are not contiguous, worse case, each block touch - * a indirect block, and each indirect block touch a double indirect - * block, plus a triple indirect block - */ - indirects = nrblocks * 2 + 1; - return indirects; -} - static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) From f86186b44b4164600cce03d0d93ad48ec21fa429 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 28 Jun 2011 10:01:31 -0400 Subject: [PATCH 027/151] ext4: refactor duplicated block placement code I found that ext4_ext_find_goal() and ext4_find_near() share the same code for returning a coloured start block based on i_block_group. We can refactor this into a common function so that they don't diverge in the future. Thanks to adilger for suggesting the new function name. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/ext4.h | 1 + fs/ext4/extents.c | 37 +---------------------------------- fs/ext4/indirect.c | 28 +-------------------------- 4 files changed, 51 insertions(+), 63 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 264f6949511e..f8224adf496e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -620,3 +620,51 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) } +/** + * ext4_inode_to_goal_block - return a hint for block allocation + * @inode: inode for block allocation + * + * Return the ideal location to start allocating blocks for a + * newly created inode. + */ +ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + ext4_group_t block_group; + ext4_grpblk_t colour; + int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); + ext4_fsblk_t bg_start; + ext4_fsblk_t last_block; + + block_group = ei->i_block_group; + if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { + /* + * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME + * block groups per flexgroup, reserve the first block + * group for directories and special files. Regular + * files will start at the second block group. This + * tends to speed up directory access and improves + * fsck times. + */ + block_group &= ~(flex_size-1); + if (S_ISREG(inode->i_mode)) + block_group++; + } + bg_start = ext4_group_first_block_no(inode->i_sb, block_group); + last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + + /* + * If we are doing delayed allocation, we don't need take + * colour into account. + */ + if (test_opt(inode->i_sb, DELALLOC)) + return bg_start; + + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) + colour = (current->pid % 16) * + (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + else + colour = (current->pid % 16) * ((last_block - bg_start) / 16); + return bg_start + colour; +} + diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ddaf5043fb38..49d2cea47382 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1743,6 +1743,7 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb, struct ext4_group_desc *desc); #define ext4_free_blocks_after_init(sb, group, desc) \ ext4_init_block_bitmap(sb, NULL, group, desc) +ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); /* dir.c */ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index eb63c7b8dfd2..f331e5010f68 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -114,12 +114,6 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block) { - struct ext4_inode_info *ei = EXT4_I(inode); - ext4_fsblk_t bg_start; - ext4_fsblk_t last_block; - ext4_grpblk_t colour; - ext4_group_t block_group; - int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); int depth; if (path) { @@ -161,36 +155,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, } /* OK. use inode's group */ - block_group = ei->i_block_group; - if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { - /* - * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME - * block groups per flexgroup, reserve the first block - * group for directories and special files. Regular - * files will start at the second block group. This - * tends to speed up directory access and improves - * fsck times. - */ - block_group &= ~(flex_size-1); - if (S_ISREG(inode->i_mode)) - block_group++; - } - bg_start = ext4_group_first_block_no(inode->i_sb, block_group); - last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; - - /* - * If we are doing delayed allocation, we don't need take - * colour into account. - */ - if (test_opt(inode->i_sb, DELALLOC)) - return bg_start; - - if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) - colour = (current->pid % 16) * - (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); - else - colour = (current->pid % 16) * ((last_block - bg_start) / 16); - return bg_start + colour + block; + return ext4_inode_to_goal_block(inode); } /* diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index c3e85a86e821..6c271115dbb6 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -207,11 +207,6 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) struct ext4_inode_info *ei = EXT4_I(inode); __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; __le32 *p; - ext4_fsblk_t bg_start; - ext4_fsblk_t last_block; - ext4_grpblk_t colour; - ext4_group_t block_group; - int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); /* Try to find previous block */ for (p = ind->p - 1; p >= start; p--) { @@ -227,28 +222,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) * It is going to be referred to from the inode itself? OK, just put it * into the same cylinder group then. */ - block_group = ei->i_block_group; - if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { - block_group &= ~(flex_size-1); - if (S_ISREG(inode->i_mode)) - block_group++; - } - bg_start = ext4_group_first_block_no(inode->i_sb, block_group); - last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; - - /* - * If we are doing delayed allocation, we don't need take - * colour into account. - */ - if (test_opt(inode->i_sb, DELALLOC)) - return bg_start; - - if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) - colour = (current->pid % 16) * - (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); - else - colour = (current->pid % 16) * ((last_block - bg_start) / 16); - return bg_start + colour; + return ext4_inode_to_goal_block(inode); } /** From 9331b6261058eb85ae7c57ab8ac279e7fdaa9f04 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 28 Jun 2011 10:19:05 -0400 Subject: [PATCH 028/151] ext4: quiet 'unused variables' compile warnings Unused variables was deleted. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 -- fs/ext4/mballoc.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f331e5010f68..31ae5fbe89e5 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3073,12 +3073,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, struct ext4_ext_path *path) { struct ext4_extent *ex; - struct ext4_extent_header *eh; int depth; int err = 0; depth = ext_depth(inode); - eh = path[depth].p_hdr; ex = path[depth].p_ext; ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6ed859d56850..389386b41c98 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4666,12 +4666,10 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, struct ext4_buddy e4b; int err = 0, ret, blk_free_count; ext4_grpblk_t blocks_freed; - struct ext4_group_info *grp; ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); ext4_get_group_no_and_offset(sb, block, &block_group, &bit); - grp = ext4_get_group_info(sb, block_group); /* * Check to see if we are freeing blocks across a group * boundary. From 275d3ba6b40d0f098693b9089c6fee9bd4e55d74 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 29 Jun 2011 21:44:45 -0400 Subject: [PATCH 029/151] ext4: remove loop around bio_alloc() These days, bio_alloc() is guaranteed to never fail (as long as nvecs is less than BIO_MAX_PAGES), so we don't need the loop around the struct bio allocation. Signed-off-by: "Theodore Ts'o" --- fs/ext4/page-io.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7bb8f76d470a..430c401d0895 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -285,11 +285,7 @@ static int io_submit_init(struct ext4_io_submit *io, io_end = ext4_init_io_end(inode, GFP_NOFS); if (!io_end) return -ENOMEM; - do { - bio = bio_alloc(GFP_NOIO, nvecs); - nvecs >>= 1; - } while (bio == NULL); - + bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_bdev = bh->b_bdev; bio->bi_private = io->io_end = io_end; From 5de705194e9883a39f993e2ff96028d5aab99b37 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 19 Jun 2011 13:33:16 +0100 Subject: [PATCH 030/151] regulator: Add basic per consumer debugfs Report the requested load and voltage for each consumer in debugfs when it is enabled. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index cc3dfd66f395..f59821f10fdc 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -81,6 +81,9 @@ struct regulator { char *supply_name; struct device_attribute dev_attr; struct regulator_dev *rdev; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs; +#endif }; static int _regulator_is_enabled(struct regulator_dev *rdev); @@ -1093,7 +1096,28 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, dev->kobj.name, err); goto link_name_err; } + } else { + regulator->supply_name = kstrdup(supply_name, GFP_KERNEL); + if (regulator->supply_name == NULL) + goto attr_err; + } + +#ifdef CONFIG_DEBUG_FS + regulator->debugfs = debugfs_create_dir(regulator->supply_name, + rdev->debugfs); + if (IS_ERR_OR_NULL(regulator->debugfs)) { + rdev_warn(rdev, "Failed to create debugfs directory\n"); + regulator->debugfs = NULL; + } else { + debugfs_create_u32("uA_load", 0444, regulator->debugfs, + ®ulator->uA_load); + debugfs_create_u32("min_uV", 0444, regulator->debugfs, + ®ulator->min_uV); + debugfs_create_u32("max_uV", 0444, regulator->debugfs, + ®ulator->max_uV); } +#endif + mutex_unlock(&rdev->mutex); return regulator; link_name_err: @@ -1272,13 +1296,17 @@ void regulator_put(struct regulator *regulator) mutex_lock(®ulator_list_mutex); rdev = regulator->rdev; +#ifdef CONFIG_DEBUG_FS + debugfs_remove_recursive(regulator->debugfs); +#endif + /* remove any sysfs entries */ if (regulator->dev) { sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name); - kfree(regulator->supply_name); device_remove_file(regulator->dev, ®ulator->dev_attr); kfree(regulator->dev_attr.attr.name); } + kfree(regulator->supply_name); list_del(®ulator->list); kfree(regulator); From 909c2f32ca0629678e353343d69089f4e94ea974 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 26 May 2011 11:37:02 +0300 Subject: [PATCH 031/151] ASoC: tlv320aic3x: Add correct hw registers to Line1 cross connect muxes Commit af46800 ("ASoC: Implement mux control sharing") revealed that "Left Line1[L | R] Mux" and "Right Line1[L | R] Mux" widgets were pointing to the same kcontrols and codec registers and thus soc-core falsely detected them as shared controls. This is actually wrong since there are separate registers in hardware that configure Line1L to RADC and Line1R to LADC cross connects so these muxes should not be shared. Signed-off-by: Jarkko Nikula Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- sound/soc/codecs/tlv320aic3x.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index c3d96fc8c267..6e35b5109c19 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -226,11 +226,13 @@ static const char *aic3x_adc_hpf[] = #define RDAC_ENUM 1 #define LHPCOM_ENUM 2 #define RHPCOM_ENUM 3 -#define LINE1L_ENUM 4 -#define LINE1R_ENUM 5 -#define LINE2L_ENUM 6 -#define LINE2R_ENUM 7 -#define ADC_HPF_ENUM 8 +#define LINE1L_2_L_ENUM 4 +#define LINE1L_2_R_ENUM 5 +#define LINE1R_2_L_ENUM 6 +#define LINE1R_2_R_ENUM 7 +#define LINE2L_ENUM 8 +#define LINE2R_ENUM 9 +#define ADC_HPF_ENUM 10 static const struct soc_enum aic3x_enum[] = { SOC_ENUM_SINGLE(DAC_LINE_MUX, 6, 3, aic3x_left_dac_mux), @@ -238,6 +240,8 @@ static const struct soc_enum aic3x_enum[] = { SOC_ENUM_SINGLE(HPLCOM_CFG, 4, 3, aic3x_left_hpcom_mux), SOC_ENUM_SINGLE(HPRCOM_CFG, 3, 5, aic3x_right_hpcom_mux), SOC_ENUM_SINGLE(LINE1L_2_LADC_CTRL, 7, 2, aic3x_linein_mode_mux), + SOC_ENUM_SINGLE(LINE1L_2_RADC_CTRL, 7, 2, aic3x_linein_mode_mux), + SOC_ENUM_SINGLE(LINE1R_2_LADC_CTRL, 7, 2, aic3x_linein_mode_mux), SOC_ENUM_SINGLE(LINE1R_2_RADC_CTRL, 7, 2, aic3x_linein_mode_mux), SOC_ENUM_SINGLE(LINE2L_2_LADC_CTRL, 7, 2, aic3x_linein_mode_mux), SOC_ENUM_SINGLE(LINE2R_2_RADC_CTRL, 7, 2, aic3x_linein_mode_mux), @@ -490,12 +494,16 @@ static const struct snd_kcontrol_new aic3x_right_pga_mixer_controls[] = { }; /* Left Line1 Mux */ -static const struct snd_kcontrol_new aic3x_left_line1_mux_controls = -SOC_DAPM_ENUM("Route", aic3x_enum[LINE1L_ENUM]); +static const struct snd_kcontrol_new aic3x_left_line1l_mux_controls = +SOC_DAPM_ENUM("Route", aic3x_enum[LINE1L_2_L_ENUM]); +static const struct snd_kcontrol_new aic3x_right_line1l_mux_controls = +SOC_DAPM_ENUM("Route", aic3x_enum[LINE1L_2_R_ENUM]); /* Right Line1 Mux */ -static const struct snd_kcontrol_new aic3x_right_line1_mux_controls = -SOC_DAPM_ENUM("Route", aic3x_enum[LINE1R_ENUM]); +static const struct snd_kcontrol_new aic3x_right_line1r_mux_controls = +SOC_DAPM_ENUM("Route", aic3x_enum[LINE1R_2_R_ENUM]); +static const struct snd_kcontrol_new aic3x_left_line1r_mux_controls = +SOC_DAPM_ENUM("Route", aic3x_enum[LINE1R_2_L_ENUM]); /* Left Line2 Mux */ static const struct snd_kcontrol_new aic3x_left_line2_mux_controls = @@ -535,9 +543,9 @@ static const struct snd_soc_dapm_widget aic3x_dapm_widgets[] = { &aic3x_left_pga_mixer_controls[0], ARRAY_SIZE(aic3x_left_pga_mixer_controls)), SND_SOC_DAPM_MUX("Left Line1L Mux", SND_SOC_NOPM, 0, 0, - &aic3x_left_line1_mux_controls), + &aic3x_left_line1l_mux_controls), SND_SOC_DAPM_MUX("Left Line1R Mux", SND_SOC_NOPM, 0, 0, - &aic3x_left_line1_mux_controls), + &aic3x_left_line1r_mux_controls), SND_SOC_DAPM_MUX("Left Line2L Mux", SND_SOC_NOPM, 0, 0, &aic3x_left_line2_mux_controls), @@ -548,9 +556,9 @@ static const struct snd_soc_dapm_widget aic3x_dapm_widgets[] = { &aic3x_right_pga_mixer_controls[0], ARRAY_SIZE(aic3x_right_pga_mixer_controls)), SND_SOC_DAPM_MUX("Right Line1L Mux", SND_SOC_NOPM, 0, 0, - &aic3x_right_line1_mux_controls), + &aic3x_right_line1l_mux_controls), SND_SOC_DAPM_MUX("Right Line1R Mux", SND_SOC_NOPM, 0, 0, - &aic3x_right_line1_mux_controls), + &aic3x_right_line1r_mux_controls), SND_SOC_DAPM_MUX("Right Line2R Mux", SND_SOC_NOPM, 0, 0, &aic3x_right_line2_mux_controls), From 7132de744ba76930d13033061018ddd7e3e8cd91 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Sun, 10 Jul 2011 19:37:48 -0400 Subject: [PATCH 032/151] ext4: fix i_blocks/quota accounting when extent insertion fails The current implementation of ext4_free_blocks() always calls dquot_free_block This looks quite sensible in the most cases: blocks to be freed are associated with inode and were accounted in quota and i_blocks some time ago. However, there is a case when blocks to free were not accounted by the time calling ext4_free_blocks() yet: 1. delalloc is on, write_begin pre-allocated some space in quota 2. write-back happens, ext4 allocates some blocks in ext4_ext_map_blocks() 3. then ext4_ext_map_blocks() gets an error (e.g. ENOSPC) from ext4_ext_insert_extent() and calls ext4_free_blocks(). In this scenario, ext4_free_blocks() calls dquot_free_block() who, in turn, decrements i_blocks for blocks which were not accounted yet (due to delalloc) After clean umount, e2fsck reports something like: > Inode 21, i_blocks is 5080, should be 5128. Fix? because i_blocks was erroneously decremented as explained above. The patch fixes the problem by passing the new flag EXT4_FREE_BLOCKS_NO_QUOT_UPDATE to ext4_free_blocks(), to request that the dquot_free_block() call be skipped. Signed-off-by: Maxim Patlasov Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/ext4.h | 1 + fs/ext4/extents.c | 4 +++- fs/ext4/mballoc.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 49d2cea47382..d13f3b509886 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -526,6 +526,7 @@ struct ext4_new_group_data { #define EXT4_FREE_BLOCKS_METADATA 0x0001 #define EXT4_FREE_BLOCKS_FORGET 0x0002 #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 +#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 /* * ioctl commands diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 31ae5fbe89e5..a86213882655 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3565,12 +3565,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err) { + int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? + EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; /* free data blocks we just allocated */ /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), 0); + ext4_ext_get_actual_len(&newex), fb_flags); goto out2; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 389386b41c98..1900ec7a1579 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4637,7 +4637,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, } ext4_mark_super_dirty(sb); error_return: - if (freed) + if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) dquot_free_block(inode, freed); brelse(bitmap_bh); ext4_std_error(sb, err); From 575a1d4bdfa2ea9fc10733013136145b497e1be0 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Sun, 10 Jul 2011 20:07:25 -0400 Subject: [PATCH 033/151] ext4: free allocated and pre-allocated blocks when check_eofblocks_fl fails Upon corrupted inode or disk failures, we may fail after we already allocate some blocks from the inode or take some blocks from the inode's preallocation list, but before we successfully insert the corresponding extent to the extent tree. In this case, we should free any allocated blocks and discard the inode's preallocated blocks because the entries in the inode's preallocation list may be in an inconsistent state. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/extents.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a86213882655..c969ae23a535 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3560,10 +3560,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, } err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); - if (err) - goto out2; - - err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + if (!err) + err = ext4_ext_insert_extent(handle, inode, path, + &newex, flags); if (err) { int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; From 4862fd6047ed02e2726667c54d35f538eecc56aa Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 10 Jul 2011 22:05:08 -0400 Subject: [PATCH 034/151] jbd2: remove jbd2_dev_to_name() from jbd2 tracepoints Using function calls in TP_printk causes perf heartburn, so print the MAJOR/MINOR device numbers instead. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 67 ------------------------------------- include/linux/jbd2.h | 6 ---- include/trace/events/jbd2.h | 36 ++++++++++---------- 3 files changed, 19 insertions(+), 90 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0dfa5b598e68..f24df13adc4e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2390,73 +2390,6 @@ static void __exit journal_exit(void) jbd2_journal_destroy_caches(); } -/* - * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 - * tracing infrastructure to map a dev_t to a device name. - * - * The caller should use rcu_read_lock() in order to make sure the - * device name stays valid until its done with it. We use - * rcu_read_lock() as well to make sure we're safe in case the caller - * gets sloppy, and because rcu_read_lock() is cheap and can be safely - * nested. - */ -struct devname_cache { - struct rcu_head rcu; - dev_t device; - char devname[BDEVNAME_SIZE]; -}; -#define CACHE_SIZE_BITS 6 -static struct devname_cache *devcache[1 << CACHE_SIZE_BITS]; -static DEFINE_SPINLOCK(devname_cache_lock); - -static void free_devcache(struct rcu_head *rcu) -{ - kfree(rcu); -} - -const char *jbd2_dev_to_name(dev_t device) -{ - int i = hash_32(device, CACHE_SIZE_BITS); - char *ret; - struct block_device *bd; - static struct devname_cache *new_dev; - - rcu_read_lock(); - if (devcache[i] && devcache[i]->device == device) { - ret = devcache[i]->devname; - rcu_read_unlock(); - return ret; - } - rcu_read_unlock(); - - new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); - if (!new_dev) - return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ - bd = bdget(device); - spin_lock(&devname_cache_lock); - if (devcache[i]) { - if (devcache[i]->device == device) { - kfree(new_dev); - bdput(bd); - ret = devcache[i]->devname; - spin_unlock(&devname_cache_lock); - return ret; - } - call_rcu(&devcache[i]->rcu, free_devcache); - } - devcache[i] = new_dev; - devcache[i]->device = device; - if (bd) { - bdevname(bd, devcache[i]->devname); - bdput(bd); - } else - __bdevname(device, devcache[i]->devname); - ret = devcache[i]->devname; - spin_unlock(&devname_cache_lock); - return ret; -} -EXPORT_SYMBOL(jbd2_dev_to_name); - MODULE_LICENSE("GPL"); module_init(journal_init); module_exit(journal_exit); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d087c2e7b2aa..38f307b8c334 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1329,12 +1329,6 @@ extern int jbd_blocks_per_page(struct inode *inode); #define BUFFER_TRACE2(bh, bh2, info) do {} while (0) #define JBUFFER_TRACE(jh, info) do {} while (0) -/* - * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 - * tracing infrastructure to map a dev_t to a device name. - */ -extern const char *jbd2_dev_to_name(dev_t device); - #endif /* __KERNEL__ */ #endif /* _LINUX_JBD2_H */ diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index bf16545cc977..75964412ddbb 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -26,8 +26,8 @@ TRACE_EVENT(jbd2_checkpoint, __entry->result = result; ), - TP_printk("dev %s result %d", - jbd2_dev_to_name(__entry->dev), __entry->result) + TP_printk("dev %d,%d result %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->result) ); DECLARE_EVENT_CLASS(jbd2_commit, @@ -48,9 +48,9 @@ DECLARE_EVENT_CLASS(jbd2_commit, __entry->transaction = commit_transaction->t_tid; ), - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_printk("dev %d,%d transaction %d sync %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->transaction, __entry->sync_commit) ); DEFINE_EVENT(jbd2_commit, jbd2_start_commit, @@ -100,9 +100,9 @@ TRACE_EVENT(jbd2_end_commit, __entry->head = journal->j_tail_sequence; ), - TP_printk("dev %s transaction %d sync %d head %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit, __entry->head) + TP_printk("dev %d,%d transaction %d sync %d head %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->transaction, __entry->sync_commit, __entry->head) ); TRACE_EVENT(jbd2_submit_inode_data, @@ -120,8 +120,9 @@ TRACE_EVENT(jbd2_submit_inode_data, __entry->ino = inode->i_ino; ), - TP_printk("dev %s ino %lu", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) + TP_printk("dev %d,%d ino %lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino) ); TRACE_EVENT(jbd2_run_stats, @@ -156,9 +157,9 @@ TRACE_EVENT(jbd2_run_stats, __entry->blocks_logged = stats->rs_blocks_logged; ), - TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u " + TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u " "logging %u handle_count %u blocks %u blocks_logged %u", - jbd2_dev_to_name(__entry->dev), __entry->tid, + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, jiffies_to_msecs(__entry->wait), jiffies_to_msecs(__entry->running), jiffies_to_msecs(__entry->locked), @@ -192,9 +193,9 @@ TRACE_EVENT(jbd2_checkpoint_stats, __entry->dropped = stats->cs_dropped; ), - TP_printk("dev %s tid %lu chp_time %u forced_to_close %u " + TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u " "written %u dropped %u", - jbd2_dev_to_name(__entry->dev), __entry->tid, + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, jiffies_to_msecs(__entry->chp_time), __entry->forced_to_close, __entry->written, __entry->dropped) ); @@ -222,9 +223,10 @@ TRACE_EVENT(jbd2_cleanup_journal_tail, __entry->freed = freed; ), - TP_printk("dev %s from %u to %u offset %lu freed %lu", - jbd2_dev_to_name(__entry->dev), __entry->tail_sequence, - __entry->first_tid, __entry->block_nr, __entry->freed) + TP_printk("dev %d,%d from %u to %u offset %lu freed %lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tail_sequence, __entry->first_tid, + __entry->block_nr, __entry->freed) ); #endif /* _TRACE_JBD2_H */ From 12706394bcaa48e3d5e19c97d7b4e5683ebb12fb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 10 Jul 2011 22:37:50 -0400 Subject: [PATCH 035/151] ext4: add tracepoint for ext4_journal_start This will help debug who is responsible for starting a jbd2 transaction. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 1 + include/trace/events/ext4.h | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9ea71aa864b3..7910e61809e7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -269,6 +269,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) journal_t *journal; handle_t *handle; + trace_ext4_journal_start(sb, nblocks, _RET_IP_); if (sb->s_flags & MS_RDONLY) return ERR_PTR(-EROFS); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 5ce2b2f5f524..6f27a59fc90d 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -1520,6 +1520,28 @@ TRACE_EVENT(ext4_load_inode, (unsigned long) __entry->ino) ); +TRACE_EVENT(ext4_journal_start, + TP_PROTO(struct super_block *sb, int nblocks, unsigned long IP), + + TP_ARGS(sb, nblocks, IP), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, nblocks ) + __field(unsigned long, ip ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->nblocks = nblocks; + __entry->ip = IP; + ), + + TP_printk("dev %d,%d nblocks %d caller %pF", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->nblocks, (void *)__entry->ip) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ From 22f10457432387615fa1ae6e0375d9cacc50819b Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sun, 10 Jul 2011 23:52:37 -0400 Subject: [PATCH 036/151] ext4: fix trim length underflow with small trim length In 0f0a25b, we adjust 'len' with s_first_data_block - start, but it could underflow in case blocksize=1K, fstrim_range.len=512 and fstrim_range.start = 0. In this case, when we run the code: len -= first_data_blk - start; len will be underflow to -1ULL. In the end, although we are safe that last_group check later will limit the trim to the whole volume, but that isn't what the user really want. So this patch fix it. It also adds the check for 'start' like ext3 so that we can break immediately if the start is invalid. Cc: Lukas Czerner Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1900ec7a1579..b189cb4ff20f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4902,6 +4902,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) return -EINVAL; + if (start + len <= first_data_blk) + goto out; if (start < first_data_blk) { len -= first_data_blk - start; start = first_data_blk; @@ -4950,5 +4952,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) } range->len = trimmed * sb->s_blocksize; +out: return ret; } From 169ddc3ec83b5f732e51d975befb191d50795844 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 00:00:07 -0400 Subject: [PATCH 037/151] ext4: speed up group trim with the right free block count When we trim some free blocks in a group of ext4, we need to calculate the free blocks properly and check whether there are enough freed blocks left for us to trim. Current solution will only calculate free spaces if they are large for a trim which isn't appropriate. Let us see a small example: a group has 1.5M free which are 300k, 300k, 300k, 300k, 300k. And minblocks is 1M. With current solution, we have to iterate the whole group since these 300k will never be subtracted from 1.5M. But actually we should exit after we find the first 2 free spaces since the left 3 chunks only sum up to 900K if we subtract the first 600K although they can't be trimed. Reviewed-by: Andreas Dilger Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b189cb4ff20f..4a25725e9157 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4821,7 +4821,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t minblocks) { void *bitmap; - ext4_grpblk_t next, count = 0; + ext4_grpblk_t next, count = 0, free_count = 0; struct ext4_buddy e4b; int ret; @@ -4848,6 +4848,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, next - start, group, &e4b); count += next - start; } + free_count += next - start; start = next + 1; if (fatal_signal_pending(current)) { @@ -4861,7 +4862,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_lock_group(sb, group); } - if ((e4b.bd_info->bb_free - count) < minblocks) + if ((e4b.bd_info->bb_free - free_count) < minblocks) break; } ext4_unlock_group(sb, group); From b3d4c2b10b68d205d3eb1b5c17dcb4649a502798 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 00:01:52 -0400 Subject: [PATCH 038/151] ext4: Add new ext4 trim tracepoints Add ext4_trim_extent and ext4_trim_all_free. Reviewed-by: Lukas Czerner Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 4 +++ include/trace/events/ext4.h | 49 +++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4a25725e9157..7aa4c16caca1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4780,6 +4780,8 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, { struct ext4_free_extent ex; + trace_ext4_trim_extent(sb, group, start, count); + assert_spin_locked(ext4_group_lock_ptr(sb, group)); ex.fe_start = start; @@ -4825,6 +4827,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, struct ext4_buddy e4b; int ret; + trace_ext4_trim_all_free(sb, group, start, max); + ret = ext4_mb_load_buddy(sb, group, &e4b); if (ret) { ext4_error(sb, "Error in loading buddy " diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6f27a59fc90d..51d88139eb8c 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -1542,6 +1542,55 @@ TRACE_EVENT(ext4_journal_start, __entry->nblocks, (void *)__entry->ip) ); +DECLARE_EVENT_CLASS(ext4__trim, + TP_PROTO(struct super_block *sb, + ext4_group_t group, + ext4_grpblk_t start, + ext4_grpblk_t len), + + TP_ARGS(sb, group, start, len), + + TP_STRUCT__entry( + __field( int, dev_major ) + __field( int, dev_minor ) + __field( __u32, group ) + __field( int, start ) + __field( int, len ) + ), + + TP_fast_assign( + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); + __entry->group = group; + __entry->start = start; + __entry->len = len; + ), + + TP_printk("dev %d,%d group %u, start %d, len %d", + __entry->dev_major, __entry->dev_minor, + __entry->group, __entry->start, __entry->len) +); + +DEFINE_EVENT(ext4__trim, ext4_trim_extent, + + TP_PROTO(struct super_block *sb, + ext4_group_t group, + ext4_grpblk_t start, + ext4_grpblk_t len), + + TP_ARGS(sb, group, start, len) +); + +DEFINE_EVENT(ext4__trim, ext4_trim_all_free, + + TP_PROTO(struct super_block *sb, + ext4_group_t group, + ext4_grpblk_t start, + ext4_grpblk_t len), + + TP_ARGS(sb, group, start, len) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ From 3d56b8d2c74cc3f375ce332b3ac3519e009d79ee Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 00:03:38 -0400 Subject: [PATCH 039/151] ext4: Speed up FITRIM by recording flags in ext4_group_info In ext4, when FITRIM is called every time, we iterate all the groups and do trim one by one. It is a bit time wasting if the group has been trimmed and there is no change since the last trim. So this patch adds a new flag in ext4_group_info->bb_state to indicate that the group has been trimmed, and it will be cleared if some blocks is freed(in release_blocks_on_commit). Another trim_minlen is added in ext4_sb_info to record the last minlen we use to trim the volume, so that if the caller provide a small one, we will go on the trim regardless of the bb_state. A simple test with my intel x25m ssd: df -h shows: /dev/sdb1 40G 21G 17G 56% /mnt/ext4 Block size: 4096 run the FITRIM with the following parameter: range.start = 0; range.len = UINT64_MAX; range.minlen = 1048576; without the patch: [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m5.505s user 0m0.000s sys 0m1.224s [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m5.359s user 0m0.000s sys 0m1.178s [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m5.228s user 0m0.000s sys 0m1.151s with the patch: [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m5.625s user 0m0.000s sys 0m1.269s [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m0.002s user 0m0.000s sys 0m0.001s [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m0.002s user 0m0.000s sys 0m0.001s A big improvement for the 2nd and 3rd run. Even after I delete some big image files, it is still much faster than iterating the whole disk. [root@boyu-tm test]# time ./ftrim /mnt/ext4/a real 0m1.217s user 0m0.000s sys 0m0.196s Cc: Lukas Czerner Reviewed-by: Andreas Dilger Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 13 ++++++++++++- fs/ext4/mballoc.c | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d13f3b509886..62cee2b6fe79 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1215,6 +1215,9 @@ struct ext4_sb_info { /* Kernel thread for multiple mount protection */ struct task_struct *s_mmp_tsk; + + /* record the last minlen when FITRIM is called. */ + atomic_t s_last_trim_minblks; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -2080,11 +2083,19 @@ struct ext4_group_info { * 5 free 8-block regions. */ }; -#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 +#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 +#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 #define EXT4_MB_GRP_NEED_INIT(grp) \ (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) +#define EXT4_MB_GRP_WAS_TRIMMED(grp) \ + (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) +#define EXT4_MB_GRP_SET_TRIMMED(grp) \ + (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) +#define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ + (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) + #define EXT4_MAX_CONTENTION 8 #define EXT4_CONTENTION_THRESHOLD 2 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7aa4c16caca1..73c254085a41 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2628,6 +2628,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) rb_erase(&entry->node, &(db->bb_free_root)); mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); + /* + * Clear the trimmed flag for the group so that the next + * ext4_trim_fs can trim it. + * If the volume is mounted with -o discard, online discard + * is supported and the free blocks will be trimmed online. + */ + if (!test_opt(sb, DISCARD)) + EXT4_MB_GRP_CLEAR_TRIMMED(db); + if (!db->bb_free_root.rb_node) { /* No more items in the per group rb tree * balance refcounts from ext4_mb_free_metadata() @@ -4838,6 +4847,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, bitmap = e4b.bd_bitmap; ext4_lock_group(sb, group); + if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) && + minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) + goto out; + start = (e4b.bd_info->bb_first_free > start) ? e4b.bd_info->bb_first_free : start; @@ -4869,6 +4882,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, if ((e4b.bd_info->bb_free - free_count) < minblocks) break; } + + if (!ret) + EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); +out: ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); @@ -4957,6 +4974,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) } range->len = trimmed * sb->s_blocksize; + if (!ret) + atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); + out: return ret; } From 22612283f7da1ce9849d9b3716010b07a0446fd9 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 00:04:34 -0400 Subject: [PATCH 040/151] ext4: Change the wrong param comment for ext4_trim_all_free at ext4_trim_all_free() comment, there is no longer an @e4b parameter, instead it is @group. Reported-by: Andreas Dilger Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 73c254085a41..04a3d92aafb4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4811,7 +4811,7 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, /** * ext4_trim_all_free -- function to trim all free space in alloc. group * @sb: super block for file system - * @e4b: ext4 buddy + * @group: group to be trimmed * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count From ffb505ff0f7b52318dea46dd139107a8371b4ad7 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Mon, 11 Jul 2011 11:43:59 -0400 Subject: [PATCH 041/151] ext4: remove redundant goto in ext4_ext_insert_extent() If eh->eh_entries is smaller than eh->eh_max, the routine will go to the "repeat" and then go to "has_space" directlly , since argument "depth" and "eh" are not even changed. Therefore, goto "has_space" directly and remove redundant "repeat" tag. Signed-off-by: Robin Dong --- fs/ext4/extents.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c969ae23a535..9cbdcb2110f5 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1723,7 +1723,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, goto merge; } -repeat: depth = ext_depth(inode); eh = path[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) @@ -1745,7 +1744,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, ext_debug("next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); path = npath; - goto repeat; + goto has_space; } ext_debug("next leaf has no free space(%d,%d)\n", le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); From 598dbdf2433cad55bd44d923f67a053871e3eabf Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Mon, 11 Jul 2011 18:24:01 -0400 Subject: [PATCH 042/151] ext4: avoid unneeded ext4_ext_next_leaf_block() while inserting extents Optimize ext4_ext_insert_extent() by avoiding ext4_ext_next_leaf_block() when the result is not used/needed. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9cbdcb2110f5..f1c538e5055c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1730,9 +1730,10 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, /* probably next leaf has space for us? */ fex = EXT_LAST_EXTENT(eh); - next = ext4_ext_next_leaf_block(inode, path); - if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) - && next != EXT_MAX_BLOCKS) { + next = EXT_MAX_BLOCKS; + if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) + next = ext4_ext_next_leaf_block(inode, path); + if (next != EXT_MAX_BLOCKS) { ext_debug("next leaf block - %d\n", next); BUG_ON(npath != NULL); npath = ext4_ext_find_extent(inode, next, NULL); From 823ba01fc07751200c43e45733925a98b73eac3a Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 18:26:01 -0400 Subject: [PATCH 043/151] ext4: fix a race which could leak memory in ext4_groupinfo_create_slab() In ext4_groupinfo_create_slab, we create ext4_groupinfo_caches within ext4_grpinfo_slab_create_mutex, but set it outside the lock, and there does exist some case that we may create it twice and causes a memory leak. So set it before we call mutex_unlock. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 04a3d92aafb4..2b9a71b99b2b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2404,14 +2404,14 @@ static int ext4_groupinfo_create_slab(size_t size) slab_size, 0, SLAB_RECLAIM_ACCOUNT, NULL); + ext4_groupinfo_caches[cache_index] = cachep; + mutex_unlock(&ext4_grpinfo_slab_create_mutex); if (!cachep) { printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); return -ENOMEM; } - ext4_groupinfo_caches[cache_index] = cachep; - return 0; } From caaf7a29d31da21bb8d8200d5e42d1c93d3c6e00 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 18:42:42 -0400 Subject: [PATCH 044/151] ext4: Fix a double free of sbi->s_group_info in ext4_mb_init_backend If we meet with an error in ext4_mb_add_groupinfo, we kfree sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)], but fail to reset it to NULL. So the caller ext4_mb_init_backend will try to kfree it again and causes a double free. So fix it by resetting it to NULL. Some typo in comments of mballoc.c are also changed. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 2b9a71b99b2b..b97a2d2f0fdf 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -75,8 +75,8 @@ * * The inode preallocation space is used looking at the _logical_ start * block. If only the logical file block falls within the range of prealloc - * space we will consume the particular prealloc space. This make sure that - * that the we have contiguous physical blocks representing the file blocks + * space we will consume the particular prealloc space. This makes sure that + * we have contiguous physical blocks representing the file blocks * * The important thing to be noted in case of inode prealloc space is that * we don't modify the values associated to inode prealloc space except @@ -84,7 +84,7 @@ * * If we are not able to find blocks in the inode prealloc space and if we * have the group allocation flag set then we look at the locality group - * prealloc space. These are per CPU prealloc list repreasented as + * prealloc space. These are per CPU prealloc list represented as * * ext4_sb_info.s_locality_groups[smp_processor_id()] * @@ -152,7 +152,7 @@ * best extent in the found extents. Searching for the blocks starts with * the group specified as the goal value in allocation context via * ac_g_ex. Each group is first checked based on the criteria whether it - * can used for allocation. ext4_mb_good_group explains how the groups are + * can be used for allocation. ext4_mb_good_group explains how the groups are * checked. * * Both the prealloc space are getting populated as above. So for the first @@ -2279,8 +2279,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, exit_group_info: /* If a meta_group_info table has been allocated, release it now */ - if (group % EXT4_DESC_PER_BLOCK(sb) == 0) + if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); + sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; + } exit_meta_group_info: return -ENOMEM; } /* ext4_mb_add_groupinfo */ From afb86178cb9b6a7329cf8709aa210fb0a245b606 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 11 Jul 2011 18:47:04 -0400 Subject: [PATCH 045/151] ext4: remove unnecessary comments in ext4_orphan_add() The comment from Al Viro about possible race in the ext4_orphan_add() is not justified. There is no race possible as we always have either i_mutex locked, or the inode can not be referenced from outside hence the J_ASSERS should not be hit from the reason described in comment. This commit replaces it with notion that we are holding i_mutex so it should not be possible for i_nlink to be changed while waiting for s_orphan_lock. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b754b7721f51..8dde5ab239cc 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1989,18 +1989,11 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) if (!list_empty(&EXT4_I(inode)->i_orphan)) goto out_unlock; - /* Orphan handling is only valid for files with data blocks - * being truncated, or files being unlinked. */ - - /* @@@ FIXME: Observation from aviro: - * I think I can trigger J_ASSERT in ext4_orphan_add(). We block - * here (on s_orphan_lock), so race with ext4_link() which might bump - * ->i_nlink. For, say it, character device. Not a regular file, - * not a directory, not a symlink and ->i_nlink > 0. - * - * tytso, 4/25/2009: I'm not sure how that could happen; - * shouldn't the fs core protect us from these sort of - * unlink()/link() races? + /* + * Orphan handling is only valid for files with data blocks + * being truncated, or files being unlinked. Note that we either + * hold i_mutex, or the inode can not be referenced from outside, + * so i_nlink should not be bumped due to race */ J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); From 4de1ba155bbe9b629b9fb03919c5d905b747e62f Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 6 Jun 2011 13:49:00 -0700 Subject: [PATCH 046/151] dma: mv_xor: use resource_size() Signed-off-by: H Hartley Sweeten Cc: Dan Williams (supporter:ASYNCHRONOUS TRAN...) Cc: Vinod Koul (supporter:DMA GENERIC OFFLO...) Signed-off-by: Vinod Koul --- drivers/dma/mv_xor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 954e334e01bb..9a353c2216d0 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1305,7 +1305,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev) return -ENODEV; msp->xor_base = devm_ioremap(&pdev->dev, res->start, - res->end - res->start + 1); + resource_size(res)); if (!msp->xor_base) return -EBUSY; @@ -1314,7 +1314,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev) return -ENODEV; msp->xor_high_base = devm_ioremap(&pdev->dev, res->start, - res->end - res->start + 1); + resource_size(res)); if (!msp->xor_high_base) return -EBUSY; From 70f18915846f092e0e1c988f1726a532fa3ab3a1 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 22 Jun 2011 17:05:33 +0200 Subject: [PATCH 047/151] pch_dma: Fix channel locking Fix for the following INFO message ================================= [ INFO: inconsistent lock state ] 2.6.39+ #89 --------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. rs232/822 [HC1[1]:SC0[0]:HE0:SE1] takes: (&(&pd_chan->lock)->rlock){?.....}, at: [] pdc_desc_get+0x16/0xab {HARDIRQ-ON-W} state was registered at: [] mark_irqflags+0xbd/0x11a [] __lock_acquire+0x501/0x6bb [] lock_acquire+0x63/0x7b [] _raw_spin_lock_bh+0x43/0x51 [] pd_alloc_chan_resources+0x92/0x11e [] dma_chan_get+0x9b/0x107 [] __dma_request_channel+0x61/0xdc [] pch_request_dma+0x61/0x19e [] pch_uart_startup+0x16a/0x1a2 [] uart_startup+0x87/0x147 [] uart_open+0x117/0x13e [] tty_open+0x23c/0x34c [] chrdev_open+0x140/0x15f [] __dentry_open.clone.14+0x14a/0x22b [] nameidata_to_filp+0x36/0x40 [] do_last+0x513/0x635 [] path_openat+0x9c/0x2aa [] do_filp_open+0x27/0x69 [] do_sys_open+0xfd/0x184 [] sys_open+0x24/0x2a [] sysenter_do_call+0x12/0x32 irq event stamp: 2522 hardirqs last enabled at (2521): [] _raw_spin_unlock_irqrestore+0x36/0x52 hardirqs last disabled at (2522): [] common_interrupt+0x27/0x34 softirqs last enabled at (2354): [] __do_softirq+0x10a/0x11a softirqs last disabled at (2299): [] do_softirq+0x57/0xa4 other info that might help us debug this: 2 locks held by rs232/822: #0: (&tty->atomic_write_lock){+.+.+.}, at: [] tty_write_lock+0x14/0x3c #1: (&port_lock_key){-.....}, at: [] pch_uart_interrupt+0x17/0x1e9 stack backtrace: Pid: 822, comm: rs232 Not tainted 2.6.39+ #89 Call Trace: [] ? printk+0x19/0x1b [] print_usage_bug+0x184/0x18f [] ? print_irq_inversion_bug+0x10e/0x10e [] mark_lock_irq+0xa5/0x1f6 [] mark_lock+0x208/0x2d7 [] mark_irqflags+0x55/0x11a [] __lock_acquire+0x501/0x6bb [] ? dump_trace+0x92/0xb6 [] lock_acquire+0x63/0x7b [] ? pdc_desc_get+0x16/0xab [] _raw_spin_lock+0x3e/0x4c [] ? pdc_desc_get+0x16/0xab [] pdc_desc_get+0x16/0xab [] ? __lock_acquire+0x653/0x6bb [] pd_prep_slave_sg+0x7c/0x1cb [] ? nommu_map_sg+0x6e/0x81 [] dma_handle_tx+0x2cf/0x344 [] ? pch_uart_interrupt+0x17/0x1e9 [] pch_uart_interrupt+0x160/0x1e9 [] handle_irq_event_percpu+0x25/0x127 [] handle_irq_event+0x2c/0x43 [] ? handle_fasteoi_irq+0x84/0x84 [] handle_edge_irq+0xac/0xce [] ? do_IRQ+0x38/0x9d [] ? common_interrupt+0x2e/0x34 [] ? __lock_acquire+0x1f6/0x6bb [] ? _raw_spin_unlock_irqrestore+0x38/0x52 [] ? uart_start+0x2d/0x32 [] ? uart_flush_chars+0x8/0xa [] ? n_tty_write+0x12c/0x1c6 [] ? try_to_wake_up+0x251/0x251 [] ? tty_write+0x169/0x1dc [] ? n_tty_ioctl+0xb7/0xb7 [] ? vfs_write+0x91/0x10d [] ? tty_write_lock+0x3c/0x3c [] ? sys_write+0x3e/0x63 [] ? sysenter_do_call+0x12/0x32 Signed-off-by: Alexander Stein Tested-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 65c32f893a57..d9d95a4dd854 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -521,11 +521,11 @@ static int pd_alloc_chan_resources(struct dma_chan *chan) list_add_tail(&desc->desc_node, &tmp_list); } - spin_lock_bh(&pd_chan->lock); + spin_lock_irq(&pd_chan->lock); list_splice(&tmp_list, &pd_chan->free_list); pd_chan->descs_allocated = i; pd_chan->completed_cookie = chan->cookie = 1; - spin_unlock_bh(&pd_chan->lock); + spin_unlock_irq(&pd_chan->lock); pdc_enable_irq(chan, 1); @@ -543,10 +543,10 @@ static void pd_free_chan_resources(struct dma_chan *chan) BUG_ON(!list_empty(&pd_chan->active_list)); BUG_ON(!list_empty(&pd_chan->queue)); - spin_lock_bh(&pd_chan->lock); + spin_lock_irq(&pd_chan->lock); list_splice_init(&pd_chan->free_list, &tmp_list); pd_chan->descs_allocated = 0; - spin_unlock_bh(&pd_chan->lock); + spin_unlock_irq(&pd_chan->lock); list_for_each_entry_safe(desc, _d, &tmp_list, desc_node) pci_pool_free(pd->pool, desc, desc->txd.phys); @@ -562,10 +562,10 @@ static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie, dma_cookie_t last_completed; int ret; - spin_lock_bh(&pd_chan->lock); + spin_lock_irq(&pd_chan->lock); last_completed = pd_chan->completed_cookie; last_used = chan->cookie; - spin_unlock_bh(&pd_chan->lock); + spin_unlock_irq(&pd_chan->lock); ret = dma_async_is_complete(cookie, last_completed, last_used); @@ -680,7 +680,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, if (cmd != DMA_TERMINATE_ALL) return -ENXIO; - spin_lock_bh(&pd_chan->lock); + spin_lock_irq(&pd_chan->lock); pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE); @@ -690,7 +690,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, list_for_each_entry_safe(desc, _d, &list, desc_node) pdc_chain_complete(pd_chan, desc); - spin_unlock_bh(&pd_chan->lock); + spin_unlock_irq(&pd_chan->lock); return 0; } From a8f3067bce60b96215f3169d2c71e21f784ef507 Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Sun, 26 Jun 2011 23:29:52 +0200 Subject: [PATCH 048/151] dmaengine/ste_dma40: add a separate queue for pending requests tx_submit will add descriptors to the pending queue. Issue pending will then move the pending descriptors to the transfer queue. Signed-off-by: Per Forlin Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 8f222d4db7de..91d5ed7c79ba 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -199,6 +199,7 @@ struct d40_chan { struct dma_chan chan; struct tasklet_struct tasklet; struct list_head client; + struct list_head pending_queue; struct list_head active; struct list_head queue; struct stedma40_chan_cfg dma_cfg; @@ -644,7 +645,20 @@ static struct d40_desc *d40_first_active_get(struct d40_chan *d40c) static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc) { - list_add_tail(&desc->node, &d40c->queue); + list_add_tail(&desc->node, &d40c->pending_queue); +} + +static struct d40_desc *d40_first_pending(struct d40_chan *d40c) +{ + struct d40_desc *d; + + if (list_empty(&d40c->pending_queue)) + return NULL; + + d = list_first_entry(&d40c->pending_queue, + struct d40_desc, + node); + return d; } static struct d40_desc *d40_first_queued(struct d40_chan *d40c) @@ -801,6 +815,11 @@ static void d40_term_all(struct d40_chan *d40c) d40_desc_free(d40c, d40d); } + /* Release pending descriptors */ + while ((d40d = d40_first_pending(d40c))) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } d40c->pending_tx = 0; d40c->busy = false; @@ -2151,7 +2170,9 @@ static void d40_issue_pending(struct dma_chan *chan) spin_lock_irqsave(&d40c->lock, flags); - /* Busy means that pending jobs are already being processed */ + list_splice_tail_init(&d40c->pending_queue, &d40c->queue); + + /* Busy means that queued jobs are already being processed */ if (!d40c->busy) (void) d40_queue_start(d40c); @@ -2340,6 +2361,7 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma, INIT_LIST_HEAD(&d40c->active); INIT_LIST_HEAD(&d40c->queue); + INIT_LIST_HEAD(&d40c->pending_queue); INIT_LIST_HEAD(&d40c->client); tasklet_init(&d40c->tasklet, dma_tasklet, From 78fdaec3416fcbf2c38927cdf8b2de9f402693f1 Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Sun, 26 Jun 2011 23:29:53 +0200 Subject: [PATCH 049/151] dmaengine: remove ste_dma40 from issue_pending TODO ste_dma40 now implements issue_pending according to documentation. Submit adds descriptos to a pending queue with are flushed down to the DMAC at issue_pending. Signed-off-by: Per Forlin Signed-off-by: Vinod Koul --- drivers/dma/TODO | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/TODO b/drivers/dma/TODO index a4af8589330c..734ed0206cd5 100644 --- a/drivers/dma/TODO +++ b/drivers/dma/TODO @@ -9,6 +9,5 @@ TODO for slave dma - mxs-dma.c - dw_dmac - intel_mid_dma - - ste_dma40 4. Check other subsystems for dma drivers and merge/move to dmaengine 5. Remove dma_slave_config's dma direction. From ae752bf4cb78520e42f96f904e441c50f2114c7b Mon Sep 17 00:00:00 2001 From: om prakash Date: Mon, 27 Jun 2011 11:33:31 +0200 Subject: [PATCH 050/151] dmaengine/ste_dma40: fix missing kernel-doc Missing documentation creates kernel-doc warnings, so add the documenation. Signed-off-by: Om Prakash Reviewed-by: Rabin Vincent Reviewed-by: Jonas Aberg Reviewed-by: Srinidhi Kasagar Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 91d5ed7c79ba..e1af76c57e4f 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -185,6 +185,8 @@ struct d40_base; * @log_def: Default logical channel settings. * @lcla: Space for one dst src pair for logical channel transfers. * @lcpa: Pointer to dst and src lcpa settings. + * @runtime_addr: runtime configured address. + * @runtime_direction: runtime configured direction. * * This struct can either "be" a logical or a physical channel. */ From 79ca7ec3d1046a79c64f95f0cac0f5fd29829f53 Mon Sep 17 00:00:00 2001 From: Robert Marklund Date: Mon, 27 Jun 2011 11:33:24 +0200 Subject: [PATCH 051/151] dmaengine/ste_dma40: make the cyclic alloc NOWAIT This function may be initiated from IRQ context, so the allocation must allocate NOWAIT memory. Signed-off-by: Robert Marklund Reviewed-by: Rabin Vincent Reviewed-by: Philippe Langlais Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index e1af76c57e4f..35b078d688d5 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -2112,7 +2112,7 @@ dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, struct scatterlist *sg; int i; - sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_KERNEL); + sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT); for (i = 0; i < periods; i++) { sg_dma_address(&sg[i]) = dma_addr; sg_dma_len(&sg[i]) = period_len; From f4b89764c470230bbf9d18c0a3411887c48bb5a2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Jun 2011 11:33:46 +0200 Subject: [PATCH 052/151] dmaengine/ste_dma40: use AMBA PrimeCell helper macros The DMA40 is not a PrimeCell from ARM, but it still use the same ID registers. So let's utilize the existing macros in the PrimeCell header to identify manufacturer and revision of the IP block instead of reinventing the wheel. Cc: Robert Marklund Cc: Per Forlin Cc: Rabin Vincent Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 72 ++++++++++++++------------------------ drivers/dma/ste_dma40_ll.h | 3 -- 2 files changed, 26 insertions(+), 49 deletions(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 35b078d688d5..2797f64e5e48 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -44,9 +45,6 @@ #define D40_ALLOC_PHY (1 << 30) #define D40_ALLOC_LOG_FREE 0 -/* Hardware designer of the block */ -#define D40_HW_DESIGNER 0x8 - /** * enum 40_command - The different commands and/or statuses. * @@ -2525,25 +2523,6 @@ static int __init d40_phy_res_init(struct d40_base *base) static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) { - static const struct d40_reg_val dma_id_regs[] = { - /* Peripheral Id */ - { .reg = D40_DREG_PERIPHID0, .val = 0x0040}, - { .reg = D40_DREG_PERIPHID1, .val = 0x0000}, - /* - * D40_DREG_PERIPHID2 Depends on HW revision: - * DB8500ed has 0x0008, - * ? has 0x0018, - * DB8500v1 has 0x0028 - * DB8500v2 has 0x0038 - */ - { .reg = D40_DREG_PERIPHID3, .val = 0x0000}, - - /* PCell Id */ - { .reg = D40_DREG_CELLID0, .val = 0x000d}, - { .reg = D40_DREG_CELLID1, .val = 0x00f0}, - { .reg = D40_DREG_CELLID2, .val = 0x0005}, - { .reg = D40_DREG_CELLID3, .val = 0x00b1} - }; struct stedma40_platform_data *plat_data; struct clk *clk = NULL; void __iomem *virtbase = NULL; @@ -2552,8 +2531,9 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) int num_log_chans = 0; int num_phy_chans; int i; - u32 val; - u32 rev; + u32 pid; + u32 cid; + u8 rev; clk = clk_get(&pdev->dev, NULL); @@ -2577,32 +2557,32 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) if (!virtbase) goto failure; - /* HW version check */ - for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) { - if (dma_id_regs[i].val != - readl(virtbase + dma_id_regs[i].reg)) { - d40_err(&pdev->dev, - "Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n", - dma_id_regs[i].val, - dma_id_regs[i].reg, - readl(virtbase + dma_id_regs[i].reg)); - goto failure; - } - } - - /* Get silicon revision and designer */ - val = readl(virtbase + D40_DREG_PERIPHID2); + /* This is just a regular AMBA PrimeCell ID actually */ + for (pid = 0, i = 0; i < 4; i++) + pid |= (readl(virtbase + resource_size(res) - 0x20 + 4 * i) + & 255) << (i * 8); + for (cid = 0, i = 0; i < 4; i++) + cid |= (readl(virtbase + resource_size(res) - 0x10 + 4 * i) + & 255) << (i * 8); - if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) != - D40_HW_DESIGNER) { + if (cid != AMBA_CID) { + d40_err(&pdev->dev, "Unknown hardware! No PrimeCell ID\n"); + goto failure; + } + if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) { d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n", - val & D40_DREG_PERIPHID2_DESIGNER_MASK, - D40_HW_DESIGNER); + AMBA_MANF_BITS(pid), + AMBA_VENDOR_ST); goto failure; } - - rev = (val & D40_DREG_PERIPHID2_REV_MASK) >> - D40_DREG_PERIPHID2_REV_POS; + /* + * HW revision: + * DB8500ed has revision 0 + * ? has revision 1 + * DB8500v1 has revision 2 + * DB8500v2 has revision 3 + */ + rev = AMBA_REV_BITS(pid); /* The number of physical channels on this HW */ num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4; diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h index 195ee65ee7f3..b44c455158de 100644 --- a/drivers/dma/ste_dma40_ll.h +++ b/drivers/dma/ste_dma40_ll.h @@ -184,9 +184,6 @@ #define D40_DREG_PERIPHID0 0xFE0 #define D40_DREG_PERIPHID1 0xFE4 #define D40_DREG_PERIPHID2 0xFE8 -#define D40_DREG_PERIPHID2_REV_POS 4 -#define D40_DREG_PERIPHID2_REV_MASK (0xf << D40_DREG_PERIPHID2_REV_POS) -#define D40_DREG_PERIPHID2_DESIGNER_MASK 0xf #define D40_DREG_PERIPHID3 0xFEC #define D40_DREG_CELLID0 0xFF0 #define D40_DREG_CELLID1 0xFF4 From 98ca528916c47ad17f78a07b45e49de3940fba77 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 27 Jun 2011 11:33:38 +0200 Subject: [PATCH 053/151] dmaengine/ste_dma40: allow memory buswidth/burst to be configured Currently the runtime config implementation forces the memory side parameters to be the same as the peripheral side. Allow these to be different, and check for misconfiguration. Signed-off-by: Rabin Vincent Reviewed-by: Ulf HANSSON Tested-by: Stefan Nilsson Reviewed-by: Per Forlin Reviewed-by: Srinidhi Kasagar Cc: Robert Marklund Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 168 ++++++++++++++++++++++++---------------- 1 file changed, 102 insertions(+), 66 deletions(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 2797f64e5e48..75ba5865d7a4 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -2179,17 +2179,78 @@ static void d40_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&d40c->lock, flags); } +static int +dma40_config_to_halfchannel(struct d40_chan *d40c, + struct stedma40_half_channel_info *info, + enum dma_slave_buswidth width, + u32 maxburst) +{ + enum stedma40_periph_data_width addr_width; + int psize; + + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + addr_width = STEDMA40_BYTE_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + addr_width = STEDMA40_HALFWORD_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + addr_width = STEDMA40_WORD_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_8_BYTES: + addr_width = STEDMA40_DOUBLEWORD_WIDTH; + break; + default: + dev_err(d40c->base->dev, + "illegal peripheral address width " + "requested (%d)\n", + width); + return -EINVAL; + } + + if (chan_is_logical(d40c)) { + if (maxburst >= 16) + psize = STEDMA40_PSIZE_LOG_16; + else if (maxburst >= 8) + psize = STEDMA40_PSIZE_LOG_8; + else if (maxburst >= 4) + psize = STEDMA40_PSIZE_LOG_4; + else + psize = STEDMA40_PSIZE_LOG_1; + } else { + if (maxburst >= 16) + psize = STEDMA40_PSIZE_PHY_16; + else if (maxburst >= 8) + psize = STEDMA40_PSIZE_PHY_8; + else if (maxburst >= 4) + psize = STEDMA40_PSIZE_PHY_4; + else + psize = STEDMA40_PSIZE_PHY_1; + } + + info->data_width = addr_width; + info->psize = psize; + info->flow_ctrl = STEDMA40_NO_FLOW_CTRL; + + return 0; +} + /* Runtime reconfiguration extension */ -static void d40_set_runtime_config(struct dma_chan *chan, - struct dma_slave_config *config) +static int d40_set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *config) { struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); struct stedma40_chan_cfg *cfg = &d40c->dma_cfg; - enum dma_slave_buswidth config_addr_width; + enum dma_slave_buswidth src_addr_width, dst_addr_width; dma_addr_t config_addr; - u32 config_maxburst; - enum stedma40_periph_data_width addr_width; - int psize; + u32 src_maxburst, dst_maxburst; + int ret; + + src_addr_width = config->src_addr_width; + src_maxburst = config->src_maxburst; + dst_addr_width = config->dst_addr_width; + dst_maxburst = config->dst_maxburst; if (config->direction == DMA_FROM_DEVICE) { dma_addr_t dev_addr_rx = @@ -2208,8 +2269,11 @@ static void d40_set_runtime_config(struct dma_chan *chan, cfg->dir); cfg->dir = STEDMA40_PERIPH_TO_MEM; - config_addr_width = config->src_addr_width; - config_maxburst = config->src_maxburst; + /* Configure the memory side */ + if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + dst_addr_width = src_addr_width; + if (dst_maxburst == 0) + dst_maxburst = src_maxburst; } else if (config->direction == DMA_TO_DEVICE) { dma_addr_t dev_addr_tx = @@ -2228,68 +2292,39 @@ static void d40_set_runtime_config(struct dma_chan *chan, cfg->dir); cfg->dir = STEDMA40_MEM_TO_PERIPH; - config_addr_width = config->dst_addr_width; - config_maxburst = config->dst_maxburst; - + /* Configure the memory side */ + if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + src_addr_width = dst_addr_width; + if (src_maxburst == 0) + src_maxburst = dst_maxburst; } else { dev_err(d40c->base->dev, "unrecognized channel direction %d\n", config->direction); - return; + return -EINVAL; } - switch (config_addr_width) { - case DMA_SLAVE_BUSWIDTH_1_BYTE: - addr_width = STEDMA40_BYTE_WIDTH; - break; - case DMA_SLAVE_BUSWIDTH_2_BYTES: - addr_width = STEDMA40_HALFWORD_WIDTH; - break; - case DMA_SLAVE_BUSWIDTH_4_BYTES: - addr_width = STEDMA40_WORD_WIDTH; - break; - case DMA_SLAVE_BUSWIDTH_8_BYTES: - addr_width = STEDMA40_DOUBLEWORD_WIDTH; - break; - default: + if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) { dev_err(d40c->base->dev, - "illegal peripheral address width " - "requested (%d)\n", - config->src_addr_width); - return; + "src/dst width/maxburst mismatch: %d*%d != %d*%d\n", + src_maxburst, + src_addr_width, + dst_maxburst, + dst_addr_width); + return -EINVAL; } - if (chan_is_logical(d40c)) { - if (config_maxburst >= 16) - psize = STEDMA40_PSIZE_LOG_16; - else if (config_maxburst >= 8) - psize = STEDMA40_PSIZE_LOG_8; - else if (config_maxburst >= 4) - psize = STEDMA40_PSIZE_LOG_4; - else - psize = STEDMA40_PSIZE_LOG_1; - } else { - if (config_maxburst >= 16) - psize = STEDMA40_PSIZE_PHY_16; - else if (config_maxburst >= 8) - psize = STEDMA40_PSIZE_PHY_8; - else if (config_maxburst >= 4) - psize = STEDMA40_PSIZE_PHY_4; - else if (config_maxburst >= 2) - psize = STEDMA40_PSIZE_PHY_2; - else - psize = STEDMA40_PSIZE_PHY_1; - } + ret = dma40_config_to_halfchannel(d40c, &cfg->src_info, + src_addr_width, + src_maxburst); + if (ret) + return ret; - /* Set up all the endpoint configs */ - cfg->src_info.data_width = addr_width; - cfg->src_info.psize = psize; - cfg->src_info.big_endian = false; - cfg->src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL; - cfg->dst_info.data_width = addr_width; - cfg->dst_info.psize = psize; - cfg->dst_info.big_endian = false; - cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL; + ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info, + dst_addr_width, + dst_maxburst); + if (ret) + return ret; /* Fill in register values */ if (chan_is_logical(d40c)) @@ -2302,12 +2337,14 @@ static void d40_set_runtime_config(struct dma_chan *chan, d40c->runtime_addr = config_addr; d40c->runtime_direction = config->direction; dev_dbg(d40c->base->dev, - "configured channel %s for %s, data width %d, " - "maxburst %d bytes, LE, no flow control\n", + "configured channel %s for %s, data width %d/%d, " + "maxburst %d/%d elements, LE, no flow control\n", dma_chan_name(chan), (config->direction == DMA_FROM_DEVICE) ? "RX" : "TX", - config_addr_width, - config_maxburst); + src_addr_width, dst_addr_width, + src_maxburst, dst_maxburst); + + return 0; } static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, @@ -2328,9 +2365,8 @@ static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, case DMA_RESUME: return d40_resume(d40c); case DMA_SLAVE_CONFIG: - d40_set_runtime_config(chan, + return d40_set_runtime_config(chan, (struct dma_slave_config *) arg); - return 0; default: break; } From b89243dd0e6a1c96a4a346cb3e1ba2c637cdfe98 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 1 Jul 2011 16:47:28 +0200 Subject: [PATCH 054/151] dmaengine/coh901318: fix slave submission semantics While testing Per Forlins MMC speed improvements I noticed a semantic bug in the COH901318 driver: it will write to channel registers in the prep_slave_sg() function, instead of deferring it to later, breaking the assumption from the drivers to be able to queue up new jobs while another job is running. Fix this by storing up the initial register writes in the job descriptors and write them to hardware when we process the descriptor instead. Now the stress tests work. Acked-by: Per Forlin Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/coh901318.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index af8c0b5ed70f..a7fca1653933 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -40,6 +40,8 @@ struct coh901318_desc { struct coh901318_lli *lli; enum dma_data_direction dir; unsigned long flags; + u32 head_config; + u32 head_ctrl; }; struct coh901318_base { @@ -660,6 +662,9 @@ static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc) coh901318_desc_submit(cohc, cohd); + /* Program the transaction head */ + coh901318_set_conf(cohc, cohd->head_config); + coh901318_set_ctrl(cohc, cohd->head_ctrl); coh901318_prep_linked_list(cohc, cohd->lli); /* start dma job on this channel */ @@ -1090,8 +1095,6 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } else goto err_direction; - coh901318_set_conf(cohc, config); - /* The dma only supports transmitting packages up to * MAX_DMA_PACKET_SIZE. Calculate to total number of * dma elemts required to send the entire sg list @@ -1128,16 +1131,18 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (ret) goto err_lli_fill; - /* - * Set the default ctrl for the channel to the one from the lli, - * things may have changed due to odd buffer alignment etc. - */ - coh901318_set_ctrl(cohc, lli->control); COH_DBG(coh901318_list_print(cohc, lli)); /* Pick a descriptor to handle this transfer */ cohd = coh901318_desc_get(cohc); + cohd->head_config = config; + /* + * Set the default head ctrl for the channel to the one from the + * lli, things may have changed due to odd buffer alignment + * etc. + */ + cohd->head_ctrl = lli->control; cohd->dir = direction; cohd->flags = flags; cohd->desc.tx_submit = coh901318_tx_submit; From 95bfea1675c02d83cf1923272e62f91db11cbb8f Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 30 Jun 2011 16:06:33 +0800 Subject: [PATCH 055/151] dmaengine: mxs-dma: skip request_irq for NO_IRQ In general, the mxs-dma users get separate irq for each channel, but gpmi is special one which has only one irq shared by all gpmi channels. It causes mxs_dma channel allocation function fail for all other gpmi channels except the first one calling into the function. The patch gets request_irq call skipped for NO_IRQ case, and leaves this gpmi specific quirk to gpmi driver to sort out. It will fix above problem if gpmi driver sets chan_irq as gpmi irq for only one channel and NO_IRQ for all the rest channels. Signed-off-by: Shawn Guo Cc: Vinod Koul Signed-off-by: Vinod Koul --- drivers/dma/mxs-dma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index 88aad4f54002..2870d919f112 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -327,10 +327,12 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan) memset(mxs_chan->ccw, 0, PAGE_SIZE); - ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler, - 0, "mxs-dma", mxs_dma); - if (ret) - goto err_irq; + if (mxs_chan->chan_irq != NO_IRQ) { + ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler, + 0, "mxs-dma", mxs_dma); + if (ret) + goto err_irq; + } ret = clk_enable(mxs_dma->clk); if (ret) From 265c6a0f9290c8f470b839257dc6af3c46b24da1 Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Sat, 16 Jul 2011 19:41:23 -0400 Subject: [PATCH 056/151] ext4: fix compilation with -DDX_DEBUG Compilation of ext4/namei.c brought up an error and warning messages when compiled with -DDX_DEBUG Signed-off-by: Bernd Schubert Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8dde5ab239cc..aaf313107c6c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -289,7 +289,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent while (len--) printk("%c", *name++); ext4fs_dirhash(de->name, de->name_len, &h); printk(":%x.%u ", h.hash, - ((char *) de - base)); + (unsigned) ((char *) de - base)); } space += EXT4_DIR_REC_LEN(de->name_len); names++; @@ -1013,7 +1013,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q *err = -ENOENT; errout: - dxtrace(printk(KERN_DEBUG "%s not found\n", name)); + dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); dx_release (frames); return NULL; } From d7a1fee135771e6e5185642bdc17df19bbdbcc48 Mon Sep 17 00:00:00 2001 From: Dan Ehrenberg Date: Sun, 17 Jul 2011 21:11:30 -0400 Subject: [PATCH 057/151] ext4: make the preallocation size be a multiple of stripe size Previously, if a stripe width was provided, then it would be used as the preallocation granularity, with no santiy checking and no way to override this. Now, mb_prealloc_size defaults to the smallest multiple of stripe size that is greater than or equal to the old default mb_prealloc_size, and this can be overridden with the sysfs interface. Signed-off-by: Dan Ehrenberg Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b97a2d2f0fdf..037f680b76f9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -128,12 +128,13 @@ * we are doing a group prealloc we try to normalize the request to * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is * 512 blocks. This can be tuned via - * /sys/fs/ext4//mb_group_prealloc. The value is represented in * terms of number of blocks. If we have mounted the file system with -O * stripe= option the group prealloc request is normalized to the - * stripe value (sbi->s_stripe) + * the smallest multiple of the stripe value (sbi->s_stripe) which is + * greater than the default mb_group_prealloc. * - * The regular allocator(using the buddy cache) supports few tunables. + * The regular allocator (using the buddy cache) supports a few tunables. * * /sys/fs/ext4//mb_min_to_scan * /sys/fs/ext4//mb_max_to_scan @@ -2474,6 +2475,18 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; + /* + * If there is a s_stripe > 1, then we set the s_mb_group_prealloc + * to the lowest multiple of s_stripe which is bigger than + * the s_mb_group_prealloc as determined above. We want + * the preallocation size to be an exact multiple of the + * RAID stripe size so that preallocations don't fragment + * the stripes. + */ + if (sbi->s_stripe > 1) { + sbi->s_mb_group_prealloc = roundup( + sbi->s_mb_group_prealloc, sbi->s_stripe); + } sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); if (sbi->s_locality_groups == NULL) { @@ -2841,8 +2854,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, /* * here we normalize request for locality group - * Group request are normalized to s_strip size if we set the same via mount - * option. If not we set it to s_mb_group_prealloc which can be configured via + * Group request are normalized to s_mb_group_prealloc, which goes to + * s_strip if we set the same via mount option. + * s_mb_group_prealloc can be configured via * /sys/fs/ext4//mb_group_prealloc * * XXX: should we try to preallocate more than the group has now? @@ -2853,10 +2867,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) struct ext4_locality_group *lg = ac->ac_lg; BUG_ON(lg == NULL); - if (EXT4_SB(sb)->s_stripe) - ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; - else - ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; + ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; mb_debug(1, "#%u: goal %u blocks for locality group\n", current->pid, ac->ac_g_ex.fe_len); } From 3eb08658431abd65c0fe6855d1860859c2d416f7 Mon Sep 17 00:00:00 2001 From: Dan Ehrenberg Date: Sun, 17 Jul 2011 21:18:51 -0400 Subject: [PATCH 058/151] ext4: ignore a stripe width of 1 If the stripe width was set to 1, then this patch will ignore that stripe width and ext4 will act as if the stripe width were 0 with respect to optimizing allocations. Signed-off-by: Dan Ehrenberg Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7910e61809e7..143d763729b4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2384,17 +2384,25 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); unsigned long stripe_width = le32_to_cpu(sbi->s_es->s_raid_stripe_width); + int ret; if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) - return sbi->s_stripe; - - if (stripe_width <= sbi->s_blocks_per_group) - return stripe_width; + ret = sbi->s_stripe; + else if (stripe_width <= sbi->s_blocks_per_group) + ret = stripe_width; + else if (stride <= sbi->s_blocks_per_group) + ret = stride; + else + ret = 0; - if (stride <= sbi->s_blocks_per_group) - return stride; + /* + * If the stripe width is 1, this makes no sense and + * we set it to 0 to turn off stripe handling code. + */ + if (ret <= 1) + ret = 0; - return 0; + return ret; } /* sysfs supprt */ From f7d0d3797fac6cad24ad9f86dd9baf65c586b434 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Sun, 17 Jul 2011 23:17:02 -0400 Subject: [PATCH 059/151] ext4: punch hole optimizations: skip un-needed extent lookup This patch optimizes the punch hole operation by skipping the tree walking code that is used by truncate. Since punch hole is done through map blocks, the path to the extent is already known in this function, so we do not need to look it up again. Signed-off-by: Allison Henderson Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f1c538e5055c..06b30b61205f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3461,8 +3461,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext4_ext_mark_uninitialized(ex); - err = ext4_ext_remove_space(inode, map->m_lblk, - map->m_lblk + punched_out); + ext4_ext_invalidate_cache(inode); + + err = ext4_ext_rm_leaf(handle, inode, path, + map->m_lblk, map->m_lblk + punched_out); + + if (!err && path->p_hdr->eh_entries == 0) { + /* + * Punch hole freed all of this sub tree, + * so we need to correct eh_depth + */ + err = ext4_ext_get_access(handle, inode, path); + if (err == 0) { + ext_inode_hdr(inode)->eh_depth = 0; + ext_inode_hdr(inode)->eh_max = + cpu_to_le16(ext4_ext_space_root( + inode, 0)); + + err = ext4_ext_dirty( + handle, inode, path); + } + } goto out2; } From c6a0371cbefade85376bbc326d18451860632dce Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Sun, 17 Jul 2011 23:21:03 -0400 Subject: [PATCH 060/151] ext4: remove unneeded parameter to ext4_ext_remove_space() This patch removes the extra parameter in ext4_ext_remove_space() which is no longer needed. Signed-off-by: Allison Henderson Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 06b30b61205f..3d8c5f50ba33 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2500,8 +2500,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) return 1; } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, - ext4_lblk_t end) +static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); @@ -2541,7 +2540,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, if (i == depth) { /* this is leaf block */ err = ext4_ext_rm_leaf(handle, inode, path, - start, end); + start, EXT_MAX_BLOCKS - 1); /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); path[i].p_bh = NULL; @@ -3683,7 +3682,7 @@ void ext4_ext_truncate(struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); - err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + err = ext4_ext_remove_space(inode, last_block); /* In a multi-transaction truncate, we only make the final * transaction synchronous. From 015861badd0db43d025bbb538f8fc62dfaf3f18d Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sun, 17 Jul 2011 23:27:43 -0400 Subject: [PATCH 061/151] ext4: avoid wasted extent cache lookup if !PUNCH_OUT_EXT This patch avoids an extraneous lookup of the extent cache in ext4_ext_map_blocks() when the flag EXT4_GET_BLOCKS_PUNCH_OUT_EXT is absent. The existing logic was performing the lookup but not making use of the result. The patch simply reverses the order of evaluation in the condition. Since ext4_ext_in_cache() does not initialize newex on misses, bypassing its invocation does not introduce any new issue in this regard. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" Reviewed-by: Lukas Czerner Reviewed-by: Eric Gouriou --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3d8c5f50ba33..b8acfab00224 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3320,8 +3320,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* check in cache */ - if (ext4_ext_in_cache(inode, map->m_lblk, &newex) && - ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) { + if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && + ext4_ext_in_cache(inode, map->m_lblk, &newex)) { if (!newex.ee_start_lo && !newex.ee_start_hi) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* From d46203159ed376fdbe2b05aa57e58207bf27a8f9 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sun, 17 Jul 2011 23:43:42 -0400 Subject: [PATCH 062/151] ext4: avoid eh_entries overflow before insert extent_idx If eh_entries is equal to (or greater than) eh_max, the operation of inserting new extent_idx will make number of entries overflow. So check eh_entries before inserting the new extent_idx. Although there is no bug case according the code (function ext4_ext_insert_index is called by ext4_ext_split and ext4_ext_split is called only if the index block has free space), the right logic should be "lookup the capacity before insertion". Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b8acfab00224..9bec432e2d26 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -741,6 +741,16 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, logical, le32_to_cpu(curp->p_idx->ei_block)); return -EIO; } + + if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) + >= le16_to_cpu(curp->p_hdr->eh_max))) { + EXT4_ERROR_INODE(inode, + "eh_entries %d >= eh_max %d!", + le16_to_cpu(curp->p_hdr->eh_entries), + le16_to_cpu(curp->p_hdr->eh_max)); + return -EIO; + } + len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; if (logical > le32_to_cpu(curp->p_idx->ei_block)) { /* insert after */ @@ -770,14 +780,6 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, ext4_idx_store_pblock(ix, ptr); le16_add_cpu(&curp->p_hdr->eh_entries, 1); - if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) - > le16_to_cpu(curp->p_hdr->eh_max))) { - EXT4_ERROR_INODE(inode, - "eh_entries %d > eh_max %d!", - le16_to_cpu(curp->p_hdr->eh_entries), - le16_to_cpu(curp->p_hdr->eh_max)); - return -EIO; - } if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); return -EIO; From af8b244f733383656c8b4c0c6e94e210e7bbc596 Mon Sep 17 00:00:00 2001 From: Ambresh K Date: Sat, 9 Jul 2011 19:02:21 -0700 Subject: [PATCH 063/151] regulator: TWL: Remove entry of RES_ID for 6030 macros RES_ID is only used in 4030, to send PBM singular message to control the state of dedicated resources. In 6030, we don't have concept of PBM, hence removing the definition of RES_ID (num) from macros. Signed-off-by: Ambresh K Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/twl-regulator.c | 64 +++++++++++++++---------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 503c2bc64c84..ee8747f4fa08 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -835,8 +835,8 @@ static struct regulator_ops twlsmps_ops = { remap_conf) \ TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \ remap_conf, TWL4030, twl4030fixed_ops) -#define TWL6030_FIXED_LDO(label, offset, mVolts, num, turnon_delay) \ - TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \ +#define TWL6030_FIXED_LDO(label, offset, mVolts, turnon_delay) \ + TWL_FIXED_LDO(label, offset, mVolts, 0x0, turnon_delay, \ 0x0, TWL6030, twl6030fixed_ops) #define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) { \ @@ -856,9 +856,8 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts, num) { \ +#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \ .base = offset, \ - .id = num, \ .min_mV = min_mVolts, \ .max_mV = max_mVolts, \ .desc = { \ @@ -871,9 +870,8 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts, num) { \ +#define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \ .base = offset, \ - .id = num, \ .min_mV = min_mVolts, \ .max_mV = max_mVolts, \ .desc = { \ @@ -903,9 +901,8 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6030_FIXED_RESOURCE(label, offset, num, turnon_delay) { \ +#define TWL6030_FIXED_RESOURCE(label, offset, turnon_delay) { \ .base = offset, \ - .id = num, \ .delay = turnon_delay, \ .desc = { \ .name = #label, \ @@ -916,9 +913,8 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6025_ADJUSTABLE_SMPS(label, offset, num) { \ +#define TWL6025_ADJUSTABLE_SMPS(label, offset) { \ .base = offset, \ - .id = num, \ .min_mV = 600, \ .max_mV = 2100, \ .desc = { \ @@ -961,32 +957,32 @@ static struct twlreg_info twl_regs[] = { /* 6030 REG with base as PMC Slave Misc : 0x0030 */ /* Turnon-delay and remap configuration values for 6030 are not verified since the specification is not public */ - TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300, 1), - TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300, 2), - TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300, 3), - TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 1000, 3300, 4), - TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 1000, 3300, 5), - TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 1000, 3300, 7), - TWL6030_FIXED_LDO(VANA, 0x50, 2100, 15, 0), - TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 16, 0), - TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 17, 0), - TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 18, 0), - TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 48, 0), + TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300), + TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300), + TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300), + TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 1000, 3300), + TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 1000, 3300), + TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 1000, 3300), + TWL6030_FIXED_LDO(VANA, 0x50, 2100, 0), + TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 0), + TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 0), + TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 0), + TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 0), /* 6025 are renamed compared to 6030 versions */ - TWL6025_ADJUSTABLE_LDO(LDO2, 0x54, 1000, 3300, 1), - TWL6025_ADJUSTABLE_LDO(LDO4, 0x58, 1000, 3300, 2), - TWL6025_ADJUSTABLE_LDO(LDO3, 0x5c, 1000, 3300, 3), - TWL6025_ADJUSTABLE_LDO(LDO5, 0x68, 1000, 3300, 4), - TWL6025_ADJUSTABLE_LDO(LDO1, 0x6c, 1000, 3300, 5), - TWL6025_ADJUSTABLE_LDO(LDO7, 0x74, 1000, 3300, 7), - TWL6025_ADJUSTABLE_LDO(LDO6, 0x60, 1000, 3300, 16), - TWL6025_ADJUSTABLE_LDO(LDOLN, 0x64, 1000, 3300, 17), - TWL6025_ADJUSTABLE_LDO(LDOUSB, 0x70, 1000, 3300, 18), - - TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34, 1), - TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10, 2), - TWL6025_ADJUSTABLE_SMPS(VIO, 0x16, 3), + TWL6025_ADJUSTABLE_LDO(LDO2, 0x54, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO4, 0x58, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO3, 0x5c, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO5, 0x68, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO1, 0x6c, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO7, 0x74, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO6, 0x60, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDOLN, 0x64, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDOUSB, 0x70, 1000, 3300), + + TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34), + TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10), + TWL6025_ADJUSTABLE_SMPS(VIO, 0x16), }; static u8 twl_get_smps_offset(void) From a3ee13ee77feea001597415f3a231a8bd4d3c6bf Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 10 Jul 2011 18:52:07 +0800 Subject: [PATCH 064/151] regulator: tps65910: Fix a memory leak in tps65910_probe error path Fix a memory leak if chip id is not matched. Signed-off-by: Axel Lin Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/tps65910-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c index 55dd4e6650db..b07a66471fb7 100644 --- a/drivers/regulator/tps65910-regulator.c +++ b/drivers/regulator/tps65910-regulator.c @@ -903,6 +903,7 @@ static __devinit int tps65910_probe(struct platform_device *pdev) info = tps65911_regs; default: pr_err("Invalid tps chip version\n"); + kfree(pmic); return -ENODEV; } From d04156bca629740a661fd0738cd69ba1f08b2b20 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 10 Jul 2011 21:44:09 +0800 Subject: [PATCH 065/151] regulator: tps65910: Add missing breaks in switch/case Also add a default case in tps65910_list_voltage_dcdc to silence 'volt' may be used uninitialized in this function warning. Signed-off-by: Axel Lin Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/tps65910-regulator.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c index b07a66471fb7..8e0edab74786 100644 --- a/drivers/regulator/tps65910-regulator.c +++ b/drivers/regulator/tps65910-regulator.c @@ -759,8 +759,13 @@ static int tps65910_list_voltage_dcdc(struct regulator_dev *dev, mult = (selector / VDD1_2_NUM_VOLTS) + 1; volt = VDD1_2_MIN_VOLT + (selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET; + break; case TPS65911_REG_VDDCTRL: volt = VDDCTRL_MIN_VOLT + (selector * VDDCTRL_OFFSET); + break; + default: + BUG(); + return -EINVAL; } return volt * 100 * mult; @@ -898,9 +903,11 @@ static __devinit int tps65910_probe(struct platform_device *pdev) case TPS65910: pmic->get_ctrl_reg = &tps65910_get_ctrl_register; info = tps65910_regs; + break; case TPS65911: pmic->get_ctrl_reg = &tps65911_get_ctrl_register; info = tps65911_regs; + break; default: pr_err("Invalid tps chip version\n"); kfree(pmic); From 39aa9b6e3cb1b2a564d3422eedb7f725179162d3 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 11 Jul 2011 09:57:43 +0800 Subject: [PATCH 066/151] regulator: tps65910: Fix array access out of bounds bug For tps65910, the number of regulator is 13. ( ARRAY_SIZE(tps65910_regs) is 13) For tps65911, the number of regulator is 12. ( ARRAY_SIZE(tps65911_regs) is 12) If we are using this driver for tps65911, we hit array access out of bounds bug in tps65910_probe() because current implementation always assume the number of regulator is 13 and thus it will access tps65911_regs[12]. Fix it by setting correct num_regulators for both chips in tps65910_probe(), and allocated neccessay memory accordingly. Signed-off-by: Axel Lin Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/tps65910-regulator.c | 55 ++++++++++++++++++++------ 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c index 8e0edab74786..66d2d60b436a 100644 --- a/drivers/regulator/tps65910-regulator.c +++ b/drivers/regulator/tps65910-regulator.c @@ -49,7 +49,6 @@ #define TPS65911_REG_LDO7 11 #define TPS65911_REG_LDO8 12 -#define TPS65910_NUM_REGULATOR 13 #define TPS65910_SUPPLY_STATE_ENABLED 0x1 /* supported VIO voltages in milivolts */ @@ -264,11 +263,12 @@ static struct tps_info tps65911_regs[] = { }; struct tps65910_reg { - struct regulator_desc desc[TPS65910_NUM_REGULATOR]; + struct regulator_desc *desc; struct tps65910 *mfd; - struct regulator_dev *rdev[TPS65910_NUM_REGULATOR]; - struct tps_info *info[TPS65910_NUM_REGULATOR]; + struct regulator_dev **rdev; + struct tps_info **info; struct mutex mutex; + int num_regulators; int mode; int (*get_ctrl_reg)(int); }; @@ -902,10 +902,12 @@ static __devinit int tps65910_probe(struct platform_device *pdev) switch(tps65910_chip_id(tps65910)) { case TPS65910: pmic->get_ctrl_reg = &tps65910_get_ctrl_register; + pmic->num_regulators = ARRAY_SIZE(tps65910_regs); info = tps65910_regs; break; case TPS65911: pmic->get_ctrl_reg = &tps65911_get_ctrl_register; + pmic->num_regulators = ARRAY_SIZE(tps65911_regs); info = tps65911_regs; break; default: @@ -914,7 +916,28 @@ static __devinit int tps65910_probe(struct platform_device *pdev) return -ENODEV; } - for (i = 0; i < TPS65910_NUM_REGULATOR; i++, info++, reg_data++) { + pmic->desc = kcalloc(pmic->num_regulators, + sizeof(struct regulator_desc), GFP_KERNEL); + if (!pmic->desc) { + err = -ENOMEM; + goto err_free_pmic; + } + + pmic->info = kcalloc(pmic->num_regulators, + sizeof(struct tps_info *), GFP_KERNEL); + if (!pmic->info) { + err = -ENOMEM; + goto err_free_desc; + } + + pmic->rdev = kcalloc(pmic->num_regulators, + sizeof(struct regulator_dev *), GFP_KERNEL); + if (!pmic->rdev) { + err = -ENOMEM; + goto err_free_info; + } + + for (i = 0; i < pmic->num_regulators; i++, info++, reg_data++) { /* Register the regulators */ pmic->info[i] = info; @@ -946,7 +969,7 @@ static __devinit int tps65910_probe(struct platform_device *pdev) "failed to register %s regulator\n", pdev->name); err = PTR_ERR(rdev); - goto err; + goto err_unregister_regulator; } /* Save regulator for cleanup */ @@ -954,23 +977,31 @@ static __devinit int tps65910_probe(struct platform_device *pdev) } return 0; -err: +err_unregister_regulator: while (--i >= 0) regulator_unregister(pmic->rdev[i]); - + kfree(pmic->rdev); +err_free_info: + kfree(pmic->info); +err_free_desc: + kfree(pmic->desc); +err_free_pmic: kfree(pmic); return err; } static int __devexit tps65910_remove(struct platform_device *pdev) { - struct tps65910_reg *tps65910_reg = platform_get_drvdata(pdev); + struct tps65910_reg *pmic = platform_get_drvdata(pdev); int i; - for (i = 0; i < TPS65910_NUM_REGULATOR; i++) - regulator_unregister(tps65910_reg->rdev[i]); + for (i = 0; i < pmic->num_regulators; i++) + regulator_unregister(pmic->rdev[i]); - kfree(tps65910_reg); + kfree(pmic->rdev); + kfree(pmic->info); + kfree(pmic->desc); + kfree(pmic); return 0; } From 89f425ed5bf3d4fd97e840296dccd75b8e0fe4c9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 12 Jul 2011 11:20:37 +0900 Subject: [PATCH 067/151] regulator: Make core more chatty about some errors Prevent some head scratching by making the core log about some rare but possible errors with invalid voltage ranges and modes being set. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index f59821f10fdc..3700d0953d73 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -151,8 +151,11 @@ static int regulator_check_voltage(struct regulator_dev *rdev, if (*min_uV < rdev->constraints->min_uV) *min_uV = rdev->constraints->min_uV; - if (*min_uV > *max_uV) + if (*min_uV > *max_uV) { + rdev_err(rdev, "unsupportable voltage range: %d-%duV\n", + min_uV, max_uV); return -EINVAL; + } return 0; } @@ -205,8 +208,11 @@ static int regulator_check_current_limit(struct regulator_dev *rdev, if (*min_uA < rdev->constraints->min_uA) *min_uA = rdev->constraints->min_uA; - if (*min_uA > *max_uA) + if (*min_uA > *max_uA) { + rdev_err(rdev, "unsupportable current range: %d-%duA\n", + min_uA, max_uA); return -EINVAL; + } return 0; } @@ -221,6 +227,7 @@ static int regulator_mode_constrain(struct regulator_dev *rdev, int *mode) case REGULATOR_MODE_STANDBY: break; default: + rdev_err(rdev, "invalid mode %x specified\n", *mode); return -EINVAL; } From 1a6958e79f9e191c89fe0c13f7452b0bd8097050 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 15 Jul 2011 10:50:43 +0800 Subject: [PATCH 068/151] regulator: Fix memory leak in set_machine_constraints() error paths Properly kfree rdev->constraints in all set_machine_constraints() error paths. Also properly kfree rdev->constraints in regulator_register() error paths. Signed-off-by: Axel Lin Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 3700d0953d73..a01954456752 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -794,7 +794,6 @@ static int machine_constraints_voltage(struct regulator_dev *rdev, if (ret < 0) { rdev_err(rdev, "failed to apply %duV constraint\n", rdev->constraints->min_uV); - rdev->constraints = NULL; return ret; } } @@ -897,7 +896,6 @@ static int set_machine_constraints(struct regulator_dev *rdev, ret = suspend_prepare(rdev, rdev->constraints->initial_state); if (ret < 0) { rdev_err(rdev, "failed to set suspend state\n"); - rdev->constraints = NULL; goto out; } } @@ -924,13 +922,15 @@ static int set_machine_constraints(struct regulator_dev *rdev, ret = ops->enable(rdev); if (ret < 0) { rdev_err(rdev, "failed to enable\n"); - rdev->constraints = NULL; goto out; } } print_constraints(rdev); + return 0; out: + kfree(rdev->constraints); + rdev->constraints = NULL; return ret; } @@ -2701,6 +2701,7 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, unset_regulator_supplies(rdev); scrub: + kfree(rdev->constraints); device_unregister(&rdev->dev); /* device core frees rdev */ rdev = ERR_PTR(ret); From 54abd335fda86d305845f9e62b4bc0997386eb66 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 21 Jul 2011 15:07:37 +0100 Subject: [PATCH 069/151] regulator: Fix argument format type errors in error prints We need to dereference the pointers to print their values. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index a01954456752..d8e6a429e8ba 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -153,7 +153,7 @@ static int regulator_check_voltage(struct regulator_dev *rdev, if (*min_uV > *max_uV) { rdev_err(rdev, "unsupportable voltage range: %d-%duV\n", - min_uV, max_uV); + *min_uV, *max_uV); return -EINVAL; } @@ -210,7 +210,7 @@ static int regulator_check_current_limit(struct regulator_dev *rdev, if (*min_uA > *max_uA) { rdev_err(rdev, "unsupportable current range: %d-%duA\n", - min_uA, max_uA); + *min_uA, *max_uA); return -EINVAL; } From 638c1fd3033c76778e6d9975ad8a4a9cdd5b96d9 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 21 Jul 2011 16:57:52 -0400 Subject: [PATCH 070/151] pstore: Extend API for more flexibility in new backends Some pstore implementations may not have a static context, so extend the API to pass the pstore_info struct to all calls and allow for a context pointer. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- drivers/acpi/apei/erst.c | 18 +++++++++++++----- fs/pstore/inode.c | 10 +++++----- fs/pstore/internal.h | 2 +- fs/pstore/platform.c | 13 +++++++------ include/linux/pstore.h | 8 +++++--- 5 files changed, 31 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index e6cef8e1b534..de3ae92adaa5 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -932,8 +932,10 @@ static int erst_check_table(struct acpi_table_erst *erst_tab) static int erst_open_pstore(struct pstore_info *psi); static int erst_close_pstore(struct pstore_info *psi); static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, - struct timespec *time); -static u64 erst_writer(enum pstore_type_id type, size_t size); + struct timespec *time, struct pstore_info *psi); +static u64 erst_writer(enum pstore_type_id type, size_t size, + struct pstore_info *psi); +static int erst_clearer(u64 id, struct pstore_info *psi); static struct pstore_info erst_info = { .owner = THIS_MODULE, @@ -942,7 +944,7 @@ static struct pstore_info erst_info = { .close = erst_close_pstore, .read = erst_reader, .write = erst_writer, - .erase = erst_clear + .erase = erst_clearer }; #define CPER_CREATOR_PSTORE \ @@ -983,7 +985,7 @@ static int erst_close_pstore(struct pstore_info *psi) } static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, - struct timespec *time) + struct timespec *time, struct pstore_info *psi) { int rc; ssize_t len = 0; @@ -1037,7 +1039,8 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, return (rc < 0) ? rc : (len - sizeof(*rcd)); } -static u64 erst_writer(enum pstore_type_id type, size_t size) +static u64 erst_writer(enum pstore_type_id type, size_t size, + struct pstore_info *psi) { struct cper_pstore_record *rcd = (struct cper_pstore_record *) (erst_info.buf - sizeof(*rcd)); @@ -1080,6 +1083,11 @@ static u64 erst_writer(enum pstore_type_id type, size_t size) return rcd->hdr.record_id; } +static int erst_clearer(u64 id, struct pstore_info *psi) +{ + return erst_clear(id); +} + static int __init erst_init(void) { int rc = 0; diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 977ed2723845..b19884a1ba77 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -40,7 +40,7 @@ struct pstore_private { u64 id; - int (*erase)(u64); + struct pstore_info *psi; ssize_t size; char data[]; }; @@ -73,7 +73,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = dentry->d_inode->i_private; - p->erase(p->id); + p->psi->erase(p->id, p->psi); return simple_unlink(dir, dentry); } @@ -175,8 +175,8 @@ int pstore_is_mounted(void) * Set the mtime & ctime to the date that this record was originally stored. */ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, - char *data, size_t size, - struct timespec time, int (*erase)(u64)) + char *data, size_t size, struct timespec time, + struct pstore_info *psi) { struct dentry *root = pstore_sb->s_root; struct dentry *dentry; @@ -193,7 +193,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, if (!private) goto fail_alloc; private->id = id; - private->erase = erase; + private->psi = psi; switch (type) { case PSTORE_TYPE_DMESG: diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 8c9f23eb1645..611c1b3c46fa 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -2,5 +2,5 @@ extern void pstore_set_kmsg_bytes(int); extern void pstore_get_records(void); extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, char *data, size_t size, - struct timespec time, int (*erase)(u64)); + struct timespec time, struct pstore_info *psi); extern int pstore_is_mounted(void); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index f2c3ff20ea68..221c04e5e333 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -94,11 +94,12 @@ static void pstore_dump(struct kmsg_dumper *dumper, memcpy(dst, s1 + s1_start, l1_cpy); memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); - id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); + id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy, + psinfo); if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, hsize + l1_cpy + l2_cpy, - CURRENT_TIME, psinfo->erase); + CURRENT_TIME, psinfo); l1 -= l1_cpy; l2 -= l2_cpy; total += l1_cpy + l2_cpy; @@ -166,9 +167,9 @@ void pstore_get_records(void) if (rc) goto out; - while ((size = psi->read(&id, &type, &time)) > 0) { + while ((size = psi->read(&id, &type, &time, psi)) > 0) { if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size, - time, psi->erase)) + time, psi)) failed++; } psi->close(psi); @@ -196,10 +197,10 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size) mutex_lock(&psinfo->buf_mutex); memcpy(psinfo->buf, buf, size); - id = psinfo->write(type, size); + id = psinfo->write(type, size, psinfo); if (pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, - size, CURRENT_TIME, psinfo->erase); + size, CURRENT_TIME, psinfo); mutex_unlock(&psinfo->buf_mutex); return 0; diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 2455ef2683f0..b2f1d97f6909 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -38,9 +38,11 @@ struct pstore_info { int (*open)(struct pstore_info *psi); int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, - struct timespec *time); - u64 (*write)(enum pstore_type_id type, size_t size); - int (*erase)(u64 id); + struct timespec *time, struct pstore_info *psi); + u64 (*write)(enum pstore_type_id type, size_t size, + struct pstore_info *psi); + int (*erase)(u64 id, struct pstore_info *psi); + void *data; }; #ifdef CONFIG_PSTORE From 56280682ceeef74b692b3e21d1872049eea7c887 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 21 Jul 2011 16:57:53 -0400 Subject: [PATCH 071/151] pstore: Add extra context for writes and erases EFI only provides small amounts of individual storage, and conventionally puts metadata in the storage variable name. Rather than add a metadata header to the (already limited) variable storage, it's easier for us to modify pstore to pass all the information we need to construct a unique variable name to the appropriate functions. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- drivers/acpi/apei/erst.c | 10 ++++++---- fs/pstore/inode.c | 6 ++++-- fs/pstore/platform.c | 9 +++++---- include/linux/pstore.h | 5 +++-- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index de3ae92adaa5..d842ac4f8cfe 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -933,9 +933,10 @@ static int erst_open_pstore(struct pstore_info *psi); static int erst_close_pstore(struct pstore_info *psi); static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, struct timespec *time, struct pstore_info *psi); -static u64 erst_writer(enum pstore_type_id type, size_t size, +static u64 erst_writer(enum pstore_type_id type, int part, size_t size, struct pstore_info *psi); -static int erst_clearer(u64 id, struct pstore_info *psi); +static int erst_clearer(enum pstore_type_id type, u64 id, + struct pstore_info *psi); static struct pstore_info erst_info = { .owner = THIS_MODULE, @@ -1039,7 +1040,7 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, return (rc < 0) ? rc : (len - sizeof(*rcd)); } -static u64 erst_writer(enum pstore_type_id type, size_t size, +static u64 erst_writer(enum pstore_type_id type, int part, size_t size, struct pstore_info *psi) { struct cper_pstore_record *rcd = (struct cper_pstore_record *) @@ -1083,7 +1084,8 @@ static u64 erst_writer(enum pstore_type_id type, size_t size, return rcd->hdr.record_id; } -static int erst_clearer(u64 id, struct pstore_info *psi) +static int erst_clearer(enum pstore_type_id type, u64 id, + struct pstore_info *psi) { return erst_clear(id); } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index b19884a1ba77..893b961dcfd8 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -39,8 +39,9 @@ #define PSTORE_NAMELEN 64 struct pstore_private { - u64 id; struct pstore_info *psi; + enum pstore_type_id type; + u64 id; ssize_t size; char data[]; }; @@ -73,7 +74,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = dentry->d_inode->i_private; - p->psi->erase(p->id, p->psi); + p->psi->erase(p->type, p->id, p->psi); return simple_unlink(dir, dentry); } @@ -192,6 +193,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, private = kmalloc(sizeof *private + size, GFP_KERNEL); if (!private) goto fail_alloc; + private->type = type; private->id = id; private->psi = psi; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 221c04e5e333..163bb40511e7 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -78,7 +78,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, oopscount++; while (total < kmsg_bytes) { dst = psinfo->buf; - hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part++); + hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part); size = psinfo->bufsize - hsize; dst += hsize; @@ -94,8 +94,8 @@ static void pstore_dump(struct kmsg_dumper *dumper, memcpy(dst, s1 + s1_start, l1_cpy); memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); - id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy, - psinfo); + id = psinfo->write(PSTORE_TYPE_DMESG, part, + hsize + l1_cpy + l2_cpy, psinfo); if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, hsize + l1_cpy + l2_cpy, @@ -103,6 +103,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, l1 -= l1_cpy; l2 -= l2_cpy; total += l1_cpy + l2_cpy; + part++; } mutex_unlock(&psinfo->buf_mutex); } @@ -197,7 +198,7 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size) mutex_lock(&psinfo->buf_mutex); memcpy(psinfo->buf, buf, size); - id = psinfo->write(type, size, psinfo); + id = psinfo->write(type, 0, size, psinfo); if (pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, size, CURRENT_TIME, psinfo); diff --git a/include/linux/pstore.h b/include/linux/pstore.h index b2f1d97f6909..12be8f193d09 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -39,9 +39,10 @@ struct pstore_info { int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, struct timespec *time, struct pstore_info *psi); - u64 (*write)(enum pstore_type_id type, size_t size, + u64 (*write)(enum pstore_type_id type, int part, + size_t size, struct pstore_info *psi); + int (*erase)(enum pstore_type_id type, u64 id, struct pstore_info *psi); - int (*erase)(u64 id, struct pstore_info *psi); void *data; }; From b94fdd077eef5e6cab56836bf62695b497946716 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 21 Jul 2011 16:57:54 -0400 Subject: [PATCH 072/151] pstore: Make "part" unsigned We'll never have a negative part, so just make this an unsigned int. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- drivers/acpi/apei/erst.c | 8 ++++---- fs/pstore/platform.c | 3 ++- include/linux/pstore.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index d842ac4f8cfe..6053f4780df9 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -933,8 +933,8 @@ static int erst_open_pstore(struct pstore_info *psi); static int erst_close_pstore(struct pstore_info *psi); static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, struct timespec *time, struct pstore_info *psi); -static u64 erst_writer(enum pstore_type_id type, int part, size_t size, - struct pstore_info *psi); +static u64 erst_writer(enum pstore_type_id type, unsigned int part, + size_t size, struct pstore_info *psi); static int erst_clearer(enum pstore_type_id type, u64 id, struct pstore_info *psi); @@ -1040,8 +1040,8 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, return (rc < 0) ? rc : (len - sizeof(*rcd)); } -static u64 erst_writer(enum pstore_type_id type, int part, size_t size, - struct pstore_info *psi) +static u64 erst_writer(enum pstore_type_id type, unsigned int part, + size_t size, struct pstore_info *psi) { struct cper_pstore_record *rcd = (struct cper_pstore_record *) (erst_info.buf - sizeof(*rcd)); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 163bb40511e7..49ff1de2178a 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -67,7 +67,8 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long size, total = 0; char *dst, *why; u64 id; - int hsize, part = 1; + int hsize; + unsigned int part = 1; if (reason < ARRAY_SIZE(reason_str)) why = reason_str[reason]; diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 12be8f193d09..cc03bbf5c4b8 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -39,7 +39,7 @@ struct pstore_info { int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, struct timespec *time, struct pstore_info *psi); - u64 (*write)(enum pstore_type_id type, int part, + u64 (*write)(enum pstore_type_id type, unsigned int part, size_t size, struct pstore_info *psi); int (*erase)(enum pstore_type_id type, u64 id, struct pstore_info *psi); From dee28e72b619b48ec80a9e5509db458dbe66f71f Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 21 Jul 2011 16:57:55 -0400 Subject: [PATCH 073/151] pstore: Allow the user to explicitly choose a backend pstore only allows one backend to be registered at present, but the system may provide several. Add a parameter to allow the user to choose which backend will be used rather than just relying on load order. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- Documentation/ABI/testing/pstore | 6 ++++++ Documentation/kernel-parameters.txt | 2 ++ fs/pstore/platform.c | 11 +++++++++++ 3 files changed, 19 insertions(+) diff --git a/Documentation/ABI/testing/pstore b/Documentation/ABI/testing/pstore index ddf451ee2a08..ff1df4e3b059 100644 --- a/Documentation/ABI/testing/pstore +++ b/Documentation/ABI/testing/pstore @@ -39,3 +39,9 @@ Description: Generic interface to platform dependent persistent storage. multiple) files based on the record size of the underlying persistent storage until at least this amount is reached. Default is 10 Kbytes. + + Pstore only supports one backend at a time. If multiple + backends are available, the preferred backend may be + set by passing the pstore.backend= argument to the kernel at + boot time. + diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index aa47be71df4c..8789d0c9291a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2151,6 +2151,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. [HW,MOUSE] Controls Logitech smartscroll autorepeat. 0 = disabled, 1 = enabled (default). + pstore.backend= Specify the name of the pstore backend to use + pt. [PARIDE] See Documentation/blockdev/paride.txt. diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 49ff1de2178a..c5300ec31696 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -37,6 +37,8 @@ static DEFINE_SPINLOCK(pstore_lock); static struct pstore_info *psinfo; +static char *backend; + /* How much of the console log to snapshot */ static unsigned long kmsg_bytes = 10240; @@ -131,6 +133,12 @@ int pstore_register(struct pstore_info *psi) spin_unlock(&pstore_lock); return -EBUSY; } + + if (backend && strcmp(backend, psi->name)) { + spin_unlock(&pstore_lock); + return -EINVAL; + } + psinfo = psi; spin_unlock(&pstore_lock); @@ -208,3 +216,6 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size) return 0; } EXPORT_SYMBOL_GPL(pstore_write); + +module_param(backend, charp, 0444); +MODULE_PARM_DESC(backend, "Pstore backend to use"); From 5ee9c198a4208d7760275d48e4c4f6c89dcd2ef0 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 21 Jul 2011 16:57:56 -0400 Subject: [PATCH 074/151] efi: Add support for using efivars as a pstore backend EFI provides an area of nonvolatile storage managed by the firmware. We can use this as a pstore backend to maintain copies of oopses, aiding diagnosis. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 191 ++++++++++++++++++++++++++++++++++++- include/linux/efi.h | 6 ++ 2 files changed, 195 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 5f29aafd4462..2bbb22670d2d 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -78,6 +78,7 @@ #include #include #include +#include #include @@ -89,6 +90,8 @@ MODULE_DESCRIPTION("sysfs interface to EFI Variables"); MODULE_LICENSE("GPL"); MODULE_VERSION(EFIVARS_VERSION); +#define DUMP_NAME_LEN 52 + /* * The maximum size of VariableName + Data = 1024 * Therefore, it's reasonable to save that much @@ -161,18 +164,28 @@ utf8_strsize(efi_char16_t *data, unsigned long maxlength) } static efi_status_t -get_var_data(struct efivars *efivars, struct efi_variable *var) +get_var_data_locked(struct efivars *efivars, struct efi_variable *var) { efi_status_t status; - spin_lock(&efivars->lock); var->DataSize = 1024; status = efivars->ops->get_variable(var->VariableName, &var->VendorGuid, &var->Attributes, &var->DataSize, var->Data); + return status; +} + +static efi_status_t +get_var_data(struct efivars *efivars, struct efi_variable *var) +{ + efi_status_t status; + + spin_lock(&efivars->lock); + status = get_var_data_locked(efivars, var); spin_unlock(&efivars->lock); + if (status != EFI_SUCCESS) { printk(KERN_WARNING "efivars: get_variable() failed 0x%lx!\n", status); @@ -387,12 +400,176 @@ static struct kobj_type efivar_ktype = { .default_attrs = def_attrs, }; +static struct pstore_info efi_pstore_info; + static inline void efivar_unregister(struct efivar_entry *var) { kobject_put(&var->kobj); } +#ifdef CONFIG_PSTORE + +static int efi_pstore_open(struct pstore_info *psi) +{ + struct efivars *efivars = psi->data; + + spin_lock(&efivars->lock); + efivars->walk_entry = list_first_entry(&efivars->list, + struct efivar_entry, list); + return 0; +} + +static int efi_pstore_close(struct pstore_info *psi) +{ + struct efivars *efivars = psi->data; + + spin_unlock(&efivars->lock); + return 0; +} + +static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, + struct timespec *timespec, struct pstore_info *psi) +{ + efi_guid_t vendor = LINUX_EFI_CRASH_GUID; + struct efivars *efivars = psi->data; + char name[DUMP_NAME_LEN]; + int i; + unsigned int part, size; + unsigned long time; + + while (&efivars->walk_entry->list != &efivars->list) { + if (!efi_guidcmp(efivars->walk_entry->var.VendorGuid, + vendor)) { + for (i = 0; i < DUMP_NAME_LEN; i++) { + name[i] = efivars->walk_entry->var.VariableName[i]; + } + if (sscanf(name, "dump-type%u-%u-%lu", type, &part, &time) == 3) { + *id = part; + timespec->tv_sec = time; + timespec->tv_nsec = 0; + get_var_data_locked(efivars, &efivars->walk_entry->var); + size = efivars->walk_entry->var.DataSize; + memcpy(psi->buf, efivars->walk_entry->var.Data, size); + efivars->walk_entry = list_entry(efivars->walk_entry->list.next, + struct efivar_entry, list); + return size; + } + } + efivars->walk_entry = list_entry(efivars->walk_entry->list.next, + struct efivar_entry, list); + } + return 0; +} + +static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, + size_t size, struct pstore_info *psi) +{ + char name[DUMP_NAME_LEN]; + char stub_name[DUMP_NAME_LEN]; + efi_char16_t efi_name[DUMP_NAME_LEN]; + efi_guid_t vendor = LINUX_EFI_CRASH_GUID; + struct efivars *efivars = psi->data; + struct efivar_entry *entry, *found = NULL; + int i; + + sprintf(stub_name, "dump-type%u-%u-", type, part); + sprintf(name, "%s%lu", stub_name, get_seconds()); + + spin_lock(&efivars->lock); + + for (i = 0; i < DUMP_NAME_LEN; i++) + efi_name[i] = stub_name[i]; + + /* + * Clean up any entries with the same name + */ + + list_for_each_entry(entry, &efivars->list, list) { + get_var_data_locked(efivars, &entry->var); + + for (i = 0; i < DUMP_NAME_LEN; i++) { + if (efi_name[i] == 0) { + found = entry; + efivars->ops->set_variable(entry->var.VariableName, + &entry->var.VendorGuid, + EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, + 0, NULL); + break; + } else if (efi_name[i] != entry->var.VariableName[i]) { + break; + } + } + } + + if (found) + list_del(&found->list); + + for (i = 0; i < DUMP_NAME_LEN; i++) + efi_name[i] = name[i]; + + efivars->ops->set_variable(efi_name, &vendor, + EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, + size, psi->buf); + + spin_unlock(&efivars->lock); + + if (found) + efivar_unregister(found); + + if (size) + efivar_create_sysfs_entry(efivars, utf8_strsize(efi_name, DUMP_NAME_LEN * 2), + efi_name, &vendor); + + return part; +}; + +static int efi_pstore_erase(enum pstore_type_id type, u64 id, + struct pstore_info *psi) +{ + efi_pstore_write(type, id, 0, psi); + + return 0; +} +#else +static int efi_pstore_open(struct pstore_info *psi) +{ + return 0; +} + +static int efi_pstore_close(struct pstore_info *psi) +{ + return 0; +} + +static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, + struct timespec *time, struct pstore_info *psi) +{ + return -1; +} + +static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size, + struct pstore_info *psi) +{ + return 0; +} + +static int efi_pstore_erase(enum pstore_type_id type, u64 id, + struct pstore_info *psi) +{ + return 0; +} +#endif + +static struct pstore_info efi_pstore_info = { + .owner = THIS_MODULE, + .name = "efi", + .open = efi_pstore_open, + .close = efi_pstore_close, + .read = efi_pstore_read, + .write = efi_pstore_write, + .erase = efi_pstore_erase, +}; static ssize_t efivar_create(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -763,6 +940,16 @@ int register_efivars(struct efivars *efivars, if (error) unregister_efivars(efivars); + efivars->efi_pstore_info = efi_pstore_info; + + efivars->efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL); + if (efivars->efi_pstore_info.buf) { + efivars->efi_pstore_info.bufsize = 1024; + efivars->efi_pstore_info.data = efivars; + mutex_init(&efivars->efi_pstore_info.buf_mutex); + pstore_register(&efivars->efi_pstore_info); + } + out: kfree(variable_name); diff --git a/include/linux/efi.h b/include/linux/efi.h index e376270cd26e..c1f5107338c6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -211,6 +212,9 @@ typedef efi_status_t efi_set_virtual_address_map_t (unsigned long memory_map_siz #define UV_SYSTEM_TABLE_GUID \ EFI_GUID( 0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 ) +#define LINUX_EFI_CRASH_GUID \ + EFI_GUID( 0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0 ) + typedef struct { efi_guid_t guid; unsigned long table; @@ -426,6 +430,8 @@ struct efivars { struct kset *kset; struct bin_attribute *new_var, *del_var; const struct efivar_operations *ops; + struct efivar_entry *walk_entry; + struct pstore_info efi_pstore_info; }; int register_efivars(struct efivars *efivars, From a2940908391f3cee72e38769b30e829b22742b5b Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Thu, 21 Jul 2011 16:57:57 -0400 Subject: [PATCH 075/151] efivars: String functions Fix the string functions in the efivars driver to be called utf16_* instead of utf8_* as the encoding is utf16, not utf8. As well, rename utf16_strlen to utf16_strnlen as it takes a maxlength argument and the name should be consistent with the standard C function names. utf16_strlen is still provided for convenience in a subsequent patch. Signed-off-by: Mike Waychison Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 2bbb22670d2d..4202a3170467 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -144,23 +144,29 @@ efivar_create_sysfs_entry(struct efivars *efivars, /* Return the number of unicode characters in data */ static unsigned long -utf8_strlen(efi_char16_t *data, unsigned long maxlength) +utf16_strnlen(efi_char16_t *s, size_t maxlength) { unsigned long length = 0; - while (*data++ != 0 && length < maxlength) + while (*s++ != 0 && length < maxlength) length++; return length; } +static unsigned long +utf16_strlen(efi_char16_t *s) +{ + return utf16_strnlen(s, ~0UL); +} + /* * Return the number of bytes is the length of this string * Note: this is NOT the same as the number of unicode characters */ static inline unsigned long -utf8_strsize(efi_char16_t *data, unsigned long maxlength) +utf16_strsize(efi_char16_t *data, unsigned long maxlength) { - return utf8_strlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); + return utf16_strnlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); } static efi_status_t @@ -518,7 +524,9 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, efivar_unregister(found); if (size) - efivar_create_sysfs_entry(efivars, utf8_strsize(efi_name, DUMP_NAME_LEN * 2), + efivar_create_sysfs_entry(efivars, + utf16_strsize(efi_name, + DUMP_NAME_LEN * 2), efi_name, &vendor); return part; @@ -591,8 +599,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, * Does this variable already exist? */ list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { - strsize1 = utf8_strsize(search_efivar->var.VariableName, 1024); - strsize2 = utf8_strsize(new_var->VariableName, 1024); + strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024); + strsize2 = utf16_strsize(new_var->VariableName, 1024); if (strsize1 == strsize2 && !memcmp(&(search_efivar->var.VariableName), new_var->VariableName, strsize1) && @@ -624,8 +632,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, /* Create the entry in sysfs. Locking is not required here */ status = efivar_create_sysfs_entry(efivars, - utf8_strsize(new_var->VariableName, - 1024), + utf16_strsize(new_var->VariableName, + 1024), new_var->VariableName, &new_var->VendorGuid); if (status) { @@ -654,8 +662,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, * Does this variable already exist? */ list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { - strsize1 = utf8_strsize(search_efivar->var.VariableName, 1024); - strsize2 = utf8_strsize(del_var->VariableName, 1024); + strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024); + strsize2 = utf16_strsize(del_var->VariableName, 1024); if (strsize1 == strsize2 && !memcmp(&(search_efivar->var.VariableName), del_var->VariableName, strsize1) && From 828aa1f00ec3508a4d813bd60d210de82929ac97 Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Thu, 21 Jul 2011 16:57:58 -0400 Subject: [PATCH 076/151] efivars: introduce utf16_strncmp Introduce utf16_strncmp which is used in the next patch. Semantics should be the same as the strncmp C function. Signed-off-by: Mike Waychison Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 4202a3170467..15b9a01b6c68 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -169,6 +169,24 @@ utf16_strsize(efi_char16_t *data, unsigned long maxlength) return utf16_strnlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); } +static inline int +utf16_strncmp(const efi_char16_t *a, const efi_char16_t *b, size_t len) +{ + while (1) { + if (len == 0) + return 0; + if (*a < *b) + return -1; + if (*a > *b) + return 1; + if (*a == 0) /* implies *b == 0 */ + return 0; + a++; + b++; + len--; + } +} + static efi_status_t get_var_data_locked(struct efivars *efivars, struct efi_variable *var) { From c475594d838c5c872e734f693a700df8c01b39d4 Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Thu, 21 Jul 2011 16:57:59 -0400 Subject: [PATCH 077/151] efivars: Use string functions in pstore_write Instead of open-coding the string operations for comparing the prefix of the variable names, use the provided utf16_* string functions. This patch also changes the calls to efi.set_variable to efivars->ops->set_variable so that the right function gets called in the case of gsmi (which doesn't have a valid efi structure). As well, make sure that we only consider variables with the right vendor string. Signed-off-by: Mike Waychison Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 15b9a01b6c68..563492e4d5cf 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -512,18 +512,20 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, list_for_each_entry(entry, &efivars->list, list) { get_var_data_locked(efivars, &entry->var); - for (i = 0; i < DUMP_NAME_LEN; i++) { - if (efi_name[i] == 0) { - found = entry; - efivars->ops->set_variable(entry->var.VariableName, - &entry->var.VendorGuid, - EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, - 0, NULL); - break; - } else if (efi_name[i] != entry->var.VariableName[i]) { - break; - } - } + if (efi_guidcmp(entry->var.VendorGuid, vendor)) + continue; + if (utf16_strncmp(entry->var.VariableName, efi_name, + utf16_strlen(efi_name))) + continue; + /* Needs to be a prefix */ + if (entry->var.VariableName[utf16_strlen(efi_name)] == 0) + continue; + + /* found */ + found = entry; + efivars->ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, + EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, + 0, NULL); } if (found) From 7644c16c7e7431fa398e834109dbb76dc1b51617 Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Thu, 21 Jul 2011 16:58:00 -0400 Subject: [PATCH 078/151] efivars: Introduce PSTORE_EFI_ATTRIBUTES Consolidate the attributes listed for pstore operations in one place, PSTORE_EFI_ATTRIBUTES. Signed-off-by: Mike Waychison Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 563492e4d5cf..eacb05e6cfb3 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -122,6 +122,10 @@ struct efivar_attribute { ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count); }; +#define PSTORE_EFI_ATTRIBUTES \ + (EFI_VARIABLE_NON_VOLATILE | \ + EFI_VARIABLE_BOOTSERVICE_ACCESS | \ + EFI_VARIABLE_RUNTIME_ACCESS) #define EFIVAR_ATTR(_name, _mode, _show, _store) \ struct efivar_attribute efivar_attr_##_name = { \ @@ -523,8 +527,9 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, /* found */ found = entry; - efivars->ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, - EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, + efivars->ops->set_variable(entry->var.VariableName, + &entry->var.VendorGuid, + PSTORE_EFI_ATTRIBUTES, 0, NULL); } @@ -534,8 +539,7 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, for (i = 0; i < DUMP_NAME_LEN; i++) efi_name[i] = name[i]; - efivars->ops->set_variable(efi_name, &vendor, - EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, + efivars->ops->set_variable(efi_name, &vendor, PSTORE_EFI_ATTRIBUTES, size, psi->buf); spin_unlock(&efivars->lock); From 529da704ad1ead755d9e40721f29446cb278e099 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 23 Jul 2011 16:07:26 -0400 Subject: [PATCH 079/151] ext4: remove unnecessary ext4_get_group_info in ext4_mb_load_buddy ext4_mb_load_buddy() calls ext4_get_group_info() for setting both "grp" and "e4b->bd_info", but it could do "e4b->bd_info = grp". Reported-by: Andreas Dilger Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 037f680b76f9..447c0f3384ab 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1126,7 +1126,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, grp = ext4_get_group_info(sb, group); e4b->bd_blkbits = sb->s_blocksize_bits; - e4b->bd_info = ext4_get_group_info(sb, group); + e4b->bd_info = grp; e4b->bd_sb = sb; e4b->bd_group = group; e4b->bd_buddy_page = NULL; From ced156e464e49b6b4153ede9aaa04d9a4ad24e0c Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 23 Jul 2011 16:18:05 -0400 Subject: [PATCH 080/151] ext4: don't increment s_mb_buddies_generated in ext4_mb_release In ext4_mb_release, we use s_mb_buddies_generated++. Although the output is OK, but I don't think we need this extra ++. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 447c0f3384ab..e16583032b6b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2581,7 +2581,7 @@ int ext4_mb_release(struct super_block *sb) atomic_read(&sbi->s_mb_lost_chunks)); printk(KERN_INFO "EXT4-fs: mballoc: %lu generated and it took %Lu\n", - sbi->s_mb_buddies_generated++, + sbi->s_mb_buddies_generated, sbi->s_mb_generation_time); printk(KERN_INFO "EXT4-fs: mballoc: %u preallocated, %u discarded\n", From 6a0fe49308dcac10ab510b3c45e00eb8d5ef440e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 23 Jul 2011 16:18:55 -0400 Subject: [PATCH 081/151] ext4: remove ac_repeats from ext4_allocation_context ac_repeats isn't referenced in the mballoc code. So remove it. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 20b5e7bfebd1..9d4a636b546c 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -187,7 +187,6 @@ struct ext4_allocation_context { __u16 ac_flags; /* allocation hints */ __u8 ac_status; __u8 ac_criteria; - __u8 ac_repeats; __u8 ac_2order; /* if request is to allocate 2^N blocks and * N > 0, the field stores N, otherwise 0 */ __u8 ac_op; /* operation, for history only */ From 5718789da5b94bd4148cb7ea0f457089c26bc1c3 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sat, 23 Jul 2011 21:49:07 -0400 Subject: [PATCH 082/151] ext4: remove unused argument in ext4_ext_next_leaf_block The argument "inode" in function ext4_ext_next_allocated_block looks useless, so clean it. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9bec432e2d26..33bbe8467bd6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1414,8 +1414,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) * ext4_ext_next_leaf_block: * returns first allocated block from next leaf or EXT_MAX_BLOCKS */ -static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, - struct ext4_ext_path *path) +static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path) { int depth; @@ -1734,7 +1733,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, fex = EXT_LAST_EXTENT(eh); next = EXT_MAX_BLOCKS; if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) - next = ext4_ext_next_leaf_block(inode, path); + next = ext4_ext_next_leaf_block(path); if (next != EXT_MAX_BLOCKS) { ext_debug("next leaf block - %d\n", next); BUG_ON(npath != NULL); From 0737964bc98202776f4d10bc8e108f45b3115037 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sat, 23 Jul 2011 21:51:07 -0400 Subject: [PATCH 083/151] ext4: correct the debug message in ext4_ext_insert_extent The debug message in ext4_ext_insert_extent before moving extent is incorrect (the "from xx to xx"). Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 33bbe8467bd6..a637d837a895 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1806,7 +1806,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, ext4_ext_pblock(newext), ext4_ext_is_uninitialized(newext), ext4_ext_get_actual_len(newext), - nearex, len, nearex + 1, nearex + 2); + nearex, len, nearex, nearex + 1); memmove(nearex + 1, nearex, len); path[depth].p_ext = nearex; } From b7ca1e8ec53259359db5313f923a0a20fa04bdb6 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sat, 23 Jul 2011 21:53:25 -0400 Subject: [PATCH 084/151] ext4: correct comment for ext4_ext_check_cache The comment for ext4_ext_check_cache has a litte mistake. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a637d837a895..4d73e11ae883 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2019,7 +2019,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, } /* - * ext4_ext_in_cache() + * ext4_ext_check_cache() * Checks to see if the given block is in the cache. * If it is, the cached extent is stored in the given * cache extent pointer. If the cached extent is a hole, From 0b052f4a088ddc47a5da23dd733522241314cfb4 Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Thu, 14 Jul 2011 09:52:38 +0900 Subject: [PATCH 085/151] pch_dma: Fix CTL register access issue Currently, Mode-Control register is accessed by read-modify-write. According to DMA hardware specifications datasheet, prohibits this method. Because this register resets to 0 by DMA HW after DMA transfer completes. Thus, current read-modify-write processing can cause unexpected behavior. The datasheet says in case of writing Mode-Control register, set the value for only target channel, the others must set '11b'. e.g. Set DMA0=01b DMA11=10b CTL0=33333331h CTL2=00002333h NOTE: CTL0 includes DMA0~7 Mode-Control register. CTL2 includes DMA8~11 Mode-Control register. This patch modifies the issue. Signed-off-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index d9d95a4dd854..1ac8d4b580b7 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -62,6 +62,9 @@ #define MAX_CHAN_NR 8 +#define DMA_MASK_CTL0_MODE 0x33333333 +#define DMA_MASK_CTL2_MODE 0x00003333 + static unsigned int init_nr_desc_per_channel = 64; module_param(init_nr_desc_per_channel, uint, 0644); MODULE_PARM_DESC(init_nr_desc_per_channel, @@ -210,10 +213,17 @@ static void pdc_set_dir(struct dma_chan *chan) struct pch_dma_chan *pd_chan = to_pd_chan(chan); struct pch_dma *pd = to_pd(chan->device); u32 val; + u32 mask_mode; + u32 mask_ctl; if (chan->chan_id < 8) { val = dma_readl(pd, CTL0); + mask_mode = DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id); + mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + val &= mask_mode; if (pd_chan->dir == DMA_TO_DEVICE) val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + DMA_CTL0_DIR_SHIFT_BITS); @@ -221,18 +231,24 @@ static void pdc_set_dir(struct dma_chan *chan) val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + DMA_CTL0_DIR_SHIFT_BITS)); + val |= mask_ctl; dma_writel(pd, CTL0, val); } else { int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ val = dma_readl(pd, CTL3); + mask_mode = DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch); + mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); + val &= mask_mode; if (pd_chan->dir == DMA_TO_DEVICE) val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch + DMA_CTL0_DIR_SHIFT_BITS); else val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch + DMA_CTL0_DIR_SHIFT_BITS)); - + val |= mask_ctl; dma_writel(pd, CTL3, val); } @@ -244,26 +260,30 @@ static void pdc_set_mode(struct dma_chan *chan, u32 mode) { struct pch_dma *pd = to_pd(chan->device); u32 val; + u32 mask_ctl; + u32 mask_dir; if (chan->chan_id < 8) { + mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\ + DMA_CTL0_DIR_SHIFT_BITS); val = dma_readl(pd, CTL0); - - val &= ~(DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + val &= mask_dir; val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id); - + val |= mask_ctl; dma_writel(pd, CTL0, val); } else { int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ - + mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); + mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\ + DMA_CTL0_DIR_SHIFT_BITS); val = dma_readl(pd, CTL3); - - val &= ~(DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * ch)); + val &= mask_dir; val |= mode << (DMA_CTL0_BITS_PER_CH * ch); - + val |= mask_ctl; dma_writel(pd, CTL3, val); - } dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n", From 1c1d9547536480626c1be1fb062b81663fb2b88e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 12 Jul 2011 21:00:13 +0800 Subject: [PATCH 086/151] dmaengine: imx-sdma: return proper error if kzalloc fails Signed-off-by: Axel Lin Acked-by: Sascha Hauer Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index b6d1455fa936..ec53980f8fcf 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1281,8 +1281,10 @@ static int __init sdma_probe(struct platform_device *pdev) goto err_request_irq; sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL); - if (!sdma->script_addrs) + if (!sdma->script_addrs) { + ret = -ENOMEM; goto err_alloc; + } sdma->version = pdata->sdma_version; From 4e0e6109a1cc18cc5e4143f828c36b6a3e8be6ad Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 25 Jul 2011 16:05:04 -0500 Subject: [PATCH 087/151] dmaengine: pl330: make platform data optional The pl330 needs platform data for describing peripheral connections, but some platforms may only support memory to memory dma channels. In this case, we can probe for how many channels there are and don't need the platform data. As memcpy requests don't need channel private data to hold peripheral info, allow private data to be NULL in this case. Signed-off-by: Rob Herring Cc: Jassi Brar Cc: Vinod Koul Cc: Dan Williams Acked-by: Jassi Brar Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 64 +++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 6abe1ec1f2ce..00eee59e8b33 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -82,7 +82,7 @@ struct dma_pl330_dmac { spinlock_t pool_lock; /* Peripheral channels connected to this DMAC */ - struct dma_pl330_chan peripherals[0]; /* keep at end */ + struct dma_pl330_chan *peripherals; /* keep at end */ }; struct dma_pl330_desc { @@ -451,8 +451,13 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) desc->txd.cookie = 0; async_tx_ack(&desc->txd); - desc->req.rqtype = peri->rqtype; - desc->req.peri = peri->peri_id; + if (peri) { + desc->req.rqtype = peri->rqtype; + desc->req.peri = peri->peri_id; + } else { + desc->req.rqtype = MEMTOMEM; + desc->req.peri = 0; + } dma_async_tx_descriptor_init(&desc->txd, &pch->chan); @@ -529,10 +534,10 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, struct pl330_info *pi; int burst; - if (unlikely(!pch || !len || !peri)) + if (unlikely(!pch || !len)) return NULL; - if (peri->rqtype != MEMTOMEM) + if (peri && peri->rqtype != MEMTOMEM) return NULL; pi = &pch->dmac->pif; @@ -577,7 +582,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, int i, burst_size; dma_addr_t addr; - if (unlikely(!pch || !sgl || !sg_len)) + if (unlikely(!pch || !sgl || !sg_len || !peri)) return NULL; /* Make sure the direction is consistent */ @@ -666,17 +671,12 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) struct dma_device *pd; struct resource *res; int i, ret, irq; + int num_chan; pdat = adev->dev.platform_data; - if (!pdat || !pdat->nr_valid_peri) { - dev_err(&adev->dev, "platform data missing\n"); - return -ENODEV; - } - /* Allocate a new DMAC and its Channels */ - pdmac = kzalloc(pdat->nr_valid_peri * sizeof(*pch) - + sizeof(*pdmac), GFP_KERNEL); + pdmac = kzalloc(sizeof(*pdmac), GFP_KERNEL); if (!pdmac) { dev_err(&adev->dev, "unable to allocate mem\n"); return -ENOMEM; @@ -685,7 +685,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pi = &pdmac->pif; pi->dev = &adev->dev; pi->pl330_data = NULL; - pi->mcbufsz = pdat->mcbuf_sz; + pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0; res = &adev->res; request_mem_region(res->start, resource_size(res), "dma-pl330"); @@ -717,27 +717,35 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) INIT_LIST_HEAD(&pd->channels); /* Initialize channel parameters */ - for (i = 0; i < pdat->nr_valid_peri; i++) { - struct dma_pl330_peri *peri = &pdat->peri[i]; - pch = &pdmac->peripherals[i]; + num_chan = max(pdat ? pdat->nr_valid_peri : 0, (u8)pi->pcfg.num_chan); + pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL); - switch (peri->rqtype) { - case MEMTOMEM: + for (i = 0; i < num_chan; i++) { + pch = &pdmac->peripherals[i]; + if (pdat) { + struct dma_pl330_peri *peri = &pdat->peri[i]; + + switch (peri->rqtype) { + case MEMTOMEM: + dma_cap_set(DMA_MEMCPY, pd->cap_mask); + break; + case MEMTODEV: + case DEVTOMEM: + dma_cap_set(DMA_SLAVE, pd->cap_mask); + break; + default: + dev_err(&adev->dev, "DEVTODEV Not Supported\n"); + continue; + } + pch->chan.private = peri; + } else { dma_cap_set(DMA_MEMCPY, pd->cap_mask); - break; - case MEMTODEV: - case DEVTOMEM: - dma_cap_set(DMA_SLAVE, pd->cap_mask); - break; - default: - dev_err(&adev->dev, "DEVTODEV Not Supported\n"); - continue; + pch->chan.private = NULL; } INIT_LIST_HEAD(&pch->work_list); spin_lock_init(&pch->lock); pch->pl330_chid = NULL; - pch->chan.private = peri; pch->chan.device = pd; pch->chan.chan_id = i; pch->dmac = pdmac; From 2a9778ed83b142e88cb38acc496a573a3472d27f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 12 Jul 2011 18:53:52 +0800 Subject: [PATCH 088/151] dma: mxs-dma: fix unterminated platform_device_id table Signed-off-by: Axel Lin Signed-off-by: Vinod Koul --- drivers/dma/mxs-dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index 2870d919f112..f22a237b7dd6 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -709,6 +709,8 @@ static struct platform_device_id mxs_dma_type[] = { }, { .name = "mxs-dma-apbx", .driver_data = MXS_DMA_APBX, + }, { + /* end of list */ } }; From add56ba711627c223e9e356d9398642abf7fa32d Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 19 Jul 2011 14:48:17 +0800 Subject: [PATCH 089/151] dma: intel_mid_dma: remove redundant pci_set_drvdata calls Call pci_set_drvdata() once in intel_mid_dma_probe() is enough. Remove redundant pci_set_drvdata() calls in dma_suspend() and dma_resume(). Signed-off-by: Axel Lin Signed-off-by: Vinod Koul --- drivers/dma/intel_mid_dma.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c index f653517ef744..8a3fdd87db97 100644 --- a/drivers/dma/intel_mid_dma.c +++ b/drivers/dma/intel_mid_dma.c @@ -1351,7 +1351,6 @@ int dma_suspend(struct pci_dev *pci, pm_message_t state) return -EAGAIN; } device->state = SUSPENDED; - pci_set_drvdata(pci, device); pci_save_state(pci); pci_disable_device(pci); pci_set_power_state(pci, PCI_D3hot); @@ -1380,7 +1379,6 @@ int dma_resume(struct pci_dev *pci) } device->state = RUNNING; iowrite32(REG_BIT0, device->dma_base + DMA_CFG); - pci_set_drvdata(pci, device); return 0; } From a62bae98a93e6c4d53b1e6c20715e94b4a5aca3c Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Tue, 19 Jul 2011 12:09:56 +0800 Subject: [PATCH 090/151] ARM: mxs-dma: reset after disable channel We met some channels in abnormal state after disable. Reset it to get a clean state. Signed-off-by: Dong Aisheng Cc: Vinod Koul Cc: Shawn Guo Signed-off-by: Vinod Koul --- drivers/dma/mxs-dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index f22a237b7dd6..be641cbd36fc 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -537,6 +537,7 @@ static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, switch (cmd) { case DMA_TERMINATE_ALL: mxs_dma_disable_chan(mxs_chan); + mxs_dma_reset_chan(mxs_chan); break; case DMA_PAUSE: mxs_dma_pause_chan(mxs_chan); From f44bd191404841e44a914b2760a16ad328f406a8 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:11:26 +0100 Subject: [PATCH 091/151] DMA: PL08x: remove unused constants PL08X_WQ_PERIODMIN and PL08X_MAX_ALLOCS are not used, remove them. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index e6d7228b1479..90db51f2d001 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -156,14 +156,10 @@ struct pl08x_driver_data { #define PL08X_BOUNDARY_SHIFT (10) /* 1KB 0x400 */ #define PL08X_BOUNDARY_SIZE (1 << PL08X_BOUNDARY_SHIFT) -/* Minimum period between work queue runs */ -#define PL08X_WQ_PERIODMIN 20 - /* Size (bytes) of each LLI buffer allocated for one transfer */ # define PL08X_LLI_TSFR_SIZE 0x2000 /* Maximum times we call dma_pool_alloc on this pool without freeing */ -#define PL08X_MAX_ALLOCS 0x40 #define MAX_NUM_TSFR_LLIS (PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli)) #define PL08X_ALIGN 8 From 25c94f7fcf70d94e12401b9c957ddf1d303061a3 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:11:46 +0100 Subject: [PATCH 092/151] DMA: PL08x: select LLI bus only once per LLI setup Avoid re-selecting the LLI bus each time we create an LLI. Move it out of the LLI setup loops. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 90db51f2d001..6808f7dc52c7 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -491,10 +491,10 @@ static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth, struct pl08x_lli_build_data { struct pl08x_txd *txd; - struct pl08x_driver_data *pl08x; struct pl08x_bus_data srcbus; struct pl08x_bus_data dstbus; size_t remainder; + u32 lli_bus; }; /* @@ -547,8 +547,7 @@ static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd, llis_va[num_llis].src = bd->srcbus.addr; llis_va[num_llis].dst = bd->dstbus.addr; llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli); - if (bd->pl08x->lli_buses & PL08X_AHB2) - llis_va[num_llis].lli |= PL080_LLI_LM_AHB2; + llis_va[num_llis].lli |= bd->lli_bus; if (cctl & PL080_CONTROL_SRC_INCR) bd->srcbus.addr += len; @@ -601,9 +600,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, cctl = txd->cctl; bd.txd = txd; - bd.pl08x = pl08x; bd.srcbus.addr = txd->src_addr; bd.dstbus.addr = txd->dst_addr; + bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0; /* Find maximum width of the source bus */ bd.srcbus.maxwidth = From fc74eb791590e624ca6915ae76a04808e03bffb0 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:12:06 +0100 Subject: [PATCH 093/151] DMA: PL08x: clean up LLI debugging Clean up debugging when setting up the LLI list. This reduces the amount of output while preserving the information, and makes it easier to read. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 6808f7dc52c7..1c641bfd1826 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -617,25 +617,15 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, /* Set up the bus widths to the maximum */ bd.srcbus.buswidth = bd.srcbus.maxwidth; bd.dstbus.buswidth = bd.dstbus.maxwidth; - dev_vdbg(&pl08x->adev->dev, - "%s source bus is %d bytes wide, dest bus is %d bytes wide\n", - __func__, bd.srcbus.buswidth, bd.dstbus.buswidth); - /* * Bytes transferred == tsize * MIN(buswidths), not max(buswidths) */ max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) * PL080_CONTROL_TRANSFER_SIZE_MASK; - dev_vdbg(&pl08x->adev->dev, - "%s max bytes per lli = %zu\n", - __func__, max_bytes_per_lli); /* We need to count this down to zero */ bd.remainder = txd->len; - dev_vdbg(&pl08x->adev->dev, - "%s remainder = %zu\n", - __func__, bd.remainder); /* * Choose bus to align to @@ -644,6 +634,16 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, */ pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl); + dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu llimax=%zu\n", + bd.srcbus.addr, cctl & PL080_CONTROL_SRC_INCR ? "+" : "", + bd.srcbus.buswidth, + bd.dstbus.addr, cctl & PL080_CONTROL_DST_INCR ? "+" : "", + bd.dstbus.buswidth, + bd.remainder, max_bytes_per_lli); + dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n", + mbus == &bd.srcbus ? "src" : "dst", + sbus == &bd.srcbus ? "src" : "dst"); + if (txd->len < mbus->buswidth) { /* Less than a bus width available - send as single bytes */ while (bd.remainder) { @@ -835,15 +835,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, { int i; + dev_vdbg(&pl08x->adev->dev, + "%-3s %-9s %-10s %-10s %-10s %s\n", + "lli", "", "csrc", "cdst", "clli", "cctl"); for (i = 0; i < num_llis; i++) { dev_vdbg(&pl08x->adev->dev, - "lli %d @%p: csrc=0x%08x, cdst=0x%08x, cctl=0x%08x, clli=0x%08x\n", - i, - &llis_va[i], - llis_va[i].src, - llis_va[i].dst, - llis_va[i].cctl, - llis_va[i].lli + "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n", + i, &llis_va[i], llis_va[i].src, + llis_va[i].dst, llis_va[i].lli, llis_va[i].cctl ); } } From b207b4d02beb06059478339bbe4672ba715605d6 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:12:27 +0100 Subject: [PATCH 094/151] DMA: PL08x: separately store source/destination slave address Store the source/destination slave address separately into the channel structure. This moves us towards being able to avoid a configuration call each time we use the channel. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 21 +++++++++------------ include/linux/amba/pl08x.h | 3 ++- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 1c641bfd1826..077ddeefb864 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1102,7 +1102,6 @@ static int dma_set_runtime_config(struct dma_chan *chan, struct pl08x_driver_data *pl08x = plchan->host; struct pl08x_channel_data *cd = plchan->cd; enum dma_slave_buswidth addr_width; - dma_addr_t addr; u32 maxburst; u32 cctl = 0; int i; @@ -1113,11 +1112,9 @@ static int dma_set_runtime_config(struct dma_chan *chan, /* Transfer direction */ plchan->runtime_direction = config->direction; if (config->direction == DMA_TO_DEVICE) { - addr = config->dst_addr; addr_width = config->dst_addr_width; maxburst = config->dst_maxburst; } else if (config->direction == DMA_FROM_DEVICE) { - addr = config->src_addr; addr_width = config->src_addr_width; maxburst = config->src_maxburst; } else { @@ -1161,7 +1158,11 @@ static int dma_set_runtime_config(struct dma_chan *chan, cctl |= burst_sizes[i].reg; } - plchan->runtime_addr = addr; + if (plchan->runtime_direction == DMA_FROM_DEVICE) { + plchan->src_addr = config->src_addr; + } else { + plchan->dst_addr = config->dst_addr; + } /* Modify the default channel data to fit PrimeCell request */ cd->cctl = cctl; @@ -1396,19 +1397,13 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; txd->cctl |= PL080_CONTROL_SRC_INCR; txd->src_addr = sgl->dma_address; - if (plchan->runtime_addr) - txd->dst_addr = plchan->runtime_addr; - else - txd->dst_addr = plchan->cd->addr; + txd->dst_addr = plchan->dst_addr; src_buses = pl08x->mem_buses; dst_buses = plchan->cd->periph_buses; } else if (direction == DMA_FROM_DEVICE) { txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; txd->cctl |= PL080_CONTROL_DST_INCR; - if (plchan->runtime_addr) - txd->src_addr = plchan->runtime_addr; - else - txd->src_addr = plchan->cd->addr; + txd->src_addr = plchan->src_addr; txd->dst_addr = sgl->dma_address; src_buses = plchan->cd->periph_buses; dst_buses = pl08x->mem_buses; @@ -1704,6 +1699,8 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, chan->slave = true; chan->name = pl08x->pd->slave_channels[i].bus_id; chan->cd = &pl08x->pd->slave_channels[i]; + chan->src_addr = chan->cd->addr; + chan->dst_addr = chan->cd->addr; } else { chan->cd = &pl08x->pd->memcpy_channel; chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 3111385b8ca7..072ab28bbb57 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -173,7 +173,8 @@ struct pl08x_dma_chan { struct tasklet_struct tasklet; char *name; struct pl08x_channel_data *cd; - dma_addr_t runtime_addr; + dma_addr_t src_addr; + dma_addr_t dst_addr; enum dma_data_direction runtime_direction; dma_cookie_t lc; struct list_head pend_list; From f14c426c723634d223344ad820997d92a3e355b6 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:12:47 +0100 Subject: [PATCH 095/151] DMA: PL08x: separately store source/destination cctl Store the source/destination cctl values into the channel structure. This moves us towards being able to avoid a configuration call each time we use the channel. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 30 ++++++++++++++++-------------- include/linux/amba/pl08x.h | 2 ++ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 077ddeefb864..ba617e3f23f8 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1095,12 +1095,21 @@ static const struct burst_table burst_sizes[] = { }, }; +static u32 pl08x_cctl(u32 cctl) +{ + cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | + PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR | + PL080_CONTROL_PROT_MASK); + + /* Access the cell in privileged mode, non-bufferable, non-cacheable */ + return cctl | PL080_CONTROL_PROT_SYS; +} + static int dma_set_runtime_config(struct dma_chan *chan, struct dma_slave_config *config) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; - struct pl08x_channel_data *cd = plchan->cd; enum dma_slave_buswidth addr_width; u32 maxburst; u32 cctl = 0; @@ -1160,13 +1169,12 @@ static int dma_set_runtime_config(struct dma_chan *chan, if (plchan->runtime_direction == DMA_FROM_DEVICE) { plchan->src_addr = config->src_addr; + plchan->src_cctl = pl08x_cctl(cctl); } else { plchan->dst_addr = config->dst_addr; + plchan->dst_cctl = pl08x_cctl(cctl); } - /* Modify the default channel data to fit PrimeCell request */ - cd->cctl = cctl; - dev_dbg(&pl08x->adev->dev, "configured channel %s (%s) for %s, data width %d, " "maxburst %d words, LE, CCTL=0x%08x\n", @@ -1385,24 +1393,16 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( txd->direction = direction; txd->len = sgl->length; - txd->cctl = plchan->cd->cctl & - ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | - PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR | - PL080_CONTROL_PROT_MASK); - - /* Access the cell in privileged mode, non-bufferable, non-cacheable */ - txd->cctl |= PL080_CONTROL_PROT_SYS; - if (direction == DMA_TO_DEVICE) { txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl |= PL080_CONTROL_SRC_INCR; + txd->cctl = plchan->dst_cctl | PL080_CONTROL_SRC_INCR; txd->src_addr = sgl->dma_address; txd->dst_addr = plchan->dst_addr; src_buses = pl08x->mem_buses; dst_buses = plchan->cd->periph_buses; } else if (direction == DMA_FROM_DEVICE) { txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl |= PL080_CONTROL_DST_INCR; + txd->cctl = plchan->src_cctl | PL080_CONTROL_DST_INCR; txd->src_addr = plchan->src_addr; txd->dst_addr = sgl->dma_address; src_buses = plchan->cd->periph_buses; @@ -1701,6 +1701,8 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, chan->cd = &pl08x->pd->slave_channels[i]; chan->src_addr = chan->cd->addr; chan->dst_addr = chan->cd->addr; + chan->src_cctl = pl08x_cctl(chan->cd->cctl); + chan->dst_cctl = pl08x_cctl(chan->cd->cctl); } else { chan->cd = &pl08x->pd->memcpy_channel; chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 072ab28bbb57..47cfe31e5c35 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -175,6 +175,8 @@ struct pl08x_dma_chan { struct pl08x_channel_data *cd; dma_addr_t src_addr; dma_addr_t dst_addr; + u32 src_cctl; + u32 dst_cctl; enum dma_data_direction runtime_direction; dma_cookie_t lc; struct list_head pend_list; From fa020e7d046436cb6642b23dc95012a3064d77e2 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:13:07 +0100 Subject: [PATCH 096/151] DMA: PL08x: constify plchan->cd and plat->slave_channels We no longer write to the channel data structure, so we can make it const throughout the driver. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- include/linux/amba/pl08x.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 47cfe31e5c35..e6e28f37d8ec 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -172,7 +172,7 @@ struct pl08x_dma_chan { int phychan_hold; struct tasklet_struct tasklet; char *name; - struct pl08x_channel_data *cd; + const struct pl08x_channel_data *cd; dma_addr_t src_addr; dma_addr_t dst_addr; u32 src_cctl; @@ -205,7 +205,7 @@ struct pl08x_dma_chan { * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2 */ struct pl08x_platform_data { - struct pl08x_channel_data *slave_channels; + const struct pl08x_channel_data *slave_channels; unsigned int num_slave_channels; struct pl08x_channel_data memcpy_channel; int (*get_signal)(struct pl08x_dma_chan *); From aa88cdaa149e1c1cfc935ff73e50f3f9f3b2e3a1 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:13:28 +0100 Subject: [PATCH 097/151] DMA: PL08x: cleanup selection of buswidth Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index ba617e3f23f8..2dd37ff753ca 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1105,13 +1105,26 @@ static u32 pl08x_cctl(u32 cctl) return cctl | PL080_CONTROL_PROT_SYS; } +static u32 pl08x_width(enum dma_slave_buswidth width) +{ + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + return PL080_WIDTH_8BIT; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return PL080_WIDTH_16BIT; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return PL080_WIDTH_32BIT; + } + return ~0; +} + static int dma_set_runtime_config(struct dma_chan *chan, struct dma_slave_config *config) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; enum dma_slave_buswidth addr_width; - u32 maxburst; + u32 width, maxburst; u32 cctl = 0; int i; @@ -1132,25 +1145,16 @@ static int dma_set_runtime_config(struct dma_chan *chan, return -EINVAL; } - switch (addr_width) { - case DMA_SLAVE_BUSWIDTH_1_BYTE: - cctl |= (PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT) | - (PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT); - break; - case DMA_SLAVE_BUSWIDTH_2_BYTES: - cctl |= (PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT) | - (PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT); - break; - case DMA_SLAVE_BUSWIDTH_4_BYTES: - cctl |= (PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT) | - (PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT); - break; - default: + width = pl08x_width(addr_width); + if (width == ~0) { dev_err(&pl08x->adev->dev, "bad runtime_config: alien address width\n"); return -EINVAL; } + cctl |= width << PL080_CONTROL_SWIDTH_SHIFT; + cctl |= width << PL080_CONTROL_DWIDTH_SHIFT; + /* * Now decide on a maxburst: * If this channel will only request single transfers, set this From 121c8476a3c39a483326c33526e72a07661df1fc Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:13:48 +0100 Subject: [PATCH 098/151] DMA: PL08x: avoid recalculating cctl at each prepare Now that we have separate cctl values for M>P and P>M transfers, we can avoid calculating the cctl value each time we prepare a transaction. Move the bus selection and increment setting to the slave configuration and initialization functions. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 78 +++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 2dd37ff753ca..a84db8b39ba1 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1095,6 +1095,23 @@ static const struct burst_table burst_sizes[] = { }, }; +/* + * Given the source and destination available bus masks, select which + * will be routed to each port. We try to have source and destination + * on separate ports, but always respect the allowable settings. + */ +static u32 pl08x_select_bus(u8 src, u8 dst) +{ + u32 cctl = 0; + + if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1))) + cctl |= PL080_CONTROL_DST_AHB2; + if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2))) + cctl |= PL080_CONTROL_SRC_AHB2; + + return cctl; +} + static u32 pl08x_cctl(u32 cctl) { cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | @@ -1173,10 +1190,14 @@ static int dma_set_runtime_config(struct dma_chan *chan, if (plchan->runtime_direction == DMA_FROM_DEVICE) { plchan->src_addr = config->src_addr; - plchan->src_cctl = pl08x_cctl(cctl); + plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR | + pl08x_select_bus(plchan->cd->periph_buses, + pl08x->mem_buses); } else { plchan->dst_addr = config->dst_addr; - plchan->dst_cctl = pl08x_cctl(cctl); + plchan->dst_cctl = pl08x_cctl(cctl) | PL080_CONTROL_SRC_INCR | + pl08x_select_bus(pl08x->mem_buses, + plchan->cd->periph_buses); } dev_dbg(&pl08x->adev->dev, @@ -1277,23 +1298,6 @@ static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan, return 0; } -/* - * Given the source and destination available bus masks, select which - * will be routed to each port. We try to have source and destination - * on separate ports, but always respect the allowable settings. - */ -static u32 pl08x_select_bus(struct pl08x_driver_data *pl08x, u8 src, u8 dst) -{ - u32 cctl = 0; - - if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1))) - cctl |= PL080_CONTROL_DST_AHB2; - if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2))) - cctl |= PL080_CONTROL_SRC_AHB2; - - return cctl; -} - static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan, unsigned long flags) { @@ -1345,8 +1349,8 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy( txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; if (pl08x->vd->dualmaster) - txd->cctl |= pl08x_select_bus(pl08x, - pl08x->mem_buses, pl08x->mem_buses); + txd->cctl |= pl08x_select_bus(pl08x->mem_buses, + pl08x->mem_buses); ret = pl08x_prep_channel_resources(plchan, txd); if (ret) @@ -1363,7 +1367,6 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; struct pl08x_txd *txd; - u8 src_buses, dst_buses; int ret; /* @@ -1399,26 +1402,20 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( if (direction == DMA_TO_DEVICE) { txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl = plchan->dst_cctl | PL080_CONTROL_SRC_INCR; + txd->cctl = plchan->dst_cctl; txd->src_addr = sgl->dma_address; txd->dst_addr = plchan->dst_addr; - src_buses = pl08x->mem_buses; - dst_buses = plchan->cd->periph_buses; } else if (direction == DMA_FROM_DEVICE) { txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl = plchan->src_cctl | PL080_CONTROL_DST_INCR; + txd->cctl = plchan->src_cctl; txd->src_addr = plchan->src_addr; txd->dst_addr = sgl->dma_address; - src_buses = plchan->cd->periph_buses; - dst_buses = pl08x->mem_buses; } else { dev_err(&pl08x->adev->dev, "%s direction unsupported\n", __func__); return NULL; } - txd->cctl |= pl08x_select_bus(pl08x, src_buses, dst_buses); - ret = pl08x_prep_channel_resources(plchan, txd); if (ret) return NULL; @@ -1669,6 +1666,20 @@ static irqreturn_t pl08x_irq(int irq, void *dev) return mask ? IRQ_HANDLED : IRQ_NONE; } +static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan) +{ + u32 cctl = pl08x_cctl(chan->cd->cctl); + + chan->slave = true; + chan->name = chan->cd->bus_id; + chan->src_addr = chan->cd->addr; + chan->dst_addr = chan->cd->addr; + chan->src_cctl = cctl | PL080_CONTROL_DST_INCR | + pl08x_select_bus(chan->cd->periph_buses, chan->host->mem_buses); + chan->dst_cctl = cctl | PL080_CONTROL_SRC_INCR | + pl08x_select_bus(chan->host->mem_buses, chan->cd->periph_buses); +} + /* * Initialise the DMAC memcpy/slave channels. * Make a local wrapper to hold required data @@ -1700,13 +1711,8 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, chan->state = PL08X_CHAN_IDLE; if (slave) { - chan->slave = true; - chan->name = pl08x->pd->slave_channels[i].bus_id; chan->cd = &pl08x->pd->slave_channels[i]; - chan->src_addr = chan->cd->addr; - chan->dst_addr = chan->cd->addr; - chan->src_cctl = pl08x_cctl(chan->cd->cctl); - chan->dst_cctl = pl08x_cctl(chan->cd->cctl); + pl08x_dma_slave_init(chan); } else { chan->cd = &pl08x->pd->memcpy_channel; chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); From 760596c6b986e6345a28392cf40ee344bfd209a6 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:14:08 +0100 Subject: [PATCH 099/151] DMA: PL08x: cleanup selection of burst size Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 58 +++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index a84db8b39ba1..9aa2bd4452d3 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1048,50 +1048,42 @@ pl08x_dma_tx_status(struct dma_chan *chan, /* PrimeCell DMA extension */ struct burst_table { - int burstwords; + u32 burstwords; u32 reg; }; static const struct burst_table burst_sizes[] = { { .burstwords = 256, - .reg = (PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_256, }, { .burstwords = 128, - .reg = (PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_128, }, { .burstwords = 64, - .reg = (PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_64, }, { .burstwords = 32, - .reg = (PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_32, }, { .burstwords = 16, - .reg = (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_16, }, { .burstwords = 8, - .reg = (PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_8, }, { .burstwords = 4, - .reg = (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_4, }, { - .burstwords = 1, - .reg = (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT), + .burstwords = 0, + .reg = PL080_BSIZE_1, }, }; @@ -1135,15 +1127,25 @@ static u32 pl08x_width(enum dma_slave_buswidth width) return ~0; } +static u32 pl08x_burst(u32 maxburst) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(burst_sizes); i++) + if (burst_sizes[i].burstwords <= maxburst) + break; + + return burst_sizes[i].reg; +} + static int dma_set_runtime_config(struct dma_chan *chan, struct dma_slave_config *config) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; enum dma_slave_buswidth addr_width; - u32 width, maxburst; + u32 width, burst, maxburst; u32 cctl = 0; - int i; if (!plchan->slave) return -EINVAL; @@ -1173,20 +1175,16 @@ static int dma_set_runtime_config(struct dma_chan *chan, cctl |= width << PL080_CONTROL_DWIDTH_SHIFT; /* - * Now decide on a maxburst: * If this channel will only request single transfers, set this * down to ONE element. Also select one element if no maxburst * is specified. */ - if (plchan->cd->single || maxburst == 0) { - cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT); - } else { - for (i = 0; i < ARRAY_SIZE(burst_sizes); i++) - if (burst_sizes[i].burstwords <= maxburst) - break; - cctl |= burst_sizes[i].reg; - } + if (plchan->cd->single) + maxburst = 1; + + burst = pl08x_burst(maxburst); + cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT; + cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT; if (plchan->runtime_direction == DMA_FROM_DEVICE) { plchan->src_addr = config->src_addr; From f32807f1ff7fbfd2d4ec708b1ac8cb75cb92bfef Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 25 Jul 2011 19:22:01 +0530 Subject: [PATCH 100/151] dmaengine: pl08x: handle the rest of enums in pl08x_width pl08x_width function does not handle rest of enums for DMA_SLAVE_BUSWIDTH_xxxx which causes gcc to emit below warining drivers/dma/amba-pl08x.c: In function 'pl08x_width': drivers/dma/amba-pl08x.c:1119: warning: enumeration value 'DMA_SLAVE_BUSWIDTH_UNDEFINED' not handled in switch drivers/dma/amba-pl08x.c:1119: warning: enumeration value 'DMA_SLAVE_BUSWIDTH_8_BYTES' not handled in switch this patch adds a default case which returns error Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 9aa2bd4452d3..196a7378d332 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1123,8 +1123,9 @@ static u32 pl08x_width(enum dma_slave_buswidth width) return PL080_WIDTH_16BIT; case DMA_SLAVE_BUSWIDTH_4_BYTES: return PL080_WIDTH_32BIT; + default: + return ~0; } - return ~0; } static u32 pl08x_burst(u32 maxburst) From 2d859db3e4a82a365572592d57624a5f996ed0ec Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 26 Jul 2011 09:07:11 -0400 Subject: [PATCH 101/151] ext4: fix data corruption in inodes with journalled data When journalling data for an inode (either because it is a symlink or because the filesystem is mounted in data=journal mode), ext4_evict_inode() can discard unwritten data by calling truncate_inode_pages(). This is because we don't mark the buffer / page dirty when journalling data but only add the buffer to the running transaction and thus mm does not know there are still unwritten data. Fix the problem by carefully tracking transaction containing inode's data, committing this transaction, and writing uncheckpointed buffers when inode should be reaped. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index de50b16a8f67..43e4abd67be7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -121,6 +121,33 @@ void ext4_evict_inode(struct inode *inode) trace_ext4_evict_inode(inode); if (inode->i_nlink) { + /* + * When journalling data dirty buffers are tracked only in the + * journal. So although mm thinks everything is clean and + * ready for reaping the inode might still have some pages to + * write in the running transaction or waiting to be + * checkpointed. Thus calling jbd2_journal_invalidatepage() + * (via truncate_inode_pages()) to discard these buffers can + * cause data loss. Also even if we did not discard these + * buffers, we would have no way to find them after the inode + * is reaped and thus user could see stale data if he tries to + * read them before the transaction is checkpointed. So be + * careful and force everything to disk here... We use + * ei->i_datasync_tid to store the newest transaction + * containing inode's data. + * + * Note that directories do not have this problem because they + * don't use page cache. + */ + if (ext4_should_journal_data(inode) && + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; + + jbd2_log_start_commit(journal, commit_tid); + jbd2_log_wait_commit(journal, commit_tid); + filemap_write_and_wait(&inode->i_data); + } truncate_inode_pages(&inode->i_data, 0); goto no_delete; } @@ -970,6 +997,7 @@ static int ext4_journalled_write_end(struct file *file, if (new_i_size > inode->i_size) i_size_write(inode, pos+copied); ext4_set_inode_state(inode, EXT4_STATE_JDATA); + EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; if (new_i_size > EXT4_I(inode)->i_disksize) { ext4_update_i_disksize(inode, new_i_size); ret2 = ext4_mark_inode_dirty(handle, inode); @@ -1678,6 +1706,7 @@ static int __ext4_journalled_writepage(struct page *page, write_end_fn); if (ret == 0) ret = err; + EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; err = ext4_journal_stop(handle); if (!ret) ret = err; From 8f82f840ec6ab873f520364d443ff6fa1b3f8e22 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:35:44 -0400 Subject: [PATCH 102/151] ext4: prevent parallel resizers by atomic bit ops Before this patch, parallel resizers are allowed and protected by a mutex lock, actually, there is no need to support parallel resizer, so this patch prevents parallel resizers by atmoic bit ops, like lock_page() and unlock_page() do. To do this, the patch removed the mutex lock s_resize_lock from struct ext4_sb_info and added a unsigned long field named s_resize_flags which inidicates if there is a resizer. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 7 +++++- fs/ext4/ioctl.c | 12 +++++++---- fs/ext4/resize.c | 55 ++++++++++++++++++------------------------------ fs/ext4/super.c | 2 +- 4 files changed, 36 insertions(+), 40 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 62cee2b6fe79..bb0f7760c7c8 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1127,7 +1127,8 @@ struct ext4_sb_info { struct journal_s *s_journal; struct list_head s_orphan; struct mutex s_orphan_lock; - struct mutex s_resize_lock; + unsigned long s_resize_flags; /* Flags indicating if there + is a resizer */ unsigned long s_commit_interval; u32 s_max_batch_time; u32 s_min_batch_time; @@ -2269,6 +2270,10 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; +#define EXT4_RESIZING 0 +extern int ext4_resize_begin(struct super_block *sb); +extern void ext4_resize_end(struct super_block *sb); + #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 808c554e773f..f18bfe37aff8 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -202,8 +202,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct super_block *sb = inode->i_sb; int err, err2=0; - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; + err = ext4_resize_begin(sb); + if (err) + return err; if (get_user(n_blocks_count, (__u32 __user *)arg)) return -EFAULT; @@ -221,6 +222,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err == 0) err = err2; mnt_drop_write(filp->f_path.mnt); + ext4_resize_end(sb); return err; } @@ -271,8 +273,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct super_block *sb = inode->i_sb; int err, err2=0; - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; + err = ext4_resize_begin(sb); + if (err) + return err; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, sizeof(input))) @@ -291,6 +294,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err == 0) err = err2; mnt_drop_write(filp->f_path.mnt); + ext4_resize_end(sb); return err; } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 80bbc9c60c24..0213f631271f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -16,6 +16,25 @@ #include "ext4_jbd2.h" +int ext4_resize_begin(struct super_block *sb) +{ + int ret = 0; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) + ret = -EBUSY; + + return ret; +} + +void ext4_resize_end(struct super_block *sb) +{ + clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); + smp_mb__after_clear_bit(); +} + #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -181,11 +200,7 @@ static int setup_new_group_blocks(struct super_block *sb, if (IS_ERR(handle)) return PTR_ERR(handle); - mutex_lock(&sbi->s_resize_lock); - if (input->group != sbi->s_groups_count) { - err = -EBUSY; - goto exit_journal; - } + BUG_ON(input->group != sbi->s_groups_count); if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { err = PTR_ERR(bh); @@ -285,7 +300,6 @@ static int setup_new_group_blocks(struct super_block *sb, brelse(bh); exit_journal: - mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; @@ -799,13 +813,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) goto exit_put; } - mutex_lock(&sbi->s_resize_lock); - if (input->group != sbi->s_groups_count) { - ext4_warning(sb, "multiple resizers run on filesystem!"); - err = -EBUSY; - goto exit_journal; - } - if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) goto exit_journal; @@ -829,7 +836,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) /* * OK, now we've set up the new group. Time to make it active. * - * We do not lock all allocations via s_resize_lock * so we have to be safe wrt. concurrent accesses the group * data. So we need to be careful to set all of the relevant * group descriptor data etc. *before* we enable the group. @@ -886,13 +892,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) * * The precise rules we use are: * - * * Writers of s_groups_count *must* hold s_resize_lock - * AND * * Writers must perform a smp_wmb() after updating all dependent * data and before modifying the groups count * - * * Readers must hold s_resize_lock over the access - * OR * * Readers must perform an smp_rmb() after reading the groups count * and before reading any dependent data. * @@ -937,7 +939,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ext4_handle_dirty_super(handle, sb); exit_journal: - mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; if (!err) { @@ -972,9 +973,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, int err; ext4_group_t group; - /* We don't need to worry about locking wrt other resizers just - * yet: we're going to revalidate es->s_blocks_count after - * taking the s_resize_lock below. */ o_blocks_count = ext4_blocks_count(es); if (test_opt(sb, DEBUG)) @@ -995,7 +993,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, if (n_blocks_count < o_blocks_count) { ext4_warning(sb, "can't shrink FS - resize aborted"); - return -EBUSY; + return -EINVAL; } /* Handle the remaining blocks in the last group only. */ @@ -1038,24 +1036,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, goto exit_put; } - mutex_lock(&EXT4_SB(sb)->s_resize_lock); - if (o_blocks_count != ext4_blocks_count(es)) { - ext4_warning(sb, "multiple resizers run on filesystem!"); - mutex_unlock(&EXT4_SB(sb)->s_resize_lock); - ext4_journal_stop(handle); - err = -EBUSY; - goto exit_put; - } - if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) { ext4_warning(sb, "error %d on journal write access", err); - mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); goto exit_put; } ext4_blocks_count_set(es, o_blocks_count + add); - mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 143d763729b4..cfe9f39c4ba2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3500,7 +3500,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); - mutex_init(&sbi->s_resize_lock); + sbi->s_resize_flags = 0; sb->s_root = NULL; From ce723c31b58f54fb865036805475ee7a8c5dc173 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:39:09 -0400 Subject: [PATCH 103/151] ext4: prevent a fs with errors from being resized A filesystem with errors is not allowed to being resized, otherwise, it is easy to destroy the filesystem. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0213f631271f..53d979541b31 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -23,6 +23,16 @@ int ext4_resize_begin(struct super_block *sb) if (!capable(CAP_SYS_RESOURCE)) return -EPERM; + /* + * We are not allowed to do online-resizing on a filesystem mounted + * with error, because it can destroy the filesystem easily. + */ + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { + ext4_warning(sb, "There are errors in the filesystem, " + "so online resizing is not allowed\n"); + return -EPERM; + } + if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) ret = -EBUSY; From 0529155e8a4bcb77dfc9ceaea19c6501487e452b Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:43:56 -0400 Subject: [PATCH 104/151] ext4: rename ext4_add_groupblocks() to ext4_group_add_blocks() Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 +- fs/ext4/mballoc.c | 4 ++-- fs/ext4/resize.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bb0f7760c7c8..bbe81db76c71 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1799,7 +1799,7 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode, unsigned long count, int flags); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); -extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, +extern void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e16583032b6b..93035ea70c0b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4667,7 +4667,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, } /** - * ext4_add_groupblocks() -- Add given blocks to an existing group + * ext4_group_add_blocks() -- Add given blocks to an existing group * @handle: handle to this transaction * @sb: super block * @block: start physcial block to add to the block group @@ -4675,7 +4675,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * * This marks the blocks as free in the bitmap and buddy. */ -void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, +void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 53d979541b31..d241ecbb0d53 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1056,7 +1056,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ - ext4_add_groupblocks(handle, sb, o_blocks_count, add); + ext4_group_add_blocks(handle, sb, o_blocks_count, add); ext4_handle_dirty_super(handle, sb); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); From cc7365dfe48cb2191f1572bf69e30d3e58716313 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:46:07 -0400 Subject: [PATCH 105/151] ext4: let ext4_group_add_blocks() return an error code This patch lets ext4_group_add_blocks() return an error code if it fails, so that upper functions can handle error correctly. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 +- fs/ext4/mballoc.c | 20 +++++++++++++++----- fs/ext4/resize.c | 10 +++++++--- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bbe81db76c71..da7ab48948f2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1799,7 +1799,7 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode, unsigned long count, int flags); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); -extern void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, +extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 93035ea70c0b..dbe429567eb3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4675,7 +4675,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * * This marks the blocks as free in the bitmap and buddy. */ -void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, +int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; @@ -4696,15 +4696,24 @@ void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, * Check to see if we are freeing blocks across a group * boundary. */ - if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) + if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { + ext4_warning(sb, "too much blocks added to group %u\n", + block_group); + err = -EINVAL; goto error_return; + } bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (!bitmap_bh) + if (!bitmap_bh) { + err = -EIO; goto error_return; + } + desc = ext4_get_group_desc(sb, block_group, &gd_bh); - if (!desc) + if (!desc) { + err = -EIO; goto error_return; + } if (in_range(ext4_block_bitmap(sb, desc), block, count) || in_range(ext4_inode_bitmap(sb, desc), block, count) || @@ -4714,6 +4723,7 @@ void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count); + err = -EINVAL; goto error_return; } @@ -4782,7 +4792,7 @@ void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, error_return: brelse(bitmap_bh); ext4_std_error(sb, err); - return; + return err; } /** diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index d241ecbb0d53..4c041e37f61f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -980,7 +980,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_grpblk_t add; struct buffer_head *bh; handle_t *handle; - int err; + int err, err2; ext4_group_t group; o_blocks_count = ext4_blocks_count(es); @@ -1056,11 +1056,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ - ext4_group_add_blocks(handle, sb, o_blocks_count, add); + err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); ext4_handle_dirty_super(handle, sb); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); - if ((err = ext4_journal_stop(handle))) + err2 = ext4_journal_stop(handle); + if (!err && err2) + err = err2; + + if (err) goto exit_put; if (test_opt(sb, DEBUG)) From 4740b830ed5720ade6c780dbf3fdfe9089b3552d Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:51:08 -0400 Subject: [PATCH 106/151] ext4: let ext4_group_add_blocks() handle 0 blocks quickly If ext4_group_add_blocks() is called with 0 block, make it return 0 without doing any extra work. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index dbe429567eb3..b6ef4da39ce3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4691,6 +4691,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); + if (count == 0) + return 0; + ext4_get_group_no_and_offset(sb, block, &block_group, &bit); /* * Check to see if we are freeing blocks across a group From 2b79b09d13e35577151bd13ba08809911baccd1c Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:53:35 -0400 Subject: [PATCH 107/151] ext4: fix a typo in ext4_group_extend() Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 4c041e37f61f..5f0aefdc8599 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, o_blocks_count = ext4_blocks_count(es); if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", + printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", o_blocks_count, n_blocks_count); if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) From c3e94d1df9bdd9e2c4ba7e8f534f7925f1756f97 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 22:05:53 -0400 Subject: [PATCH 108/151] ext4: let setup_new_group_blocks() set multiple bits at a time Rename mb_set_bits() to ext4_set_bits() and make it a global function so that setup_new_group_blocks() can use it. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 ++ fs/ext4/mballoc.c | 15 ++++++++------- fs/ext4/resize.c | 18 +++++++----------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index da7ab48948f2..ba2009b49a55 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -940,6 +940,8 @@ struct ext4_inode_info { #define ext4_find_next_zero_bit find_next_zero_bit_le #define ext4_find_next_bit find_next_bit_le +extern void ext4_set_bits(void *bm, int cur, int len); + /* * Maximal mount counts between two filesystem checks */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b6ef4da39ce3..fa716c9b2455 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1282,7 +1282,7 @@ static void mb_clear_bits(void *bm, int cur, int len) } } -static void mb_set_bits(void *bm, int cur, int len) +void ext4_set_bits(void *bm, int cur, int len) { __u32 *addr; @@ -1511,7 +1511,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) } mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); - mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); + ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); mb_check_buddy(e4b); return ret; @@ -2795,8 +2795,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, * We leak some of the blocks here. */ ext4_lock_group(sb, ac->ac_b_ex.fe_group); - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, - ac->ac_b_ex.fe_len); + ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!err) @@ -2814,7 +2814,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } } #endif - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len); + ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_blks_set(sb, gdp, @@ -3284,7 +3285,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, while (n) { entry = rb_entry(n, struct ext4_free_data, node); - mb_set_bits(bitmap, entry->start_blk, entry->count); + ext4_set_bits(bitmap, entry->start_blk, entry->count); n = rb_next(n); } return; @@ -3326,7 +3327,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, if (unlikely(len == 0)) continue; BUG_ON(groupnr != group); - mb_set_bits(bitmap, start, len); + ext4_set_bits(bitmap, start, len); preallocated += len; count++; } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5f0aefdc8599..178fb2f11c3f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -217,11 +217,6 @@ static int setup_new_group_blocks(struct super_block *sb, goto exit_journal; } - if (ext4_bg_has_super(sb, input->group)) { - ext4_debug("mark backup superblock %#04llx (+0)\n", start); - ext4_set_bit(0, bh->b_data); - } - /* Copy all of the GDT blocks into the backup in this group */ for (i = 0, bit = 1, block = start + 1; i < gdblocks; i++, block++, bit++) { @@ -250,7 +245,6 @@ static int setup_new_group_blocks(struct super_block *sb, brelse(gdb); goto exit_bh; } - ext4_set_bit(bit, bh->b_data); brelse(gdb); } @@ -261,8 +255,11 @@ static int setup_new_group_blocks(struct super_block *sb, GFP_NOFS); if (err) goto exit_bh; - for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++) - ext4_set_bit(bit, bh->b_data); + + if (ext4_bg_has_super(sb, input->group)) { + ext4_debug("mark backup group tables %#04llx (+0)\n", start); + ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); + } ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, input->block_bitmap - start); @@ -278,9 +275,8 @@ static int setup_new_group_blocks(struct super_block *sb, err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); if (err) goto exit_bh; - for (i = 0, bit = input->inode_table - start; - i < sbi->s_itb_per_group; i++, bit++) - ext4_set_bit(bit, bh->b_data); + ext4_set_bits(bh->b_data, input->inode_table - start, + sbi->s_itb_per_group); if ((err = extend_or_restart_transaction(handle, 2, bh))) goto exit_bh; From 6d40bc5a7e8fc71795d131e835f38f161ed7e1b1 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 22:24:41 -0400 Subject: [PATCH 109/151] ext4: simplify journal handling in setup_new_group_blocks() This patch simplifies journal handling in setup_new_group_blocks(). In previous code, block bitmap is modified everywhere in setup_new_group_blocks(), ext4_get_write_access() in extend_or_restart_transaction() is used to guarantee that the block bitmap stays in the new handle, this makes things complicated. The previous commit changed things so that the modifications on the block bitmap are batched and done by ext4_set_bits() at the end of the for loop. This allows us to simplify things. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 178fb2f11c3f..5b423f89efda 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -161,8 +161,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, * If that fails, restart the transaction & regain write access for the * buffer head which is used for block_bitmap modifications. */ -static int extend_or_restart_transaction(handle_t *handle, int thresh, - struct buffer_head *bh) +static int extend_or_restart_transaction(handle_t *handle, int thresh) { int err; @@ -173,9 +172,8 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh, if (err < 0) return err; if (err) { - if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) - return err; - if ((err = ext4_journal_get_write_access(handle, bh))) + err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA); + if (err) return err; } @@ -212,29 +210,24 @@ static int setup_new_group_blocks(struct super_block *sb, BUG_ON(input->group != sbi->s_groups_count); - if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { - err = PTR_ERR(bh); - goto exit_journal; - } - /* Copy all of the GDT blocks into the backup in this group */ for (i = 0, bit = 1, block = start + 1; i < gdblocks; i++, block++, bit++) { struct buffer_head *gdb; ext4_debug("update backup group %#04llx (+%d)\n", block, bit); - - if ((err = extend_or_restart_transaction(handle, 1, bh))) - goto exit_bh; + err = extend_or_restart_transaction(handle, 1); + if (err) + goto exit_journal; gdb = sb_getblk(sb, block); if (!gdb) { err = -EIO; - goto exit_bh; + goto exit_journal; } if ((err = ext4_journal_get_write_access(handle, gdb))) { brelse(gdb); - goto exit_bh; + goto exit_journal; } lock_buffer(gdb); memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); @@ -243,7 +236,7 @@ static int setup_new_group_blocks(struct super_block *sb, err = ext4_handle_dirty_metadata(handle, NULL, gdb); if (unlikely(err)) { brelse(gdb); - goto exit_bh; + goto exit_journal; } brelse(gdb); } @@ -254,7 +247,17 @@ static int setup_new_group_blocks(struct super_block *sb, err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, GFP_NOFS); if (err) - goto exit_bh; + goto exit_journal; + + err = extend_or_restart_transaction(handle, 2); + if (err) + goto exit_journal; + + bh = bclean(handle, sb, input->block_bitmap); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + goto exit_journal; + } if (ext4_bg_has_super(sb, input->group)) { ext4_debug("mark backup group tables %#04llx (+0)\n", start); @@ -278,8 +281,6 @@ static int setup_new_group_blocks(struct super_block *sb, ext4_set_bits(bh->b_data, input->inode_table - start, sbi->s_itb_per_group); - if ((err = extend_or_restart_transaction(handle, 2, bh))) - goto exit_bh; ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); From 5a42fb93e6a33224774786691027ef2d9795c245 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Tue, 26 Jul 2011 14:25:10 +0100 Subject: [PATCH 110/151] Improve slave/cyclic DMA engine documentation Improve the documentation for the slave and cyclic DMA engine support reformatting it for easier reading, adding further APIs, splitting it into five steps, and including references to the documentation in dmaengine.h. Signed-off-by: Russell King [Fixed the index title to reflect new changes] Signed-off-by: Vinod Koul --- Documentation/dmaengine.txt | 234 +++++++++++++++++++++++++----------- 1 file changed, 164 insertions(+), 70 deletions(-) diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt index 5a0cb1ef6164..94b7e0f96b38 100644 --- a/Documentation/dmaengine.txt +++ b/Documentation/dmaengine.txt @@ -10,87 +10,181 @@ NOTE: For DMA Engine usage in async_tx please see: Below is a guide to device driver writers on how to use the Slave-DMA API of the DMA Engine. This is applicable only for slave DMA usage only. -The slave DMA usage consists of following steps +The slave DMA usage consists of following steps: 1. Allocate a DMA slave channel 2. Set slave and controller specific parameters 3. Get a descriptor for transaction -4. Submit the transaction and wait for callback notification +4. Submit the transaction +5. Issue pending requests and wait for callback notification 1. Allocate a DMA slave channel -Channel allocation is slightly different in the slave DMA context, client -drivers typically need a channel from a particular DMA controller only and even -in some cases a specific channel is desired. To request a channel -dma_request_channel() API is used. - -Interface: -struct dma_chan *dma_request_channel(dma_cap_mask_t mask, - dma_filter_fn filter_fn, - void *filter_param); -where dma_filter_fn is defined as: -typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); - -When the optional 'filter_fn' parameter is set to NULL dma_request_channel -simply returns the first channel that satisfies the capability mask. Otherwise, -when the mask parameter is insufficient for specifying the necessary channel, -the filter_fn routine can be used to disposition the available channels in the -system. The filter_fn routine is called once for each free channel in the -system. Upon seeing a suitable channel filter_fn returns DMA_ACK which flags -that channel to be the return value from dma_request_channel. A channel -allocated via this interface is exclusive to the caller, until -dma_release_channel() is called. + + Channel allocation is slightly different in the slave DMA context, + client drivers typically need a channel from a particular DMA + controller only and even in some cases a specific channel is desired. + To request a channel dma_request_channel() API is used. + + Interface: + struct dma_chan *dma_request_channel(dma_cap_mask_t mask, + dma_filter_fn filter_fn, + void *filter_param); + where dma_filter_fn is defined as: + typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + + The 'filter_fn' parameter is optional, but highly recommended for + slave and cyclic channels as they typically need to obtain a specific + DMA channel. + + When the optional 'filter_fn' parameter is NULL, dma_request_channel() + simply returns the first channel that satisfies the capability mask. + + Otherwise, the 'filter_fn' routine will be called once for each free + channel which has a capability in 'mask'. 'filter_fn' is expected to + return 'true' when the desired DMA channel is found. + + A channel allocated via this interface is exclusive to the caller, + until dma_release_channel() is called. 2. Set slave and controller specific parameters -Next step is always to pass some specific information to the DMA driver. Most of -the generic information which a slave DMA can use is in struct dma_slave_config. -It allows the clients to specify DMA direction, DMA addresses, bus widths, DMA -burst lengths etc. If some DMA controllers have more parameters to be sent then -they should try to embed struct dma_slave_config in their controller specific -structure. That gives flexibility to client to pass more parameters, if -required. - -Interface: -int dmaengine_slave_config(struct dma_chan *chan, - struct dma_slave_config *config) + + Next step is always to pass some specific information to the DMA + driver. Most of the generic information which a slave DMA can use + is in struct dma_slave_config. This allows the clients to specify + DMA direction, DMA addresses, bus widths, DMA burst lengths etc + for the peripheral. + + If some DMA controllers have more parameters to be sent then they + should try to embed struct dma_slave_config in their controller + specific structure. That gives flexibility to client to pass more + parameters, if required. + + Interface: + int dmaengine_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) + + Please see the dma_slave_config structure definition in dmaengine.h + for a detailed explaination of the struct members. Please note + that the 'direction' member will be going away as it duplicates the + direction given in the prepare call. 3. Get a descriptor for transaction -For slave usage the various modes of slave transfers supported by the -DMA-engine are: -slave_sg - DMA a list of scatter gather buffers from/to a peripheral -dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the + + For slave usage the various modes of slave transfers supported by the + DMA-engine are: + + slave_sg - DMA a list of scatter gather buffers from/to a peripheral + dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the operation is explicitly stopped. -The non NULL return of this transfer API represents a "descriptor" for the given -transaction. - -Interface: -struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_sg)( - struct dma_chan *chan, - struct scatterlist *dst_sg, unsigned int dst_nents, - struct scatterlist *src_sg, unsigned int src_nents, + + A non-NULL return of this transfer API represents a "descriptor" for + the given transaction. + + Interface: + struct dma_async_tx_descriptor *(*chan->device->device_prep_slave_sg)( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, unsigned long flags); -struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( + + struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_data_direction direction); -4. Submit the transaction and wait for callback notification -To schedule the transaction to be scheduled by dma device, the "descriptor" -returned in above (3) needs to be submitted. -To tell the dma driver that a transaction is ready to be serviced, the -descriptor->submit() callback needs to be invoked. This chains the descriptor to -the pending queue. -The transactions in the pending queue can be activated by calling the -issue_pending API. If channel is idle then the first transaction in queue is -started and subsequent ones queued up. -On completion of the DMA operation the next in queue is submitted and a tasklet -triggered. The tasklet would then call the client driver completion callback -routine for notification, if set. -Interface: -void dma_async_issue_pending(struct dma_chan *chan); - -============================================================================== - -Additional usage notes for dma driver writers -1/ Although DMA engine specifies that completion callback routines cannot submit -any new operations, but typically for slave DMA subsequent transaction may not -be available for submit prior to callback routine being called. This requirement -is not a requirement for DMA-slave devices. But they should take care to drop -the spin-lock they might be holding before calling the callback routine + The peripheral driver is expected to have mapped the scatterlist for + the DMA operation prior to calling device_prep_slave_sg, and must + keep the scatterlist mapped until the DMA operation has completed. + The scatterlist must be mapped using the DMA struct device. So, + normal setup should look like this: + + nr_sg = dma_map_sg(chan->device->dev, sgl, sg_len); + if (nr_sg == 0) + /* error */ + + desc = chan->device->device_prep_slave_sg(chan, sgl, nr_sg, + direction, flags); + + Once a descriptor has been obtained, the callback information can be + added and the descriptor must then be submitted. Some DMA engine + drivers may hold a spinlock between a successful preparation and + submission so it is important that these two operations are closely + paired. + + Note: + Although the async_tx API specifies that completion callback + routines cannot submit any new operations, this is not the + case for slave/cyclic DMA. + + For slave DMA, the subsequent transaction may not be available + for submission prior to callback function being invoked, so + slave DMA callbacks are permitted to prepare and submit a new + transaction. + + For cyclic DMA, a callback function may wish to terminate the + DMA via dmaengine_terminate_all(). + + Therefore, it is important that DMA engine drivers drop any + locks before calling the callback function which may cause a + deadlock. + + Note that callbacks will always be invoked from the DMA + engines tasklet, never from interrupt context. + +4. Submit the transaction + + Once the descriptor has been prepared and the callback information + added, it must be placed on the DMA engine drivers pending queue. + + Interface: + dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc) + + This returns a cookie can be used to check the progress of DMA engine + activity via other DMA engine calls not covered in this document. + + dmaengine_submit() will not start the DMA operation, it merely adds + it to the pending queue. For this, see step 5, dma_async_issue_pending. + +5. Issue pending DMA requests and wait for callback notification + + The transactions in the pending queue can be activated by calling the + issue_pending API. If channel is idle then the first transaction in + queue is started and subsequent ones queued up. + + On completion of each DMA operation, the next in queue is started and + a tasklet triggered. The tasklet will then call the client driver + completion callback routine for notification, if set. + + Interface: + void dma_async_issue_pending(struct dma_chan *chan); + +Further APIs: + +1. int dmaengine_terminate_all(struct dma_chan *chan) + + This causes all activity for the DMA channel to be stopped, and may + discard data in the DMA FIFO which hasn't been fully transferred. + No callback functions will be called for any incomplete transfers. + +2. int dmaengine_pause(struct dma_chan *chan) + + This pauses activity on the DMA channel without data loss. + +3. int dmaengine_resume(struct dma_chan *chan) + + Resume a previously paused DMA channel. It is invalid to resume a + channel which is not currently paused. + +4. enum dma_status dma_async_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) + + This can be used to check the status of the channel. Please see + the documentation in include/linux/dmaengine.h for a more complete + description of this API. + + This can be used in conjunction with dma_async_is_complete() and + the cookie returned from 'descriptor->submit()' to check for + completion of a specific DMA transaction. + + Note: + Not all DMA engine drivers can return reliable information for + a running DMA channel. It is recommended that DMA engine users + pause or stop (via dmaengine_terminate_all) the channel before + using this API. From e6075e984d100c12bb79267639c3f661d9788a67 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 27 Jul 2011 20:40:18 -0400 Subject: [PATCH 111/151] ext4: remove lock_buffer in bclean() and setup_new_group_blocks() There is no need to lock the buffers since no one else should be touching these buffers besides the file system. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5b423f89efda..65e5cb6c094d 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -147,10 +147,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, brelse(bh); bh = ERR_PTR(err); } else { - lock_buffer(bh); memset(bh->b_data, 0, sb->s_blocksize); set_buffer_uptodate(bh); - unlock_buffer(bh); } return bh; @@ -229,10 +227,8 @@ static int setup_new_group_blocks(struct super_block *sb, brelse(gdb); goto exit_journal; } - lock_buffer(gdb); memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); set_buffer_uptodate(gdb); - unlock_buffer(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); if (unlikely(err)) { brelse(gdb); From 2f919710143cb2025157c3c193ee22de86f3ed73 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 27 Jul 2011 21:16:33 -0400 Subject: [PATCH 112/151] ext4: simplify parameters of add_new_gdb() add_new_gdb() only needs the block group number; there is no need to pass a pointer to struct ext4_new_group_data to add_new_gdb(). Instead of filling in a pointer the struct buffer_head in add_new_gdb(), it's simpler to have the caller fetch it from the s_group_desc[] array. [Fixed error path to handle the case where struct buffer_head *primary hasn't been set yet. -- Ted] Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 65e5cb6c094d..9e453552f10e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -394,15 +394,15 @@ static int verify_reserved_gdb(struct super_block *sb, * fail once we start modifying the data on disk, because JBD has no rollback. */ static int add_new_gdb(handle_t *handle, struct inode *inode, - struct ext4_new_group_data *input, - struct buffer_head **primary) + ext4_group_t group) { struct super_block *sb = inode->i_sb; struct ext4_super_block *es = EXT4_SB(sb)->s_es; - unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); + unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; struct buffer_head **o_group_desc, **n_group_desc; struct buffer_head *dind; + struct buffer_head *gdb_bh; int gdbackups; struct ext4_iloc iloc; __le32 *data; @@ -425,11 +425,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return -EPERM; } - *primary = sb_bread(sb, gdblock); - if (!*primary) + gdb_bh = sb_bread(sb, gdblock); + if (!gdb_bh) return -EIO; - if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { + gdbackups = verify_reserved_gdb(sb, gdb_bh); + if (gdbackups < 0) { err = gdbackups; goto exit_bh; } @@ -444,7 +445,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, data = (__le32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { ext4_warning(sb, "new group %u GDT block %llu not reserved", - input->group, gdblock); + group, gdblock); err = -EINVAL; goto exit_dind; } @@ -453,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, if (unlikely(err)) goto exit_dind; - err = ext4_journal_get_write_access(handle, *primary); + err = ext4_journal_get_write_access(handle, gdb_bh); if (unlikely(err)) goto exit_sbh; @@ -492,8 +493,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, } inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; ext4_mark_iloc_dirty(handle, inode, &iloc); - memset((*primary)->b_data, 0, sb->s_blocksize); - err = ext4_handle_dirty_metadata(handle, NULL, *primary); + memset(gdb_bh->b_data, 0, sb->s_blocksize); + err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); goto exit_inode; @@ -503,7 +504,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, o_group_desc = EXT4_SB(sb)->s_group_desc; memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); - n_group_desc[gdb_num] = *primary; + n_group_desc[gdb_num] = gdb_bh; EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; kfree(o_group_desc); @@ -525,7 +526,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, exit_dind: brelse(dind); exit_bh: - brelse(*primary); + brelse(gdb_bh); ext4_debug("leaving with error %d\n", err); return err; @@ -833,8 +834,16 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) && (err = reserve_backup_gdb(handle, inode, input))) goto exit_journal; - } else if ((err = add_new_gdb(handle, inode, input, &primary))) - goto exit_journal; + } else { + /* + * Note that we can access new group descriptor block safely + * only if add_new_gdb() succeeds. + */ + err = add_new_gdb(handle, inode, input->group); + if (err) + goto exit_journal; + primary = sbi->s_group_desc[gdb_num]; + } /* * OK, now we've set up the new group. Time to make it active. @@ -944,7 +953,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) exit_journal: if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; - if (!err) { + if (!err && primary) { update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, sizeof(struct ext4_super_block)); update_backups(sb, primary->b_blocknr, primary->b_data, From 668f4dc5593327fadc95b33189c375f7178ef88e Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 27 Jul 2011 21:23:13 -0400 Subject: [PATCH 113/151] ext4: simplify parameters of reserve_backup_gdb() The reserve_backup_gdb() function only needs the block group number; there's no need to pass a pointer to struct ext4_new_group_data to it. Signed-off-by: Yongqiang Yang --- fs/ext4/resize.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9e453552f10e..6e3327d6ff1f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -546,7 +546,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, * backup GDT blocks are stored in their reserved primary GDT block. */ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, - struct ext4_new_group_data *input) + ext4_group_t group) { struct super_block *sb = inode->i_sb; int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); @@ -617,7 +617,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, * Finally we can add each of the reserved backup GDT blocks from * the new group to its reserved primary GDT block. */ - blk = input->group * EXT4_BLOCKS_PER_GROUP(sb); + blk = group * EXT4_BLOCKS_PER_GROUP(sb); for (i = 0; i < reserved_gdb; i++) { int err2; data = (__le32 *)primary[i]->b_data; @@ -831,9 +831,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if ((err = ext4_journal_get_write_access(handle, primary))) goto exit_journal; - if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) && - (err = reserve_backup_gdb(handle, inode, input))) - goto exit_journal; + if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) { + err = reserve_backup_gdb(handle, inode, input->group); + if (err) + goto exit_journal; + } } else { /* * Note that we can access new group descriptor block safely From 0e1147b001793593624e80b3c0a1790822b6baca Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Wed, 27 Jul 2011 21:29:33 -0400 Subject: [PATCH 114/151] ext4: add action of moving index in ext4_ext_rm_idx for Punch Hole The old function ext4_ext_rm_idx is used only for truncate case because it just remove last index in extent-index-block. When punching hole, it usually needed to remove "middle" index, therefore we must move indexes which after it forward. (I create a file with 1 depth extent tree and punch hole in the middle of it, the last index in index-block strangly gone, so I find out this bug) Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4d73e11ae883..a25bbdc7d493 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2101,8 +2101,6 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, /* * ext4_ext_rm_idx: * removes index from the index block. - * It's used in truncate case only, thus all requests are for - * last index in the block only. */ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) @@ -2120,6 +2118,13 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path); if (err) return err; + + if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) { + int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx; + len *= sizeof(struct ext4_extent_idx); + memmove(path->p_idx, path->p_idx + 1, len); + } + le16_add_cpu(&path->p_hdr->eh_entries, -1); err = ext4_ext_dirty(handle, inode, path); if (err) From 29ae07b702cb77dbc24b0843f15ee8cf8a642311 Mon Sep 17 00:00:00 2001 From: Utako Kusaka Date: Wed, 27 Jul 2011 22:11:20 -0400 Subject: [PATCH 115/151] ext4: Fix overflow caused by missing cast in ext4_fallocate() The logical block number in map.l_blk is a __u32, and so before we shift it left, by the block size, we neeed cast it to a 64-bit size. Otherwise i_size can be corrupted on an ENOSPC. # df -T /mnt/mp1 Filesystem Type 1K-blocks Used Available Use% Mounted on /dev/sda6 ext4 9843276 153056 9190200 2% /mnt/mp1 # fallocate -o 0 -l 2199023251456 /mnt/mp1/testfile fallocate: /mnt/mp1/testfile: fallocate failed: No space left on device # stat /mnt/mp1/testfile File: `/mnt/mp1/testfile' Size: 4293656576 Blocks: 19380440 IO Block: 4096 regular file Device: 806h/2054d Inode: 12 Links: 1 Access: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/ root) Access: 2011-07-25 13:01:31.414490496 +0900 Modify: 2011-07-25 13:01:31.414490496 +0900 Change: 2011-07-25 13:01:31.454490495 +0900 Signed-off-by: Utako Kusaka Signed-off-by: "Theodore Ts'o" -- fs/ext4/extents.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a25bbdc7d493..57cf568a98ab 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3824,7 +3824,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) blkbits) >> blkbits)) new_size = offset + len; else - new_size = (map.m_lblk + ret) << blkbits; + new_size = ((loff_t) map.m_lblk + ret) << blkbits; ext4_falloc_update_inode(inode, mode, new_size, (map.m_flags & EXT4_MAP_NEW)); From d6b722aa383a467a43d09ee38e866981abba08ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jul 2011 22:21:58 -0400 Subject: [PATCH 116/151] hppfs: missing include Signed-off-by: Al Viro --- fs/hppfs/hppfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 8635be5ffd97..970ea987b3f6 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "os.h" From 9d108d25487bf958f8093409a4c0bee6169edba6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jul 2011 22:27:33 -0400 Subject: [PATCH 117/151] devtmpfs: missing initialialization in never-hit case create_path() on something without a single / in it will return err without initializing it. It actually can't happen (we call that thing only if create on the same path returns -ENOENT, which won't happen happen for single-component path), but in this case initializing err to 0 is more than making compiler to STFU - would be the right thing to return on such paths; the function creates a parent directory of given pathname and in that case it has no work to do... Signed-off-by: Al Viro --- drivers/base/devtmpfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index b89fffc1d777..33e1bed68fdd 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -166,7 +166,7 @@ static int create_path(const char *nodepath) { char *path; char *s; - int err; + int err = 0; /* parent directories do not exist, create them */ path = kstrdup(nodepath, GFP_KERNEL); From 137a6354305455d585fe99fe5e9949acd895b045 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:29 +0100 Subject: [PATCH 118/151] regulator: Fix WM831x regulator ID lookups for multiple WM831xs With multiple wm831x devices the device IDs used for the regulators will not always be contiguous so simply taking the modulus is not sufficient to look up the ID, we need to reverse the way the ID is generated. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 16 ++++++++++++++-- drivers/regulator/wm831x-ldo.c | 25 ++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index a0982e809851..0c7a9d5047f4 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -504,11 +504,17 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id = pdev->id % ARRAY_SIZE(pdata->dcdc); + int id; struct wm831x_dcdc *dcdc; struct resource *res; int ret, irq; + if (pdata && pdata->wm831x_num) + id = (pdata->wm831x_num * 10) + 1; + else + id = 0; + id = pdev->id - id; + dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1); if (pdata == NULL || pdata->dcdc[id] == NULL) @@ -709,11 +715,17 @@ static __devinit int wm831x_buckp_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id = pdev->id % ARRAY_SIZE(pdata->dcdc); + int id; struct wm831x_dcdc *dcdc; struct resource *res; int ret, irq; + if (pdata && pdata->wm831x_num) + id = (pdata->wm831x_num * 10) + 1; + else + id = 0; + id = pdev->id - id; + dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1); if (pdata == NULL || pdata->dcdc[id] == NULL) diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c index 2220cf8defb1..6709710a059e 100644 --- a/drivers/regulator/wm831x-ldo.c +++ b/drivers/regulator/wm831x-ldo.c @@ -310,11 +310,17 @@ static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id = pdev->id % ARRAY_SIZE(pdata->ldo); + int id; struct wm831x_ldo *ldo; struct resource *res; int ret, irq; + if (pdata && pdata->wm831x_num) + id = (pdata->wm831x_num * 10) + 1; + else + id = 0; + id = pdev->id - id; + dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); if (pdata == NULL || pdata->ldo[id] == NULL) @@ -574,11 +580,17 @@ static __devinit int wm831x_aldo_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id = pdev->id % ARRAY_SIZE(pdata->ldo); + int id; struct wm831x_ldo *ldo; struct resource *res; int ret, irq; + if (pdata && pdata->wm831x_num) + id = (pdata->wm831x_num * 10) + 1; + else + id = 0; + id = pdev->id - id; + dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); if (pdata == NULL || pdata->ldo[id] == NULL) @@ -764,11 +776,18 @@ static __devinit int wm831x_alive_ldo_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id = pdev->id % ARRAY_SIZE(pdata->ldo); + int id; struct wm831x_ldo *ldo; struct resource *res; int ret; + if (pdata && pdata->wm831x_num) + id = (pdata->wm831x_num * 10) + 1; + else + id = 0; + id = pdev->id - id; + + dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); if (pdata == NULL || pdata->ldo[id] == NULL) From a1b81dd3ff2c622d0f4e3954bf9b5dd47a0f13a0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:30 +0100 Subject: [PATCH 119/151] regulator: Fix WM831x DCDC DVS VSEL bootstrapping Read our initial VSEL from the correct register. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 0c7a9d5047f4..8a4fdc3a45d9 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -551,7 +551,7 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev) } dcdc->on_vsel = ret & WM831X_DC1_ON_VSEL_MASK; - ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG); + ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL); if (ret < 0) { dev_err(wm831x->dev, "Failed to read DVS VSEL: %d\n", ret); goto err; From 24b4315051ef2b9155d23ccbad528daab3b65eb6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:31 +0100 Subject: [PATCH 120/151] regulator: Add EPEs to the MODULE_ALIAS() for wm831x-dcdc Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 8a4fdc3a45d9..2ee482306784 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -1058,3 +1058,4 @@ MODULE_DESCRIPTION("WM831x DC-DC convertor driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:wm831x-buckv"); MODULE_ALIAS("platform:wm831x-buckp"); +MODULE_ALIAS("platform:wm831x-epe"); From b47ba9fdd336b318a6a6431e6a4556df99272277 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:32 +0100 Subject: [PATCH 121/151] regulator: Set up GPIO for WM831x VSEL before enabling VSEL mode If the VSEL is not in use prior to us starting up then we need to make sure we initialise the GPIO before we push the DVS control to being done by the GPIO. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 41 ++++++++++++++++----------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 2ee482306784..95249f7a0e39 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -456,27 +456,6 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc, if (!pdata || !pdata->dvs_gpio) return; - switch (pdata->dvs_control_src) { - case 1: - ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT; - break; - case 2: - ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT; - break; - default: - dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n", - pdata->dvs_control_src, dcdc->name); - return; - } - - ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL, - WM831X_DC1_DVS_SRC_MASK, ctrl); - if (ret < 0) { - dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n", - dcdc->name, ret); - return; - } - ret = gpio_request(pdata->dvs_gpio, "DCDC DVS"); if (ret < 0) { dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n", @@ -498,6 +477,26 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc, } dcdc->dvs_gpio = pdata->dvs_gpio; + + switch (pdata->dvs_control_src) { + case 1: + ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT; + break; + case 2: + ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT; + break; + default: + dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n", + pdata->dvs_control_src, dcdc->name); + return; + } + + ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL, + WM831X_DC1_DVS_SRC_MASK, ctrl); + if (ret < 0) { + dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n", + dcdc->name, ret); + } } static __devinit int wm831x_buckv_probe(struct platform_device *pdev) From c439b8f46ee79147139e124621dbc9e1e7804655 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:33 +0100 Subject: [PATCH 122/151] regulator: Bootstrap wm831x DVS VSEL value from ON VSEL if not already set If we don't have a DVS VSEL value already set when we start up then start it off with the value currently being used for ON. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 95249f7a0e39..2c5d54b026c9 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -491,6 +491,20 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc, return; } + /* If DVS_VSEL is set to the minimum value then raise it to ON_VSEL + * to make bootstrapping a bit smoother. + */ + if (!dcdc->dvs_vsel) { + ret = wm831x_set_bits(wm831x, + dcdc->base + WM831X_DCDC_DVS_CONTROL, + WM831X_DC1_DVS_VSEL_MASK, dcdc->on_vsel); + if (ret == 0) + dcdc->dvs_vsel = dcdc->on_vsel; + else + dev_warn(wm831x->dev, "Failed to set DVS_VSEL: %d\n", + ret); + } + ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL, WM831X_DC1_DVS_SRC_MASK, ctrl); if (ret < 0) { From 88cda60e512373ca18a663ee66dc2550800223eb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:34 +0100 Subject: [PATCH 123/151] regulator: Improve WM831x DVS VSEL selection algorithm Rather than using the maximum voltage we get passed to select the DVS voltage to use remember the highest voltage we've ever seen. This improves how the driver works when the consumer permits higher voltages than it will ever selects in order to support the widest possible voltage range. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 52 ++++++++++----------------------- 1 file changed, 15 insertions(+), 37 deletions(-) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 2c5d54b026c9..bd3531d8b2ac 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -267,23 +267,6 @@ static int wm831x_buckv_select_min_voltage(struct regulator_dev *rdev, return vsel; } -static int wm831x_buckv_select_max_voltage(struct regulator_dev *rdev, - int min_uV, int max_uV) -{ - u16 vsel; - - if (max_uV < 600000 || max_uV > 1800000) - return -EINVAL; - - vsel = ((max_uV - 600000) / 12500) + 8; - - if (wm831x_buckv_list_voltage(rdev, vsel) < min_uV || - wm831x_buckv_list_voltage(rdev, vsel) < max_uV) - return -EINVAL; - - return vsel; -} - static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state) { struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev); @@ -338,28 +321,23 @@ static int wm831x_buckv_set_voltage(struct regulator_dev *rdev, if (ret < 0) return ret; - /* Set the high voltage as the DVS voltage. This is optimised - * for CPUfreq usage, most processors will keep the maximum - * voltage constant and lower the minimum with the frequency. */ - vsel = wm831x_buckv_select_max_voltage(rdev, min_uV, max_uV); - if (vsel < 0) { - /* This should never happen - at worst the same vsel - * should be chosen */ - WARN_ON(vsel < 0); - return 0; + /* + * If this VSEL is higher than the last one we've seen then + * remember it as the DVS VSEL. This is optimised for CPUfreq + * usage where we want to get to the highest voltage very + * quickly. + */ + if (vsel > dcdc->dvs_vsel) { + ret = wm831x_set_bits(wm831x, dvs_reg, + WM831X_DC1_DVS_VSEL_MASK, + dcdc->dvs_vsel); + if (ret == 0) + dcdc->dvs_vsel = vsel; + else + dev_warn(wm831x->dev, + "Failed to set DCDC DVS VSEL: %d\n", ret); } - /* Don't bother if it's the same VSEL we're already using */ - if (vsel == dcdc->on_vsel) - return 0; - - ret = wm831x_set_bits(wm831x, dvs_reg, WM831X_DC1_DVS_VSEL_MASK, vsel); - if (ret == 0) - dcdc->dvs_vsel = vsel; - else - dev_warn(wm831x->dev, "Failed to set DCDC DVS VSEL: %d\n", - ret); - return 0; } From d59729f4e794f814b25ccd2aebfbe606242c4544 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 30 Jul 2011 12:34:19 -0400 Subject: [PATCH 124/151] ext4: fix races in ext4_sync_parent() Fix problems if fsync() races against a rename of a parent directory as pointed out by Al Viro in his own inimitable way: >While we are at it, could somebody please explain what the hell is ext4 >doing in >static int ext4_sync_parent(struct inode *inode) >{ > struct writeback_control wbc; > struct dentry *dentry = NULL; > int ret = 0; > > while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { > ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); > dentry = list_entry(inode->i_dentry.next, > struct dentry, d_alias); > if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) > break; > inode = dentry->d_parent->d_inode; > ret = sync_mapping_buffers(inode->i_mapping); > ... >Note that dentry obviously can't be NULL there. dentry->d_parent is never >NULL. And dentry->d_parent would better not be negative, for crying out >loud! What's worse, there's no guarantees that dentry->d_parent will >remain our parent over that sync_mapping_buffers() *and* that inode won't >just be freed under us (after rename() and memory pressure leading to >eviction of what used to be our dentry->d_parent)...... Reported-by: Al Viro Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index ce66d2fe826c..f9dbe33cde5e 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -129,15 +129,30 @@ static int ext4_sync_parent(struct inode *inode) { struct writeback_control wbc; struct dentry *dentry = NULL; + struct inode *next; int ret = 0; - while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { + if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) + return 0; + inode = igrab(inode); + while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); - dentry = list_entry(inode->i_dentry.next, - struct dentry, d_alias); - if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) + dentry = NULL; + spin_lock(&inode->i_lock); + if (!list_empty(&inode->i_dentry)) { + dentry = list_first_entry(&inode->i_dentry, + struct dentry, d_alias); + dget(dentry); + } + spin_unlock(&inode->i_lock); + if (!dentry) + break; + next = igrab(dentry->d_parent->d_inode); + dput(dentry); + if (!next) break; - inode = dentry->d_parent->d_inode; + iput(inode); + inode = next; ret = sync_mapping_buffers(inode->i_mapping); if (ret) break; @@ -148,6 +163,7 @@ static int ext4_sync_parent(struct inode *inode) if (ret) break; } + iput(inode); return ret; } From 59be8e7280c10fd8f078ba6dc2bcdc2b1453b6ab Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 30 Jul 2011 12:38:46 -0400 Subject: [PATCH 125/151] ext4: change umode_t in tracepoint headers to be an explicit __u16 As requested by Al Viro, since umode_t may be changing to a u32 for some architectures. Signed-off-by: "Theodore Ts'o" Cc: Al Viro --- include/trace/events/ext4.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 51d88139eb8c..bf5518d50423 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -23,7 +23,7 @@ TRACE_EVENT(ext4_free_inode, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u64, blocks ) @@ -52,7 +52,7 @@ TRACE_EVENT(ext4_request_inode, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, dir ) - __field( umode_t, mode ) + __field( __u16, mode ) ), TP_fast_assign( @@ -75,7 +75,7 @@ TRACE_EVENT(ext4_allocate_inode, __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, dir ) - __field( umode_t, mode ) + __field( __u16, mode ) ), TP_fast_assign( @@ -727,7 +727,7 @@ TRACE_EVENT(ext4_free_blocks, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( __u64, block ) __field( unsigned long, count ) __field( int, flags ) @@ -1014,7 +1014,7 @@ TRACE_EVENT(ext4_forget, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( int, is_metadata ) __field( __u64, block ) ), @@ -1041,7 +1041,7 @@ TRACE_EVENT(ext4_da_update_reserve_space, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( __u64, i_blocks ) __field( int, used_blocks ) __field( int, reserved_data_blocks ) @@ -1078,7 +1078,7 @@ TRACE_EVENT(ext4_da_reserve_space, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( __u64, i_blocks ) __field( int, md_needed ) __field( int, reserved_data_blocks ) @@ -1112,7 +1112,7 @@ TRACE_EVENT(ext4_da_release_space, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( __u64, i_blocks ) __field( int, freed_blocks ) __field( int, reserved_data_blocks ) From c49bafa3842751b8955a962859f42d307673d75d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 Jul 2011 12:58:41 -0400 Subject: [PATCH 126/151] ext4: add missing kfree() on error return path in add_new_gdb() We added some more error handling in b40971426a "ext4: add error checking to calls to ext4_handle_dirty_metadata()". But we need to call kfree() as well to avoid a memory leak. Signed-off-by: Dan Carpenter Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6e3327d6ff1f..71085df97bbd 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -517,6 +517,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return err; exit_inode: + kfree(n_group_desc); /* ext4_handle_release_buffer(handle, iloc.bh); */ brelse(iloc.bh); exit_dindj: From b4aff1f874f679320c03e3d97b60fc7babfd4623 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 28 Jun 2011 16:18:59 -0400 Subject: [PATCH 127/151] Btrfs: load the key from the dir item in readdir into a fake dentry In btrfs we have 2 indexes for inodes. One is for readdir, it's in this nice sequential order and works out brilliantly for readdir. However if you use ls, it usually stat's each file it gets from readdir. This is where the second index comes in, which is based on a hash of the name of the file. So then the lookup has to lookup this index, and then lookup the inode. The index lookup is going to be in random order (since its based on the name hash), which gives us less than stellar performance. Since we know the inode location from the readdir index, I create a dummy dentry and copy the location key into dentry->d_fsdata. Then on lookup if we have d_fsdata we use that location to lookup the inode, avoiding looking up the other directory index. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Al Viro --- fs/btrfs/inode.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index caa26ab5ed68..540e3b43c688 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4016,12 +4016,19 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct btrfs_root *sub_root = root; struct btrfs_key location; int index; - int ret; + int ret = 0; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ret = btrfs_inode_by_name(dir, dentry, &location); + if (unlikely(d_need_lookup(dentry))) { + memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); + kfree(dentry->d_fsdata); + dentry->d_fsdata = NULL; + d_clear_need_lookup(dentry); + } else { + ret = btrfs_inode_by_name(dir, dentry, &location); + } if (ret < 0) return ERR_PTR(ret); @@ -4076,6 +4083,12 @@ static int btrfs_dentry_delete(const struct dentry *dentry) return 0; } +static void btrfs_dentry_release(struct dentry *dentry) +{ + if (dentry->d_fsdata) + kfree(dentry->d_fsdata); +} + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { @@ -4098,6 +4111,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, struct btrfs_path *path; struct list_head ins_list; struct list_head del_list; + struct qstr q; int ret; struct extent_buffer *leaf; int slot; @@ -4187,6 +4201,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, while (di_cur < di_total) { struct btrfs_key location; + struct dentry *tmp; if (verify_dir_item(root, leaf, di)) break; @@ -4207,6 +4222,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); + q.name = name_ptr; + q.len = name_len; + q.hash = full_name_hash(q.name, q.len); + tmp = d_lookup(filp->f_dentry, &q); + if (!tmp) { + struct btrfs_key *newkey; + + newkey = kzalloc(sizeof(struct btrfs_key), + GFP_NOFS); + if (!newkey) + goto no_dentry; + tmp = d_alloc(filp->f_dentry, &q); + if (!tmp) { + kfree(newkey); + dput(tmp); + goto no_dentry; + } + memcpy(newkey, &location, + sizeof(struct btrfs_key)); + tmp->d_fsdata = newkey; + tmp->d_flags |= DCACHE_NEED_LOOKUP; + d_rehash(tmp); + dput(tmp); + } else { + dput(tmp); + } +no_dentry: /* is this a reference to our own snapshot? If so * skip it */ @@ -7452,4 +7494,5 @@ static const struct inode_operations btrfs_symlink_inode_operations = { const struct dentry_operations btrfs_dentry_operations = { .d_delete = btrfs_dentry_delete, + .d_release = btrfs_dentry_release, }; From 5a30d8a2b8ddd5102c440c7e5a7c8e1fd729c818 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 11 Jul 2011 14:20:57 +0100 Subject: [PATCH 128/151] VFS: Fix automount for negative autofs dentries Autofs may set the DCACHE_NEED_AUTOMOUNT flag on negative dentries. These need attention from the automounter daemon regardless of the LOOKUP_FOLLOW flag. Signed-off-by: David Howells Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/namei.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f8c69d373793..445fd5da11fa 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -716,19 +716,25 @@ static int follow_automount(struct path *path, unsigned flags, if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) return -EISDIR; /* we actually want to stop here */ - /* We want to mount if someone is trying to open/create a file of any - * type under the mountpoint, wants to traverse through the mountpoint - * or wants to open the mounted directory. - * + /* * We don't want to mount if someone's just doing a stat and they've * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and * appended a '/' to the name. */ - if (!(flags & LOOKUP_FOLLOW) && - !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE))) - return -EISDIR; - + if (!(flags & LOOKUP_FOLLOW)) { + /* We do, however, want to mount if someone wants to open or + * create a file of any type under the mountpoint, wants to + * traverse through the mountpoint or wants to open the mounted + * directory. + * Also, autofs may mark negative dentries as being automount + * points. These will need the attentions of the daemon to + * instantiate them before they can be used. + */ + if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | + LOOKUP_OPEN | LOOKUP_CREATE)) && + path->dentry->d_inode) + return -EISDIR; + } current->total_link_count++; if (current->total_link_count >= 40) return -ELOOP; From b12362bdb61a230a67daa77bcd2a11e59b2802e1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Jul 2011 06:11:47 +0200 Subject: [PATCH 129/151] vfs: conditionally call inode_wb_list_del() Some inodes (pipes, sockets, ...) are not in bdi writeback list. evict() can avoid calling inode_wb_list_del() and its expensive spinlock by checking inode i_wb_list being empty or not. At this point, no other cpu/user can concurrently manipulate this inode i_wb_list Signed-off-by: Eric Dumazet Signed-off-by: Al Viro --- fs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index d0c72ff6b30e..9dab13ae6ef7 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -454,7 +454,9 @@ static void evict(struct inode *inode) BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!list_empty(&inode->i_lru)); - inode_wb_list_del(inode); + if (!list_empty(&inode->i_wb_list)) + inode_wb_list_del(inode); + inode_sb_list_del(inode); if (op->evict_inode) { From f2ee7abf4c40c8e6bffced923a7c01ea2d1f6c97 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Jul 2011 06:41:09 +0200 Subject: [PATCH 130/151] vfs: avoid taking inode_hash_lock on pipes and sockets Some inodes (pipes, sockets, ...) are not hashed, no need to take contended inode_hash_lock at dismantle time. nice speedup on SMP machines on socket intensive workloads. Signed-off-by: Eric Dumazet Signed-off-by: Al Viro --- fs/inode.c | 6 +++--- include/linux/fs.h | 9 ++++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 9dab13ae6ef7..e445be2a18f9 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -399,12 +399,12 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) EXPORT_SYMBOL(__insert_inode_hash); /** - * remove_inode_hash - remove an inode from the hash + * __remove_inode_hash - remove an inode from the hash * @inode: inode to unhash * * Remove an inode from the superblock. */ -void remove_inode_hash(struct inode *inode) +void __remove_inode_hash(struct inode *inode) { spin_lock(&inode_hash_lock); spin_lock(&inode->i_lock); @@ -412,7 +412,7 @@ void remove_inode_hash(struct inode *inode) spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); } -EXPORT_SYMBOL(remove_inode_hash); +EXPORT_SYMBOL(__remove_inode_hash); void end_writeback(struct inode *inode) { diff --git a/include/linux/fs.h b/include/linux/fs.h index f23bcb77260c..786b3b1113cf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2317,11 +2317,18 @@ extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); extern void __insert_inode_hash(struct inode *, unsigned long hashval); -extern void remove_inode_hash(struct inode *); static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } + +extern void __remove_inode_hash(struct inode *); +static inline void remove_inode_hash(struct inode *inode) +{ + if (!inode_unhashed(inode)) + __remove_inode_hash(inode); +} + extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK From c4ae0c65455c1bb30d1b71c6dd9a1a62aadde8ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Jul 2011 06:55:13 +0200 Subject: [PATCH 131/151] vfs: avoid call to inode_lru_list_del() if possible inode_lru_list_del() is expensive because of per superblock lru locking, while some inodes are not in lru list. Adding a check in iput_final() can speedup pipe/sockets workloads on SMP. Signed-off-by: Eric Dumazet Signed-off-by: Al Viro --- fs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index e445be2a18f9..5aab80dc008c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1330,7 +1330,8 @@ static void iput_final(struct inode *inode) } inode->i_state |= I_FREEING; - inode_lru_list_del(inode); + if (!list_empty(&inode->i_lru)) + inode_lru_list_del(inode); spin_unlock(&inode->i_lock); evict(inode); From 782b94cdf577b4df1feb376f372dccc28e66a771 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Jun 2011 11:01:45 +1000 Subject: [PATCH 132/151] block: initialise bd_super in bdget() bd_super is currently reset to NULL in kill_block_super() so we rely on previous users of the block_device object to initialise this value for the next user. This quirk was exposed on RHEL5 when a third party filesystem did not always use kill_block_super() and therefore bd_super wasn't being reset when a block_device object was recycled within the cache. This may not be a problem upstream but makes sense to be defensive. Signed-off-by: Lachlan McIlroy Reviewed-by: Eric Sandeen Signed-off-by: Al Viro --- fs/block_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index f55aad4d1611..f28680553288 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -552,6 +552,7 @@ struct block_device *bdget(dev_t dev) if (inode->i_state & I_NEW) { bdev->bd_contains = NULL; + bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_block_size = (1 << inode->i_blkbits); bdev->bd_part_count = 0; From d3fb612076eebec6f67257db0c7a9666ac7e5892 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 18:37:50 -0400 Subject: [PATCH 133/151] switch posix_acl_create() to umode_t * so we can pass &inode->i_mode to it Signed-off-by: Al Viro --- fs/9p/acl.c | 4 ++-- fs/9p/acl.h | 4 ++-- fs/9p/vfs_inode_dotl.c | 6 +++--- fs/btrfs/acl.c | 5 +---- fs/ext2/acl.c | 4 +--- fs/ext3/acl.c | 5 +---- fs/ext4/acl.c | 5 +---- fs/generic_acl.c | 7 +++---- fs/gfs2/acl.c | 4 ++-- fs/jffs2/acl.c | 2 +- fs/jffs2/acl.h | 2 +- fs/jffs2/fs.c | 2 +- fs/jffs2/os-linux.h | 2 +- fs/jfs/acl.c | 4 +--- fs/nfs/nfs3acl.c | 2 +- fs/nfs/nfs3proc.c | 6 +++--- fs/ocfs2/acl.c | 2 +- fs/posix_acl.c | 6 +++--- fs/reiserfs/xattr_acl.c | 6 +----- fs/xfs/linux-2.6/xfs_acl.c | 4 ++-- include/linux/nfs_fs.h | 4 ++-- include/linux/posix_acl.h | 2 +- 22 files changed, 35 insertions(+), 53 deletions(-) diff --git a/fs/9p/acl.c b/fs/9p/acl.c index e9cb57f07546..ad734e3220cf 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -182,11 +182,11 @@ int v9fs_set_create_acl(struct dentry *dentry, return 0; } -int v9fs_acl_mode(struct inode *dir, mode_t *modep, +int v9fs_acl_mode(struct inode *dir, umode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl) { int retval = 0; - mode_t mode = *modep; + umode_t mode = *modep; struct posix_acl *acl = NULL; if (!S_ISLNK(mode)) { diff --git a/fs/9p/acl.h b/fs/9p/acl.h index ddb7ae19d971..559556411965 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -20,7 +20,7 @@ extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type); extern int v9fs_acl_chmod(struct dentry *); extern int v9fs_set_create_acl(struct dentry *, struct posix_acl **, struct posix_acl **); -extern int v9fs_acl_mode(struct inode *dir, mode_t *modep, +extern int v9fs_acl_mode(struct inode *dir, umode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl); #else #define v9fs_iop_get_acl NULL @@ -38,7 +38,7 @@ static inline int v9fs_set_create_acl(struct dentry *dentry, { return 0; } -static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep, +static inline int v9fs_acl_mode(struct inode *dir, umode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl) { diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 9a26dce5a99f..b6c8ed205192 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -206,7 +206,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, int err = 0; gid_t gid; int flags; - mode_t mode; + umode_t mode; char *name = NULL; struct file *filp; struct p9_qid qid; @@ -348,7 +348,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct p9_fid *fid = NULL, *dfid = NULL; gid_t gid; char *name; - mode_t mode; + umode_t mode; struct inode *inode; struct p9_qid qid; struct dentry *dir_dentry; @@ -751,7 +751,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, int err; gid_t gid; char *name; - mode_t mode; + umode_t mode; struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; struct inode *inode; diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 65a735d8f6e4..59086142c14d 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -222,19 +222,16 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, } if (IS_POSIXACL(dir) && acl) { - mode_t mode = inode->i_mode; - if (S_ISDIR(inode->i_mode)) { ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_DEFAULT); if (ret) goto failed; } - ret = posix_acl_create(&acl, GFP_NOFS, &mode); + ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); if (ret < 0) return ret; - inode->i_mode = mode; if (ret > 0) { /* we need an acl */ ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 52c053763942..0ce740489ab1 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -253,16 +253,14 @@ ext2_init_acl(struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); if (error) goto cleanup; } - error = posix_acl_create(&acl, GFP_KERNEL, &mode); + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); if (error < 0) return error; - inode->i_mode = mode; if (error > 0) { /* This is an extended ACL */ error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl); diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 6c29bf0df04a..74a3c6486f8f 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -261,19 +261,16 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - mode_t mode = inode->i_mode; - if (S_ISDIR(inode->i_mode)) { error = ext3_set_acl(handle, inode, ACL_TYPE_DEFAULT, acl); if (error) goto cleanup; } - error = posix_acl_create(&acl, GFP_NOFS, &mode); + error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); if (error < 0) return error; - inode->i_mode = mode; if (error > 0) { /* This is an extended ACL */ error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index dca2d1ded931..74e469ccdf50 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -259,19 +259,16 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - mode_t mode = inode->i_mode; - if (S_ISDIR(inode->i_mode)) { error = ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT, acl); if (error) goto cleanup; } - error = posix_acl_create(&acl, GFP_NOFS, &mode); + error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); if (error < 0) return error; - inode->i_mode = mode; if (error > 0) { /* This is an extended ACL */ error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); diff --git a/fs/generic_acl.c b/fs/generic_acl.c index d5e33a077a67..2dd434d6ff29 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -125,21 +125,20 @@ int generic_acl_init(struct inode *inode, struct inode *dir) { struct posix_acl *acl = NULL; - mode_t mode = inode->i_mode; int error; - inode->i_mode = mode & ~current_umask(); if (!S_ISLNK(inode->i_mode)) acl = get_cached_acl(dir, ACL_TYPE_DEFAULT); if (acl) { if (S_ISDIR(inode->i_mode)) set_cached_acl(inode, ACL_TYPE_DEFAULT, acl); - error = posix_acl_create(&acl, GFP_KERNEL, &mode); + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); if (error < 0) return error; - inode->i_mode = mode; if (error > 0) set_cached_acl(inode, ACL_TYPE_ACCESS, acl); + } else { + inode->i_mode &= ~current_umask(); } error = 0; diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 884c9af0542f..0ac3c53f928f 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -72,7 +72,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) return gfs2_acl_get(GFS2_I(inode), type); } -static int gfs2_set_mode(struct inode *inode, mode_t mode) +static int gfs2_set_mode(struct inode *inode, umode_t mode) { int error = 0; @@ -117,7 +117,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct posix_acl *acl; - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; int error = 0; if (!sdp->sd_args.ar_posix_acl) diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 27c511a1cf05..6372a84728d7 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -259,7 +259,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return rc; } -int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, mode_t *i_mode) +int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, umode_t *i_mode) { struct posix_acl *acl; int rc; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index b3421c78d9f8..9b477246f2a6 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -28,7 +28,7 @@ struct jffs2_acl_header { struct posix_acl *jffs2_get_acl(struct inode *inode, int type); extern int jffs2_acl_chmod(struct inode *); -extern int jffs2_init_acl_pre(struct inode *, struct inode *, mode_t *); +extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *); extern int jffs2_init_acl_post(struct inode *); extern const struct xattr_handler jffs2_acl_access_xattr_handler; diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index b81b35ddf4e4..bbcb9755dd2b 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -406,7 +406,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, fill in the raw_inode while you're at it. */ -struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, struct jffs2_raw_inode *ri) +struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_raw_inode *ri) { struct inode *inode; struct super_block *sb = dir_i->i_sb; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 526979c607b6..6c1755c59c0f 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -173,7 +173,7 @@ int jffs2_do_setattr (struct inode *, struct iattr *); struct inode *jffs2_iget(struct super_block *, unsigned long); void jffs2_evict_inode (struct inode *); void jffs2_dirty_inode(struct inode *inode, int flags); -struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, +struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_raw_inode *ri); int jffs2_statfs (struct dentry *, struct kstatfs *); int jffs2_remount_fs (struct super_block *, int *, char *); diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index b3a32caf2b45..45559dc3ea2f 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -127,16 +127,14 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) return PTR_ERR(acl); if (acl) { - mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl); if (rc) goto cleanup; } - rc = posix_acl_create(&acl, GFP_KERNEL, &mode); + rc = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); if (rc < 0) goto cleanup; /* posix_acl_release(NULL) is no-op */ - inode->i_mode = mode; if (rc > 0) rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); cleanup: diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index e49e73107e62..7ef23979896d 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -415,7 +415,7 @@ int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) } int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - mode_t mode) + umode_t mode) { struct posix_acl *dfacl, *acl; int error = 0; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 38053d823eb0..85f1690ca08c 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -316,7 +316,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nfs_open_context *ctx) { struct nfs3_createdata *data; - mode_t mode = sattr->ia_mode; + umode_t mode = sattr->ia_mode; int status = -ENOMEM; dprintk("NFS call create %s\n", dentry->d_name.name); @@ -562,7 +562,7 @@ static int nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { struct nfs3_createdata *data; - int mode = sattr->ia_mode; + umode_t mode = sattr->ia_mode; int status = -ENOMEM; dprintk("NFS call mkdir %s\n", dentry->d_name.name); @@ -681,7 +681,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { struct nfs3_createdata *data; - mode_t mode = sattr->ia_mode; + umode_t mode = sattr->ia_mode; int status = -ENOMEM; dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 783c58d9daf1..fbafc6e36e25 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -351,7 +351,7 @@ int ocfs2_init_acl(handle_t *handle, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct posix_acl *acl = NULL; int ret = 0, ret2; - mode_t mode; + umode_t mode; if (!S_ISLNK(inode->i_mode)) { if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { diff --git a/fs/posix_acl.c b/fs/posix_acl.c index d43729a760e2..f0a017edee1e 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -279,11 +279,11 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want) * system calls. All permissions that are not granted by the acl are removed. * The permissions in the acl are changed to reflect the mode_p parameter. */ -static int posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p) +static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) { struct posix_acl_entry *pa, *pe; struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; - mode_t mode = *mode_p; + umode_t mode = *mode_p; int not_equiv = 0; /* assert(atomic_read(acl->a_refcount) == 1); */ @@ -382,7 +382,7 @@ static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) } int -posix_acl_create(struct posix_acl **acl, gfp_t gfp, mode_t *mode_p) +posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p) { struct posix_acl *clone = posix_acl_clone(*acl, gfp); int err = -ENOMEM; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 7362cf4c946a..89ebc77e0e9a 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -354,8 +354,6 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, return PTR_ERR(acl); if (acl) { - mode_t mode = inode->i_mode; - /* Copy the default ACL to the default ACL of a new directory */ if (S_ISDIR(inode->i_mode)) { err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, @@ -366,12 +364,10 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, /* Now we reconcile the new ACL and the mode, potentially modifying both */ - err = posix_acl_create(&acl, GFP_NOFS, &mode); + err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); if (err < 0) return err; - inode->i_mode = mode; - /* If we need an ACL.. */ if (err > 0) err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl); diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index 44ce51656804..bb85500e0b88 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -221,7 +221,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } static int -xfs_set_mode(struct inode *inode, mode_t mode) +xfs_set_mode(struct inode *inode, umode_t mode) { int error = 0; @@ -267,7 +267,7 @@ posix_acl_default_exists(struct inode *inode) int xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; int error = 0, inherit = 0; if (S_ISDIR(inode->i_mode)) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8b579beb6358..dda2ac8a5f7b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -568,12 +568,12 @@ extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type); extern int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl); extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - mode_t mode); + umode_t mode); extern void nfs3_forget_cached_acls(struct inode *inode); #else static inline int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - mode_t mode) + umode_t mode) { return 0; } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 9a53b99818e2..bd8d0050d725 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -77,7 +77,7 @@ extern int posix_acl_valid(const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *); -extern int posix_acl_create(struct posix_acl **, gfp_t, mode_t *); +extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *); extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); From d6952123b53cc8b334df69bba2cd0063b0d88f68 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 18:56:36 -0400 Subject: [PATCH 134/151] switch posix_acl_equiv_mode() to umode_t * ... so that &inode->i_mode could be passed to it Signed-off-by: Al Viro --- fs/9p/acl.c | 2 +- fs/btrfs/acl.c | 5 +---- fs/ext2/acl.c | 4 +--- fs/ext3/acl.c | 4 +--- fs/ext4/acl.c | 4 +--- fs/generic_acl.c | 6 +----- fs/gfs2/acl.c | 2 +- fs/jffs2/acl.c | 2 +- fs/jfs/xattr.c | 4 +--- fs/ocfs2/acl.c | 2 +- fs/posix_acl.c | 4 ++-- fs/reiserfs/xattr_acl.c | 4 +--- fs/xfs/linux-2.6/xfs_acl.c | 2 +- include/linux/posix_acl.h | 2 +- 14 files changed, 15 insertions(+), 32 deletions(-) diff --git a/fs/9p/acl.c b/fs/9p/acl.c index ad734e3220cf..9a1d42630751 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -319,7 +319,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; retval = posix_acl_equiv_mode(acl, &mode); if (retval < 0) goto err_out; diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 59086142c14d..4cc5c0164ed6 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -111,7 +111,6 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, int ret, size = 0; const char *name; char *value = NULL; - mode_t mode; if (acl) { ret = posix_acl_valid(acl); @@ -122,13 +121,11 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, switch (type) { case ACL_TYPE_ACCESS: - mode = inode->i_mode; name = POSIX_ACL_XATTR_ACCESS; if (acl) { - ret = posix_acl_equiv_mode(acl, &mode); + ret = posix_acl_equiv_mode(acl, &inode->i_mode); if (ret < 0) return ret; - inode->i_mode = mode; } ret = 0; break; diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 0ce740489ab1..35d6a3cfd9ff 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -194,12 +194,10 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; else { - inode->i_mode = mode; inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); if (error == 0) diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 74a3c6486f8f..3091f62e55b6 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -199,12 +199,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; else { - inode->i_mode = mode; inode->i_ctime = CURRENT_TIME_SEC; ext3_mark_inode_dirty(handle, inode); if (error == 0) diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 74e469ccdf50..a5c29bb3b835 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -198,12 +198,10 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; else { - inode->i_mode = mode; inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); if (error == 0) diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 2dd434d6ff29..d0dddaceac59 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -82,18 +82,14 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, return PTR_ERR(acl); } if (acl) { - mode_t mode; - error = posix_acl_valid(acl); if (error) goto failed; switch (type) { case ACL_TYPE_ACCESS: - mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) goto failed; - inode->i_mode = mode; inode->i_ctime = CURRENT_TIME; if (error == 0) { posix_acl_release(acl); diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 0ac3c53f928f..34501b64bc47 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -276,7 +276,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name, goto out_release; if (type == ACL_TYPE_ACCESS) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); if (error <= 0) { diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 6372a84728d7..926d02068a14 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -227,7 +227,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) case ACL_TYPE_ACCESS: xprefix = JFFS2_XPREFIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; rc = posix_acl_equiv_mode(acl, &mode); if (rc < 0) return rc; diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 24838f1eeee5..e87fedef23db 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -693,8 +693,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, return rc; } if (acl) { - mode_t mode = inode->i_mode; - rc = posix_acl_equiv_mode(acl, &mode); + rc = posix_acl_equiv_mode(acl, &inode->i_mode); posix_acl_release(acl); if (rc < 0) { printk(KERN_ERR @@ -702,7 +701,6 @@ static int can_set_system_xattr(struct inode *inode, const char *name, rc); return rc; } - inode->i_mode = mode; mark_inode_dirty(inode); } /* diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index fbafc6e36e25..a7219075b4de 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -247,7 +247,7 @@ static int ocfs2_set_acl(handle_t *handle, case ACL_TYPE_ACCESS: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; ret = posix_acl_equiv_mode(acl, &mode); if (ret < 0) return ret; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index f0a017edee1e..3d943be6761c 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -149,10 +149,10 @@ posix_acl_valid(const struct posix_acl *acl) * file mode permission bits, or else 1. Returns -E... on error. */ int -posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p) +posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) { const struct posix_acl_entry *pa, *pe; - mode_t mode = 0; + umode_t mode = 0; int not_equiv = 0; FOREACH_ACL_ENTRY(pa, acl, pe) { diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 89ebc77e0e9a..6da0396e5052 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -272,12 +272,10 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; else { - inode->i_mode = mode; if (error == 0) acl = NULL; } diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index bb85500e0b88..b6c4b3795c4a 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -381,7 +381,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, goto out_release; if (type == ACL_TYPE_ACCESS) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); if (error <= 0) { diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index bd8d0050d725..529c32ad58c0 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -76,7 +76,7 @@ extern struct posix_acl *posix_acl_alloc(int, gfp_t); extern int posix_acl_valid(const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); -extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *); +extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *); extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t); From 3a5fba19b080b365d67866db38e32e6a4a2089e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 19:01:48 -0400 Subject: [PATCH 135/151] switch posix_acl_from_mode() to umode_t ... seeing that this is what all callers pass to it anyway. Signed-off-by: Al Viro --- fs/posix_acl.c | 2 +- include/linux/posix_acl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 3d943be6761c..4e16e8001982 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -188,7 +188,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) * Create an ACL representing the file mode permission bits of an inode. */ struct posix_acl * -posix_acl_from_mode(mode_t mode, gfp_t flags) +posix_acl_from_mode(umode_t mode, gfp_t flags) { struct posix_acl *acl = posix_acl_alloc(3, flags); if (!acl) diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 529c32ad58c0..16ecfb485d28 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -75,7 +75,7 @@ extern void posix_acl_init(struct posix_acl *, int); extern struct posix_acl *posix_acl_alloc(int, gfp_t); extern int posix_acl_valid(const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); -extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); +extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *); extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t); From 86bc704db0ab7e69230f79bc7d124e063259abc6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 19:03:11 -0400 Subject: [PATCH 136/151] switch posix_acl_chmod() to umode_t again, that's what all callers pass to it Signed-off-by: Al Viro --- fs/posix_acl.c | 4 ++-- include/linux/posix_acl.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 4e16e8001982..10027b42b7e2 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -336,7 +336,7 @@ static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) /* * Modify the ACL for the chmod syscall. */ -static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) +static int posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode) { struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; struct posix_acl_entry *pa, *pe; @@ -400,7 +400,7 @@ posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p) EXPORT_SYMBOL(posix_acl_create); int -posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, mode_t mode) +posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode) { struct posix_acl *clone = posix_acl_clone(*acl, gfp); int err = -ENOMEM; diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 16ecfb485d28..951bba82d50d 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -78,7 +78,7 @@ extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *); -extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t); +extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); extern int set_posix_acl(struct inode *, int, struct posix_acl *); From 35f40ef00204c456f5c181c0e7f54e25bb93cd49 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Jun 2011 14:09:10 +0100 Subject: [PATCH 137/151] VFS: Remove detached-dentry counter from shrink_dcache_for_umount_subtree() Remove the detached-dentry counter from shrink_dcache_for_umount_subtree() as the value it computes is no longer used as of commit 312d3ca856d369bb04d0443846b85b4cdde6fa8a which made the nr_dentry counters summed per-CPU rather than global atomic. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/dcache.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index b05aac3a8cfc..75590572ff7a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -828,7 +828,6 @@ EXPORT_SYMBOL(shrink_dcache_sb); static void shrink_dcache_for_umount_subtree(struct dentry *dentry) { struct dentry *parent; - unsigned detached = 0; BUG_ON(!IS_ROOT(dentry)); @@ -892,8 +891,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_unlock(&parent->d_lock); } - detached++; - inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; From c6627c60c07c43b51ef88e352627fa786d1e1592 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Jun 2011 14:09:20 +0100 Subject: [PATCH 138/151] VFS: Remove dentry->d_lock locking from shrink_dcache_for_umount_subtree() Locks of the dcache_lock were replaced by locks of dentry->d_lock in commits such as: 2304450783dfde7b0b94ae234edd0dbffa865073 2fd6b7f50797f2e993eea59e0a0b8c6399c811dc as part of the RCU-based pathwalk changes, despite the fact that the caller (shrink_dcache_for_umount()) notes in the banner comment the reasons that d_lock is not necessary in these functions: /* * destroy the dentries attached to a superblock on unmounting * - we don't need to use dentry->d_lock because: * - the superblock is detached from all mountings and open files, so the * dentry trees will not be rearranged by the VFS * - s_umount is write-locked, so the memory pressure shrinker will ignore * any dentries belonging to this superblock that it comes across * - the filesystem itself is no longer permitted to rearrange the dentries * in this superblock */ So remove these locks. If the locks are actually necessary, then this banner comment should be altered instead. The hash table chains are protected by 1-bit locks in the hash table heads, so those shouldn't be a problem. Note that to make this work, __d_drop() has to be split so that the RCUwalk barrier can be avoided. This causes problems otherwise as it has an assertion that dentry->d_lock is locked - but there is no need for that as no one else can be trying to access this dentry, except to step over it (and that should be handled by d_free(), I think). Signed-off-by: David Howells Cc: Nick Piggin Signed-off-by: Al Viro --- fs/dcache.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 75590572ff7a..9df8b861e18e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -301,6 +301,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) return parent; } +/* + * Unhash a dentry without inserting an RCU walk barrier or checking that + * dentry->d_lock is locked. The caller must take care of that, if + * appropriate. + */ +static void __d_shrink(struct dentry *dentry) +{ + if (!d_unhashed(dentry)) { + struct hlist_bl_head *b; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + b = &dentry->d_sb->s_anon; + else + b = d_hash(dentry->d_parent, dentry->d_name.hash); + + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + hlist_bl_unlock(b); + } +} + /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -319,17 +340,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) void __d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { - struct hlist_bl_head *b; - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) - b = &dentry->d_sb->s_anon; - else - b = d_hash(dentry->d_parent, dentry->d_name.hash); - - hlist_bl_lock(b); - __hlist_bl_del(&dentry->d_hash); - dentry->d_hash.pprev = NULL; - hlist_bl_unlock(b); - + __d_shrink(dentry); dentry_rcuwalk_barrier(dentry); } } @@ -832,10 +843,8 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG_ON(!IS_ROOT(dentry)); /* detach this root from the system */ - spin_lock(&dentry->d_lock); dentry_lru_del(dentry); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); + __d_shrink(dentry); for (;;) { /* descend to the first leaf in the current subtree */ @@ -844,16 +853,11 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ - spin_lock(&dentry->d_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { - spin_lock_nested(&loop->d_lock, - DENTRY_D_LOCK_NESTED); dentry_lru_del(loop); - __d_drop(loop); - spin_unlock(&loop->d_lock); + __d_shrink(loop); } - spin_unlock(&dentry->d_lock); /* move to the first child */ dentry = list_entry(dentry->d_subdirs.next, @@ -885,10 +889,8 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) list_del(&dentry->d_u.d_child); } else { parent = dentry->d_parent; - spin_lock(&parent->d_lock); parent->d_count--; list_del(&dentry->d_u.d_child); - spin_unlock(&parent->d_lock); } inode = dentry->d_inode; @@ -935,9 +937,7 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - spin_lock(&dentry->d_lock); dentry->d_count--; - spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); while (!hlist_bl_empty(&sb->s_anon)) { From 43c1c9cd244098012441b90c32304f11f1258d43 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Jun 2011 14:09:30 +0100 Subject: [PATCH 139/151] VFS: Reorganise shrink_dcache_for_umount_subtree() after demise of dcache_lock Reorganise shrink_dcache_for_umount_subtree() in light of the demise of dcache_lock. Without that dcache_lock, there is no need for the batching of removal of dentries from the system under it (we wanted to make intensive use of the locked data whilst we held it, but didn't want to hold it for long at a time). This works, provided the preceding patch is correct in its removal of locking on dentry->d_lock on the basis that no one should be locking these dentries any more as the whole superblock is defunct. With this patch, the calls to dentry_lru_del() and __d_shrink() are placed at the point where each dentry is detached handled. It is possible that, as an alternative, the batching should still be done - but only for dentry_lru_del() of all a dentry's children in one go. In such a case, the batching would be done under dcache_lru_lock. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/dcache.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 9df8b861e18e..2347cdb15abb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -842,33 +842,21 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG_ON(!IS_ROOT(dentry)); - /* detach this root from the system */ - dentry_lru_del(dentry); - __d_shrink(dentry); - for (;;) { /* descend to the first leaf in the current subtree */ - while (!list_empty(&dentry->d_subdirs)) { - struct dentry *loop; - - /* this is a branch with children - detach all of them - * from the system in one go */ - list_for_each_entry(loop, &dentry->d_subdirs, - d_u.d_child) { - dentry_lru_del(loop); - __d_shrink(loop); - } - - /* move to the first child */ + while (!list_empty(&dentry->d_subdirs)) dentry = list_entry(dentry->d_subdirs.next, struct dentry, d_u.d_child); - } /* consume the dentries from this leaf up through its parents * until we find one with children or run out altogether */ do { struct inode *inode; + /* detach from the system */ + dentry_lru_del(dentry); + __d_shrink(dentry); + if (dentry->d_count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" From 206d440f64030b6425841bf7cb38e26a5ea0c382 Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Tue, 26 Jul 2011 11:15:20 +0200 Subject: [PATCH 140/151] xfs: Fix build breakage in xfs_iops.c when CONFIG_FS_POSIX_ACL is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4e34e719e45, that takes the ACL checks to common code, accidentely broke the build when CONFIG_FS_POSIX_ACL is not set: CC fs/xfs/linux-2.6/xfs_iops.o fs/xfs/linux-2.6/xfs_iops.c:1025:14: error: ‘xfs_get_acl’ undeclared here (not in a function) Fix this by declaring xfs_get_acl a static inline function. Signed-off-by: Markus Trippelsdorf Signed-off-by: Al Viro --- fs/xfs/xfs_acl.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 2c656ef49473..39632d941354 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -51,7 +51,10 @@ extern int posix_acl_default_exists(struct inode *inode); extern const struct xattr_handler xfs_xattr_acl_access_handler; extern const struct xattr_handler xfs_xattr_acl_default_handler; #else -# define xfs_get_acl(inode, type) NULL +static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) +{ + return NULL; +} # define xfs_inherit_acl(inode, default_acl) 0 # define xfs_acl_chmod(inode) 0 # define posix_acl_access_exists(inode) 0 From 33853a0dde359ded0534204eb6857ad5166d515b Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Mon, 1 Aug 2011 06:32:19 -0400 Subject: [PATCH 141/151] ext4: use the correct error exit path in ext4_init_inode_table() This patch lets ext4_init_inode_table() handle errors right. ext4_init_inode_table() should down_write() alloc_sem which has been up_write()ed and stop the started journal handle. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 21bb2f61e502..9c63f273b550 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1287,7 +1287,7 @@ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, group, used_blks, ext4_itable_unused_count(sb, gdp)); ret = 1; - goto out; + goto err_out; } blk = ext4_inode_table(sb, gdp) + used_blks; From 9933fc0ac1ac14b795819cd63d05ea92112f690a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Aug 2011 08:45:02 -0400 Subject: [PATCH 142/151] ext4: introduce ext4_kvmalloc(), ext4_kzalloc(), and ext4_kvfree() Introduce new helper functions which try kmalloc, and then fall back to vmalloc if necessary, and use them for allocating and deallocating s_flex_groups. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ fs/ext4/super.c | 54 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ba2009b49a55..db9feadf53a0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1874,6 +1874,9 @@ extern int ext4_group_extend(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ +extern void *ext4_kvmalloc(size_t size, gfp_t flags); +extern void *ext4_kvzalloc(size_t size, gfp_t flags); +extern void ext4_kvfree(void *ptr); extern void __ext4_error(struct super_block *, const char *, unsigned int, const char *, ...) __attribute__ ((format (printf, 4, 5))); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cfe9f39c4ba2..658f5864e9cf 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -110,6 +110,35 @@ static struct file_system_type ext3_fs_type = { #define IS_EXT3_SB(sb) (0) #endif +void *ext4_kvmalloc(size_t size, gfp_t flags) +{ + void *ret; + + ret = kmalloc(size, flags); + if (!ret) + ret = __vmalloc(size, flags, PAGE_KERNEL); + return ret; +} + +void *ext4_kvzalloc(size_t size, gfp_t flags) +{ + void *ret; + + ret = kmalloc(size, flags); + if (!ret) + ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); + return ret; +} + +void ext4_kvfree(void *ptr) +{ + if (is_vmalloc_addr(ptr)) + vfree(ptr); + else + kfree(ptr); + +} + ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) { @@ -791,10 +820,7 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); kfree(sbi->s_group_desc); - if (is_vmalloc_addr(sbi->s_flex_groups)) - vfree(sbi->s_flex_groups); - else - kfree(sbi->s_flex_groups); + ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -1977,15 +2003,11 @@ static int ext4_fill_flex_info(struct super_block *sb) ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; size = flex_group_count * sizeof(struct flex_groups); - sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); + sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL); if (sbi->s_flex_groups == NULL) { - sbi->s_flex_groups = vzalloc(size); - if (sbi->s_flex_groups == NULL) { - ext4_msg(sb, KERN_ERR, - "not enough memory for %u flex groups", - flex_group_count); - goto failed; - } + ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups", + flex_group_count); + goto failed; } for (i = 0; i < sbi->s_groups_count; i++) { @@ -3750,12 +3772,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } failed_mount3: del_timer(&sbi->s_err_report); - if (sbi->s_flex_groups) { - if (is_vmalloc_addr(sbi->s_flex_groups)) - vfree(sbi->s_flex_groups); - else - kfree(sbi->s_flex_groups); - } + if (sbi->s_flex_groups) + ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); From f18a5f21c25707b4fe64b326e2b4d150565e7300 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Aug 2011 08:45:38 -0400 Subject: [PATCH 143/151] ext4: use ext4_kvzalloc()/ext4_kvmalloc() for s_group_desc and s_group_info Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 6 +++--- fs/ext4/resize.c | 13 +++++++------ fs/ext4/super.c | 9 +++++---- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index fa716c9b2455..d5021e82f8cc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2331,7 +2331,7 @@ static int ext4_mb_init_backend(struct super_block *sb) /* An 8TB filesystem with 64-bit pointers requires a 4096 byte * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. * So a two level scheme suffices for now. */ - sbi->s_group_info = kzalloc(array_size, GFP_KERNEL); + sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); if (sbi->s_group_info == NULL) { printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); return -ENOMEM; @@ -2365,7 +2365,7 @@ static int ext4_mb_init_backend(struct super_block *sb) kfree(sbi->s_group_info[i]); iput(sbi->s_buddy_cache); err_freesgi: - kfree(sbi->s_group_info); + ext4_kvfree(sbi->s_group_info); return -ENOMEM; } @@ -2559,7 +2559,7 @@ int ext4_mb_release(struct super_block *sb) EXT4_DESC_PER_BLOCK_BITS(sb); for (i = 0; i < num_meta_group_infos; i++) kfree(sbi->s_group_info[i]); - kfree(sbi->s_group_info); + ext4_kvfree(sbi->s_group_info); } kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 71085df97bbd..707d3f16f7ce 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -467,12 +467,13 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, if (unlikely(err)) goto exit_dindj; - n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), - GFP_NOFS); + n_group_desc = ext4_kvmalloc((gdb_num + 1) * + sizeof(struct buffer_head *), + GFP_NOFS); if (!n_group_desc) { err = -ENOMEM; - ext4_warning(sb, - "not enough memory for %lu groups", gdb_num + 1); + ext4_warning(sb, "not enough memory for %lu groups", + gdb_num + 1); goto exit_inode; } @@ -507,7 +508,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, n_group_desc[gdb_num] = gdb_bh; EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; - kfree(o_group_desc); + ext4_kvfree(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); @@ -517,7 +518,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return err; exit_inode: - kfree(n_group_desc); + ext4_kvfree(n_group_desc); /* ext4_handle_release_buffer(handle, iloc.bh); */ brelse(iloc.bh); exit_dindj: diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 658f5864e9cf..e2d88baf91d3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -819,7 +819,7 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); + ext4_kvfree(sbi->s_group_desc); ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); @@ -3439,8 +3439,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); - sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), - GFP_KERNEL); + sbi->s_group_desc = ext4_kvmalloc(db_count * + sizeof(struct buffer_head *), + GFP_KERNEL); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); goto failed_mount; @@ -3783,7 +3784,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); + ext4_kvfree(sbi->s_group_desc); failed_mount: if (sbi->s_proc) { remove_proc_entry(sb->s_id, ext4_proc_root); From 3bdb65ec95e6cccffc40102d7c003047c45da90c Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 30 Jun 2011 14:12:00 -0500 Subject: [PATCH 144/151] kdb: cleanup unused variables missed in the original kdb merge The BTARGS and BTSYMARG variables do not have any function in the mainline version of kdb. Reported-by: Tim Bird Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_bt.c | 5 ++--- kernel/debug/kdb/kdb_cmds | 4 ---- kernel/debug/kdb/kdb_main.c | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 2f62fe85f16a..7179eac7b41c 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -112,9 +112,8 @@ kdb_bt(int argc, const char **argv) unsigned long addr; long offset; - kdbgetintenv("BTARGS", &argcount); /* Arguments to print */ - kdbgetintenv("BTAPROMPT", &btaprompt); /* Prompt after each - * proc in bta */ + /* Prompt after each proc in bta */ + kdbgetintenv("BTAPROMPT", &btaprompt); if (strcmp(argv[0], "bta") == 0) { struct task_struct *g, *p; diff --git a/kernel/debug/kdb/kdb_cmds b/kernel/debug/kdb/kdb_cmds index 56c88e4db309..9834ad303ab6 100644 --- a/kernel/debug/kdb/kdb_cmds +++ b/kernel/debug/kdb/kdb_cmds @@ -18,16 +18,12 @@ defcmd dumpcommon "" "Common kdb debugging" endefcmd defcmd dumpall "" "First line debugging" - set BTSYMARG 1 - set BTARGS 9 pid R -dumpcommon -bta endefcmd defcmd dumpcpu "" "Same as dumpall but only tasks on cpus" - set BTSYMARG 1 - set BTARGS 9 pid R -dumpcommon -btc diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index be14779bcef6..b33116ec9e6d 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -145,7 +145,6 @@ static char *__env[] = { #endif "RADIX=16", "MDCOUNT=8", /* lines of md output */ - "BTARGS=9", /* 9 possible args in bt */ KDB_PLATFORM_ENV, "DTABCOUNT=30", "NOSECT=1", @@ -172,6 +171,7 @@ static char *__env[] = { (char *)0, (char *)0, (char *)0, + (char *)0, }; static const int __nenv = (sizeof(__env) / sizeof(char *)); From f679c4985bb2e7de9d39a5d40b6031361c4ad861 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 23 May 2011 13:17:41 -0500 Subject: [PATCH 145/151] kdb,kgdb: Implement switch and pass buffer from kdb -> gdb When switching from kdb mode to kgdb mode packets were getting lost depending on the size of the fifo queue of the serial chip. When gdb initially connects if it is in kdb mode it should entirely send any character buffer over to the gdbstub when switching connections. Previously kdb was zero'ing out the character buffer and this could lead to gdb failing to connect at all, or a lengthy pause could occur on the initial connect. Signed-off-by: Jason Wessel --- kernel/debug/gdbstub.c | 22 +++++++++++++++------- kernel/debug/kdb/kdb_debugger.c | 17 +++++++---------- kernel/debug/kdb/kdb_io.c | 10 ++++++---- kernel/debug/kdb/kdb_private.h | 1 + 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index a11db956dd62..34872482315e 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -42,6 +42,8 @@ /* Our I/O buffers. */ static char remcom_in_buffer[BUFMAX]; static char remcom_out_buffer[BUFMAX]; +static int gdbstub_use_prev_in_buf; +static int gdbstub_prev_in_buf_pos; /* Storage for the registers, in GDB format. */ static unsigned long gdb_regs[(NUMREGBYTES + @@ -58,6 +60,13 @@ static int gdbstub_read_wait(void) int ret = -1; int i; + if (unlikely(gdbstub_use_prev_in_buf)) { + if (gdbstub_prev_in_buf_pos < gdbstub_use_prev_in_buf) + return remcom_in_buffer[gdbstub_prev_in_buf_pos++]; + else + gdbstub_use_prev_in_buf = 0; + } + /* poll any additional I/O interfaces that are defined */ while (ret < 0) for (i = 0; kdb_poll_funcs[i] != NULL; i++) { @@ -109,7 +118,6 @@ static void get_packet(char *buffer) buffer[count] = ch; count = count + 1; } - buffer[count] = 0; if (ch == '#') { xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4; @@ -124,6 +132,7 @@ static void get_packet(char *buffer) if (dbg_io_ops->flush) dbg_io_ops->flush(); } + buffer[count] = 0; } while (checksum != xmitcsum); } @@ -1082,12 +1091,11 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd) case 'c': strcpy(remcom_in_buffer, cmd); return 0; - case '?': - gdb_cmd_status(ks); - break; - case '\0': - strcpy(remcom_out_buffer, ""); - break; + case '$': + strcpy(remcom_in_buffer, cmd); + gdbstub_use_prev_in_buf = strlen(remcom_in_buffer); + gdbstub_prev_in_buf_pos = 0; + return 0; } dbg_io_ops->write_char('+'); put_packet(remcom_out_buffer); diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index dd0b1b7dd02c..fe422d275782 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -30,6 +30,8 @@ EXPORT_SYMBOL_GPL(kdb_poll_funcs); int kdb_poll_idx = 1; EXPORT_SYMBOL_GPL(kdb_poll_idx); +static struct kgdb_state *kdb_ks; + int kdb_stub(struct kgdb_state *ks) { int error = 0; @@ -39,6 +41,7 @@ int kdb_stub(struct kgdb_state *ks) kdb_dbtrap_t db_result = KDB_DB_NOBPT; int i; + kdb_ks = ks; if (KDB_STATE(REENTRY)) { reason = KDB_REASON_SWITCH; KDB_STATE_CLEAR(REENTRY); @@ -124,16 +127,6 @@ int kdb_stub(struct kgdb_state *ks) kdbnearsym_cleanup(); if (error == KDB_CMD_KGDB) { if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) { - /* - * This inteface glue which allows kdb to transition in into - * the gdb stub. In order to do this the '?' or '' gdb serial - * packet response is processed here. And then control is - * passed to the gdbstub. - */ - if (KDB_STATE(DOING_KGDB)) - gdbstub_state(ks, "?"); - else - gdbstub_state(ks, ""); KDB_STATE_CLEAR(DOING_KGDB); KDB_STATE_CLEAR(DOING_KGDB2); } @@ -166,3 +159,7 @@ int kdb_stub(struct kgdb_state *ks) return kgdb_info[ks->cpu].ret_state; } +void kdb_gdb_state_pass(char *buf) +{ + gdbstub_state(kdb_ks, buf); +} diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 96fdaac46a80..bd233264b29f 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -35,8 +35,8 @@ static void kgdb_transition_check(char *buffer) { int slen = strlen(buffer); if (strncmp(buffer, "$?#3f", slen) != 0 && - strncmp(buffer, "$qSupported#37", slen) != 0 && - strncmp(buffer, "+$qSupported#37", slen) != 0) { + strncmp(buffer, "$qSupported", slen) != 0 && + strncmp(buffer, "+$qSupported", slen) != 0) { KDB_STATE_SET(KGDB_TRANS); kdb_printf("%s", buffer); } @@ -390,12 +390,14 @@ static char *kdb_read(char *buffer, size_t bufsize) /* Special escape to kgdb */ if (lastchar - buffer >= 5 && strcmp(lastchar - 5, "$?#3f") == 0) { + kdb_gdb_state_pass(lastchar - 5); strcpy(buffer, "kgdb"); KDB_STATE_SET(DOING_KGDB); return buffer; } - if (lastchar - buffer >= 14 && - strcmp(lastchar - 14, "$qSupported#37") == 0) { + if (lastchar - buffer >= 11 && + strcmp(lastchar - 11, "$qSupported") == 0) { + kdb_gdb_state_pass(lastchar - 11); strcpy(buffer, "kgdb"); KDB_STATE_SET(DOING_KGDB2); return buffer; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 35d69ed1dfb5..03d332e63442 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -218,6 +218,7 @@ extern void kdb_print_nameval(const char *name, unsigned long val); extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); extern void kdb_meminfo_proc_show(void); extern char *kdb_getstr(char *, size_t, char *); +extern void kdb_gdb_state_pass(char *buf); /* Defines for kdb_symbol_print */ #define KDB_SP_SPACEB 0x0001 /* Space before string */ From d613d828e8987a1f794378022f900b454fa95403 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 23 May 2011 13:22:54 -0500 Subject: [PATCH 146/151] kdb: Remove all references to DOING_KGDB2 The DOING_KGDB2 was originally a state variable for one of the two ways to automatically transition from kdb to kgdb. Purge all these variables and just use one single state for the transition. Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_debugger.c | 4 +--- kernel/debug/kdb/kdb_io.c | 2 +- kernel/debug/kdb/kdb_main.c | 2 +- kernel/debug/kdb/kdb_private.h | 2 -- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index fe422d275782..d9ca9aa481ec 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -126,10 +126,8 @@ int kdb_stub(struct kgdb_state *ks) KDB_STATE_CLEAR(PAGER); kdbnearsym_cleanup(); if (error == KDB_CMD_KGDB) { - if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) { + if (KDB_STATE(DOING_KGDB)) KDB_STATE_CLEAR(DOING_KGDB); - KDB_STATE_CLEAR(DOING_KGDB2); - } return DBG_PASS_EVENT; } kdb_bp_install(ks->linux_regs); diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index bd233264b29f..0dbcdfbb6fd0 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -399,7 +399,7 @@ static char *kdb_read(char *buffer, size_t bufsize) strcmp(lastchar - 11, "$qSupported") == 0) { kdb_gdb_state_pass(lastchar - 11); strcpy(buffer, "kgdb"); - KDB_STATE_SET(DOING_KGDB2); + KDB_STATE_SET(DOING_KGDB); return buffer; } } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index b33116ec9e6d..63786e71a3cd 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1386,7 +1386,7 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, } if (result == KDB_CMD_KGDB) { - if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2))) + if (!KDB_STATE(DOING_KGDB)) kdb_printf("Entering please attach debugger " "or use $D#44+ or $3#33\n"); break; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 03d332e63442..e381d105b40b 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -21,7 +21,6 @@ #define KDB_CMD_SS (-1003) #define KDB_CMD_SSB (-1004) #define KDB_CMD_KGDB (-1005) -#define KDB_CMD_KGDB2 (-1006) /* Internal debug flags */ #define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */ @@ -146,7 +145,6 @@ extern int kdb_state; * keyboard on this cpu */ #define KDB_STATE_KEXEC 0x00040000 /* kexec issued */ #define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */ -#define KDB_STATE_DOING_KGDB2 0x00100000 /* kgdb enter now issued */ #define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */ #define KDB_STATE_ARCH 0xff000000 /* Reserved for arch * specific use */ From 37f86b469d73fc2f2a925536fb99b8f513f641b7 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Tue, 24 May 2011 10:43:06 -0500 Subject: [PATCH 147/151] kdb,kgdb: Allow arbitrary kgdb magic knock sequences The first packet that gdb sends when the kernel is in kdb mode seems to change with every release of gdb. Instead of continuing to add many different gdb packets, change kdb to automatically look for any thing that looks like a gdb packet. Example 1 cold start test: echo g > /proc/sysrq-trigger $D#44+ Example 2 cold start test: echo g > /proc/sysrq-trigger $3#33 The second one should re-enter kdb's shell right away and is purely a test. Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_io.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 0dbcdfbb6fd0..4802eb5840e1 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -31,15 +31,21 @@ char kdb_prompt_str[CMD_BUFLEN]; int kdb_trap_printk; -static void kgdb_transition_check(char *buffer) +static int kgdb_transition_check(char *buffer) { - int slen = strlen(buffer); - if (strncmp(buffer, "$?#3f", slen) != 0 && - strncmp(buffer, "$qSupported", slen) != 0 && - strncmp(buffer, "+$qSupported", slen) != 0) { + if (buffer[0] != '+' && buffer[0] != '$') { KDB_STATE_SET(KGDB_TRANS); kdb_printf("%s", buffer); + } else { + int slen = strlen(buffer); + if (slen > 3 && buffer[slen - 3] == '#') { + kdb_gdb_state_pass(buffer); + strcpy(buffer, "kgdb"); + KDB_STATE_SET(DOING_KGDB); + return 1; + } } + return 0; } static int kdb_read_get_key(char *buffer, size_t bufsize) @@ -251,6 +257,10 @@ static char *kdb_read(char *buffer, size_t bufsize) case 13: /* enter */ *lastchar++ = '\n'; *lastchar++ = '\0'; + if (!KDB_STATE(KGDB_TRANS)) { + KDB_STATE_SET(KGDB_TRANS); + kdb_printf("%s", buffer); + } kdb_printf("\n"); return buffer; case 4: /* Del */ @@ -382,10 +392,12 @@ static char *kdb_read(char *buffer, size_t bufsize) * printed characters if we think that * kgdb is connecting, until the check * fails */ - if (!KDB_STATE(KGDB_TRANS)) - kgdb_transition_check(buffer); - else + if (!KDB_STATE(KGDB_TRANS)) { + if (kgdb_transition_check(buffer)) + return buffer; + } else { kdb_printf("%c", key); + } } /* Special escape to kgdb */ if (lastchar - buffer >= 5 && From 9d8b9ec44234b2f6e0225300632d250210c04f11 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Aug 2011 17:41:35 -0400 Subject: [PATCH 148/151] ext4: use ext4_msg() instead of printk in mballoc Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 79 +++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 37 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d5021e82f8cc..70d1b3e64284 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -493,10 +493,11 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) b2 = (unsigned char *) bitmap; for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { if (b1[i] != b2[i]) { - printk(KERN_ERR "corruption in group %u " - "at byte %u(%u): %x in copy != %x " - "on disk/prealloc\n", - e4b->bd_group, i, i * 8, b1[i], b2[i]); + ext4_msg(e4b->bd_sb, KERN_ERR, + "corruption in group %u " + "at byte %u(%u): %x in copy != %x " + "on disk/prealloc", + e4b->bd_group, i, i * 8, b1[i], b2[i]); BUG(); } } @@ -2224,8 +2225,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, EXT4_DESC_PER_BLOCK_BITS(sb); meta_group_info = kmalloc(metalen, GFP_KERNEL); if (meta_group_info == NULL) { - printk(KERN_ERR "EXT4-fs: can't allocate mem for a " - "buddy group\n"); + ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " + "for a buddy group"); goto exit_meta_group_info; } sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = @@ -2238,7 +2239,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); if (meta_group_info[i] == NULL) { - printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); + ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); goto exit_group_info; } memset(meta_group_info[i], 0, kmem_cache_size(cachep)); @@ -2333,12 +2334,12 @@ static int ext4_mb_init_backend(struct super_block *sb) * So a two level scheme suffices for now. */ sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); if (sbi->s_group_info == NULL) { - printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); + ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; } sbi->s_buddy_cache = new_inode(sb); if (sbi->s_buddy_cache == NULL) { - printk(KERN_ERR "EXT4-fs: can't get new inode\n"); + ext4_msg(sb, KERN_ERR, "can't get new inode"); goto err_freesgi; } sbi->s_buddy_cache->i_ino = get_next_ino(); @@ -2346,8 +2347,7 @@ static int ext4_mb_init_backend(struct super_block *sb) for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { - printk(KERN_ERR - "EXT4-fs: can't read descriptor %u\n", i); + ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i); goto err_freebuddy; } if (ext4_mb_add_groupinfo(sb, i, desc) != 0) @@ -2411,7 +2411,8 @@ static int ext4_groupinfo_create_slab(size_t size) mutex_unlock(&ext4_grpinfo_slab_create_mutex); if (!cachep) { - printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); + printk(KERN_EMERG + "EXT4-fs: no memory for groupinfo slab cache\n"); return -ENOMEM; } @@ -2566,25 +2567,25 @@ int ext4_mb_release(struct super_block *sb) if (sbi->s_buddy_cache) iput(sbi->s_buddy_cache); if (sbi->s_mb_stats) { - printk(KERN_INFO - "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n", + ext4_msg(sb, KERN_INFO, + "mballoc: %u blocks %u reqs (%u success)", atomic_read(&sbi->s_bal_allocated), atomic_read(&sbi->s_bal_reqs), atomic_read(&sbi->s_bal_success)); - printk(KERN_INFO - "EXT4-fs: mballoc: %u extents scanned, %u goal hits, " - "%u 2^N hits, %u breaks, %u lost\n", + ext4_msg(sb, KERN_INFO, + "mballoc: %u extents scanned, %u goal hits, " + "%u 2^N hits, %u breaks, %u lost", atomic_read(&sbi->s_bal_ex_scanned), atomic_read(&sbi->s_bal_goals), atomic_read(&sbi->s_bal_2orders), atomic_read(&sbi->s_bal_breaks), atomic_read(&sbi->s_mb_lost_chunks)); - printk(KERN_INFO - "EXT4-fs: mballoc: %lu generated and it took %Lu\n", + ext4_msg(sb, KERN_INFO, + "mballoc: %lu generated and it took %Lu", sbi->s_mb_buddies_generated, sbi->s_mb_generation_time); - printk(KERN_INFO - "EXT4-fs: mballoc: %u preallocated, %u discarded\n", + ext4_msg(sb, KERN_INFO, + "mballoc: %u preallocated, %u discarded", atomic_read(&sbi->s_mb_preallocated), atomic_read(&sbi->s_mb_discarded)); } @@ -3024,9 +3025,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical) { - printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n", - (unsigned long) start, (unsigned long) size, - (unsigned long) ac->ac_o_ex.fe_logical); + ext4_msg(ac->ac_sb, KERN_ERR, + "start %lu, size %lu, fe_logical %lu", + (unsigned long) start, (unsigned long) size, + (unsigned long) ac->ac_o_ex.fe_logical); } BUG_ON(start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical); @@ -3607,10 +3609,11 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, bit = next + 1; } if (free != pa->pa_free) { - printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n", - pa, (unsigned long) pa->pa_lstart, - (unsigned long) pa->pa_pstart, - (unsigned long) pa->pa_len); + ext4_msg(e4b->bd_sb, KERN_CRIT, + "pa %p: logic %lu, phys. %lu, len %lu", + pa, (unsigned long) pa->pa_lstart, + (unsigned long) pa->pa_pstart, + (unsigned long) pa->pa_len); ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u", free, pa->pa_free); /* @@ -3798,7 +3801,8 @@ void ext4_discard_preallocations(struct inode *inode) * use preallocation while we're discarding it */ spin_unlock(&pa->pa_lock); spin_unlock(&ei->i_prealloc_lock); - printk(KERN_ERR "uh-oh! used pa while discarding\n"); + ext4_msg(sb, KERN_ERR, + "uh-oh! used pa while discarding"); WARN_ON(1); schedule_timeout_uninterruptible(HZ); goto repeat; @@ -3875,12 +3879,13 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) return; - printk(KERN_ERR "EXT4-fs: Can't allocate:" - " Allocation context details:\n"); - printk(KERN_ERR "EXT4-fs: status %d flags %d\n", + ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" + " Allocation context details:"); + ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", ac->ac_status, ac->ac_flags); - printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, " - "best %lu/%lu/%lu@%lu cr %d\n", + ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " + "goal %lu/%lu/%lu@%lu, " + "best %lu/%lu/%lu@%lu cr %d", (unsigned long)ac->ac_o_ex.fe_group, (unsigned long)ac->ac_o_ex.fe_start, (unsigned long)ac->ac_o_ex.fe_len, @@ -3894,9 +3899,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) (unsigned long)ac->ac_b_ex.fe_len, (unsigned long)ac->ac_b_ex.fe_logical, (int)ac->ac_criteria); - printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, - ac->ac_found); - printk(KERN_ERR "EXT4-fs: groups: \n"); + ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", + ac->ac_ex_scanned, ac->ac_found); + ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); ngroups = ext4_get_groups_count(sb); for (i = 0; i < ngroups; i++) { struct ext4_group_info *grp = ext4_get_group_info(sb, i); From 48e6061bf4bb25eec151b91f22fd90a5b9a4920a Mon Sep 17 00:00:00 2001 From: Yu Jian Date: Mon, 1 Aug 2011 17:41:39 -0400 Subject: [PATCH 149/151] ext4: use EXT4_BAD_INO for buddy cache to avoid colliding with valid inode # Signed-off-by: Yu Jian Signed-off-by: Andreas Dilger Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 70d1b3e64284..e41620b56e53 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2342,7 +2342,11 @@ static int ext4_mb_init_backend(struct super_block *sb) ext4_msg(sb, KERN_ERR, "can't get new inode"); goto err_freesgi; } - sbi->s_buddy_cache->i_ino = get_next_ino(); + /* To avoid potentially colliding with an valid on-disk inode number, + * use EXT4_BAD_INO for the buddy cache inode number. This inode is + * not in the inode hash, so it should never be found by iget(), but + * this will avoid confusion if it ever shows up during debugging. */ + sbi->s_buddy_cache->i_ino = EXT4_BAD_INO; EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); From 79a77c5ac34cc27ccbfbdf7113b41cdd93534eab Mon Sep 17 00:00:00 2001 From: Yu Jian Date: Mon, 1 Aug 2011 17:41:46 -0400 Subject: [PATCH 150/151] ext4: prevent memory leaks from ext4_mb_init_backend() on error path In ext4_mb_init(), if the s_locality_group allocation fails it will currently cause the allocations made in ext4_mb_init_backend() to be leaked. Moving the ext4_mb_init_backend() allocation after the s_locality_group allocation avoids that problem. Signed-off-by: Yu Jian Signed-off-by: Andreas Dilger Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e41620b56e53..17a5a57c415a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2465,12 +2465,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) i++; } while (i <= sb->s_blocksize_bits + 1); - /* init file for buddy data */ - ret = ext4_mb_init_backend(sb); - if (ret != 0) { - goto out; - } - spin_lock_init(&sbi->s_md_lock); spin_lock_init(&sbi->s_bal_lock); @@ -2507,6 +2501,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) spin_lock_init(&lg->lg_prealloc_lock); } + /* init file for buddy data */ + ret = ext4_mb_init_backend(sb); + if (ret != 0) { + goto out; + } + if (sbi->s_proc) proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_fops, sb); From c027a474a68065391c8773f6e83ed5412657e369 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 30 Jul 2011 16:35:02 +0200 Subject: [PATCH 151/151] oom: task->mm == NULL doesn't mean the memory was freed exit_mm() sets ->mm == NULL then it does mmput()->exit_mmap() which frees the memory. However select_bad_process() checks ->mm != NULL before TIF_MEMDIE, so it continues to kill other tasks even if we have the oom-killed task freeing its memory. Change select_bad_process() to check ->mm after TIF_MEMDIE, but skip the tasks which have already passed exit_notify() to ensure a zombie with TIF_MEMDIE set can't block oom-killer. Alternatively we could probably clear TIF_MEMDIE after exit_mmap(). Signed-off-by: Oleg Nesterov Reviewed-by: KOSAKI Motohiro Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index eafff89b3dd6..626303b52f3c 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -303,7 +303,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, do_each_thread(g, p) { unsigned int points; - if (!p->mm) + if (p->exit_state) continue; if (oom_unkillable_task(p, mem, nodemask)) continue; @@ -319,6 +319,8 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, */ if (test_tsk_thread_flag(p, TIF_MEMDIE)) return ERR_PTR(-1UL); + if (!p->mm) + continue; if (p->flags & PF_EXITING) { /*