From 4c5087b99b228ddd0ffe6c127ee89ce5224a6ced Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 21 Sep 2009 19:28:49 +0000 Subject: [PATCH] --- yaml --- r: 166372 b: refs/heads/master c: b24715027aab5e586c4ab1d035f3e543307dea69 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/arch/arm/kernel/entry-header.S | 2 - trunk/arch/sparc/include/asm/vio.h | 2 + trunk/drivers/net/can/at91_can.c | 1186 ----------------------- trunk/drivers/net/davinci_emac.c | 9 +- trunk/drivers/net/virtio_net.c | 229 +++-- trunk/drivers/pci/hotplug/acpiphp_ibm.c | 1 - trunk/fs/buffer.c | 10 +- trunk/fs/cifs/Kconfig | 1 - trunk/fs/cifs/cifsfs.c | 93 +- trunk/fs/cifs/cifsglob.h | 21 +- trunk/fs/cifs/cifsproto.h | 11 +- trunk/fs/cifs/cifssmb.c | 1 - trunk/fs/cifs/connect.c | 1 + trunk/fs/cifs/dir.c | 64 +- trunk/fs/cifs/file.c | 137 +-- trunk/fs/cifs/misc.c | 34 +- trunk/fs/cifs/readdir.c | 4 +- trunk/fs/cifs/transport.c | 50 + trunk/fs/fs-writeback.c | 165 +--- trunk/include/linux/backing-dev.h | 3 +- trunk/mm/page-writeback.c | 30 +- trunk/mm/shmem.c | 5 +- trunk/mm/vmscan.c | 8 +- trunk/net/ax25/af_ax25.c | 27 +- 25 files changed, 487 insertions(+), 1609 deletions(-) delete mode 100644 trunk/drivers/net/can/at91_can.c diff --git a/[refs] b/[refs] index 12bf0abf2053..85623aea2a84 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: bfebb1406329667f2cccb50fad1de87f573b2c1a +refs/heads/master: b24715027aab5e586c4ab1d035f3e543307dea69 diff --git a/trunk/arch/arm/kernel/entry-header.S b/trunk/arch/arm/kernel/entry-header.S index ac34c0d9384b..e17e3c30d957 100644 --- a/trunk/arch/arm/kernel/entry-header.S +++ b/trunk/arch/arm/kernel/entry-header.S @@ -83,8 +83,6 @@ ldr r0, [sp] strex r1, r2, [sp] @ clear the exclusive monitor ldmib sp, {r1 - pc}^ @ load r1 - pc, cpsr -#else - ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr #endif .endm diff --git a/trunk/arch/sparc/include/asm/vio.h b/trunk/arch/sparc/include/asm/vio.h index 9d83d3bcb494..6cdbf7e7351d 100644 --- a/trunk/arch/sparc/include/asm/vio.h +++ b/trunk/arch/sparc/include/asm/vio.h @@ -258,6 +258,8 @@ static inline void *vio_dring_entry(struct vio_dring_state *dr, static inline u32 vio_dring_avail(struct vio_dring_state *dr, unsigned int ring_size) { + MAYBE_BUILD_BUG_ON(!is_power_of_2(ring_size)); + return (dr->pending - ((dr->prod - dr->cons) & (ring_size - 1))); } diff --git a/trunk/drivers/net/can/at91_can.c b/trunk/drivers/net/can/at91_can.c deleted file mode 100644 index f67ae285a35a..000000000000 --- a/trunk/drivers/net/can/at91_can.c +++ /dev/null @@ -1,1186 +0,0 @@ -/* - * at91_can.c - CAN network driver for AT91 SoC CAN controller - * - * (C) 2007 by Hans J. Koch - * (C) 2008, 2009 by Marc Kleine-Budde - * - * This software may be distributed under the terms of the GNU General - * Public License ("GPL") version 2 as distributed in the 'COPYING' - * file from the main directory of the linux kernel source. - * - * Send feedback to - * - * - * Your platform definition file should specify something like: - * - * static struct at91_can_data ek_can_data = { - * transceiver_switch = sam9263ek_transceiver_switch, - * }; - * - * at91_add_device_can(&ek_can_data); - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -#define DRV_NAME "at91_can" -#define AT91_NAPI_WEIGHT 12 - -/* - * RX/TX Mailbox split - * don't dare to touch - */ -#define AT91_MB_RX_NUM 12 -#define AT91_MB_TX_SHIFT 2 - -#define AT91_MB_RX_FIRST 0 -#define AT91_MB_RX_LAST (AT91_MB_RX_FIRST + AT91_MB_RX_NUM - 1) - -#define AT91_MB_RX_MASK(i) ((1 << (i)) - 1) -#define AT91_MB_RX_SPLIT 8 -#define AT91_MB_RX_LOW_LAST (AT91_MB_RX_SPLIT - 1) -#define AT91_MB_RX_LOW_MASK (AT91_MB_RX_MASK(AT91_MB_RX_SPLIT)) - -#define AT91_MB_TX_NUM (1 << AT91_MB_TX_SHIFT) -#define AT91_MB_TX_FIRST (AT91_MB_RX_LAST + 1) -#define AT91_MB_TX_LAST (AT91_MB_TX_FIRST + AT91_MB_TX_NUM - 1) - -#define AT91_NEXT_PRIO_SHIFT (AT91_MB_TX_SHIFT) -#define AT91_NEXT_PRIO_MASK (0xf << AT91_MB_TX_SHIFT) -#define AT91_NEXT_MB_MASK (AT91_MB_TX_NUM - 1) -#define AT91_NEXT_MASK ((AT91_MB_TX_NUM - 1) | AT91_NEXT_PRIO_MASK) - -/* Common registers */ -enum at91_reg { - AT91_MR = 0x000, - AT91_IER = 0x004, - AT91_IDR = 0x008, - AT91_IMR = 0x00C, - AT91_SR = 0x010, - AT91_BR = 0x014, - AT91_TIM = 0x018, - AT91_TIMESTP = 0x01C, - AT91_ECR = 0x020, - AT91_TCR = 0x024, - AT91_ACR = 0x028, -}; - -/* Mailbox registers (0 <= i <= 15) */ -#define AT91_MMR(i) (enum at91_reg)(0x200 + ((i) * 0x20)) -#define AT91_MAM(i) (enum at91_reg)(0x204 + ((i) * 0x20)) -#define AT91_MID(i) (enum at91_reg)(0x208 + ((i) * 0x20)) -#define AT91_MFID(i) (enum at91_reg)(0x20C + ((i) * 0x20)) -#define AT91_MSR(i) (enum at91_reg)(0x210 + ((i) * 0x20)) -#define AT91_MDL(i) (enum at91_reg)(0x214 + ((i) * 0x20)) -#define AT91_MDH(i) (enum at91_reg)(0x218 + ((i) * 0x20)) -#define AT91_MCR(i) (enum at91_reg)(0x21C + ((i) * 0x20)) - -/* Register bits */ -#define AT91_MR_CANEN BIT(0) -#define AT91_MR_LPM BIT(1) -#define AT91_MR_ABM BIT(2) -#define AT91_MR_OVL BIT(3) -#define AT91_MR_TEOF BIT(4) -#define AT91_MR_TTM BIT(5) -#define AT91_MR_TIMFRZ BIT(6) -#define AT91_MR_DRPT BIT(7) - -#define AT91_SR_RBSY BIT(29) - -#define AT91_MMR_PRIO_SHIFT (16) - -#define AT91_MID_MIDE BIT(29) - -#define AT91_MSR_MRTR BIT(20) -#define AT91_MSR_MABT BIT(22) -#define AT91_MSR_MRDY BIT(23) -#define AT91_MSR_MMI BIT(24) - -#define AT91_MCR_MRTR BIT(20) -#define AT91_MCR_MTCR BIT(23) - -/* Mailbox Modes */ -enum at91_mb_mode { - AT91_MB_MODE_DISABLED = 0, - AT91_MB_MODE_RX = 1, - AT91_MB_MODE_RX_OVRWR = 2, - AT91_MB_MODE_TX = 3, - AT91_MB_MODE_CONSUMER = 4, - AT91_MB_MODE_PRODUCER = 5, -}; - -/* Interrupt mask bits */ -#define AT91_IRQ_MB_RX ((1 << (AT91_MB_RX_LAST + 1)) \ - - (1 << AT91_MB_RX_FIRST)) -#define AT91_IRQ_MB_TX ((1 << (AT91_MB_TX_LAST + 1)) \ - - (1 << AT91_MB_TX_FIRST)) -#define AT91_IRQ_MB_ALL (AT91_IRQ_MB_RX | AT91_IRQ_MB_TX) - -#define AT91_IRQ_ERRA (1 << 16) -#define AT91_IRQ_WARN (1 << 17) -#define AT91_IRQ_ERRP (1 << 18) -#define AT91_IRQ_BOFF (1 << 19) -#define AT91_IRQ_SLEEP (1 << 20) -#define AT91_IRQ_WAKEUP (1 << 21) -#define AT91_IRQ_TOVF (1 << 22) -#define AT91_IRQ_TSTP (1 << 23) -#define AT91_IRQ_CERR (1 << 24) -#define AT91_IRQ_SERR (1 << 25) -#define AT91_IRQ_AERR (1 << 26) -#define AT91_IRQ_FERR (1 << 27) -#define AT91_IRQ_BERR (1 << 28) - -#define AT91_IRQ_ERR_ALL (0x1fff0000) -#define AT91_IRQ_ERR_FRAME (AT91_IRQ_CERR | AT91_IRQ_SERR | \ - AT91_IRQ_AERR | AT91_IRQ_FERR | AT91_IRQ_BERR) -#define AT91_IRQ_ERR_LINE (AT91_IRQ_ERRA | AT91_IRQ_WARN | \ - AT91_IRQ_ERRP | AT91_IRQ_BOFF) - -#define AT91_IRQ_ALL (0x1fffffff) - -struct at91_priv { - struct can_priv can; /* must be the first member! */ - struct net_device *dev; - struct napi_struct napi; - - void __iomem *reg_base; - - u32 reg_sr; - unsigned int tx_next; - unsigned int tx_echo; - unsigned int rx_next; - - struct clk *clk; - struct at91_can_data *pdata; -}; - -static struct can_bittiming_const at91_bittiming_const = { - .tseg1_min = 4, - .tseg1_max = 16, - .tseg2_min = 2, - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 2, - .brp_max = 128, - .brp_inc = 1, -}; - -static inline int get_tx_next_mb(const struct at91_priv *priv) -{ - return (priv->tx_next & AT91_NEXT_MB_MASK) + AT91_MB_TX_FIRST; -} - -static inline int get_tx_next_prio(const struct at91_priv *priv) -{ - return (priv->tx_next >> AT91_NEXT_PRIO_SHIFT) & 0xf; -} - -static inline int get_tx_echo_mb(const struct at91_priv *priv) -{ - return (priv->tx_echo & AT91_NEXT_MB_MASK) + AT91_MB_TX_FIRST; -} - -static inline u32 at91_read(const struct at91_priv *priv, enum at91_reg reg) -{ - return readl(priv->reg_base + reg); -} - -static inline void at91_write(const struct at91_priv *priv, enum at91_reg reg, - u32 value) -{ - writel(value, priv->reg_base + reg); -} - -static inline void set_mb_mode_prio(const struct at91_priv *priv, - unsigned int mb, enum at91_mb_mode mode, int prio) -{ - at91_write(priv, AT91_MMR(mb), (mode << 24) | (prio << 16)); -} - -static inline void set_mb_mode(const struct at91_priv *priv, unsigned int mb, - enum at91_mb_mode mode) -{ - set_mb_mode_prio(priv, mb, mode, 0); -} - -static struct sk_buff *alloc_can_skb(struct net_device *dev, - struct can_frame **cf) -{ - struct sk_buff *skb; - - skb = netdev_alloc_skb(dev, sizeof(struct can_frame)); - if (unlikely(!skb)) - return NULL; - - skb->protocol = htons(ETH_P_CAN); - skb->ip_summed = CHECKSUM_UNNECESSARY; - *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); - - return skb; -} - -static struct sk_buff *alloc_can_err_skb(struct net_device *dev, - struct can_frame **cf) -{ - struct sk_buff *skb; - - skb = alloc_can_skb(dev, cf); - if (unlikely(!skb)) - return NULL; - - memset(*cf, 0, sizeof(struct can_frame)); - (*cf)->can_id = CAN_ERR_FLAG; - (*cf)->can_dlc = CAN_ERR_DLC; - - return skb; -} - -/* - * Swtich transceiver on or off - */ -static void at91_transceiver_switch(const struct at91_priv *priv, int on) -{ - if (priv->pdata && priv->pdata->transceiver_switch) - priv->pdata->transceiver_switch(on); -} - -static void at91_setup_mailboxes(struct net_device *dev) -{ - struct at91_priv *priv = netdev_priv(dev); - unsigned int i; - - /* - * The first 12 mailboxes are used as a reception FIFO. The - * last mailbox is configured with overwrite option. The - * overwrite flag indicates a FIFO overflow. - */ - for (i = AT91_MB_RX_FIRST; i < AT91_MB_RX_LAST; i++) - set_mb_mode(priv, i, AT91_MB_MODE_RX); - set_mb_mode(priv, AT91_MB_RX_LAST, AT91_MB_MODE_RX_OVRWR); - - /* The last 4 mailboxes are used for transmitting. */ - for (i = AT91_MB_TX_FIRST; i <= AT91_MB_TX_LAST; i++) - set_mb_mode_prio(priv, i, AT91_MB_MODE_TX, 0); - - /* Reset tx and rx helper pointers */ - priv->tx_next = priv->tx_echo = priv->rx_next = 0; -} - -static int at91_set_bittiming(struct net_device *dev) -{ - const struct at91_priv *priv = netdev_priv(dev); - const struct can_bittiming *bt = &priv->can.bittiming; - u32 reg_br; - - reg_br = ((priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) << 24) | - ((bt->brp - 1) << 16) | ((bt->sjw - 1) << 12) | - ((bt->prop_seg - 1) << 8) | ((bt->phase_seg1 - 1) << 4) | - ((bt->phase_seg2 - 1) << 0); - - dev_info(dev->dev.parent, "writing AT91_BR: 0x%08x\n", reg_br); - - at91_write(priv, AT91_BR, reg_br); - - return 0; -} - -static void at91_chip_start(struct net_device *dev) -{ - struct at91_priv *priv = netdev_priv(dev); - u32 reg_mr, reg_ier; - - /* disable interrupts */ - at91_write(priv, AT91_IDR, AT91_IRQ_ALL); - - /* disable chip */ - reg_mr = at91_read(priv, AT91_MR); - at91_write(priv, AT91_MR, reg_mr & ~AT91_MR_CANEN); - - at91_setup_mailboxes(dev); - at91_transceiver_switch(priv, 1); - - /* enable chip */ - at91_write(priv, AT91_MR, AT91_MR_CANEN); - - priv->can.state = CAN_STATE_ERROR_ACTIVE; - - /* Enable interrupts */ - reg_ier = AT91_IRQ_MB_RX | AT91_IRQ_ERRP | AT91_IRQ_ERR_FRAME; - at91_write(priv, AT91_IDR, AT91_IRQ_ALL); - at91_write(priv, AT91_IER, reg_ier); -} - -static void at91_chip_stop(struct net_device *dev, enum can_state state) -{ - struct at91_priv *priv = netdev_priv(dev); - u32 reg_mr; - - /* disable interrupts */ - at91_write(priv, AT91_IDR, AT91_IRQ_ALL); - - reg_mr = at91_read(priv, AT91_MR); - at91_write(priv, AT91_MR, reg_mr & ~AT91_MR_CANEN); - - at91_transceiver_switch(priv, 0); - priv->can.state = state; -} - -/* - * theory of operation: - * - * According to the datasheet priority 0 is the highest priority, 15 - * is the lowest. If two mailboxes have the same priority level the - * message of the mailbox with the lowest number is sent first. - * - * We use the first TX mailbox (AT91_MB_TX_FIRST) with prio 0, then - * the next mailbox with prio 0, and so on, until all mailboxes are - * used. Then we start from the beginning with mailbox - * AT91_MB_TX_FIRST, but with prio 1, mailbox AT91_MB_TX_FIRST + 1 - * prio 1. When we reach the last mailbox with prio 15, we have to - * stop sending, waiting for all messages to be delivered, then start - * again with mailbox AT91_MB_TX_FIRST prio 0. - * - * We use the priv->tx_next as counter for the next transmission - * mailbox, but without the offset AT91_MB_TX_FIRST. The lower bits - * encode the mailbox number, the upper 4 bits the mailbox priority: - * - * priv->tx_next = (prio << AT91_NEXT_PRIO_SHIFT) || - * (mb - AT91_MB_TX_FIRST); - * - */ -static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct at91_priv *priv = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; - struct can_frame *cf = (struct can_frame *)skb->data; - unsigned int mb, prio; - u32 reg_mid, reg_mcr; - - mb = get_tx_next_mb(priv); - prio = get_tx_next_prio(priv); - - if (unlikely(!(at91_read(priv, AT91_MSR(mb)) & AT91_MSR_MRDY))) { - netif_stop_queue(dev); - - dev_err(dev->dev.parent, - "BUG! TX buffer full when queue awake!\n"); - return NETDEV_TX_BUSY; - } - - if (cf->can_id & CAN_EFF_FLAG) - reg_mid = (cf->can_id & CAN_EFF_MASK) | AT91_MID_MIDE; - else - reg_mid = (cf->can_id & CAN_SFF_MASK) << 18; - - reg_mcr = ((cf->can_id & CAN_RTR_FLAG) ? AT91_MCR_MRTR : 0) | - (cf->can_dlc << 16) | AT91_MCR_MTCR; - - /* disable MB while writing ID (see datasheet) */ - set_mb_mode(priv, mb, AT91_MB_MODE_DISABLED); - at91_write(priv, AT91_MID(mb), reg_mid); - set_mb_mode_prio(priv, mb, AT91_MB_MODE_TX, prio); - - at91_write(priv, AT91_MDL(mb), *(u32 *)(cf->data + 0)); - at91_write(priv, AT91_MDH(mb), *(u32 *)(cf->data + 4)); - - /* This triggers transmission */ - at91_write(priv, AT91_MCR(mb), reg_mcr); - - stats->tx_bytes += cf->can_dlc; - dev->trans_start = jiffies; - - /* _NOTE_: substract AT91_MB_TX_FIRST offset from mb! */ - can_put_echo_skb(skb, dev, mb - AT91_MB_TX_FIRST); - - /* - * we have to stop the queue and deliver all messages in case - * of a prio+mb counter wrap around. This is the case if - * tx_next buffer prio and mailbox equals 0. - * - * also stop the queue if next buffer is still in use - * (== not ready) - */ - priv->tx_next++; - if (!(at91_read(priv, AT91_MSR(get_tx_next_mb(priv))) & - AT91_MSR_MRDY) || - (priv->tx_next & AT91_NEXT_MASK) == 0) - netif_stop_queue(dev); - - /* Enable interrupt for this mailbox */ - at91_write(priv, AT91_IER, 1 << mb); - - return NETDEV_TX_OK; -} - -/** - * at91_activate_rx_low - activate lower rx mailboxes - * @priv: a91 context - * - * Reenables the lower mailboxes for reception of new CAN messages - */ -static inline void at91_activate_rx_low(const struct at91_priv *priv) -{ - u32 mask = AT91_MB_RX_LOW_MASK; - at91_write(priv, AT91_TCR, mask); -} - -/** - * at91_activate_rx_mb - reactive single rx mailbox - * @priv: a91 context - * @mb: mailbox to reactivate - * - * Reenables given mailbox for reception of new CAN messages - */ -static inline void at91_activate_rx_mb(const struct at91_priv *priv, - unsigned int mb) -{ - u32 mask = 1 << mb; - at91_write(priv, AT91_TCR, mask); -} - -/** - * at91_rx_overflow_err - send error frame due to rx overflow - * @dev: net device - */ -static void at91_rx_overflow_err(struct net_device *dev) -{ - struct net_device_stats *stats = &dev->stats; - struct sk_buff *skb; - struct can_frame *cf; - - dev_dbg(dev->dev.parent, "RX buffer overflow\n"); - stats->rx_over_errors++; - stats->rx_errors++; - - skb = alloc_can_err_skb(dev, &cf); - if (unlikely(!skb)) - return; - - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; - netif_receive_skb(skb); - - stats->rx_packets++; - stats->rx_bytes += cf->can_dlc; -} - -/** - * at91_read_mb - read CAN msg from mailbox (lowlevel impl) - * @dev: net device - * @mb: mailbox number to read from - * @cf: can frame where to store message - * - * Reads a CAN message from the given mailbox and stores data into - * given can frame. "mb" and "cf" must be valid. - */ -static void at91_read_mb(struct net_device *dev, unsigned int mb, - struct can_frame *cf) -{ - const struct at91_priv *priv = netdev_priv(dev); - u32 reg_msr, reg_mid; - - reg_mid = at91_read(priv, AT91_MID(mb)); - if (reg_mid & AT91_MID_MIDE) - cf->can_id = ((reg_mid >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG; - else - cf->can_id = (reg_mid >> 18) & CAN_SFF_MASK; - - reg_msr = at91_read(priv, AT91_MSR(mb)); - if (reg_msr & AT91_MSR_MRTR) - cf->can_id |= CAN_RTR_FLAG; - cf->can_dlc = min_t(__u8, (reg_msr >> 16) & 0xf, 8); - - *(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb)); - *(u32 *)(cf->data + 4) = at91_read(priv, AT91_MDH(mb)); - - if (unlikely(mb == AT91_MB_RX_LAST && reg_msr & AT91_MSR_MMI)) - at91_rx_overflow_err(dev); -} - -/** - * at91_read_msg - read CAN message from mailbox - * @dev: net device - * @mb: mail box to read from - * - * Reads a CAN message from given mailbox, and put into linux network - * RX queue, does all housekeeping chores (stats, ...) - */ -static void at91_read_msg(struct net_device *dev, unsigned int mb) -{ - struct net_device_stats *stats = &dev->stats; - struct can_frame *cf; - struct sk_buff *skb; - - skb = alloc_can_skb(dev, &cf); - if (unlikely(!skb)) { - stats->rx_dropped++; - return; - } - - at91_read_mb(dev, mb, cf); - netif_receive_skb(skb); - - stats->rx_packets++; - stats->rx_bytes += cf->can_dlc; -} - -/** - * at91_poll_rx - read multiple CAN messages from mailboxes - * @dev: net device - * @quota: max number of pkgs we're allowed to receive - * - * Theory of Operation: - * - * 12 of the 16 mailboxes on the chip are reserved for RX. we split - * them into 2 groups. The lower group holds 8 and upper 4 mailboxes. - * - * Like it or not, but the chip always saves a received CAN message - * into the first free mailbox it finds (starting with the - * lowest). This makes it very difficult to read the messages in the - * right order from the chip. This is how we work around that problem: - * - * The first message goes into mb nr. 0 and issues an interrupt. All - * rx ints are disabled in the interrupt handler and a napi poll is - * scheduled. We read the mailbox, but do _not_ reenable the mb (to - * receive another message). - * - * lower mbxs upper - * ______^______ __^__ - * / \ / \ - * +-+-+-+-+-+-+-+-++-+-+-+-+ - * |x|x|x|x|x|x|x|x|| | | | | - * +-+-+-+-+-+-+-+-++-+-+-+-+ - * 0 0 0 0 0 0 0 0 0 0 1 1 \ mail - * 0 1 2 3 4 5 6 7 8 9 0 1 / box - * - * The variable priv->rx_next points to the next mailbox to read a - * message from. As long we're in the lower mailboxes we just read the - * mailbox but not reenable it. - * - * With completion of the last of the lower mailboxes, we reenable the - * whole first group, but continue to look for filled mailboxes in the - * upper mailboxes. Imagine the second group like overflow mailboxes, - * which takes CAN messages if the lower goup is full. While in the - * upper group we reenable the mailbox right after reading it. Giving - * the chip more room to store messages. - * - * After finishing we look again in the lower group if we've still - * quota. - * - */ -static int at91_poll_rx(struct net_device *dev, int quota) -{ - struct at91_priv *priv = netdev_priv(dev); - u32 reg_sr = at91_read(priv, AT91_SR); - const unsigned long *addr = (unsigned long *)®_sr; - unsigned int mb; - int received = 0; - - if (priv->rx_next > AT91_MB_RX_LOW_LAST && - reg_sr & AT91_MB_RX_LOW_MASK) - dev_info(dev->dev.parent, - "order of incoming frames cannot be guaranteed\n"); - - again: - for (mb = find_next_bit(addr, AT91_MB_RX_NUM, priv->rx_next); - mb < AT91_MB_RX_NUM && quota > 0; - reg_sr = at91_read(priv, AT91_SR), - mb = find_next_bit(addr, AT91_MB_RX_NUM, ++priv->rx_next)) { - at91_read_msg(dev, mb); - - /* reactivate mailboxes */ - if (mb == AT91_MB_RX_LOW_LAST) - /* all lower mailboxed, if just finished it */ - at91_activate_rx_low(priv); - else if (mb > AT91_MB_RX_LOW_LAST) - /* only the mailbox we read */ - at91_activate_rx_mb(priv, mb); - - received++; - quota--; - } - - /* upper group completed, look again in lower */ - if (priv->rx_next > AT91_MB_RX_LOW_LAST && - quota > 0 && mb >= AT91_MB_RX_NUM) { - priv->rx_next = 0; - goto again; - } - - return received; -} - -static void at91_poll_err_frame(struct net_device *dev, - struct can_frame *cf, u32 reg_sr) -{ - struct at91_priv *priv = netdev_priv(dev); - - /* CRC error */ - if (reg_sr & AT91_IRQ_CERR) { - dev_dbg(dev->dev.parent, "CERR irq\n"); - dev->stats.rx_errors++; - priv->can.can_stats.bus_error++; - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - } - - /* Stuffing Error */ - if (reg_sr & AT91_IRQ_SERR) { - dev_dbg(dev->dev.parent, "SERR irq\n"); - dev->stats.rx_errors++; - priv->can.can_stats.bus_error++; - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - cf->data[2] |= CAN_ERR_PROT_STUFF; - } - - /* Acknowledgement Error */ - if (reg_sr & AT91_IRQ_AERR) { - dev_dbg(dev->dev.parent, "AERR irq\n"); - dev->stats.tx_errors++; - cf->can_id |= CAN_ERR_ACK; - } - - /* Form error */ - if (reg_sr & AT91_IRQ_FERR) { - dev_dbg(dev->dev.parent, "FERR irq\n"); - dev->stats.rx_errors++; - priv->can.can_stats.bus_error++; - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - cf->data[2] |= CAN_ERR_PROT_FORM; - } - - /* Bit Error */ - if (reg_sr & AT91_IRQ_BERR) { - dev_dbg(dev->dev.parent, "BERR irq\n"); - dev->stats.tx_errors++; - priv->can.can_stats.bus_error++; - cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - cf->data[2] |= CAN_ERR_PROT_BIT; - } -} - -static int at91_poll_err(struct net_device *dev, int quota, u32 reg_sr) -{ - struct sk_buff *skb; - struct can_frame *cf; - - if (quota == 0) - return 0; - - skb = alloc_can_err_skb(dev, &cf); - if (unlikely(!skb)) - return 0; - - at91_poll_err_frame(dev, cf, reg_sr); - netif_receive_skb(skb); - - dev->last_rx = jiffies; - dev->stats.rx_packets++; - dev->stats.rx_bytes += cf->can_dlc; - - return 1; -} - -static int at91_poll(struct napi_struct *napi, int quota) -{ - struct net_device *dev = napi->dev; - const struct at91_priv *priv = netdev_priv(dev); - u32 reg_sr = at91_read(priv, AT91_SR); - int work_done = 0; - - if (reg_sr & AT91_IRQ_MB_RX) - work_done += at91_poll_rx(dev, quota - work_done); - - /* - * The error bits are clear on read, - * so use saved value from irq handler. - */ - reg_sr |= priv->reg_sr; - if (reg_sr & AT91_IRQ_ERR_FRAME) - work_done += at91_poll_err(dev, quota - work_done, reg_sr); - - if (work_done < quota) { - /* enable IRQs for frame errors and all mailboxes >= rx_next */ - u32 reg_ier = AT91_IRQ_ERR_FRAME; - reg_ier |= AT91_IRQ_MB_RX & ~AT91_MB_RX_MASK(priv->rx_next); - - napi_complete(napi); - at91_write(priv, AT91_IER, reg_ier); - } - - return work_done; -} - -/* - * theory of operation: - * - * priv->tx_echo holds the number of the oldest can_frame put for - * transmission into the hardware, but not yet ACKed by the CAN tx - * complete IRQ. - * - * We iterate from priv->tx_echo to priv->tx_next and check if the - * packet has been transmitted, echo it back to the CAN framework. If - * we discover a not yet transmitted package, stop looking for more. - * - */ -static void at91_irq_tx(struct net_device *dev, u32 reg_sr) -{ - struct at91_priv *priv = netdev_priv(dev); - u32 reg_msr; - unsigned int mb; - - /* masking of reg_sr not needed, already done by at91_irq */ - - for (/* nix */; (priv->tx_next - priv->tx_echo) > 0; priv->tx_echo++) { - mb = get_tx_echo_mb(priv); - - /* no event in mailbox? */ - if (!(reg_sr & (1 << mb))) - break; - - /* Disable irq for this TX mailbox */ - at91_write(priv, AT91_IDR, 1 << mb); - - /* - * only echo if mailbox signals us a transfer - * complete (MSR_MRDY). Otherwise it's a tansfer - * abort. "can_bus_off()" takes care about the skbs - * parked in the echo queue. - */ - reg_msr = at91_read(priv, AT91_MSR(mb)); - if (likely(reg_msr & AT91_MSR_MRDY && - ~reg_msr & AT91_MSR_MABT)) { - /* _NOTE_: substract AT91_MB_TX_FIRST offset from mb! */ - can_get_echo_skb(dev, mb - AT91_MB_TX_FIRST); - dev->stats.tx_packets++; - } - } - - /* - * restart queue if we don't have a wrap around but restart if - * we get a TX int for the last can frame directly before a - * wrap around. - */ - if ((priv->tx_next & AT91_NEXT_MASK) != 0 || - (priv->tx_echo & AT91_NEXT_MASK) == 0) - netif_wake_queue(dev); -} - -static void at91_irq_err_state(struct net_device *dev, - struct can_frame *cf, enum can_state new_state) -{ - struct at91_priv *priv = netdev_priv(dev); - u32 reg_idr, reg_ier, reg_ecr; - u8 tec, rec; - - reg_ecr = at91_read(priv, AT91_ECR); - rec = reg_ecr & 0xff; - tec = reg_ecr >> 16; - - switch (priv->can.state) { - case CAN_STATE_ERROR_ACTIVE: - /* - * from: ERROR_ACTIVE - * to : ERROR_WARNING, ERROR_PASSIVE, BUS_OFF - * => : there was a warning int - */ - if (new_state >= CAN_STATE_ERROR_WARNING && - new_state <= CAN_STATE_BUS_OFF) { - dev_dbg(dev->dev.parent, "Error Warning IRQ\n"); - priv->can.can_stats.error_warning++; - - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (tec > rec) ? - CAN_ERR_CRTL_TX_WARNING : - CAN_ERR_CRTL_RX_WARNING; - } - case CAN_STATE_ERROR_WARNING: /* fallthrough */ - /* - * from: ERROR_ACTIVE, ERROR_WARNING - * to : ERROR_PASSIVE, BUS_OFF - * => : error passive int - */ - if (new_state >= CAN_STATE_ERROR_PASSIVE && - new_state <= CAN_STATE_BUS_OFF) { - dev_dbg(dev->dev.parent, "Error Passive IRQ\n"); - priv->can.can_stats.error_passive++; - - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (tec > rec) ? - CAN_ERR_CRTL_TX_PASSIVE : - CAN_ERR_CRTL_RX_PASSIVE; - } - break; - case CAN_STATE_BUS_OFF: - /* - * from: BUS_OFF - * to : ERROR_ACTIVE, ERROR_WARNING, ERROR_PASSIVE - */ - if (new_state <= CAN_STATE_ERROR_PASSIVE) { - cf->can_id |= CAN_ERR_RESTARTED; - - dev_dbg(dev->dev.parent, "restarted\n"); - priv->can.can_stats.restarts++; - - netif_carrier_on(dev); - netif_wake_queue(dev); - } - break; - default: - break; - } - - - /* process state changes depending on the new state */ - switch (new_state) { - case CAN_STATE_ERROR_ACTIVE: - /* - * actually we want to enable AT91_IRQ_WARN here, but - * it screws up the system under certain - * circumstances. so just enable AT91_IRQ_ERRP, thus - * the "fallthrough" - */ - dev_dbg(dev->dev.parent, "Error Active\n"); - cf->can_id |= CAN_ERR_PROT; - cf->data[2] = CAN_ERR_PROT_ACTIVE; - case CAN_STATE_ERROR_WARNING: /* fallthrough */ - reg_idr = AT91_IRQ_ERRA | AT91_IRQ_WARN | AT91_IRQ_BOFF; - reg_ier = AT91_IRQ_ERRP; - break; - case CAN_STATE_ERROR_PASSIVE: - reg_idr = AT91_IRQ_ERRA | AT91_IRQ_WARN | AT91_IRQ_ERRP; - reg_ier = AT91_IRQ_BOFF; - break; - case CAN_STATE_BUS_OFF: - reg_idr = AT91_IRQ_ERRA | AT91_IRQ_ERRP | - AT91_IRQ_WARN | AT91_IRQ_BOFF; - reg_ier = 0; - - cf->can_id |= CAN_ERR_BUSOFF; - - dev_dbg(dev->dev.parent, "bus-off\n"); - netif_carrier_off(dev); - priv->can.can_stats.bus_off++; - - /* turn off chip, if restart is disabled */ - if (!priv->can.restart_ms) { - at91_chip_stop(dev, CAN_STATE_BUS_OFF); - return; - } - break; - default: - break; - } - - at91_write(priv, AT91_IDR, reg_idr); - at91_write(priv, AT91_IER, reg_ier); -} - -static void at91_irq_err(struct net_device *dev) -{ - struct at91_priv *priv = netdev_priv(dev); - struct sk_buff *skb; - struct can_frame *cf; - enum can_state new_state; - u32 reg_sr; - - reg_sr = at91_read(priv, AT91_SR); - - /* we need to look at the unmasked reg_sr */ - if (unlikely(reg_sr & AT91_IRQ_BOFF)) - new_state = CAN_STATE_BUS_OFF; - else if (unlikely(reg_sr & AT91_IRQ_ERRP)) - new_state = CAN_STATE_ERROR_PASSIVE; - else if (unlikely(reg_sr & AT91_IRQ_WARN)) - new_state = CAN_STATE_ERROR_WARNING; - else if (likely(reg_sr & AT91_IRQ_ERRA)) - new_state = CAN_STATE_ERROR_ACTIVE; - else { - dev_err(dev->dev.parent, "BUG! hardware in undefined state\n"); - return; - } - - /* state hasn't changed */ - if (likely(new_state == priv->can.state)) - return; - - skb = alloc_can_err_skb(dev, &cf); - if (unlikely(!skb)) - return; - - at91_irq_err_state(dev, cf, new_state); - netif_rx(skb); - - dev->last_rx = jiffies; - dev->stats.rx_packets++; - dev->stats.rx_bytes += cf->can_dlc; - - priv->can.state = new_state; -} - -/* - * interrupt handler - */ -static irqreturn_t at91_irq(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct at91_priv *priv = netdev_priv(dev); - irqreturn_t handled = IRQ_NONE; - u32 reg_sr, reg_imr; - - reg_sr = at91_read(priv, AT91_SR); - reg_imr = at91_read(priv, AT91_IMR); - - /* Ignore masked interrupts */ - reg_sr &= reg_imr; - if (!reg_sr) - goto exit; - - handled = IRQ_HANDLED; - - /* Receive or error interrupt? -> napi */ - if (reg_sr & (AT91_IRQ_MB_RX | AT91_IRQ_ERR_FRAME)) { - /* - * The error bits are clear on read, - * save for later use. - */ - priv->reg_sr = reg_sr; - at91_write(priv, AT91_IDR, - AT91_IRQ_MB_RX | AT91_IRQ_ERR_FRAME); - napi_schedule(&priv->napi); - } - - /* Transmission complete interrupt */ - if (reg_sr & AT91_IRQ_MB_TX) - at91_irq_tx(dev, reg_sr); - - at91_irq_err(dev); - - exit: - return handled; -} - -static int at91_open(struct net_device *dev) -{ - struct at91_priv *priv = netdev_priv(dev); - int err; - - clk_enable(priv->clk); - - /* check or determine and set bittime */ - err = open_candev(dev); - if (err) - goto out; - - /* register interrupt handler */ - if (request_irq(dev->irq, at91_irq, IRQF_SHARED, - dev->name, dev)) { - err = -EAGAIN; - goto out_close; - } - - /* start chip and queuing */ - at91_chip_start(dev); - napi_enable(&priv->napi); - netif_start_queue(dev); - - return 0; - - out_close: - close_candev(dev); - out: - clk_disable(priv->clk); - - return err; -} - -/* - * stop CAN bus activity - */ -static int at91_close(struct net_device *dev) -{ - struct at91_priv *priv = netdev_priv(dev); - - netif_stop_queue(dev); - napi_disable(&priv->napi); - at91_chip_stop(dev, CAN_STATE_STOPPED); - - free_irq(dev->irq, dev); - clk_disable(priv->clk); - - close_candev(dev); - - return 0; -} - -static int at91_set_mode(struct net_device *dev, enum can_mode mode) -{ - switch (mode) { - case CAN_MODE_START: - at91_chip_start(dev); - netif_wake_queue(dev); - break; - - default: - return -EOPNOTSUPP; - } - - return 0; -} - -static const struct net_device_ops at91_netdev_ops = { - .ndo_open = at91_open, - .ndo_stop = at91_close, - .ndo_start_xmit = at91_start_xmit, -}; - -static int __init at91_can_probe(struct platform_device *pdev) -{ - struct net_device *dev; - struct at91_priv *priv; - struct resource *res; - struct clk *clk; - void __iomem *addr; - int err, irq; - - clk = clk_get(&pdev->dev, "can_clk"); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "no clock defined\n"); - err = -ENODEV; - goto exit; - } - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - irq = platform_get_irq(pdev, 0); - if (!res || !irq) { - err = -ENODEV; - goto exit_put; - } - - if (!request_mem_region(res->start, - resource_size(res), - pdev->name)) { - err = -EBUSY; - goto exit_put; - } - - addr = ioremap_nocache(res->start, resource_size(res)); - if (!addr) { - err = -ENOMEM; - goto exit_release; - } - - dev = alloc_candev(sizeof(struct at91_priv)); - if (!dev) { - err = -ENOMEM; - goto exit_iounmap; - } - - dev->netdev_ops = &at91_netdev_ops; - dev->irq = irq; - dev->flags |= IFF_ECHO; - - priv = netdev_priv(dev); - priv->can.clock.freq = clk_get_rate(clk); - priv->can.bittiming_const = &at91_bittiming_const; - priv->can.do_set_bittiming = at91_set_bittiming; - priv->can.do_set_mode = at91_set_mode; - priv->reg_base = addr; - priv->dev = dev; - priv->clk = clk; - priv->pdata = pdev->dev.platform_data; - - netif_napi_add(dev, &priv->napi, at91_poll, AT91_NAPI_WEIGHT); - - dev_set_drvdata(&pdev->dev, dev); - SET_NETDEV_DEV(dev, &pdev->dev); - - err = register_candev(dev); - if (err) { - dev_err(&pdev->dev, "registering netdev failed\n"); - goto exit_free; - } - - dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%d)\n", - priv->reg_base, dev->irq); - - return 0; - - exit_free: - free_netdev(dev); - exit_iounmap: - iounmap(addr); - exit_release: - release_mem_region(res->start, resource_size(res)); - exit_put: - clk_put(clk); - exit: - return err; -} - -static int __devexit at91_can_remove(struct platform_device *pdev) -{ - struct net_device *dev = platform_get_drvdata(pdev); - struct at91_priv *priv = netdev_priv(dev); - struct resource *res; - - unregister_netdev(dev); - - platform_set_drvdata(pdev, NULL); - - free_netdev(dev); - - iounmap(priv->reg_base); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - - clk_put(priv->clk); - - return 0; -} - -static struct platform_driver at91_can_driver = { - .probe = at91_can_probe, - .remove = __devexit_p(at91_can_remove), - .driver = { - .name = DRV_NAME, - .owner = THIS_MODULE, - }, -}; - -static int __init at91_can_module_init(void) -{ - printk(KERN_INFO "%s netdevice driver\n", DRV_NAME); - return platform_driver_register(&at91_can_driver); -} - -static void __exit at91_can_module_exit(void) -{ - platform_driver_unregister(&at91_can_driver); - printk(KERN_INFO "%s: driver removed\n", DRV_NAME); -} - -module_init(at91_can_module_init); -module_exit(at91_can_module_exit); - -MODULE_AUTHOR("Marc Kleine-Budde "); -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION(DRV_NAME " CAN netdevice driver"); diff --git a/trunk/drivers/net/davinci_emac.c b/trunk/drivers/net/davinci_emac.c index 65a2d0ba64e2..d465eaa796c4 100644 --- a/trunk/drivers/net/davinci_emac.c +++ b/trunk/drivers/net/davinci_emac.c @@ -200,9 +200,6 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1"; /** NOTE:: For DM646x the IN_VECTOR has changed */ #define EMAC_DM646X_MAC_IN_VECTOR_RX_INT_VEC BIT(EMAC_DEF_RX_CH) #define EMAC_DM646X_MAC_IN_VECTOR_TX_INT_VEC BIT(16 + EMAC_DEF_TX_CH) -#define EMAC_DM646X_MAC_IN_VECTOR_HOST_INT BIT(26) -#define EMAC_DM646X_MAC_IN_VECTOR_STATPEND_INT BIT(27) - /* CPPI bit positions */ #define EMAC_CPPI_SOP_BIT BIT(31) @@ -2170,11 +2167,7 @@ static int emac_poll(struct napi_struct *napi, int budget) emac_int_enable(priv); } - mask = EMAC_DM644X_MAC_IN_VECTOR_HOST_INT; - if (priv->version == EMAC_VERSION_2) - mask = EMAC_DM646X_MAC_IN_VECTOR_HOST_INT; - - if (unlikely(status & mask)) { + if (unlikely(status & EMAC_DM644X_MAC_IN_VECTOR_HOST_INT)) { u32 ch, cause; dev_err(emac_dev, "DaVinci EMAC: Fatal Hardware Error\n"); netif_stop_queue(ndev); diff --git a/trunk/drivers/net/virtio_net.c b/trunk/drivers/net/virtio_net.c index d445845f2779..5c498d2b043f 100644 --- a/trunk/drivers/net/virtio_net.c +++ b/trunk/drivers/net/virtio_net.c @@ -1,4 +1,4 @@ -/* A network driver using virtio. +/* A simple network driver using virtio. * * Copyright 2007 Rusty Russell IBM Corporation * @@ -48,9 +48,19 @@ struct virtnet_info struct napi_struct napi; unsigned int status; + /* The skb we couldn't send because buffers were full. */ + struct sk_buff *last_xmit_skb; + + /* If we need to free in a timer, this is it. */ + struct timer_list xmit_free_timer; + /* Number of input buffers, and max we've ever had. */ unsigned int num, max; + /* For cleaning up after transmission. */ + struct tasklet_struct tasklet; + bool free_in_tasklet; + /* I like... big packets and I cannot lie! */ bool big_packets; @@ -68,17 +78,9 @@ struct virtnet_info struct page *pages; }; -struct skb_vnet_hdr { - union { - struct virtio_net_hdr hdr; - struct virtio_net_hdr_mrg_rxbuf mhdr; - }; - unsigned int num_sg; -}; - -static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) +static inline void *skb_vnet_hdr(struct sk_buff *skb) { - return (struct skb_vnet_hdr *)skb->cb; + return (struct virtio_net_hdr *)skb->cb; } static void give_a_page(struct virtnet_info *vi, struct page *page) @@ -117,13 +119,17 @@ static void skb_xmit_done(struct virtqueue *svq) /* We were probably waiting for more output buffers. */ netif_wake_queue(vi->dev); + + /* Make sure we re-xmit last_xmit_skb: if there are no more packets + * queued, start_xmit won't be called. */ + tasklet_schedule(&vi->tasklet); } static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned len) { struct virtnet_info *vi = netdev_priv(dev); - struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); + struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); int err; int i; @@ -134,6 +140,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, } if (vi->mergeable_rx_bufs) { + struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); unsigned int copy; char *p = page_address(skb_shinfo(skb)->frags[0].page); @@ -141,8 +148,8 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, len = PAGE_SIZE; len -= sizeof(struct virtio_net_hdr_mrg_rxbuf); - memcpy(&hdr->mhdr, p, sizeof(hdr->mhdr)); - p += sizeof(hdr->mhdr); + memcpy(hdr, p, sizeof(*mhdr)); + p += sizeof(*mhdr); copy = len; if (copy > skb_tailroom(skb)) @@ -157,13 +164,13 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, skb_shinfo(skb)->nr_frags--; } else { skb_shinfo(skb)->frags[0].page_offset += - sizeof(hdr->mhdr) + copy; + sizeof(*mhdr) + copy; skb_shinfo(skb)->frags[0].size = len; skb->data_len += len; skb->len += len; } - while (--hdr->mhdr.num_buffers) { + while (--mhdr->num_buffers) { struct sk_buff *nskb; i = skb_shinfo(skb)->nr_frags; @@ -177,7 +184,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len); if (!nskb) { pr_debug("%s: rx error: %d buffers missing\n", - dev->name, hdr->mhdr.num_buffers); + dev->name, mhdr->num_buffers); dev->stats.rx_length_errors++; goto drop; } @@ -198,7 +205,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, skb->len += len; } } else { - len -= sizeof(hdr->hdr); + len -= sizeof(struct virtio_net_hdr); if (len <= MAX_PACKET_LEN) trim_pages(vi, skb); @@ -216,11 +223,9 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; - if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { pr_debug("Needs csum!\n"); - if (!skb_partial_csum_set(skb, - hdr->hdr.csum_start, - hdr->hdr.csum_offset)) + if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset)) goto frame_err; } @@ -228,9 +233,9 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, pr_debug("Receiving skb proto 0x%04x len %i type %i\n", ntohs(skb->protocol), skb->len, skb->pkt_type); - if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); - switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; @@ -243,14 +248,14 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, default: if (net_ratelimit()) printk(KERN_WARNING "%s: bad gso type %u.\n", - dev->name, hdr->hdr.gso_type); + dev->name, hdr->gso_type); goto frame_err; } - if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) + if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; + skb_shinfo(skb)->gso_size = hdr->gso_size; if (skb_shinfo(skb)->gso_size == 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: zero gso size.\n", @@ -280,8 +285,8 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) bool oom = false; sg_init_table(sg, 2+MAX_SKB_FRAGS); - do { - struct skb_vnet_hdr *hdr; + for (;;) { + struct virtio_net_hdr *hdr; skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN); if (unlikely(!skb)) { @@ -293,7 +298,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) skb_put(skb, MAX_PACKET_LEN); hdr = skb_vnet_hdr(skb); - sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); + sg_set_buf(sg, hdr, sizeof(*hdr)); if (vi->big_packets) { for (i = 0; i < MAX_SKB_FRAGS; i++) { @@ -323,7 +328,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) break; } vi->num++; - } while (err >= num); + } if (unlikely(vi->num > vi->max)) vi->max = vi->num; vi->rvq->vq_ops->kick(vi->rvq); @@ -341,7 +346,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) if (!vi->mergeable_rx_bufs) return try_fill_recv_maxbufs(vi, gfp); - do { + for (;;) { skb_frag_t *f; skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN); @@ -375,7 +380,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) break; } vi->num++; - } while (err > 0); + } if (unlikely(vi->num > vi->max)) vi->max = vi->num; vi->rvq->vq_ops->kick(vi->rvq); @@ -443,26 +448,42 @@ static int virtnet_poll(struct napi_struct *napi, int budget) return received; } -static unsigned int free_old_xmit_skbs(struct virtnet_info *vi) +static void free_old_xmit_skbs(struct virtnet_info *vi) { struct sk_buff *skb; - unsigned int len, tot_sgs = 0; + unsigned int len; while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) { pr_debug("Sent skb %p\n", skb); __skb_unlink(skb, &vi->send); vi->dev->stats.tx_bytes += skb->len; vi->dev->stats.tx_packets++; - tot_sgs += skb_vnet_hdr(skb)->num_sg; kfree_skb(skb); } - return tot_sgs; +} + +/* If the virtio transport doesn't always notify us when all in-flight packets + * are consumed, we fall back to using this function on a timer to free them. */ +static void xmit_free(unsigned long data) +{ + struct virtnet_info *vi = (void *)data; + + netif_tx_lock(vi->dev); + + free_old_xmit_skbs(vi); + + if (!skb_queue_empty(&vi->send)) + mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); + + netif_tx_unlock(vi->dev); } static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { + int num, err; struct scatterlist sg[2+MAX_SKB_FRAGS]; - struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); + struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); + struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; sg_init_table(sg, 2+MAX_SKB_FRAGS); @@ -470,89 +491,108 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); if (skb->ip_summed == CHECKSUM_PARTIAL) { - hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->hdr.csum_start = skb->csum_start - skb_headroom(skb); - hdr->hdr.csum_offset = skb->csum_offset; + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + hdr->csum_start = skb->csum_start - skb_headroom(skb); + hdr->csum_offset = skb->csum_offset; } else { - hdr->hdr.flags = 0; - hdr->hdr.csum_offset = hdr->hdr.csum_start = 0; + hdr->flags = 0; + hdr->csum_offset = hdr->csum_start = 0; } if (skb_is_gso(skb)) { - hdr->hdr.hdr_len = skb_headlen(skb); - hdr->hdr.gso_size = skb_shinfo(skb)->gso_size; + hdr->hdr_len = skb_headlen(skb); + hdr->gso_size = skb_shinfo(skb)->gso_size; if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) - hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; + hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; else BUG(); if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) - hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; + hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } else { - hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; - hdr->hdr.gso_size = hdr->hdr.hdr_len = 0; + hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; + hdr->gso_size = hdr->hdr_len = 0; } - hdr->mhdr.num_buffers = 0; + mhdr->num_buffers = 0; /* Encode metadata header at front. */ if (vi->mergeable_rx_bufs) - sg_set_buf(sg, &hdr->mhdr, sizeof(hdr->mhdr)); + sg_set_buf(sg, mhdr, sizeof(*mhdr)); else - sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); + sg_set_buf(sg, hdr, sizeof(*hdr)); - hdr->num_sg = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; - return vi->svq->vq_ops->add_buf(vi->svq, sg, hdr->num_sg, 0, skb); + num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; + + err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); + if (err >= 0 && !vi->free_in_tasklet) + mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); + + return err; +} + +static void xmit_tasklet(unsigned long data) +{ + struct virtnet_info *vi = (void *)data; + + netif_tx_lock_bh(vi->dev); + if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) >= 0) { + vi->svq->vq_ops->kick(vi->svq); + vi->last_xmit_skb = NULL; + } + if (vi->free_in_tasklet) + free_old_xmit_skbs(vi); + netif_tx_unlock_bh(vi->dev); } static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); - int capacity; again: /* Free up any pending old buffers before queueing new ones. */ free_old_xmit_skbs(vi); + /* If we has a buffer left over from last time, send it now. */ + if (unlikely(vi->last_xmit_skb) && + xmit_skb(vi, vi->last_xmit_skb) < 0) + goto stop_queue; + + vi->last_xmit_skb = NULL; + /* Put new one in send queue and do transmit */ - __skb_queue_head(&vi->send, skb); - capacity = xmit_skb(vi, skb); - - /* This can happen with OOM and indirect buffers. */ - if (unlikely(capacity < 0)) { - netif_stop_queue(dev); - dev_warn(&dev->dev, "Unexpected full queue\n"); - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { - vi->svq->vq_ops->disable_cb(vi->svq); - netif_start_queue(dev); - goto again; + if (likely(skb)) { + __skb_queue_head(&vi->send, skb); + if (xmit_skb(vi, skb) < 0) { + vi->last_xmit_skb = skb; + skb = NULL; + goto stop_queue; } - return NETDEV_TX_BUSY; } - +done: vi->svq->vq_ops->kick(vi->svq); - /* Don't wait up for transmitted skbs to be freed. */ - skb_orphan(skb); - nf_reset(skb); - - /* Apparently nice girls don't return TX_BUSY; stop the queue - * before it gets out of hand. Naturally, this wastes entries. */ - if (capacity < 2+MAX_SKB_FRAGS) { - netif_stop_queue(dev); - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { - /* More just got used, free them then recheck. */ - capacity += free_old_xmit_skbs(vi); - if (capacity >= 2+MAX_SKB_FRAGS) { - netif_start_queue(dev); - vi->svq->vq_ops->disable_cb(vi->svq); - } - } - } - return NETDEV_TX_OK; + +stop_queue: + pr_debug("%s: virtio not prepared to send\n", dev->name); + netif_stop_queue(dev); + + /* Activate callback for using skbs: if this returns false it + * means some were used in the meantime. */ + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { + vi->svq->vq_ops->disable_cb(vi->svq); + netif_start_queue(dev); + goto again; + } + if (skb) { + /* Drop this skb: we only queue one. */ + vi->dev->stats.tx_dropped++; + kfree_skb(skb); + } + goto done; } static int virtnet_set_mac_address(struct net_device *dev, void *p) @@ -885,6 +925,10 @@ static int virtnet_probe(struct virtio_device *vdev) vi->pages = NULL; INIT_DELAYED_WORK(&vi->refill, refill_work); + /* If they give us a callback when all buffers are done, we don't need + * the timer. */ + vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); + /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) @@ -916,6 +960,11 @@ static int virtnet_probe(struct virtio_device *vdev) skb_queue_head_init(&vi->recv); skb_queue_head_init(&vi->send); + tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi); + + if (!vi->free_in_tasklet) + setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi); + err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); @@ -956,6 +1005,9 @@ static void virtnet_remove(struct virtio_device *vdev) /* Stop all the virtqueues. */ vdev->config->reset(vdev); + if (!vi->free_in_tasklet) + del_timer_sync(&vi->xmit_free_timer); + /* Free our skbs in send and recv queues, if any. */ while ((skb = __skb_dequeue(&vi->recv)) != NULL) { kfree_skb(skb); @@ -989,6 +1041,7 @@ static unsigned int features[] = { VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, + VIRTIO_F_NOTIFY_ON_EMPTY, }; static struct virtio_driver virtio_net = { diff --git a/trunk/drivers/pci/hotplug/acpiphp_ibm.c b/trunk/drivers/pci/hotplug/acpiphp_ibm.c index a9d926b7d805..e7be66dbac21 100644 --- a/trunk/drivers/pci/hotplug/acpiphp_ibm.c +++ b/trunk/drivers/pci/hotplug/acpiphp_ibm.c @@ -406,7 +406,6 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle, __func__, status); return retval; } - info->hardware_id.string[sizeof(info->hardware_id.length) - 1] = '\0'; if (info->current_status && (info->valid & ACPI_VALID_HID) && (!strcmp(info->hardware_id.string, IBM_HARDWARE_ID1) || diff --git a/trunk/fs/buffer.c b/trunk/fs/buffer.c index 6fa530256bfd..24afd7422ae8 100644 --- a/trunk/fs/buffer.c +++ b/trunk/fs/buffer.c @@ -280,7 +280,7 @@ void invalidate_bdev(struct block_device *bdev) EXPORT_SYMBOL(invalidate_bdev); /* - * Kick the writeback threads then try to free up some ZONE_NORMAL memory. + * Kick pdflush then try to free up some ZONE_NORMAL memory. */ static void free_more_memory(void) { @@ -1709,9 +1709,9 @@ static int __block_write_full_page(struct inode *inode, struct page *page, /* * If it's a fully non-blocking write attempt and we cannot * lock the buffer then redirty the page. Note that this can - * potentially cause a busy-wait loop from writeback threads - * and kswapd activity, but those code paths have their own - * higher-level throttling. + * potentially cause a busy-wait loop from pdflush and kswapd + * activity, but those code paths have their own higher-level + * throttling. */ if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { lock_buffer(bh); @@ -3208,7 +3208,7 @@ EXPORT_SYMBOL(block_sync_page); * still running obsolete flush daemons, so we terminate them here. * * Use of bdflush() is deprecated and will be removed in a future kernel. - * The `flush-X' kernel threads fully replace bdflush daemons and this call. + * The `pdflush' kernel threads fully replace bdflush daemons and this call. */ SYSCALL_DEFINE2(bdflush, int, func, long, data) { diff --git a/trunk/fs/cifs/Kconfig b/trunk/fs/cifs/Kconfig index 80f352596807..6994a0f54f02 100644 --- a/trunk/fs/cifs/Kconfig +++ b/trunk/fs/cifs/Kconfig @@ -2,7 +2,6 @@ config CIFS tristate "CIFS support (advanced network filesystem, SMBFS successor)" depends on INET select NLS - select SLOW_WORK help This is the client VFS module for the Common Internet File System (CIFS) protocol which is the successor to the Server Message Block diff --git a/trunk/fs/cifs/cifsfs.c b/trunk/fs/cifs/cifsfs.c index 9a5e4f5f3122..90c5b39f0313 100644 --- a/trunk/fs/cifs/cifsfs.c +++ b/trunk/fs/cifs/cifsfs.c @@ -64,6 +64,9 @@ unsigned int multiuser_mount = 0; unsigned int extended_security = CIFSSEC_DEF; /* unsigned int ntlmv2_support = 0; */ unsigned int sign_CIFS_PDUs = 1; +extern struct task_struct *oplockThread; /* remove sparse warning */ +struct task_struct *oplockThread = NULL; +/* extern struct task_struct * dnotifyThread; remove sparse warning */ static const struct super_operations cifs_super_ops; unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; module_param(CIFSMaxBufSize, int, 0); @@ -969,12 +972,89 @@ cifs_destroy_mids(void) kmem_cache_destroy(cifs_oplock_cachep); } +static int cifs_oplock_thread(void *dummyarg) +{ + struct oplock_q_entry *oplock_item; + struct cifsTconInfo *pTcon; + struct inode *inode; + __u16 netfid; + int rc, waitrc = 0; + + set_freezable(); + do { + if (try_to_freeze()) + continue; + + spin_lock(&cifs_oplock_lock); + if (list_empty(&cifs_oplock_list)) { + spin_unlock(&cifs_oplock_lock); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(39*HZ); + } else { + oplock_item = list_entry(cifs_oplock_list.next, + struct oplock_q_entry, qhead); + cFYI(1, ("found oplock item to write out")); + pTcon = oplock_item->tcon; + inode = oplock_item->pinode; + netfid = oplock_item->netfid; + spin_unlock(&cifs_oplock_lock); + DeleteOplockQEntry(oplock_item); + /* can not grab inode sem here since it would + deadlock when oplock received on delete + since vfs_unlink holds the i_mutex across + the call */ + /* mutex_lock(&inode->i_mutex);*/ + if (S_ISREG(inode->i_mode)) { +#ifdef CONFIG_CIFS_EXPERIMENTAL + if (CIFS_I(inode)->clientCanCacheAll == 0) + break_lease(inode, FMODE_READ); + else if (CIFS_I(inode)->clientCanCacheRead == 0) + break_lease(inode, FMODE_WRITE); +#endif + rc = filemap_fdatawrite(inode->i_mapping); + if (CIFS_I(inode)->clientCanCacheRead == 0) { + waitrc = filemap_fdatawait( + inode->i_mapping); + invalidate_remote_inode(inode); + } + if (rc == 0) + rc = waitrc; + } else + rc = 0; + /* mutex_unlock(&inode->i_mutex);*/ + if (rc) + CIFS_I(inode)->write_behind_rc = rc; + cFYI(1, ("Oplock flush inode %p rc %d", + inode, rc)); + + /* releasing stale oplock after recent reconnect + of smb session using a now incorrect file + handle is not a data integrity issue but do + not bother sending an oplock release if session + to server still is disconnected since oplock + already released by the server in that case */ + if (!pTcon->need_reconnect) { + rc = CIFSSMBLock(0, pTcon, netfid, + 0 /* len */ , 0 /* offset */, 0, + 0, LOCKING_ANDX_OPLOCK_RELEASE, + false /* wait flag */); + cFYI(1, ("Oplock release rc = %d", rc)); + } + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); /* yield in case q were corrupt */ + } + } while (!kthread_should_stop()); + + return 0; +} + static int __init init_cifs(void) { int rc = 0; cifs_proc_init(); INIT_LIST_HEAD(&cifs_tcp_ses_list); + INIT_LIST_HEAD(&cifs_oplock_list); #ifdef CONFIG_CIFS_EXPERIMENTAL INIT_LIST_HEAD(&GlobalDnotifyReqList); INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); @@ -1003,6 +1083,7 @@ init_cifs(void) rwlock_init(&GlobalSMBSeslock); rwlock_init(&cifs_tcp_ses_lock); spin_lock_init(&GlobalMid_Lock); + spin_lock_init(&cifs_oplock_lock); if (cifs_max_pending < 2) { cifs_max_pending = 2; @@ -1037,13 +1118,16 @@ init_cifs(void) if (rc) goto out_unregister_key_type; #endif - rc = slow_work_register_user(); - if (rc) - goto out_unregister_resolver_key; + oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd"); + if (IS_ERR(oplockThread)) { + rc = PTR_ERR(oplockThread); + cERROR(1, ("error %d create oplock thread", rc)); + goto out_unregister_dfs_key_type; + } return 0; - out_unregister_resolver_key: + out_unregister_dfs_key_type: #ifdef CONFIG_CIFS_DFS_UPCALL unregister_key_type(&key_type_dns_resolver); out_unregister_key_type: @@ -1080,6 +1164,7 @@ exit_cifs(void) cifs_destroy_inodecache(); cifs_destroy_mids(); cifs_destroy_request_bufs(); + kthread_stop(oplockThread); } MODULE_AUTHOR("Steve French "); diff --git a/trunk/fs/cifs/cifsglob.h b/trunk/fs/cifs/cifsglob.h index 5d0fde18039c..6cfc81a32703 100644 --- a/trunk/fs/cifs/cifsglob.h +++ b/trunk/fs/cifs/cifsglob.h @@ -18,7 +18,6 @@ */ #include #include -#include #include "cifs_fs_sb.h" #include "cifsacl.h" /* @@ -347,16 +346,14 @@ struct cifsFileInfo { /* lock scope id (0 if none) */ struct file *pfile; /* needed for writepage */ struct inode *pInode; /* needed for oplock break */ - struct vfsmount *mnt; struct mutex lock_mutex; struct list_head llist; /* list of byte range locks we have. */ bool closePend:1; /* file is marked to close */ bool invalidHandle:1; /* file closed via session abend */ - bool oplock_break_cancelled:1; + bool messageMode:1; /* for pipes: message vs byte mode */ atomic_t count; /* reference count */ struct mutex fh_mutex; /* prevents reopen race after dead ses*/ struct cifs_search_info srch_inf; - struct slow_work oplock_break; /* slow_work job for oplock breaks */ }; /* Take a reference on the file private data */ @@ -368,10 +365,8 @@ static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file) /* Release a reference on the file private data */ static inline void cifsFileInfo_put(struct cifsFileInfo *cifs_file) { - if (atomic_dec_and_test(&cifs_file->count)) { - iput(cifs_file->pInode); + if (atomic_dec_and_test(&cifs_file->count)) kfree(cifs_file); - } } /* @@ -387,6 +382,7 @@ struct cifsInodeInfo { unsigned long time; /* jiffies of last update/check of inode */ bool clientCanCacheRead:1; /* read oplock */ bool clientCanCacheAll:1; /* read and writebehind oplock */ + bool oplockPending:1; bool delete_pending:1; /* DELETE_ON_CLOSE is set */ u64 server_eof; /* current file size on server */ u64 uniqueid; /* server inode number */ @@ -589,9 +585,9 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_LANMAN 0x10010 #define CIFSSEC_MUST_PLNTXT 0x20020 #ifdef CONFIG_CIFS_UPCALL -#define CIFSSEC_MASK 0xBF0BF /* allows weak security but also krb5 */ +#define CIFSSEC_MASK 0xAF0AF /* allows weak security but also krb5 */ #else -#define CIFSSEC_MASK 0xB70B7 /* current flags supported if weak */ +#define CIFSSEC_MASK 0xA70A7 /* current flags supported if weak */ #endif /* UPCALL */ #else /* do not allow weak pw hash */ #ifdef CONFIG_CIFS_UPCALL @@ -673,6 +669,12 @@ GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock; */ GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; +/* Global list of oplocks */ +GLOBAL_EXTERN struct list_head cifs_oplock_list; + +/* Protects the cifs_oplock_list */ +GLOBAL_EXTERN spinlock_t cifs_oplock_lock; + /* Outstanding dir notify requests */ GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; /* DirNotify response queue */ @@ -723,4 +725,3 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ -extern const struct slow_work_ops cifs_oplock_break_ops; diff --git a/trunk/fs/cifs/cifsproto.h b/trunk/fs/cifs/cifsproto.h index 6928c24d1d42..da8fbf565991 100644 --- a/trunk/fs/cifs/cifsproto.h +++ b/trunk/fs/cifs/cifsproto.h @@ -86,17 +86,18 @@ extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int stage, const struct nls_table *nls_cp); extern __u16 GetNextMid(struct TCP_Server_Info *server); +extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16, + struct cifsTconInfo *); +extern void DeleteOplockQEntry(struct oplock_q_entry *); +extern void DeleteTconOplockQEntries(struct cifsTconInfo *); extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); extern u64 cifs_UnixTimeToNT(struct timespec); extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset); -extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode, - __u16 fileHandle, struct file *file, - struct vfsmount *mnt, unsigned int oflags); extern int cifs_posix_open(char *full_path, struct inode **pinode, - struct vfsmount *mnt, int mode, int oflags, - __u32 *poplock, __u16 *pnetfid, int xid); + struct super_block *sb, int mode, int oflags, + int *poplock, __u16 *pnetfid, int xid); extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, struct cifs_sb_info *cifs_sb); diff --git a/trunk/fs/cifs/cifssmb.c b/trunk/fs/cifs/cifssmb.c index 941441d3e386..301e307e1279 100644 --- a/trunk/fs/cifs/cifssmb.c +++ b/trunk/fs/cifs/cifssmb.c @@ -94,7 +94,6 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon) list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { open_file = list_entry(tmp, struct cifsFileInfo, tlist); open_file->invalidHandle = true; - open_file->oplock_break_cancelled = true; } write_unlock(&GlobalSMBSeslock); /* BB Add call to invalidate_inodes(sb) for all superblocks mounted diff --git a/trunk/fs/cifs/connect.c b/trunk/fs/cifs/connect.c index 43003e0bef18..d49682433c20 100644 --- a/trunk/fs/cifs/connect.c +++ b/trunk/fs/cifs/connect.c @@ -1670,6 +1670,7 @@ cifs_put_tcon(struct cifsTconInfo *tcon) CIFSSMBTDis(xid, tcon); _FreeXid(xid); + DeleteTconOplockQEntries(tcon); tconInfoFree(tcon); cifs_put_smb_ses(ses); } diff --git a/trunk/fs/cifs/dir.c b/trunk/fs/cifs/dir.c index 627a60a6c1b1..a6424cfc0121 100644 --- a/trunk/fs/cifs/dir.c +++ b/trunk/fs/cifs/dir.c @@ -24,7 +24,6 @@ #include #include #include -#include #include "cifsfs.h" #include "cifspdu.h" #include "cifsglob.h" @@ -130,45 +129,44 @@ build_path_from_dentry(struct dentry *direntry) return full_path; } -struct cifsFileInfo * -cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, - struct file *file, struct vfsmount *mnt, unsigned int oflags) +static void +cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle, + struct cifsTconInfo *tcon, bool write_only) { int oplock = 0; struct cifsFileInfo *pCifsFile; struct cifsInodeInfo *pCifsInode; - struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb); pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); + if (pCifsFile == NULL) - return pCifsFile; + return; if (oplockEnabled) oplock = REQ_OPLOCK; pCifsFile->netfid = fileHandle; pCifsFile->pid = current->tgid; - pCifsFile->pInode = igrab(newinode); - pCifsFile->mnt = mnt; - pCifsFile->pfile = file; + pCifsFile->pInode = newinode; pCifsFile->invalidHandle = false; pCifsFile->closePend = false; mutex_init(&pCifsFile->fh_mutex); mutex_init(&pCifsFile->lock_mutex); INIT_LIST_HEAD(&pCifsFile->llist); atomic_set(&pCifsFile->count, 1); - slow_work_init(&pCifsFile->oplock_break, &cifs_oplock_break_ops); + /* set the following in open now + pCifsFile->pfile = file; */ write_lock(&GlobalSMBSeslock); - list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList); + list_add(&pCifsFile->tlist, &tcon->openFileList); pCifsInode = CIFS_I(newinode); if (pCifsInode) { /* if readable file instance put first in list*/ - if (oflags & FMODE_READ) - list_add(&pCifsFile->flist, &pCifsInode->openFileList); - else + if (write_only) list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); + else + list_add(&pCifsFile->flist, &pCifsInode->openFileList); if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { pCifsInode->clientCanCacheAll = true; @@ -178,18 +176,18 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, pCifsInode->clientCanCacheRead = true; } write_unlock(&GlobalSMBSeslock); - - return pCifsFile; } int cifs_posix_open(char *full_path, struct inode **pinode, - struct vfsmount *mnt, int mode, int oflags, - __u32 *poplock, __u16 *pnetfid, int xid) + struct super_block *sb, int mode, int oflags, + int *poplock, __u16 *pnetfid, int xid) { int rc; + __u32 oplock; + bool write_only = false; FILE_UNIX_BASIC_INFO *presp_data; __u32 posix_flags = 0; - struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_fattr fattr; cFYI(1, ("posix open %s", full_path)); @@ -225,9 +223,12 @@ int cifs_posix_open(char *full_path, struct inode **pinode, if (oflags & O_DIRECT) posix_flags |= SMB_O_DIRECT; + if (!(oflags & FMODE_READ)) + write_only = true; + mode &= ~current_umask(); rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, - pnetfid, presp_data, poplock, full_path, + pnetfid, presp_data, &oplock, full_path, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) @@ -243,7 +244,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, /* get new inode and set it up */ if (*pinode == NULL) { - *pinode = cifs_iget(mnt->mnt_sb, &fattr); + *pinode = cifs_iget(sb, &fattr); if (!*pinode) { rc = -ENOMEM; goto posix_open_ret; @@ -252,7 +253,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, cifs_fattr_to_inode(*pinode, &fattr); } - cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags); + cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only); posix_open_ret: kfree(presp_data); @@ -279,7 +280,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, int rc = -ENOENT; int xid; int create_options = CREATE_NOT_DIR; - __u32 oplock = 0; + int oplock = 0; int oflags; bool posix_create = false; /* @@ -297,6 +298,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, FILE_ALL_INFO *buf = NULL; struct inode *newinode = NULL; int disposition = FILE_OVERWRITE_IF; + bool write_only = false; xid = GetXid(); @@ -321,7 +323,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { - rc = cifs_posix_open(full_path, &newinode, nd->path.mnt, + rc = cifs_posix_open(full_path, &newinode, inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); /* EIO could indicate that (posix open) operation is not supported, despite what server claimed in capability @@ -349,8 +351,11 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, desiredAccess = 0; if (oflags & FMODE_READ) desiredAccess |= GENERIC_READ; /* is this too little? */ - if (oflags & FMODE_WRITE) + if (oflags & FMODE_WRITE) { desiredAccess |= GENERIC_WRITE; + if (!(oflags & FMODE_READ)) + write_only = true; + } if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) disposition = FILE_CREATE; @@ -465,8 +470,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, /* mknod case - do not leave file open */ CIFSSMBClose(xid, tcon, fileHandle); } else if (!(posix_create) && (newinode)) { - cifs_new_fileinfo(newinode, fileHandle, NULL, - nd->path.mnt, oflags); + cifs_fill_fileinfo(newinode, fileHandle, + cifs_sb->tcon, write_only); } cifs_create_out: kfree(buf); @@ -606,7 +611,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - __u32 oplock = 0; + int oplock = 0; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; @@ -678,7 +683,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && (nd->intent.open.flags & O_CREAT)) { - rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, + rc = cifs_posix_open(full_path, &newInode, + parent_dir_inode->i_sb, nd->intent.open.create_mode, nd->intent.open.flags, &oplock, &fileHandle, xid); diff --git a/trunk/fs/cifs/file.c b/trunk/fs/cifs/file.c index 429337eb7afe..fa7beac8b80e 100644 --- a/trunk/fs/cifs/file.c +++ b/trunk/fs/cifs/file.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include "cifsfs.h" #include "cifspdu.h" @@ -40,6 +39,27 @@ #include "cifs_debug.h" #include "cifs_fs_sb.h" +static inline struct cifsFileInfo *cifs_init_private( + struct cifsFileInfo *private_data, struct inode *inode, + struct file *file, __u16 netfid) +{ + memset(private_data, 0, sizeof(struct cifsFileInfo)); + private_data->netfid = netfid; + private_data->pid = current->tgid; + mutex_init(&private_data->fh_mutex); + mutex_init(&private_data->lock_mutex); + INIT_LIST_HEAD(&private_data->llist); + private_data->pfile = file; /* needed for writepage */ + private_data->pInode = inode; + private_data->invalidHandle = false; + private_data->closePend = false; + /* Initialize reference count to one. The private data is + freed on the release of the last reference */ + atomic_set(&private_data->count, 1); + + return private_data; +} + static inline int cifs_convert_flags(unsigned int flags) { if ((flags & O_ACCMODE) == O_RDONLY) @@ -103,11 +123,9 @@ static inline int cifs_get_disposition(unsigned int flags) } /* all arguments to this function must be checked for validity in caller */ -static inline int -cifs_posix_open_inode_helper(struct inode *inode, struct file *file, - struct cifsInodeInfo *pCifsInode, - struct cifsFileInfo *pCifsFile, __u32 oplock, - u16 netfid) +static inline int cifs_posix_open_inode_helper(struct inode *inode, + struct file *file, struct cifsInodeInfo *pCifsInode, + struct cifsFileInfo *pCifsFile, int oplock, u16 netfid) { write_lock(&GlobalSMBSeslock); @@ -201,6 +219,17 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, struct timespec temp; int rc; + /* want handles we can use to read with first + in the list so we do not have to walk the + list to search for one in write_begin */ + if ((file->f_flags & O_ACCMODE) == O_WRONLY) { + list_add_tail(&pCifsFile->flist, + &pCifsInode->openFileList); + } else { + list_add(&pCifsFile->flist, + &pCifsInode->openFileList); + } + write_unlock(&GlobalSMBSeslock); if (pCifsInode->clientCanCacheRead) { /* we have the inode open somewhere else no need to discard cache data */ @@ -250,8 +279,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, int cifs_open(struct inode *inode, struct file *file) { int rc = -EACCES; - int xid; - __u32 oplock; + int xid, oplock; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; struct cifsFileInfo *pCifsFile; @@ -296,7 +324,7 @@ int cifs_open(struct inode *inode, struct file *file) le64_to_cpu(tcon->fsUnixInfo.Capability))) { int oflags = (int) cifs_posix_convert_flags(file->f_flags); /* can not refresh inode info since size could be stale */ - rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, + rc = cifs_posix_open(full_path, &inode, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &netfid, xid); if (rc == 0) { @@ -386,17 +414,24 @@ int cifs_open(struct inode *inode, struct file *file) cFYI(1, ("cifs_open returned 0x%x", rc)); goto out; } - - pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt, - file->f_flags); - file->private_data = pCifsFile; + file->private_data = + kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); if (file->private_data == NULL) { rc = -ENOMEM; goto out; } + pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); + write_lock(&GlobalSMBSeslock); + list_add(&pCifsFile->tlist, &tcon->openFileList); - rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon, - &oplock, buf, full_path, xid); + pCifsInode = CIFS_I(file->f_path.dentry->d_inode); + if (pCifsInode) { + rc = cifs_open_inode_helper(inode, file, pCifsInode, + pCifsFile, tcon, + &oplock, buf, full_path, xid); + } else { + write_unlock(&GlobalSMBSeslock); + } if (oplock & CIFS_CREATE_ACTION) { /* time to set mode which we can not set earlier due to @@ -439,8 +474,7 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile) static int cifs_reopen_file(struct file *file, bool can_flush) { int rc = -EACCES; - int xid; - __u32 oplock; + int xid, oplock; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; struct cifsFileInfo *pCifsFile; @@ -509,7 +543,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush) le64_to_cpu(tcon->fsUnixInfo.Capability))) { int oflags = (int) cifs_posix_convert_flags(file->f_flags); /* can not refresh inode info since size could be stale */ - rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, + rc = cifs_posix_open(full_path, NULL, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &netfid, xid); if (rc == 0) { @@ -2274,73 +2308,6 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping, return rc; } -static void -cifs_oplock_break(struct slow_work *work) -{ - struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, - oplock_break); - struct inode *inode = cfile->pInode; - struct cifsInodeInfo *cinode = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->mnt->mnt_sb); - int rc, waitrc = 0; - - if (inode && S_ISREG(inode->i_mode)) { -#ifdef CONFIG_CIFS_EXPERIMENTAL - if (cinode->clientCanCacheAll == 0) - break_lease(inode, FMODE_READ); - else if (cinode->clientCanCacheRead == 0) - break_lease(inode, FMODE_WRITE); -#endif - rc = filemap_fdatawrite(inode->i_mapping); - if (cinode->clientCanCacheRead == 0) { - waitrc = filemap_fdatawait(inode->i_mapping); - invalidate_remote_inode(inode); - } - if (!rc) - rc = waitrc; - if (rc) - cinode->write_behind_rc = rc; - cFYI(1, ("Oplock flush inode %p rc %d", inode, rc)); - } - - /* - * releasing stale oplock after recent reconnect of smb session using - * a now incorrect file handle is not a data integrity issue but do - * not bother sending an oplock release if session to server still is - * disconnected since oplock already released by the server - */ - if (!cfile->closePend && !cfile->oplock_break_cancelled) { - rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0, - LOCKING_ANDX_OPLOCK_RELEASE, false); - cFYI(1, ("Oplock release rc = %d", rc)); - } -} - -static int -cifs_oplock_break_get(struct slow_work *work) -{ - struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, - oplock_break); - mntget(cfile->mnt); - cifsFileInfo_get(cfile); - return 0; -} - -static void -cifs_oplock_break_put(struct slow_work *work) -{ - struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, - oplock_break); - mntput(cfile->mnt); - cifsFileInfo_put(cfile); -} - -const struct slow_work_ops cifs_oplock_break_ops = { - .get_ref = cifs_oplock_break_get, - .put_ref = cifs_oplock_break_put, - .execute = cifs_oplock_break, -}; - const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, .readpages = cifs_readpages, diff --git a/trunk/fs/cifs/misc.c b/trunk/fs/cifs/misc.c index 0241b25ac33f..e079a9190ec4 100644 --- a/trunk/fs/cifs/misc.c +++ b/trunk/fs/cifs/misc.c @@ -32,6 +32,7 @@ extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; +extern struct task_struct *oplockThread; /* The xid serves as a useful identifier for each incoming vfs request, in a similar way to the mid which is useful to track each sent smb, @@ -499,7 +500,6 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) struct cifsTconInfo *tcon; struct cifsInodeInfo *pCifsInode; struct cifsFileInfo *netfile; - int rc; cFYI(1, ("Checking for oplock break or dnotify response")); if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && @@ -562,40 +562,30 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) continue; cifs_stats_inc(&tcon->num_oplock_brks); - read_lock(&GlobalSMBSeslock); + write_lock(&GlobalSMBSeslock); list_for_each(tmp2, &tcon->openFileList) { netfile = list_entry(tmp2, struct cifsFileInfo, tlist); if (pSMB->Fid != netfile->netfid) continue; - /* - * don't do anything if file is about to be - * closed anyway. - */ - if (netfile->closePend) { - read_unlock(&GlobalSMBSeslock); - read_unlock(&cifs_tcp_ses_lock); - return true; - } - + write_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); cFYI(1, ("file id match, oplock break")); pCifsInode = CIFS_I(netfile->pInode); pCifsInode->clientCanCacheAll = false; if (pSMB->OplockLevel == 0) pCifsInode->clientCanCacheRead = false; - rc = slow_work_enqueue(&netfile->oplock_break); - if (rc) { - cERROR(1, ("failed to enqueue oplock " - "break: %d\n", rc)); - } else { - netfile->oplock_break_cancelled = false; - } - read_unlock(&GlobalSMBSeslock); - read_unlock(&cifs_tcp_ses_lock); + pCifsInode->oplockPending = true; + AllocOplockQEntry(netfile->pInode, + netfile->netfid, tcon); + cFYI(1, ("about to wake up oplock thread")); + if (oplockThread) + wake_up_process(oplockThread); + return true; } - read_unlock(&GlobalSMBSeslock); + write_unlock(&GlobalSMBSeslock); read_unlock(&cifs_tcp_ses_lock); cFYI(1, ("No matching file for oplock break")); return true; diff --git a/trunk/fs/cifs/readdir.c b/trunk/fs/cifs/readdir.c index 1f098ca71636..f823a4a208a7 100644 --- a/trunk/fs/cifs/readdir.c +++ b/trunk/fs/cifs/readdir.c @@ -146,7 +146,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) } } -static void +void cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, struct cifs_sb_info *cifs_sb) { @@ -161,7 +161,7 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, cifs_fill_common_info(fattr, cifs_sb); } -static void +void cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info, struct cifs_sb_info *cifs_sb) { diff --git a/trunk/fs/cifs/transport.c b/trunk/fs/cifs/transport.c index 07b8e71544ee..1da4ab250eae 100644 --- a/trunk/fs/cifs/transport.c +++ b/trunk/fs/cifs/transport.c @@ -103,6 +103,56 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) mempool_free(midEntry, cifs_mid_poolp); } +struct oplock_q_entry * +AllocOplockQEntry(struct inode *pinode, __u16 fid, struct cifsTconInfo *tcon) +{ + struct oplock_q_entry *temp; + if ((pinode == NULL) || (tcon == NULL)) { + cERROR(1, ("Null parms passed to AllocOplockQEntry")); + return NULL; + } + temp = (struct oplock_q_entry *) kmem_cache_alloc(cifs_oplock_cachep, + GFP_KERNEL); + if (temp == NULL) + return temp; + else { + temp->pinode = pinode; + temp->tcon = tcon; + temp->netfid = fid; + spin_lock(&cifs_oplock_lock); + list_add_tail(&temp->qhead, &cifs_oplock_list); + spin_unlock(&cifs_oplock_lock); + } + return temp; +} + +void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry) +{ + spin_lock(&cifs_oplock_lock); + /* should we check if list empty first? */ + list_del(&oplockEntry->qhead); + spin_unlock(&cifs_oplock_lock); + kmem_cache_free(cifs_oplock_cachep, oplockEntry); +} + + +void DeleteTconOplockQEntries(struct cifsTconInfo *tcon) +{ + struct oplock_q_entry *temp; + + if (tcon == NULL) + return; + + spin_lock(&cifs_oplock_lock); + list_for_each_entry(temp, &cifs_oplock_list, qhead) { + if ((temp->tcon) && (temp->tcon == tcon)) { + list_del(&temp->qhead); + kmem_cache_free(cifs_oplock_cachep, temp); + } + } + spin_unlock(&cifs_oplock_lock); +} + static int smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) { diff --git a/trunk/fs/fs-writeback.c b/trunk/fs/fs-writeback.c index 9d5360c4c2af..8e1e5e19d21e 100644 --- a/trunk/fs/fs-writeback.c +++ b/trunk/fs/fs-writeback.c @@ -41,9 +41,8 @@ struct wb_writeback_args { long nr_pages; struct super_block *sb; enum writeback_sync_modes sync_mode; - int for_kupdate:1; - int range_cyclic:1; - int for_background:1; + int for_kupdate; + int range_cyclic; }; /* @@ -250,25 +249,14 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, * completion. Caller need not hold sb s_umount semaphore. * */ -void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages) +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) { struct wb_writeback_args args = { - .sb = sb, .sync_mode = WB_SYNC_NONE, .nr_pages = nr_pages, .range_cyclic = 1, }; - /* - * We treat @nr_pages=0 as the special case to do background writeback, - * ie. to sync pages until the background dirty threshold is reached. - */ - if (!nr_pages) { - args.nr_pages = LONG_MAX; - args.for_background = 1; - } - bdi_alloc_queue_work(bdi, &args); } @@ -322,7 +310,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) * For inodes being constantly redirtied, dirtied_when can get stuck. * It _appears_ to be in the future, but is actually in distant past. * This test is necessary to prevent such wrapped-around relative times - * from permanently stopping the whole bdi writeback. + * from permanently stopping the whole pdflush writeback. */ ret = ret && time_before_eq(inode->dirtied_when, jiffies); #endif @@ -336,38 +324,13 @@ static void move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, unsigned long *older_than_this) { - LIST_HEAD(tmp); - struct list_head *pos, *node; - struct super_block *sb = NULL; - struct inode *inode; - int do_sb_sort = 0; - while (!list_empty(delaying_queue)) { - inode = list_entry(delaying_queue->prev, struct inode, i_list); + struct inode *inode = list_entry(delaying_queue->prev, + struct inode, i_list); if (older_than_this && inode_dirtied_after(inode, *older_than_this)) break; - if (sb && sb != inode->i_sb) - do_sb_sort = 1; - sb = inode->i_sb; - list_move(&inode->i_list, &tmp); - } - - /* just one sb in list, splice to dispatch_queue and we're done */ - if (!do_sb_sort) { - list_splice(&tmp, dispatch_queue); - return; - } - - /* Move inodes from one superblock together */ - while (!list_empty(&tmp)) { - inode = list_entry(tmp.prev, struct inode, i_list); - sb = inode->i_sb; - list_for_each_prev_safe(pos, node, &tmp) { - inode = list_entry(pos, struct inode, i_list); - if (inode->i_sb == sb) - list_move(&inode->i_list, dispatch_queue); - } + list_move(&inode->i_list, dispatch_queue); } } @@ -476,18 +439,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode_lock); inode->i_state &= ~I_SYNC; if (!(inode->i_state & (I_FREEING | I_CLEAR))) { - if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { - /* - * More pages get dirtied by a fast dirtier. - */ - goto select_queue; - } else if (inode->i_state & I_DIRTY) { - /* - * At least XFS will redirty the inode during the - * writeback (delalloc) and on io completion (isize). - */ - redirty_tail(inode); - } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { + if (!(inode->i_state & I_DIRTY) && + mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { /* * We didn't write back all the pages. nfs_writepages() * sometimes bales out without doing anything. Redirty @@ -509,7 +462,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * soon as the queue becomes uncongested. */ inode->i_state |= I_DIRTY_PAGES; -select_queue: if (wbc->nr_to_write <= 0) { /* * slice used up: queue for next turn @@ -532,6 +484,12 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) inode->i_state |= I_DIRTY_PAGES; redirty_tail(inode); } + } else if (inode->i_state & I_DIRTY) { + /* + * Someone redirtied the inode while were writing back + * the pages. + */ + redirty_tail(inode); } else if (atomic_read(&inode->i_count)) { /* * The inode is clean, inuse @@ -548,17 +506,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) return ret; } -static void unpin_sb_for_writeback(struct super_block **psb) -{ - struct super_block *sb = *psb; - - if (sb) { - up_read(&sb->s_umount); - put_super(sb); - *psb = NULL; - } -} - /* * For WB_SYNC_NONE writeback, the caller does not have the sb pinned * before calling writeback. So make sure that we do pin it, so it doesn't @@ -568,19 +515,10 @@ static void unpin_sb_for_writeback(struct super_block **psb) * 1 if we failed. */ static int pin_sb_for_writeback(struct writeback_control *wbc, - struct inode *inode, struct super_block **psb) + struct inode *inode) { struct super_block *sb = inode->i_sb; - /* - * If this sb is already pinned, nothing more to do. If not and - * *psb is non-NULL, unpin the old one first - */ - if (sb == *psb) - return 0; - else if (*psb) - unpin_sb_for_writeback(psb); - /* * Caller must already hold the ref for this */ @@ -594,7 +532,7 @@ static int pin_sb_for_writeback(struct writeback_control *wbc, if (down_read_trylock(&sb->s_umount)) { if (sb->s_root) { spin_unlock(&sb_lock); - goto pinned; + return 0; } /* * umounted, drop rwsem again and fall through to failure @@ -605,15 +543,24 @@ static int pin_sb_for_writeback(struct writeback_control *wbc, sb->s_count--; spin_unlock(&sb_lock); return 1; -pinned: - *psb = sb; - return 0; +} + +static void unpin_sb_for_writeback(struct writeback_control *wbc, + struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + + if (wbc->sync_mode == WB_SYNC_ALL) + return; + + up_read(&sb->s_umount); + put_super(sb); } static void writeback_inodes_wb(struct bdi_writeback *wb, struct writeback_control *wbc) { - struct super_block *sb = wbc->sb, *pin_sb = NULL; + struct super_block *sb = wbc->sb; const int is_blkdev_sb = sb_is_blkdev_sb(sb); const unsigned long start = jiffies; /* livelock avoidance */ @@ -672,7 +619,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, if (inode_dirtied_after(inode, start)) break; - if (pin_sb_for_writeback(wbc, inode, &pin_sb)) { + if (pin_sb_for_writeback(wbc, inode)) { requeue_io(inode); continue; } @@ -681,6 +628,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, __iget(inode); pages_skipped = wbc->pages_skipped; writeback_single_inode(inode, wbc); + unpin_sb_for_writeback(wbc, inode); if (wbc->pages_skipped != pages_skipped) { /* * writeback is not making progress due to locked @@ -700,8 +648,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, wbc->more_io = 1; } - unpin_sb_for_writeback(&pin_sb); - spin_unlock(&inode_lock); /* Leave any unwritten inodes on b_io */ } @@ -760,7 +706,6 @@ static long wb_writeback(struct bdi_writeback *wb, }; unsigned long oldest_jif; long wrote = 0; - struct inode *inode; if (wbc.for_kupdate) { wbc.older_than_this = &oldest_jif; @@ -774,16 +719,20 @@ static long wb_writeback(struct bdi_writeback *wb, for (;;) { /* - * Stop writeback when nr_pages has been consumed + * Don't flush anything for non-integrity writeback where + * no nr_pages was given */ - if (args->nr_pages <= 0) + if (!args->for_kupdate && args->nr_pages <= 0 && + args->sync_mode == WB_SYNC_NONE) break; /* - * For background writeout, stop when we are below the - * background dirty threshold + * If no specific pages were given and this is just a + * periodic background writeout and we are below the + * background dirty threshold, don't do anything */ - if (args->for_background && !over_bground_thresh()) + if (args->for_kupdate && args->nr_pages <= 0 && + !over_bground_thresh()) break; wbc.more_io = 0; @@ -795,32 +744,13 @@ static long wb_writeback(struct bdi_writeback *wb, wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; /* - * If we consumed everything, see if we have more - */ - if (wbc.nr_to_write <= 0) - continue; - /* - * Didn't write everything and we don't have more IO, bail + * If we ran out of stuff to write, bail unless more_io got set */ - if (!wbc.more_io) + if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { + if (wbc.more_io && !wbc.for_kupdate) + continue; break; - /* - * Did we write something? Try for more - */ - if (wbc.nr_to_write < MAX_WRITEBACK_PAGES) - continue; - /* - * Nothing written. Wait for some inode to - * become available for writeback. Otherwise - * we'll just busyloop. - */ - spin_lock(&inode_lock); - if (!list_empty(&wb->b_more_io)) { - inode = list_entry(wb->b_more_io.prev, - struct inode, i_list); - inode_wait_for_writeback(inode); } - spin_unlock(&inode_lock); } return wrote; @@ -1130,6 +1060,9 @@ EXPORT_SYMBOL(__mark_inode_dirty); * If older_than_this is non-NULL, then only write out inodes which * had their first dirtying at a time earlier than *older_than_this. * + * If we're a pdlfush thread, then implement pdflush collision avoidance + * against the entire list. + * * If `bdi' is non-zero then we're being asked to writeback a specific queue. * This function assumes that the blockdev superblock's inodes are backed by * a variety of queues, so all inodes are searched. For other superblocks, @@ -1208,7 +1141,7 @@ void writeback_inodes_sb(struct super_block *sb) nr_to_write = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - bdi_start_writeback(sb->s_bdi, sb, nr_to_write); + bdi_writeback_all(sb, nr_to_write); } EXPORT_SYMBOL(writeback_inodes_sb); diff --git a/trunk/include/linux/backing-dev.h b/trunk/include/linux/backing-dev.h index b449e738533a..0ee33c2e6129 100644 --- a/trunk/include/linux/backing-dev.h +++ b/trunk/include/linux/backing-dev.h @@ -101,8 +101,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); -void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages); +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages); int bdi_writeback_task(struct bdi_writeback *wb); int bdi_has_dirty_io(struct backing_dev_info *bdi); diff --git a/trunk/mm/page-writeback.c b/trunk/mm/page-writeback.c index a3b14090b1fb..d99664e8607e 100644 --- a/trunk/mm/page-writeback.c +++ b/trunk/mm/page-writeback.c @@ -44,21 +44,18 @@ static long ratelimit_pages = 32; /* * When balance_dirty_pages decides that the caller needs to perform some * non-background writeback, this is how many pages it will attempt to write. - * It should be somewhat larger than dirtied pages to ensure that reasonably + * It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably * large amounts of I/O are submitted. */ -static inline long sync_writeback_pages(unsigned long dirtied) +static inline long sync_writeback_pages(void) { - if (dirtied < ratelimit_pages) - dirtied = ratelimit_pages; - - return dirtied + dirtied / 2; + return ratelimit_pages + ratelimit_pages / 2; } /* The following parameters are exported via /proc/sys/vm */ /* - * Start background writeback (via writeback threads) at this percentage + * Start background writeback (via pdflush) at this percentage */ int dirty_background_ratio = 10; @@ -477,11 +474,10 @@ get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force * the caller to perform writeback if the system is over `vm_dirty_ratio'. - * If we're over `background_thresh' then the writeback threads are woken to - * perform some writeout. + * If we're over `background_thresh' then pdflush is woken to perform some + * writeout. */ -static void balance_dirty_pages(struct address_space *mapping, - unsigned long write_chunk) +static void balance_dirty_pages(struct address_space *mapping) { long nr_reclaimable, bdi_nr_reclaimable; long nr_writeback, bdi_nr_writeback; @@ -489,6 +485,7 @@ static void balance_dirty_pages(struct address_space *mapping, unsigned long dirty_thresh; unsigned long bdi_thresh; unsigned long pages_written = 0; + unsigned long write_chunk = sync_writeback_pages(); unsigned long pause = 1; struct backing_dev_info *bdi = mapping->backing_dev_info; @@ -582,7 +579,7 @@ static void balance_dirty_pages(struct address_space *mapping, bdi->dirty_exceeded = 0; if (writeback_in_progress(bdi)) - return; + return; /* pdflush is already working this queue */ /* * In laptop mode, we wait until hitting the higher threshold before @@ -593,10 +590,10 @@ static void balance_dirty_pages(struct address_space *mapping, * background_thresh, to keep the amount of dirty memory low. */ if ((laptop_mode && pages_written) || - (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) - + global_page_state(NR_UNSTABLE_NFS)) + (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS)) > background_thresh))) - bdi_start_writeback(bdi, NULL, 0); + bdi_start_writeback(bdi, nr_writeback); } void set_page_dirty_balance(struct page *page, int page_mkwrite) @@ -643,10 +640,9 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, p = &__get_cpu_var(bdp_ratelimits); *p += nr_pages_dirtied; if (unlikely(*p >= ratelimit)) { - ratelimit = sync_writeback_pages(*p); *p = 0; preempt_enable(); - balance_dirty_pages(mapping, ratelimit); + balance_dirty_pages(mapping); return; } preempt_enable(); diff --git a/trunk/mm/shmem.c b/trunk/mm/shmem.c index ccf446a9faa1..98631c26c200 100644 --- a/trunk/mm/shmem.c +++ b/trunk/mm/shmem.c @@ -1046,9 +1046,8 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) * sync from ever calling shmem_writepage; but a stacking filesystem * may use the ->writepage of its underlying filesystem, in which case * tmpfs should write out to swap only in response to memory pressure, - * and not for the writeback threads or sync. However, in those cases, - * we do still want to check if there's a redundant swappage to be - * discarded. + * and not for pdflush or sync. However, in those cases, we do still + * want to check if there's a redundant swappage to be discarded. */ if (wbc->for_reclaim) swap = get_swap_page(); diff --git a/trunk/mm/vmscan.c b/trunk/mm/vmscan.c index 64e438898832..1219ceb8a9b2 100644 --- a/trunk/mm/vmscan.c +++ b/trunk/mm/vmscan.c @@ -1709,10 +1709,10 @@ static void shrink_zones(int priority, struct zonelist *zonelist, * * If the caller is !__GFP_FS then the probability of a failure is reasonably * high - the zone may be full of dirty or under-writeback pages, which this - * caller can't do much about. We kick the writeback threads and take explicit - * naps in the hope that some of these pages can be written. But if the - * allocating task holds filesystem locks which prevent writeout this might not - * work, and the allocation attempt will fail. + * caller can't do much about. We kick pdflush and take explicit naps in the + * hope that some of these pages can be written. But if the allocating task + * holds filesystem locks which prevent writeout this might not work, and the + * allocation attempt will fail. * * returns: 0, if no pages reclaimed * else, the number of pages reclaimed diff --git a/trunk/net/ax25/af_ax25.c b/trunk/net/ax25/af_ax25.c index fbcac76fdc0d..d6b1b054e294 100644 --- a/trunk/net/ax25/af_ax25.c +++ b/trunk/net/ax25/af_ax25.c @@ -358,7 +358,6 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) ax25_dev *ax25_dev; ax25_cb *ax25; unsigned int k; - int ret = 0; if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl))) return -EFAULT; @@ -389,63 +388,57 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) case AX25_WINDOW: if (ax25->modulus == AX25_MODULUS) { if (ax25_ctl.arg < 1 || ax25_ctl.arg > 7) - goto einval_put; + return -EINVAL; } else { if (ax25_ctl.arg < 1 || ax25_ctl.arg > 63) - goto einval_put; + return -EINVAL; } ax25->window = ax25_ctl.arg; break; case AX25_T1: if (ax25_ctl.arg < 1) - goto einval_put; + return -EINVAL; ax25->rtt = (ax25_ctl.arg * HZ) / 2; ax25->t1 = ax25_ctl.arg * HZ; break; case AX25_T2: if (ax25_ctl.arg < 1) - goto einval_put; + return -EINVAL; ax25->t2 = ax25_ctl.arg * HZ; break; case AX25_N2: if (ax25_ctl.arg < 1 || ax25_ctl.arg > 31) - goto einval_put; + return -EINVAL; ax25->n2count = 0; ax25->n2 = ax25_ctl.arg; break; case AX25_T3: if (ax25_ctl.arg < 0) - goto einval_put; + return -EINVAL; ax25->t3 = ax25_ctl.arg * HZ; break; case AX25_IDLE: if (ax25_ctl.arg < 0) - goto einval_put; + return -EINVAL; ax25->idle = ax25_ctl.arg * 60 * HZ; break; case AX25_PACLEN: if (ax25_ctl.arg < 16 || ax25_ctl.arg > 65535) - goto einval_put; + return -EINVAL; ax25->paclen = ax25_ctl.arg; break; default: - goto einval_put; + return -EINVAL; } -out_put: - ax25_cb_put(ax25); - return ret; - -einval_put: - ret = -EINVAL; - goto out_put; + return 0; } static void ax25_fillin_cb_from_dev(ax25_cb *ax25, ax25_dev *ax25_dev)