diff --git a/[refs] b/[refs] index daf97e4237cf..d2b7b41b0df8 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 5e686019df425a4fd8003ce7f6eaccbe537331d8 +refs/heads/master: 45ff34d32a19e9008e7202ba2a7c0d0f40420228 diff --git a/trunk/arch/x86/include/asm/tsc.h b/trunk/arch/x86/include/asm/tsc.h index 1ca132fc0d03..c0427295e8f5 100644 --- a/trunk/arch/x86/include/asm/tsc.h +++ b/trunk/arch/x86/include/asm/tsc.h @@ -59,7 +59,5 @@ extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); extern int notsc_setup(char *); -extern void save_sched_clock_state(void); -extern void restore_sched_clock_state(void); #endif /* _ASM_X86_TSC_H */ diff --git a/trunk/arch/x86/kernel/tsc.c b/trunk/arch/x86/kernel/tsc.c index d632934cb638..ce8e50239332 100644 --- a/trunk/arch/x86/kernel/tsc.c +++ b/trunk/arch/x86/kernel/tsc.c @@ -626,44 +626,6 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) local_irq_restore(flags); } -static unsigned long long cyc2ns_suspend; - -void save_sched_clock_state(void) -{ - if (!sched_clock_stable) - return; - - cyc2ns_suspend = sched_clock(); -} - -/* - * Even on processors with invariant TSC, TSC gets reset in some the - * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to - * arbitrary value (still sync'd across cpu's) during resume from such sleep - * states. To cope up with this, recompute the cyc2ns_offset for each cpu so - * that sched_clock() continues from the point where it was left off during - * suspend. - */ -void restore_sched_clock_state(void) -{ - unsigned long long offset; - unsigned long flags; - int cpu; - - if (!sched_clock_stable) - return; - - local_irq_save(flags); - - get_cpu_var(cyc2ns_offset) = 0; - offset = cyc2ns_suspend - sched_clock(); - - for_each_possible_cpu(cpu) - per_cpu(cyc2ns_offset, cpu) = offset; - - local_irq_restore(flags); -} - #ifdef CONFIG_CPU_FREQ /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency diff --git a/trunk/arch/x86/power/cpu.c b/trunk/arch/x86/power/cpu.c index 87bb35e34ef1..e7e8c5f54956 100644 --- a/trunk/arch/x86/power/cpu.c +++ b/trunk/arch/x86/power/cpu.c @@ -113,7 +113,6 @@ static void __save_processor_state(struct saved_context *ctxt) void save_processor_state(void) { __save_processor_state(&saved_context); - save_sched_clock_state(); } #ifdef CONFIG_X86_32 EXPORT_SYMBOL(save_processor_state); @@ -230,7 +229,6 @@ static void __restore_processor_state(struct saved_context *ctxt) void restore_processor_state(void) { __restore_processor_state(&saved_context); - restore_sched_clock_state(); } #ifdef CONFIG_X86_32 EXPORT_SYMBOL(restore_processor_state); diff --git a/trunk/drivers/hwmon/coretemp.c b/trunk/drivers/hwmon/coretemp.c index c070c9714cbe..de8111114f46 100644 --- a/trunk/drivers/hwmon/coretemp.c +++ b/trunk/drivers/hwmon/coretemp.c @@ -518,7 +518,6 @@ static struct notifier_block coretemp_cpu_notifier __refdata = { static int __init coretemp_init(void) { int i, err = -ENODEV; - struct pdev_entry *p, *n; /* quick check if we run Intel */ if (cpu_data(0).x86_vendor != X86_VENDOR_INTEL) diff --git a/trunk/drivers/xen/events.c b/trunk/drivers/xen/events.c index 13365ba35218..72f91bff29c7 100644 --- a/trunk/drivers/xen/events.c +++ b/trunk/drivers/xen/events.c @@ -112,7 +112,6 @@ static inline unsigned long *cpu_evtchn_mask(int cpu) #define VALID_EVTCHN(chn) ((chn) != 0) static struct irq_chip xen_dynamic_chip; -static struct irq_chip xen_percpu_chip; /* Constructor for packed IRQ information. */ static struct irq_info mk_unbound_info(void) @@ -378,7 +377,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) irq = find_unbound_irq(); set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, - handle_edge_irq, "event"); + handle_level_irq, "event"); evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_evtchn_info(evtchn); @@ -404,8 +403,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) if (irq < 0) goto out; - set_irq_chip_and_handler_name(irq, &xen_percpu_chip, - handle_percpu_irq, "ipi"); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "ipi"); bind_ipi.vcpu = cpu; if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, @@ -445,8 +444,8 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) irq = find_unbound_irq(); - set_irq_chip_and_handler_name(irq, &xen_percpu_chip, - handle_percpu_irq, "virq"); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "virq"); evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_virq_info(evtchn, virq); @@ -965,16 +964,6 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { .retrigger = retrigger_dynirq, }; -static struct irq_chip xen_percpu_chip __read_mostly = { - .name = "xen-percpu", - - .disable = disable_dynirq, - .mask = disable_dynirq, - .unmask = enable_dynirq, - - .ack = ack_dynirq, -}; - int xen_set_callback_via(uint64_t via) { struct xen_hvm_param a; diff --git a/trunk/fs/xfs/linux-2.6/xfs_aops.c b/trunk/fs/xfs/linux-2.6/xfs_aops.c index b552f816de15..15412fe15c3a 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_aops.c +++ b/trunk/fs/xfs/linux-2.6/xfs_aops.c @@ -852,8 +852,8 @@ xfs_convert_page( SetPageUptodate(page); if (count) { - if (--wbc->nr_to_write <= 0 && - wbc->sync_mode == WB_SYNC_NONE) + wbc->nr_to_write--; + if (wbc->nr_to_write <= 0) done = 1; } xfs_start_page_writeback(page, !page_dirty, count); @@ -1068,7 +1068,7 @@ xfs_vm_writepage( * by themselves. */ if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) - goto redirty; + goto out_fail; /* * We need a transaction if there are delalloc or unwritten buffers @@ -1080,7 +1080,7 @@ xfs_vm_writepage( */ xfs_count_page_state(page, &delalloc, &unwritten); if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) - goto redirty; + goto out_fail; /* Is this page beyond the end of the file? */ offset = i_size_read(inode); @@ -1245,15 +1245,12 @@ xfs_vm_writepage( if (iohead) xfs_cancel_ioend(iohead); - if (err == -EAGAIN) - goto redirty; - xfs_aops_discard_page(page); ClearPageUptodate(page); unlock_page(page); return err; -redirty: +out_fail: redirty_page_for_writepage(wbc, page); unlock_page(page); return 0; diff --git a/trunk/fs/xfs/linux-2.6/xfs_super.c b/trunk/fs/xfs/linux-2.6/xfs_super.c index a4e07974955b..15c35b62ff14 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_super.c +++ b/trunk/fs/xfs/linux-2.6/xfs_super.c @@ -1226,7 +1226,6 @@ xfs_fs_statfs( struct xfs_inode *ip = XFS_I(dentry->d_inode); __uint64_t fakeinos, id; xfs_extlen_t lsize; - __int64_t ffree; statp->f_type = XFS_SB_MAGIC; statp->f_namelen = MAXNAMELEN - 1; @@ -1250,11 +1249,7 @@ xfs_fs_statfs( statp->f_files = min_t(typeof(statp->f_files), statp->f_files, mp->m_maxicount); - - /* make sure statp->f_ffree does not underflow */ - ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); - statp->f_ffree = max_t(__int64_t, ffree, 0); - + statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); spin_unlock(&mp->m_sb_lock); if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || @@ -1407,7 +1402,7 @@ xfs_fs_freeze( xfs_save_resvblks(mp); xfs_quiesce_attr(mp); - return -xfs_fs_log_dummy(mp, SYNC_WAIT); + return -xfs_fs_log_dummy(mp); } STATIC int diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c index d59c4a65d492..dfcbd98d1599 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.c +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c @@ -34,7 +34,6 @@ #include "xfs_inode_item.h" #include "xfs_quota.h" #include "xfs_trace.h" -#include "xfs_fsops.h" #include #include @@ -341,6 +340,38 @@ xfs_sync_attr( XFS_ICI_NO_TAG, 0, NULL); } +STATIC int +xfs_commit_dummy_trans( + struct xfs_mount *mp, + uint flags) +{ + struct xfs_inode *ip = mp->m_rootip; + struct xfs_trans *tp; + int error; + + /* + * Put a dummy transaction in the log to tell recovery + * that all others are OK. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_trans_commit(tp, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + /* the log force ensures this transaction is pushed to disk */ + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); + return error; +} + STATIC int xfs_sync_fsdata( struct xfs_mount *mp) @@ -401,7 +432,7 @@ xfs_quiesce_data( /* mark the log as covered if needed */ if (xfs_log_need_covered(mp)) - error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); + error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); /* flush data-only devices */ if (mp->m_rtdev_targp) @@ -532,7 +563,7 @@ xfs_flush_inodes( /* * Every sync period we need to unpin all items, reclaim inodes and sync * disk quotas. We might need to cover the log to indicate that the - * filesystem is idle and not frozen. + * filesystem is idle. */ STATIC void xfs_sync_worker( @@ -546,9 +577,8 @@ xfs_sync_worker( xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); - if (mp->m_super->s_frozen == SB_UNFROZEN && - xfs_log_need_covered(mp)) - error = xfs_fs_log_dummy(mp, 0); + if (xfs_log_need_covered(mp)) + error = xfs_commit_dummy_trans(mp, 0); } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); diff --git a/trunk/fs/xfs/xfs_fsops.c b/trunk/fs/xfs/xfs_fsops.c index 43b1d5699335..dbca5f5c37ba 100644 --- a/trunk/fs/xfs/xfs_fsops.c +++ b/trunk/fs/xfs/xfs_fsops.c @@ -604,36 +604,31 @@ xfs_reserve_blocks( return 0; } -/* - * Dump a transaction into the log that contains no real change. This is needed - * to be able to make the log dirty or stamp the current tail LSN into the log - * during the covering operation. - * - * We cannot use an inode here for this - that will push dirty state back up - * into the VFS and then periodic inode flushing will prevent log covering from - * making progress. Hence we log a field in the superblock instead. - */ int xfs_fs_log_dummy( - xfs_mount_t *mp, - int flags) + xfs_mount_t *mp) { xfs_trans_t *tp; + xfs_inode_t *ip; int error; tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); - error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT); + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return error; } - /* log the UUID because it is an unchanging field */ - xfs_mod_sb(tp, XFS_SB_UUID); - if (flags & SYNC_WAIT) - xfs_trans_set_sync(tp); - return xfs_trans_commit(tp, 0); + ip = mp->m_rootip; + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; } int diff --git a/trunk/fs/xfs/xfs_fsops.h b/trunk/fs/xfs/xfs_fsops.h index a786c5212c1e..88435e0a77c9 100644 --- a/trunk/fs/xfs/xfs_fsops.h +++ b/trunk/fs/xfs/xfs_fsops.h @@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, xfs_fsop_resblks_t *outval); extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); -extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); +extern int xfs_fs_log_dummy(xfs_mount_t *mp); #endif /* __XFS_FSOPS_H__ */ diff --git a/trunk/fs/xfs/xfs_ialloc.c b/trunk/fs/xfs/xfs_ialloc.c index 5371d2dc360e..abf80ae1e95b 100644 --- a/trunk/fs/xfs/xfs_ialloc.c +++ b/trunk/fs/xfs/xfs_ialloc.c @@ -1213,6 +1213,7 @@ xfs_imap_lookup( struct xfs_inobt_rec_incore rec; struct xfs_btree_cur *cur; struct xfs_buf *agbp; + xfs_agino_t startino; int error; int i; @@ -1226,13 +1227,13 @@ xfs_imap_lookup( } /* - * Lookup the inode record for the given agino. If the record cannot be - * found, then it's an invalid inode number and we should abort. Once - * we have a record, we need to ensure it contains the inode number - * we are looking up. + * derive and lookup the exact inode record for the given agino. If the + * record cannot be found, then it's an invalid inode number and we + * should abort. */ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); + startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); + error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); if (!error) { if (i) error = xfs_inobt_get_rec(cur, &rec, &i); @@ -1245,11 +1246,6 @@ xfs_imap_lookup( if (error) return error; - /* check that the returned record contains the required inode */ - if (rec.ir_startino > agino || - rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) - return EINVAL; - /* for untrusted inodes check it is allocated first */ if ((flags & XFS_IGET_UNTRUSTED) && (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c index 34798f391c49..68415cb4f23c 100644 --- a/trunk/fs/xfs/xfs_inode.c +++ b/trunk/fs/xfs/xfs_inode.c @@ -1914,11 +1914,6 @@ xfs_iunlink_remove( return 0; } -/* - * A big issue when freeing the inode cluster is is that we _cannot_ skip any - * inodes that are in memory - they all must be marked stale and attached to - * the cluster buffer. - */ STATIC void xfs_ifree_cluster( xfs_inode_t *free_ip, @@ -1950,6 +1945,8 @@ xfs_ifree_cluster( } for (j = 0; j < nbufs; j++, inum += ninodes) { + int found = 0; + blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), XFS_INO_TO_AGBNO(mp, inum)); @@ -1968,9 +1965,7 @@ xfs_ifree_cluster( /* * Walk the inodes already attached to the buffer and mark them * stale. These will all have the flush locks held, so an - * in-memory inode walk can't lock them. By marking them all - * stale first, we will not attempt to lock them in the loop - * below as the XFS_ISTALE flag will be set. + * in-memory inode walk can't lock them. */ lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); while (lip) { @@ -1982,11 +1977,11 @@ xfs_ifree_cluster( &iip->ili_flush_lsn, &iip->ili_item.li_lsn); xfs_iflags_set(iip->ili_inode, XFS_ISTALE); + found++; } lip = lip->li_bio_list; } - /* * For each inode in memory attempt to add it to the inode * buffer and set it up for being staled on buffer IO @@ -1998,7 +1993,6 @@ xfs_ifree_cluster( * even trying to lock them. */ for (i = 0; i < ninodes; i++) { -retry: read_lock(&pag->pag_ici_lock); ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, (inum + i))); @@ -2009,36 +2003,38 @@ xfs_ifree_cluster( continue; } - /* - * Don't try to lock/unlock the current inode, but we - * _cannot_ skip the other inodes that we did not find - * in the list attached to the buffer and are not - * already marked stale. If we can't lock it, back off - * and retry. - */ + /* don't try to lock/unlock the current inode */ if (ip != free_ip && !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { read_unlock(&pag->pag_ici_lock); - delay(1); - goto retry; + continue; } read_unlock(&pag->pag_ici_lock); - xfs_iflock(ip); + if (!xfs_iflock_nowait(ip)) { + if (ip != free_ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; + } + xfs_iflags_set(ip, XFS_ISTALE); + if (xfs_inode_clean(ip)) { + ASSERT(ip != free_ip); + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; + } - /* - * we don't need to attach clean inodes or those only - * with unlogged changes (which we throw away, anyway). - */ iip = ip->i_itemp; - if (!iip || xfs_inode_clean(ip)) { + if (!iip) { + /* inode with unlogged changes only */ ASSERT(ip != free_ip); ip->i_update_core = 0; xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); continue; } + found++; iip->ili_last_fields = iip->ili_format.ilf_fields; iip->ili_format.ilf_fields = 0; @@ -2053,7 +2049,8 @@ xfs_ifree_cluster( xfs_iunlock(ip, XFS_ILOCK_EXCL); } - xfs_trans_stale_inode_buf(tp, bp); + if (found) + xfs_trans_stale_inode_buf(tp, bp); xfs_trans_binval(tp, bp); } diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c index 33f718f92a48..925d572bf0f4 100644 --- a/trunk/fs/xfs/xfs_log.c +++ b/trunk/fs/xfs/xfs_log.c @@ -3015,8 +3015,7 @@ _xfs_log_force( XFS_STATS_INC(xs_log_force); - if (log->l_cilp) - xlog_cil_force(log); + xlog_cil_push(log, 1); spin_lock(&log->l_icloglock); @@ -3168,7 +3167,7 @@ _xfs_log_force_lsn( XFS_STATS_INC(xs_log_force); if (log->l_cilp) { - lsn = xlog_cil_force_lsn(log, lsn); + lsn = xlog_cil_push_lsn(log, lsn); if (lsn == NULLCOMMITLSN) return 0; } @@ -3725,7 +3724,7 @@ xfs_log_force_umount( * call below. */ if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) - xlog_cil_force(log); + xlog_cil_push(log, 1); /* * We must hold both the GRANT lock and the LOG lock, diff --git a/trunk/fs/xfs/xfs_log_cil.c b/trunk/fs/xfs/xfs_log_cil.c index ed575fb4b495..31e4ea2d19ac 100644 --- a/trunk/fs/xfs/xfs_log_cil.c +++ b/trunk/fs/xfs/xfs_log_cil.c @@ -68,7 +68,6 @@ xlog_cil_init( ctx->sequence = 1; ctx->cil = cil; cil->xc_ctx = ctx; - cil->xc_current_sequence = ctx->sequence; cil->xc_log = log; log->l_cilp = cil; @@ -270,10 +269,15 @@ xlog_cil_insert( static void xlog_cil_format_items( struct log *log, - struct xfs_log_vec *log_vector) + struct xfs_log_vec *log_vector, + struct xlog_ticket *ticket, + xfs_lsn_t *start_lsn) { struct xfs_log_vec *lv; + if (start_lsn) + *start_lsn = log->l_cilp->xc_ctx->sequence; + ASSERT(log_vector); for (lv = log_vector; lv; lv = lv->lv_next) { void *ptr; @@ -297,24 +301,9 @@ xlog_cil_format_items( ptr += vec->i_len; } ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); - } -} - -static void -xlog_cil_insert_items( - struct log *log, - struct xfs_log_vec *log_vector, - struct xlog_ticket *ticket, - xfs_lsn_t *start_lsn) -{ - struct xfs_log_vec *lv; - - if (start_lsn) - *start_lsn = log->l_cilp->xc_ctx->sequence; - ASSERT(log_vector); - for (lv = log_vector; lv; lv = lv->lv_next) xlog_cil_insert(log, ticket, lv->lv_item, lv); + } } static void @@ -331,6 +320,80 @@ xlog_cil_free_logvec( } } +/* + * Commit a transaction with the given vector to the Committed Item List. + * + * To do this, we need to format the item, pin it in memory if required and + * account for the space used by the transaction. Once we have done that we + * need to release the unused reservation for the transaction, attach the + * transaction to the checkpoint context so we carry the busy extents through + * to checkpoint completion, and then unlock all the items in the transaction. + * + * For more specific information about the order of operations in + * xfs_log_commit_cil() please refer to the comments in + * xfs_trans_commit_iclog(). + * + * Called with the context lock already held in read mode to lock out + * background commit, returns without it held once background commits are + * allowed again. + */ +int +xfs_log_commit_cil( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_log_vec *log_vector, + xfs_lsn_t *commit_lsn, + int flags) +{ + struct log *log = mp->m_log; + int log_flags = 0; + int push = 0; + + if (flags & XFS_TRANS_RELEASE_LOG_RES) + log_flags = XFS_LOG_REL_PERM_RESERV; + + if (XLOG_FORCED_SHUTDOWN(log)) { + xlog_cil_free_logvec(log_vector); + return XFS_ERROR(EIO); + } + + /* lock out background commit */ + down_read(&log->l_cilp->xc_ctx_lock); + xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); + + /* check we didn't blow the reservation */ + if (tp->t_ticket->t_curr_res < 0) + xlog_print_tic_res(log->l_mp, tp->t_ticket); + + /* attach the transaction to the CIL if it has any busy extents */ + if (!list_empty(&tp->t_busy)) { + spin_lock(&log->l_cilp->xc_cil_lock); + list_splice_init(&tp->t_busy, + &log->l_cilp->xc_ctx->busy_extents); + spin_unlock(&log->l_cilp->xc_cil_lock); + } + + tp->t_commit_lsn = *commit_lsn; + xfs_log_done(mp, tp->t_ticket, NULL, log_flags); + xfs_trans_unreserve_and_mod_sb(tp); + + /* check for background commit before unlock */ + if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) + push = 1; + up_read(&log->l_cilp->xc_ctx_lock); + + /* + * We need to push CIL every so often so we don't cache more than we + * can fit in the log. The limit really is that a checkpoint can't be + * more than half the log (the current checkpoint is not allowed to + * overwrite the previous checkpoint), but commit latency and memory + * usage limit this to a smaller size in most cases. + */ + if (push) + xlog_cil_push(log, 0); + return 0; +} + /* * Mark all items committed and clear busy extents. We free the log vector * chains in a separate pass so that we unpin the log items as quickly as @@ -364,23 +427,13 @@ xlog_cil_committed( } /* - * Push the Committed Item List to the log. If @push_seq flag is zero, then it - * is a background flush and so we can chose to ignore it. Otherwise, if the - * current sequence is the same as @push_seq we need to do a flush. If - * @push_seq is less than the current sequence, then it has already been - * flushed and we don't need to do anything - the caller will wait for it to - * complete if necessary. - * - * @push_seq is a value rather than a flag because that allows us to do an - * unlocked check of the sequence number for a match. Hence we can allows log - * forces to run racily and not issue pushes for the same sequence twice. If we - * get a race between multiple pushes for the same sequence they will block on - * the first one and then abort, hence avoiding needless pushes. + * Push the Committed Item List to the log. If the push_now flag is not set, + * then it is a background flush and so we can chose to ignore it. */ -STATIC int +int xlog_cil_push( struct log *log, - xfs_lsn_t push_seq) + int push_now) { struct xfs_cil *cil = log->l_cilp; struct xfs_log_vec *lv; @@ -400,14 +453,12 @@ xlog_cil_push( if (!cil) return 0; - ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); - new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); new_ctx->ticket = xlog_cil_ticket_alloc(log); /* lock out transaction commit, but don't block on background push */ if (!down_write_trylock(&cil->xc_ctx_lock)) { - if (!push_seq) + if (!push_now) goto out_free_ticket; down_write(&cil->xc_ctx_lock); } @@ -418,11 +469,7 @@ xlog_cil_push( goto out_skip; /* check for spurious background flush */ - if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) - goto out_skip; - - /* check for a previously pushed seqeunce */ - if (push_seq < cil->xc_ctx->sequence) + if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) goto out_skip; /* @@ -467,13 +514,6 @@ xlog_cil_push( new_ctx->cil = cil; cil->xc_ctx = new_ctx; - /* - * mirror the new sequence into the cil structure so that we can do - * unlocked checks against the current sequence in log forces without - * risking deferencing a freed context pointer. - */ - cil->xc_current_sequence = new_ctx->sequence; - /* * The switch is now done, so we can drop the context lock and move out * of a shared context. We can't just go straight to the commit record, @@ -585,102 +625,6 @@ xlog_cil_push( return XFS_ERROR(EIO); } -/* - * Commit a transaction with the given vector to the Committed Item List. - * - * To do this, we need to format the item, pin it in memory if required and - * account for the space used by the transaction. Once we have done that we - * need to release the unused reservation for the transaction, attach the - * transaction to the checkpoint context so we carry the busy extents through - * to checkpoint completion, and then unlock all the items in the transaction. - * - * For more specific information about the order of operations in - * xfs_log_commit_cil() please refer to the comments in - * xfs_trans_commit_iclog(). - * - * Called with the context lock already held in read mode to lock out - * background commit, returns without it held once background commits are - * allowed again. - */ -int -xfs_log_commit_cil( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_log_vec *log_vector, - xfs_lsn_t *commit_lsn, - int flags) -{ - struct log *log = mp->m_log; - int log_flags = 0; - int push = 0; - - if (flags & XFS_TRANS_RELEASE_LOG_RES) - log_flags = XFS_LOG_REL_PERM_RESERV; - - if (XLOG_FORCED_SHUTDOWN(log)) { - xlog_cil_free_logvec(log_vector); - return XFS_ERROR(EIO); - } - - /* - * do all the hard work of formatting items (including memory - * allocation) outside the CIL context lock. This prevents stalling CIL - * pushes when we are low on memory and a transaction commit spends a - * lot of time in memory reclaim. - */ - xlog_cil_format_items(log, log_vector); - - /* lock out background commit */ - down_read(&log->l_cilp->xc_ctx_lock); - xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); - - /* check we didn't blow the reservation */ - if (tp->t_ticket->t_curr_res < 0) - xlog_print_tic_res(log->l_mp, tp->t_ticket); - - /* attach the transaction to the CIL if it has any busy extents */ - if (!list_empty(&tp->t_busy)) { - spin_lock(&log->l_cilp->xc_cil_lock); - list_splice_init(&tp->t_busy, - &log->l_cilp->xc_ctx->busy_extents); - spin_unlock(&log->l_cilp->xc_cil_lock); - } - - tp->t_commit_lsn = *commit_lsn; - xfs_log_done(mp, tp->t_ticket, NULL, log_flags); - xfs_trans_unreserve_and_mod_sb(tp); - - /* - * Once all the items of the transaction have been copied to the CIL, - * the items can be unlocked and freed. - * - * This needs to be done before we drop the CIL context lock because we - * have to update state in the log items and unlock them before they go - * to disk. If we don't, then the CIL checkpoint can race with us and - * we can run checkpoint completion before we've updated and unlocked - * the log items. This affects (at least) processing of stale buffers, - * inodes and EFIs. - */ - xfs_trans_free_items(tp, *commit_lsn, 0); - - /* check for background commit before unlock */ - if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) - push = 1; - - up_read(&log->l_cilp->xc_ctx_lock); - - /* - * We need to push CIL every so often so we don't cache more than we - * can fit in the log. The limit really is that a checkpoint can't be - * more than half the log (the current checkpoint is not allowed to - * overwrite the previous checkpoint), but commit latency and memory - * usage limit this to a smaller size in most cases. - */ - if (push) - xlog_cil_push(log, 0); - return 0; -} - /* * Conditionally push the CIL based on the sequence passed in. * @@ -695,34 +639,39 @@ xfs_log_commit_cil( * commit lsn is there. It'll be empty, so this is broken for now. */ xfs_lsn_t -xlog_cil_force_lsn( +xlog_cil_push_lsn( struct log *log, - xfs_lsn_t sequence) + xfs_lsn_t push_seq) { struct xfs_cil *cil = log->l_cilp; struct xfs_cil_ctx *ctx; xfs_lsn_t commit_lsn = NULLCOMMITLSN; - ASSERT(sequence <= cil->xc_current_sequence); - - /* - * check to see if we need to force out the current context. - * xlog_cil_push() handles racing pushes for the same sequence, - * so no need to deal with it here. - */ - if (sequence == cil->xc_current_sequence) - xlog_cil_push(log, sequence); +restart: + down_write(&cil->xc_ctx_lock); + ASSERT(push_seq <= cil->xc_ctx->sequence); + + /* check to see if we need to force out the current context */ + if (push_seq == cil->xc_ctx->sequence) { + up_write(&cil->xc_ctx_lock); + xlog_cil_push(log, 1); + goto restart; + } /* * See if we can find a previous sequence still committing. + * We can drop the flush lock as soon as we have the cil lock + * because we are now only comparing contexts protected by + * the cil lock. + * * We need to wait for all previous sequence commits to complete * before allowing the force of push_seq to go ahead. Hence block * on commits for those as well. */ -restart: spin_lock(&cil->xc_cil_lock); + up_write(&cil->xc_ctx_lock); list_for_each_entry(ctx, &cil->xc_committing, committing) { - if (ctx->sequence > sequence) + if (ctx->sequence > push_seq) continue; if (!ctx->commit_lsn) { /* @@ -732,7 +681,7 @@ xlog_cil_force_lsn( sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); goto restart; } - if (ctx->sequence != sequence) + if (ctx->sequence != push_seq) continue; /* found it! */ commit_lsn = ctx->commit_lsn; diff --git a/trunk/fs/xfs/xfs_log_priv.h b/trunk/fs/xfs/xfs_log_priv.h index ced52b98b322..8c072618965c 100644 --- a/trunk/fs/xfs/xfs_log_priv.h +++ b/trunk/fs/xfs/xfs_log_priv.h @@ -422,7 +422,6 @@ struct xfs_cil { struct rw_semaphore xc_ctx_lock; struct list_head xc_committing; sv_t xc_commit_wait; - xfs_lsn_t xc_current_sequence; }; /* @@ -563,16 +562,8 @@ int xlog_cil_init(struct log *log); void xlog_cil_init_post_recovery(struct log *log); void xlog_cil_destroy(struct log *log); -/* - * CIL force routines - */ -xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); - -static inline void -xlog_cil_force(struct log *log) -{ - xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); -} +int xlog_cil_push(struct log *log, int push_now); +xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); /* * Unmount record type is used as a pseudo transaction type for the ticket. diff --git a/trunk/fs/xfs/xfs_trans.c b/trunk/fs/xfs/xfs_trans.c index 1c47edaea0d2..fdca7416c754 100644 --- a/trunk/fs/xfs/xfs_trans.c +++ b/trunk/fs/xfs/xfs_trans.c @@ -1167,7 +1167,7 @@ xfs_trans_del_item( * Unlock all of the items of a transaction and free all the descriptors * of that transaction. */ -void +STATIC void xfs_trans_free_items( struct xfs_trans *tp, xfs_lsn_t commit_lsn, @@ -1653,6 +1653,9 @@ xfs_trans_commit_cil( return error; current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + + /* xfs_trans_free_items() unlocks them first */ + xfs_trans_free_items(tp, *commit_lsn, 0); xfs_trans_free(tp); return 0; } diff --git a/trunk/fs/xfs/xfs_trans_priv.h b/trunk/fs/xfs/xfs_trans_priv.h index 62da86c90de5..e2d93d8ead7b 100644 --- a/trunk/fs/xfs/xfs_trans_priv.h +++ b/trunk/fs/xfs/xfs_trans_priv.h @@ -25,8 +25,7 @@ struct xfs_trans; void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *); -void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, - int flags); + void xfs_trans_item_committed(struct xfs_log_item *lip, xfs_lsn_t commit_lsn, int aborted); void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index ab661ebc4895..806d1b227a21 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -3752,8 +3752,6 @@ static void task_fork_fair(struct task_struct *p) raw_spin_lock_irqsave(&rq->lock, flags); - update_rq_clock(rq); - if (unlikely(task_cpu(p) != this_cpu)) __set_task_cpu(p, this_cpu); diff --git a/trunk/mm/page-writeback.c b/trunk/mm/page-writeback.c index a803f5e33471..c09ef5219cbe 100644 --- a/trunk/mm/page-writeback.c +++ b/trunk/mm/page-writeback.c @@ -985,16 +985,22 @@ int write_cache_pages(struct address_space *mapping, } } - /* - * We stop writing back only if we are not doing - * integrity sync. In case of integrity sync we have to - * keep going until we have written all the pages - * we tagged for writeback prior to entering this loop. - */ - if (--wbc->nr_to_write <= 0 && - wbc->sync_mode == WB_SYNC_NONE) { - done = 1; - break; + if (wbc->nr_to_write > 0) { + if (--wbc->nr_to_write == 0 && + wbc->sync_mode == WB_SYNC_NONE) { + /* + * We stop writing back only if we are + * not doing integrity sync. In case of + * integrity sync we have to keep going + * because someone may be concurrently + * dirtying pages, and we might have + * synced a lot of newly appeared dirty + * pages, but have not synced all of the + * old dirty pages. + */ + done = 1; + break; + } } } pagevec_release(&pvec);