diff --git a/[refs] b/[refs] index 21c10e83af2e..aa95bca88bb9 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 3fff0179e33cd7d0a688dab65700c46ad089e934 +refs/heads/master: fb8ec18c316d869271137c97320dbfd2def56569 diff --git a/trunk/block/blk-core.c b/trunk/block/blk-core.c index ca69f3d94100..29bcfac6c688 100644 --- a/trunk/block/blk-core.c +++ b/trunk/block/blk-core.c @@ -69,7 +69,7 @@ static void drive_stat_acct(struct request *rq, int new_io) int rw = rq_data_dir(rq); int cpu; - if (!blk_fs_request(rq) || !disk || !blk_queue_io_stat(disk->queue)) + if (!blk_fs_request(rq) || !disk || !blk_do_io_stat(disk->queue)) return; cpu = part_stat_lock(); @@ -1667,7 +1667,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) { struct gendisk *disk = req->rq_disk; - if (!disk || !blk_queue_io_stat(disk->queue)) + if (!disk || !blk_do_io_stat(disk->queue)) return; if (blk_fs_request(req)) { @@ -1686,7 +1686,7 @@ static void blk_account_io_done(struct request *req) { struct gendisk *disk = req->rq_disk; - if (!disk || !blk_queue_io_stat(disk->queue)) + if (!disk || !blk_do_io_stat(disk->queue)) return; /* diff --git a/trunk/block/blk.h b/trunk/block/blk.h index 6e1ed40534e9..0dce92c37496 100644 --- a/trunk/block/blk.h +++ b/trunk/block/blk.h @@ -108,4 +108,12 @@ static inline int blk_cpu_to_group(int cpu) #endif } +static inline int blk_do_io_stat(struct request_queue *q) +{ + if (q) + return blk_queue_io_stat(q); + + return 0; +} + #endif diff --git a/trunk/drivers/virtio/virtio_pci.c b/trunk/drivers/virtio/virtio_pci.c index 330aacbdec1f..bef6b45e8a5c 100644 --- a/trunk/drivers/virtio/virtio_pci.c +++ b/trunk/drivers/virtio/virtio_pci.c @@ -192,7 +192,7 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) drv = container_of(vp_dev->vdev.dev.driver, struct virtio_driver, driver); - if (drv && drv->config_changed) + if (drv->config_changed) drv->config_changed(&vp_dev->vdev); } diff --git a/trunk/include/linux/module.h b/trunk/include/linux/module.h index f3b8329eb5b8..4f7ea12463d3 100644 --- a/trunk/include/linux/module.h +++ b/trunk/include/linux/module.h @@ -219,6 +219,11 @@ void *__symbol_get_gpl(const char *symbol); #endif +struct module_ref +{ + local_t count; +} ____cacheline_aligned; + enum module_state { MODULE_STATE_LIVE, @@ -339,11 +344,8 @@ struct module /* Destruction function. */ void (*exit)(void); -#ifdef CONFIG_SMP - char *refptr; -#else - local_t ref; -#endif + /* Reference counts */ + struct module_ref ref[NR_CPUS]; #endif }; #ifndef MODULE_ARCH_INIT @@ -393,22 +395,13 @@ void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) void symbol_put_addr(void *addr); -static inline local_t *__module_ref_addr(struct module *mod, int cpu) -{ -#ifdef CONFIG_SMP - return (local_t *) (mod->refptr + per_cpu_offset(cpu)); -#else - return &mod->ref; -#endif -} - /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ static inline void __module_get(struct module *module) { if (module) { BUG_ON(module_refcount(module) == 0); - local_inc(__module_ref_addr(module, get_cpu())); + local_inc(&module->ref[get_cpu()].count); put_cpu(); } } @@ -420,7 +413,7 @@ static inline int try_module_get(struct module *module) if (module) { unsigned int cpu = get_cpu(); if (likely(module_is_live(module))) - local_inc(__module_ref_addr(module, cpu)); + local_inc(&module->ref[cpu].count); else ret = 0; put_cpu(); diff --git a/trunk/kernel/module.c b/trunk/kernel/module.c index ba22484a987e..e8b51d41dd72 100644 --- a/trunk/kernel/module.c +++ b/trunk/kernel/module.c @@ -573,13 +573,13 @@ static char last_unloaded_module[MODULE_NAME_LEN+1]; /* Init the unload section of the module. */ static void module_unload_init(struct module *mod) { - int cpu; + unsigned int i; INIT_LIST_HEAD(&mod->modules_which_use_me); - for_each_possible_cpu(cpu) - local_set(__module_ref_addr(mod, cpu), 0); + for (i = 0; i < NR_CPUS; i++) + local_set(&mod->ref[i].count, 0); /* Hold reference count during initialization. */ - local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); + local_set(&mod->ref[raw_smp_processor_id()].count, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -717,11 +717,10 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int total = 0; - int cpu; + unsigned int i, total = 0; - for_each_possible_cpu(cpu) - total += local_read(__module_ref_addr(mod, cpu)); + for (i = 0; i < NR_CPUS; i++) + total += local_read(&mod->ref[i].count); return total; } EXPORT_SYMBOL(module_refcount); @@ -895,7 +894,7 @@ void module_put(struct module *module) { if (module) { unsigned int cpu = get_cpu(); - local_dec(__module_ref_addr(module, cpu)); + local_dec(&module->ref[cpu].count); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); @@ -1465,10 +1464,7 @@ static void free_module(struct module *mod) kfree(mod->args); if (mod->percpu) percpu_modfree(mod->percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - if (mod->refptr) - percpu_modfree(mod->refptr); -#endif + /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); @@ -2015,14 +2011,6 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto free_mod; -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), - mod->name); - if (!mod->refptr) { - err = -ENOMEM; - goto free_mod; - } -#endif if (pcpuindex) { /* We have a special allocation for this section. */ percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, @@ -2030,7 +2018,7 @@ static noinline struct module *load_module(void __user *umod, mod->name); if (!percpu) { err = -ENOMEM; - goto free_percpu; + goto free_mod; } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; mod->percpu = percpu; @@ -2294,9 +2282,6 @@ static noinline struct module *load_module(void __user *umod, free_percpu: if (percpu) percpu_modfree(percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - percpu_modfree(mod->refptr); -#endif free_mod: kfree(args); free_hdr: diff --git a/trunk/mm/mlock.c b/trunk/mm/mlock.c index 028ec482fdd4..2904a347e476 100644 --- a/trunk/mm/mlock.c +++ b/trunk/mm/mlock.c @@ -294,10 +294,14 @@ static inline int __mlock_posix_error_return(long retval) * * return number of pages [> 0] to be removed from locked_vm on success * of "special" vmas. + * + * return negative error if vma spanning @start-@range disappears while + * mmap semaphore is dropped. Unlikely? */ long mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + struct mm_struct *mm = vma->vm_mm; int nr_pages = (end - start) / PAGE_SIZE; BUG_ON(!(vma->vm_flags & VM_LOCKED)); @@ -310,8 +314,20 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current))) { + long error; + downgrade_write(&mm->mmap_sem); + + error = __mlock_vma_pages_range(vma, start, end, 1); - return __mlock_vma_pages_range(vma, start, end, 1); + up_read(&mm->mmap_sem); + /* vma can change or disappear */ + down_write(&mm->mmap_sem); + vma = find_vma(mm, start); + /* non-NULL vma must contain @start, but need to check @end */ + if (!vma || end > vma->vm_end) + return -ENOMEM; + + return 0; /* hide other errors from mmap(), et al */ } /* @@ -422,14 +438,41 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, vma->vm_flags = newflags; if (lock) { + /* + * mmap_sem is currently held for write. Downgrade the write + * lock to a read lock so that other faults, mmap scans, ... + * while we fault in all pages. + */ + downgrade_write(&mm->mmap_sem); + ret = __mlock_vma_pages_range(vma, start, end, 1); - if (ret > 0) { + /* + * Need to reacquire mmap sem in write mode, as our callers + * expect this. We have no support for atomically upgrading + * a sem to write, so we need to check for ranges while sem + * is unlocked. + */ + up_read(&mm->mmap_sem); + /* vma can change or disappear */ + down_write(&mm->mmap_sem); + *prev = find_vma(mm, start); + /* non-NULL *prev must contain @start, but need to check @end */ + if (!(*prev) || end > (*prev)->vm_end) + ret = -ENOMEM; + else if (ret > 0) { mm->locked_vm -= ret; ret = 0; } else ret = __mlock_posix_error_return(ret); /* translate if needed */ } else { + /* + * TODO: for unlocking, pages will already be resident, so + * we don't need to wait for allocations/reclaim/pagein, ... + * However, unlocking a very large region can still take a + * while. Should we downgrade the semaphore for both lock + * AND unlock ? + */ __mlock_vma_pages_range(vma, start, end, 0); }