From ca2bfea08739ba30bf50c15ad77de92cf8584c8f Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 3 Jul 2012 12:55:31 +0100 Subject: [PATCH] --- yaml --- r: 311571 b: refs/heads/master c: 0d200aefd4ac51787b6b80de1bb7ce93bccd59f6 h: refs/heads/master i: 311569: f5bc08a19cc4fe5a5ee228406f65ebf025b7ade4 311567: 6b700bb14d1a9915133b090223739182b39cf15b v: v3 --- [refs] | 2 +- trunk/Documentation/prctl/no_new_privs.txt | 50 ------------- trunk/arch/arm/kernel/vmlinux.lds.S | 2 - trunk/arch/arm/mm/mmu.c | 74 ------------------- trunk/arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- trunk/arch/powerpc/xmon/xmon.c | 2 +- trunk/drivers/gpu/drm/drm_edid.c | 27 +------ trunk/drivers/gpu/drm/i915/i915_dma.c | 37 ++-------- trunk/drivers/gpu/drm/radeon/radeon_gart.c | 13 +--- trunk/drivers/gpu/drm/radeon/radeon_gem.c | 10 +-- trunk/drivers/gpu/drm/radeon/si.c | 4 +- trunk/drivers/md/dm-thin.c | 7 ++ trunk/drivers/md/md.c | 8 +- trunk/drivers/md/multipath.c | 3 +- trunk/drivers/md/raid1.c | 13 ++-- trunk/drivers/md/raid10.c | 26 +++---- trunk/drivers/md/raid5.c | 67 +++++------------ trunk/security/security.c | 1 - 18 files changed, 70 insertions(+), 278 deletions(-) delete mode 100644 trunk/Documentation/prctl/no_new_privs.txt diff --git a/[refs] b/[refs] index cc6fb7c94d1a..7ad575d7c05e 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 73e6080547429a3cf16f2cceba54891d345f44c2 +refs/heads/master: 0d200aefd4ac51787b6b80de1bb7ce93bccd59f6 diff --git a/trunk/Documentation/prctl/no_new_privs.txt b/trunk/Documentation/prctl/no_new_privs.txt deleted file mode 100644 index cb705ec69abe..000000000000 --- a/trunk/Documentation/prctl/no_new_privs.txt +++ /dev/null @@ -1,50 +0,0 @@ -The execve system call can grant a newly-started program privileges that -its parent did not have. The most obvious examples are setuid/setgid -programs and file capabilities. To prevent the parent program from -gaining these privileges as well, the kernel and user code must be -careful to prevent the parent from doing anything that could subvert the -child. For example: - - - The dynamic loader handles LD_* environment variables differently if - a program is setuid. - - - chroot is disallowed to unprivileged processes, since it would allow - /etc/passwd to be replaced from the point of view of a process that - inherited chroot. - - - The exec code has special handling for ptrace. - -These are all ad-hoc fixes. The no_new_privs bit (since Linux 3.5) is a -new, generic mechanism to make it safe for a process to modify its -execution environment in a manner that persists across execve. Any task -can set no_new_privs. Once the bit is set, it is inherited across fork, -clone, and execve and cannot be unset. With no_new_privs set, execve -promises not to grant the privilege to do anything that could not have -been done without the execve call. For example, the setuid and setgid -bits will no longer change the uid or gid; file capabilities will not -add to the permitted set, and LSMs will not relax constraints after -execve. - -Note that no_new_privs does not prevent privilege changes that do not -involve execve. An appropriately privileged task can still call -setuid(2) and receive SCM_RIGHTS datagrams. - -There are two main use cases for no_new_privs so far: - - - Filters installed for the seccomp mode 2 sandbox persist across - execve and can change the behavior of newly-executed programs. - Unprivileged users are therefore only allowed to install such filters - if no_new_privs is set. - - - By itself, no_new_privs can be used to reduce the attack surface - available to an unprivileged user. If everything running with a - given uid has no_new_privs set, then that uid will be unable to - escalate its privileges by directly attacking setuid, setgid, and - fcap-using binaries; it will need to compromise something without the - no_new_privs bit set first. - -In the future, other potentially dangerous kernel features could become -available to unprivileged tasks if no_new_privs is set. In principle, -several options to unshare(2) and clone(2) would be safe when -no_new_privs is set, and no_new_privs + chroot is considerable less -dangerous than chroot by itself. diff --git a/trunk/arch/arm/kernel/vmlinux.lds.S b/trunk/arch/arm/kernel/vmlinux.lds.S index 36ff15bbfdd4..43a31fb06318 100644 --- a/trunk/arch/arm/kernel/vmlinux.lds.S +++ b/trunk/arch/arm/kernel/vmlinux.lds.S @@ -183,9 +183,7 @@ SECTIONS } #endif -#ifdef CONFIG_SMP PERCPU_SECTION(L1_CACHE_BYTES) -#endif #ifdef CONFIG_XIP_KERNEL __data_loc = ALIGN(4); /* location in binary */ diff --git a/trunk/arch/arm/mm/mmu.c b/trunk/arch/arm/mm/mmu.c index cf4528d51774..e5dad60b558b 100644 --- a/trunk/arch/arm/mm/mmu.c +++ b/trunk/arch/arm/mm/mmu.c @@ -791,79 +791,6 @@ void __init iotable_init(struct map_desc *io_desc, int nr) } } -#ifndef CONFIG_ARM_LPAE - -/* - * The Linux PMD is made of two consecutive section entries covering 2MB - * (see definition in include/asm/pgtable-2level.h). However a call to - * create_mapping() may optimize static mappings by using individual - * 1MB section mappings. This leaves the actual PMD potentially half - * initialized if the top or bottom section entry isn't used, leaving it - * open to problems if a subsequent ioremap() or vmalloc() tries to use - * the virtual space left free by that unused section entry. - * - * Let's avoid the issue by inserting dummy vm entries covering the unused - * PMD halves once the static mappings are in place. - */ - -static void __init pmd_empty_section_gap(unsigned long addr) -{ - struct vm_struct *vm; - - vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm)); - vm->addr = (void *)addr; - vm->size = SECTION_SIZE; - vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; - vm->caller = pmd_empty_section_gap; - vm_area_add_early(vm); -} - -static void __init fill_pmd_gaps(void) -{ - struct vm_struct *vm; - unsigned long addr, next = 0; - pmd_t *pmd; - - /* we're still single threaded hence no lock needed here */ - for (vm = vmlist; vm; vm = vm->next) { - if (!(vm->flags & VM_ARM_STATIC_MAPPING)) - continue; - addr = (unsigned long)vm->addr; - if (addr < next) - continue; - - /* - * Check if this vm starts on an odd section boundary. - * If so and the first section entry for this PMD is free - * then we block the corresponding virtual address. - */ - if ((addr & ~PMD_MASK) == SECTION_SIZE) { - pmd = pmd_off_k(addr); - if (pmd_none(*pmd)) - pmd_empty_section_gap(addr & PMD_MASK); - } - - /* - * Then check if this vm ends on an odd section boundary. - * If so and the second section entry for this PMD is empty - * then we block the corresponding virtual address. - */ - addr += vm->size; - if ((addr & ~PMD_MASK) == SECTION_SIZE) { - pmd = pmd_off_k(addr) + 1; - if (pmd_none(*pmd)) - pmd_empty_section_gap(addr); - } - - /* no need to look at any vm entry until we hit the next PMD */ - next = (addr + PMD_SIZE - 1) & PMD_MASK; - } -} - -#else -#define fill_pmd_gaps() do { } while (0) -#endif - static void * __initdata vmalloc_min = (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET); @@ -1145,7 +1072,6 @@ static void __init devicemaps_init(struct machine_desc *mdesc) */ if (mdesc->map_io) mdesc->map_io(); - fill_pmd_gaps(); /* * Finally flush the caches and tlb to ensure that we're in a diff --git a/trunk/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/trunk/arch/powerpc/kvm/book3s_hv_rmhandlers.S index a1044f43becd..a84aafce2a12 100644 --- a/trunk/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/trunk/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -810,7 +810,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) lwz r3,VCORE_NAPPING_THREADS(r5) lwz r4,VCPU_PTID(r9) li r0,1 - sld r0,r0,r4 + sldi r0,r0,r4 andc. r3,r3,r0 /* no sense IPI'ing ourselves */ beq 43f mulli r4,r4,PACA_SIZE /* get paca for thread 0 */ diff --git a/trunk/arch/powerpc/xmon/xmon.c b/trunk/arch/powerpc/xmon/xmon.c index eab3492a45c5..0f3ab06d2222 100644 --- a/trunk/arch/powerpc/xmon/xmon.c +++ b/trunk/arch/powerpc/xmon/xmon.c @@ -971,7 +971,7 @@ static int cpu_cmd(void) /* print cpus waiting or in xmon */ printf("cpus stopped:"); count = 0; - for_each_possible_cpu(cpu) { + for (cpu = 0; cpu < NR_CPUS; ++cpu) { if (cpumask_test_cpu(cpu, &cpus_in_xmon)) { if (count == 0) printf(" %x", cpu); diff --git a/trunk/drivers/gpu/drm/drm_edid.c b/trunk/drivers/gpu/drm/drm_edid.c index a8743c399e83..5873e481e5d2 100644 --- a/trunk/drivers/gpu/drm/drm_edid.c +++ b/trunk/drivers/gpu/drm/drm_edid.c @@ -1039,24 +1039,6 @@ mode_in_range(const struct drm_display_mode *mode, struct edid *edid, return true; } -static bool valid_inferred_mode(const struct drm_connector *connector, - const struct drm_display_mode *mode) -{ - struct drm_display_mode *m; - bool ok = false; - - list_for_each_entry(m, &connector->probed_modes, head) { - if (mode->hdisplay == m->hdisplay && - mode->vdisplay == m->vdisplay && - drm_mode_vrefresh(mode) == drm_mode_vrefresh(m)) - return false; /* duplicated */ - if (mode->hdisplay <= m->hdisplay && - mode->vdisplay <= m->vdisplay) - ok = true; - } - return ok; -} - static int drm_dmt_modes_for_range(struct drm_connector *connector, struct edid *edid, struct detailed_timing *timing) @@ -1066,8 +1048,7 @@ drm_dmt_modes_for_range(struct drm_connector *connector, struct edid *edid, struct drm_device *dev = connector->dev; for (i = 0; i < drm_num_dmt_modes; i++) { - if (mode_in_range(drm_dmt_modes + i, edid, timing) && - valid_inferred_mode(connector, drm_dmt_modes + i)) { + if (mode_in_range(drm_dmt_modes + i, edid, timing)) { newmode = drm_mode_duplicate(dev, &drm_dmt_modes[i]); if (newmode) { drm_mode_probed_add(connector, newmode); @@ -1107,8 +1088,7 @@ drm_gtf_modes_for_range(struct drm_connector *connector, struct edid *edid, return modes; fixup_mode_1366x768(newmode); - if (!mode_in_range(newmode, edid, timing) || - !valid_inferred_mode(connector, newmode)) { + if (!mode_in_range(newmode, edid, timing)) { drm_mode_destroy(dev, newmode); continue; } @@ -1136,8 +1116,7 @@ drm_cvt_modes_for_range(struct drm_connector *connector, struct edid *edid, return modes; fixup_mode_1366x768(newmode); - if (!mode_in_range(newmode, edid, timing) || - !valid_inferred_mode(connector, newmode)) { + if (!mode_in_range(newmode, edid, timing)) { drm_mode_destroy(dev, newmode); continue; } diff --git a/trunk/drivers/gpu/drm/i915/i915_dma.c b/trunk/drivers/gpu/drm/i915/i915_dma.c index 36822b924eb1..f94792626b94 100644 --- a/trunk/drivers/gpu/drm/i915/i915_dma.c +++ b/trunk/drivers/gpu/drm/i915/i915_dma.c @@ -1401,27 +1401,6 @@ i915_mtrr_setup(struct drm_i915_private *dev_priv, unsigned long base, } } -static void i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv) -{ - struct apertures_struct *ap; - struct pci_dev *pdev = dev_priv->dev->pdev; - bool primary; - - ap = alloc_apertures(1); - if (!ap) - return; - - ap->ranges[0].base = dev_priv->dev->agp->base; - ap->ranges[0].size = - dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; - primary = - pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; - - remove_conflicting_framebuffers(ap, "inteldrmfb", primary); - - kfree(ap); -} - /** * i915_driver_load - setup chip and create an initial config * @dev: DRM device @@ -1467,15 +1446,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto free_priv; } - dev_priv->mm.gtt = intel_gtt_get(); - if (!dev_priv->mm.gtt) { - DRM_ERROR("Failed to initialize GTT\n"); - ret = -ENODEV; - goto put_bridge; - } - - i915_kick_out_firmware_fb(dev_priv); - pci_set_master(dev->pdev); /* overlay on gen2 is broken and can't address above 1G */ @@ -1501,6 +1471,13 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto put_bridge; } + dev_priv->mm.gtt = intel_gtt_get(); + if (!dev_priv->mm.gtt) { + DRM_ERROR("Failed to initialize GTT\n"); + ret = -ENODEV; + goto out_rmmap; + } + aperture_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; dev_priv->mm.gtt_mapping = diff --git a/trunk/drivers/gpu/drm/radeon/radeon_gart.c b/trunk/drivers/gpu/drm/radeon/radeon_gart.c index 84b648a7ddd8..59d44937dd9f 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_gart.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_gart.c @@ -289,9 +289,8 @@ int radeon_vm_manager_init(struct radeon_device *rdev) rdev->vm_manager.enabled = false; /* mark first vm as always in use, it's the system one */ - /* allocate enough for 2 full VM pts */ r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - rdev->vm_manager.max_pfn * 8 * 2, + rdev->vm_manager.max_pfn * 8, RADEON_GEM_DOMAIN_VRAM); if (r) { dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", @@ -634,15 +633,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va); - /* SI requires equal sized PTs for all VMs, so always set - * last_pfn to max_pfn. cayman allows variable sized - * pts so we can grow then as needed. Once we switch - * to two level pts we can unify this again. - */ - if (rdev->family >= CHIP_TAHITI) - vm->last_pfn = rdev->vm_manager.max_pfn; - else - vm->last_pfn = 0; + vm->last_pfn = 0; /* map the ib pool buffer at 0 in virtual address space, set * read only */ diff --git a/trunk/drivers/gpu/drm/radeon/radeon_gem.c b/trunk/drivers/gpu/drm/radeon/radeon_gem.c index 21ec9f5653ce..f28bd4b7ef98 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_gem.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_gem.c @@ -292,7 +292,6 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_busy *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -318,14 +317,13 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, break; } drm_gem_object_unreference_unlocked(gobj); - r = radeon_gem_handle_lockup(rdev, r); + r = radeon_gem_handle_lockup(robj->rdev, r); return r; } int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_wait_idle *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -338,10 +336,10 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, robj = gem_to_radeon_bo(gobj); r = radeon_bo_wait(robj, NULL, false); /* callback hw specific functions if any */ - if (rdev->asic->ioctl_wait_idle) - robj->rdev->asic->ioctl_wait_idle(rdev, robj); + if (robj->rdev->asic->ioctl_wait_idle) + robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj); drm_gem_object_unreference_unlocked(gobj); - r = radeon_gem_handle_lockup(rdev, r); + r = radeon_gem_handle_lockup(robj->rdev, r); return r; } diff --git a/trunk/drivers/gpu/drm/radeon/si.c b/trunk/drivers/gpu/drm/radeon/si.c index 0b0279291a73..c7b61f16ecfd 100644 --- a/trunk/drivers/gpu/drm/radeon/si.c +++ b/trunk/drivers/gpu/drm/radeon/si.c @@ -2365,12 +2365,12 @@ int si_pcie_gart_enable(struct radeon_device *rdev) WREG32(0x15DC, 0); /* empty context1-15 */ - /* FIXME start with 4G, once using 2 level pt switch to full + /* FIXME start with 1G, once using 2 level pt switch to full * vm size space */ /* set vm size, must be a multiple of 4 */ WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); - WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); + WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE); for (i = 1; i < 16; i++) { if (i < 8) WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), diff --git a/trunk/drivers/md/dm-thin.c b/trunk/drivers/md/dm-thin.c index 37fdaf81bd1f..ce59824fb414 100644 --- a/trunk/drivers/md/dm-thin.c +++ b/trunk/drivers/md/dm-thin.c @@ -2292,6 +2292,13 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct if (r) return r; + r = dm_pool_commit_metadata(pool->pmd); + if (r) { + DMERR("%s: dm_pool_commit_metadata() failed, error = %d", + __func__, r); + return r; + } + r = dm_pool_reserve_metadata_snap(pool->pmd); if (r) DMWARN("reserve_metadata_snap message failed."); diff --git a/trunk/drivers/md/md.c b/trunk/drivers/md/md.c index a4c219e3c859..1c2f9048e1ae 100644 --- a/trunk/drivers/md/md.c +++ b/trunk/drivers/md/md.c @@ -5784,7 +5784,8 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) super_types[mddev->major_version]. validate_super(mddev, rdev); if ((info->state & (1<raid_disk != info->raid_disk) { + (!test_bit(In_sync, &rdev->flags) || + rdev->raid_disk != info->raid_disk)) { /* This was a hot-add request, but events doesn't * match, so reject it. */ @@ -6750,7 +6751,7 @@ struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev thread->tsk = kthread_run(md_thread, thread, "%s_%s", mdname(thread->mddev), - name); + name ?: mddev->pers->name); if (IS_ERR(thread->tsk)) { kfree(thread); return NULL; @@ -7297,7 +7298,6 @@ void md_do_sync(struct mddev *mddev) int skipped = 0; struct md_rdev *rdev; char *desc; - struct blk_plug plug; /* just incase thread restarts... */ if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) @@ -7447,7 +7447,6 @@ void md_do_sync(struct mddev *mddev) } mddev->curr_resync_completed = j; - blk_start_plug(&plug); while (j < max_sectors) { sector_t sectors; @@ -7553,7 +7552,6 @@ void md_do_sync(struct mddev *mddev) * this also signals 'finished resyncing' to md_stop */ out: - blk_finish_plug(&plug); wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); /* tell personality that we are finished */ diff --git a/trunk/drivers/md/multipath.c b/trunk/drivers/md/multipath.c index 61a1833ebaf3..9339e67fcc79 100644 --- a/trunk/drivers/md/multipath.c +++ b/trunk/drivers/md/multipath.c @@ -474,8 +474,7 @@ static int multipath_run (struct mddev *mddev) } { - mddev->thread = md_register_thread(multipathd, mddev, - "multipath"); + mddev->thread = md_register_thread(multipathd, mddev, NULL); if (!mddev->thread) { printk(KERN_ERR "multipath: couldn't allocate thread" " for %s\n", mdname(mddev)); diff --git a/trunk/drivers/md/raid1.c b/trunk/drivers/md/raid1.c index 8c2754f835ef..a9c7981ddd24 100644 --- a/trunk/drivers/md/raid1.c +++ b/trunk/drivers/md/raid1.c @@ -517,8 +517,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect int bad_sectors; int disk = start_disk + i; - if (disk >= conf->raid_disks * 2) - disk -= conf->raid_disks * 2; + if (disk >= conf->raid_disks) + disk -= conf->raid_disks; rdev = rcu_dereference(conf->mirrors[disk].rdev); if (r1_bio->bios[disk] == IO_BLOCKED @@ -883,6 +883,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); struct md_rdev *blocked_rdev; + int plugged; int first_clone; int sectors_handled; int max_sectors; @@ -1033,6 +1034,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) * the bad blocks. Each set of writes gets it's own r1bio * with a set of bios attached. */ + plugged = mddev_check_plugged(mddev); disks = conf->raid_disks * 2; retry_write: @@ -1189,8 +1191,6 @@ static void make_request(struct mddev *mddev, struct bio * bio) bio_list_add(&conf->pending_bio_list, mbio); conf->pending_count++; spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev)) - md_wakeup_thread(mddev->thread); } /* Mustn't call r1_bio_write_done before this next test, * as it could result in the bio being freed. @@ -1213,6 +1213,9 @@ static void make_request(struct mddev *mddev, struct bio * bio) /* In case raid1d snuck in to freeze_array */ wake_up(&conf->wait_barrier); + + if (do_sync || !bitmap || !plugged) + md_wakeup_thread(mddev->thread); } static void status(struct seq_file *seq, struct mddev *mddev) @@ -2618,7 +2621,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) goto abort; } err = -ENOMEM; - conf->thread = md_register_thread(raid1d, mddev, "raid1"); + conf->thread = md_register_thread(raid1d, mddev, NULL); if (!conf->thread) { printk(KERN_ERR "md/raid1:%s: couldn't allocate thread\n", diff --git a/trunk/drivers/md/raid10.c b/trunk/drivers/md/raid10.c index acf5a828c7e1..99ae6068e456 100644 --- a/trunk/drivers/md/raid10.c +++ b/trunk/drivers/md/raid10.c @@ -1039,6 +1039,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) const unsigned long do_fua = (bio->bi_rw & REQ_FUA); unsigned long flags; struct md_rdev *blocked_rdev; + int plugged; int sectors_handled; int max_sectors; int sectors; @@ -1238,6 +1239,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) * of r10_bios is recored in bio->bi_phys_segments just as with * the read case. */ + plugged = mddev_check_plugged(mddev); r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ raid10_find_phys(conf, r10_bio); @@ -1394,8 +1396,6 @@ static void make_request(struct mddev *mddev, struct bio * bio) bio_list_add(&conf->pending_bio_list, mbio); conf->pending_count++; spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev, 0, 0)) - md_wakeup_thread(mddev->thread); if (!r10_bio->devs[i].repl_bio) continue; @@ -1423,8 +1423,6 @@ static void make_request(struct mddev *mddev, struct bio * bio) bio_list_add(&conf->pending_bio_list, mbio); conf->pending_count++; spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev)) - md_wakeup_thread(mddev->thread); } /* Don't remove the bias on 'remaining' (one_write_done) until @@ -1450,6 +1448,9 @@ static void make_request(struct mddev *mddev, struct bio * bio) /* In case raid10d snuck in to freeze_array */ wake_up(&conf->wait_barrier); + + if (do_sync || !mddev->bitmap || !plugged) + md_wakeup_thread(mddev->thread); } static void status(struct seq_file *seq, struct mddev *mddev) @@ -2309,7 +2310,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 if (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s, conf->tmppage, WRITE) + s<<9, conf->tmppage, WRITE) == 0) { /* Well, this device is dead */ printk(KERN_NOTICE @@ -2348,7 +2349,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 switch (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s, conf->tmppage, + s<<9, conf->tmppage, READ)) { case 0: /* Well, this device is dead */ @@ -2511,7 +2512,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) slot = r10_bio->read_slot; printk_ratelimited( KERN_ERR - "md/raid10:%s: %s: redirecting " + "md/raid10:%s: %s: redirecting" "sector %llu to another mirror\n", mdname(mddev), bdevname(rdev->bdev, b), @@ -2660,8 +2661,7 @@ static void raid10d(struct mddev *mddev) blk_start_plug(&plug); for (;;) { - if (atomic_read(&mddev->plug_cnt) == 0) - flush_pending_writes(conf); + flush_pending_writes(conf); spin_lock_irqsave(&conf->device_lock, flags); if (list_empty(head)) { @@ -2890,12 +2890,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* want to reconstruct this device */ rb2 = r10_bio; sect = raid10_find_virt(conf, sector_nr, i); - if (sect >= mddev->resync_max_sectors) { - /* last stripe is not complete - don't - * try to recover this sector. - */ - continue; - } /* Unless we are doing a full sync, or a replacement * we only need to recover the block if it is set in * the bitmap @@ -3427,7 +3421,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) spin_lock_init(&conf->resync_lock); init_waitqueue_head(&conf->wait_barrier); - conf->thread = md_register_thread(raid10d, mddev, "raid10"); + conf->thread = md_register_thread(raid10d, mddev, NULL); if (!conf->thread) goto out; diff --git a/trunk/drivers/md/raid5.c b/trunk/drivers/md/raid5.c index 04348d76bb30..d26767246d26 100644 --- a/trunk/drivers/md/raid5.c +++ b/trunk/drivers/md/raid5.c @@ -196,14 +196,12 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) BUG_ON(!list_empty(&sh->lru)); BUG_ON(atomic_read(&conf->active_stripes)==0); if (test_bit(STRIPE_HANDLE, &sh->state)) { - if (test_bit(STRIPE_DELAYED, &sh->state) && - !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) + if (test_bit(STRIPE_DELAYED, &sh->state)) list_add_tail(&sh->lru, &conf->delayed_list); else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && sh->bm_seq - conf->seq_write > 0) list_add_tail(&sh->lru, &conf->bitmap_list); else { - clear_bit(STRIPE_DELAYED, &sh->state); clear_bit(STRIPE_BIT_DELAY, &sh->state); list_add_tail(&sh->lru, &conf->handle_list); } @@ -608,12 +606,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) * a chance*/ md_check_recovery(conf->mddev); } - /* - * Because md_wait_for_blocked_rdev - * will dec nr_pending, we must - * increment it first. - */ - atomic_inc(&rdev->nr_pending); md_wait_for_blocked_rdev(rdev, conf->mddev); } else { /* Acknowledged bad block - skip the write */ @@ -1745,7 +1737,6 @@ static void raid5_end_read_request(struct bio * bi, int error) } else { const char *bdn = bdevname(rdev->bdev, b); int retry = 0; - int set_bad = 0; clear_bit(R5_UPTODATE, &sh->dev[i].flags); atomic_inc(&rdev->read_errors); @@ -1757,8 +1748,7 @@ static void raid5_end_read_request(struct bio * bi, int error) mdname(conf->mddev), (unsigned long long)s, bdn); - else if (conf->mddev->degraded >= conf->max_degraded) { - set_bad = 1; + else if (conf->mddev->degraded >= conf->max_degraded) printk_ratelimited( KERN_WARNING "md/raid:%s: read error not correctable " @@ -1766,9 +1756,8 @@ static void raid5_end_read_request(struct bio * bi, int error) mdname(conf->mddev), (unsigned long long)s, bdn); - } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { + else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) /* Oh, no!!! */ - set_bad = 1; printk_ratelimited( KERN_WARNING "md/raid:%s: read error NOT corrected!! " @@ -1776,7 +1765,7 @@ static void raid5_end_read_request(struct bio * bi, int error) mdname(conf->mddev), (unsigned long long)s, bdn); - } else if (atomic_read(&rdev->read_errors) + else if (atomic_read(&rdev->read_errors) > conf->max_nr_stripes) printk(KERN_WARNING "md/raid:%s: Too many read errors, failing device %s.\n", @@ -1788,11 +1777,7 @@ static void raid5_end_read_request(struct bio * bi, int error) else { clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); - if (!(set_bad - && test_bit(In_sync, &rdev->flags) - && rdev_set_badblocks( - rdev, sh->sector, STRIPE_SECTORS, 0))) - md_error(conf->mddev, rdev); + md_error(conf->mddev, rdev); } } rdev_dec_pending(rdev, conf->mddev); @@ -3597,18 +3582,8 @@ static void handle_stripe(struct stripe_head *sh) finish: /* wait for this device to become unblocked */ - if (unlikely(s.blocked_rdev)) { - if (conf->mddev->external) - md_wait_for_blocked_rdev(s.blocked_rdev, - conf->mddev); - else - /* Internal metadata will immediately - * be written by raid5d, so we don't - * need to wait here. - */ - rdev_dec_pending(s.blocked_rdev, - conf->mddev); - } + if (conf->mddev->external && unlikely(s.blocked_rdev)) + md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); if (s.handle_bad_blocks) for (i = disks; i--; ) { @@ -3906,6 +3881,8 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) raid_bio->bi_next = (void*)rdev; align_bi->bi_bdev = rdev->bdev; align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); + /* No reshape active, so we can trust rdev->data_offset */ + align_bi->bi_sector += rdev->data_offset; if (!bio_fits_rdev(align_bi) || is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, @@ -3916,9 +3893,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) return 0; } - /* No reshape active, so we can trust rdev->data_offset */ - align_bi->bi_sector += rdev->data_offset; - spin_lock_irq(&conf->device_lock); wait_event_lock_irq(conf->wait_for_stripe, conf->quiesce == 0, @@ -3997,6 +3971,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) struct stripe_head *sh; const int rw = bio_data_dir(bi); int remaining; + int plugged; if (unlikely(bi->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bi); @@ -4015,6 +3990,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) bi->bi_next = NULL; bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ + plugged = mddev_check_plugged(mddev); for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { DEFINE_WAIT(w); int previous; @@ -4116,7 +4092,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) if ((bi->bi_rw & REQ_SYNC) && !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); - mddev_check_plugged(mddev); release_stripe(sh); } else { /* cannot get stripe for read-ahead, just give-up */ @@ -4124,7 +4099,10 @@ static void make_request(struct mddev *mddev, struct bio * bi) finish_wait(&conf->wait_for_overlap, &w); break; } + } + if (!plugged) + md_wakeup_thread(mddev->thread); spin_lock_irq(&conf->device_lock); remaining = raid5_dec_bi_phys_segments(bi); @@ -4845,7 +4823,6 @@ static struct r5conf *setup_conf(struct mddev *mddev) int raid_disk, memory, max_disks; struct md_rdev *rdev; struct disk_info *disk; - char pers_name[6]; if (mddev->new_level != 5 && mddev->new_level != 4 @@ -4969,8 +4946,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) printk(KERN_INFO "md/raid:%s: allocated %dkB\n", mdname(mddev), memory); - sprintf(pers_name, "raid%d", mddev->new_level); - conf->thread = md_register_thread(raid5d, mddev, pers_name); + conf->thread = md_register_thread(raid5d, mddev, NULL); if (!conf->thread) { printk(KERN_ERR "md/raid:%s: couldn't allocate thread.\n", @@ -5489,9 +5465,10 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (rdev->saved_raid_disk >= 0 && rdev->saved_raid_disk >= first && conf->disks[rdev->saved_raid_disk].rdev == NULL) - first = rdev->saved_raid_disk; - - for (disk = first; disk <= last; disk++) { + disk = rdev->saved_raid_disk; + else + disk = first; + for ( ; disk <= last ; disk++) { p = conf->disks + disk; if (p->rdev == NULL) { clear_bit(In_sync, &rdev->flags); @@ -5500,11 +5477,8 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (rdev->saved_raid_disk != disk) conf->fullsync = 1; rcu_assign_pointer(p->rdev, rdev); - goto out; + break; } - } - for (disk = first; disk <= last; disk++) { - p = conf->disks + disk; if (test_bit(WantReplacement, &p->rdev->flags) && p->replacement == NULL) { clear_bit(In_sync, &rdev->flags); @@ -5516,7 +5490,6 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) break; } } -out: print_raid5_conf(conf); return err; } diff --git a/trunk/security/security.c b/trunk/security/security.c index 860aeb349cb3..3efc9b12aef4 100644 --- a/trunk/security/security.c +++ b/trunk/security/security.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #define MAX_LSM_EVM_XATTR 2